storkit: merge 445_bug_rate_limited_mergemaster_exits_advance_stories_to_done_without_merging
This commit is contained in:
@@ -498,6 +498,50 @@ impl AgentPool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mergemaster agents have their own completion path via
|
||||||
|
// start_merge_agent_work / run_merge_pipeline and must NOT go
|
||||||
|
// through server-owned gates. When a mergemaster exits early
|
||||||
|
// (e.g. rate-limited before calling start_merge_agent_work) the
|
||||||
|
// feature-branch worktree compiles fine and post-merge tests on
|
||||||
|
// master pass (nothing changed), which would wrongly advance the
|
||||||
|
// story to 5_done/ without any squash merge having occurred.
|
||||||
|
// Instead: just remove the agent from the pool and let
|
||||||
|
// auto-assign restart a new mergemaster for the story.
|
||||||
|
let stage = config_clone
|
||||||
|
.find_agent(&aname)
|
||||||
|
.map(agent_config_stage)
|
||||||
|
.unwrap_or_else(|| pipeline_stage(&aname));
|
||||||
|
if stage == PipelineStage::Mergemaster {
|
||||||
|
let (tx_done, done_session_id) = {
|
||||||
|
let mut lock = match agents_ref.lock() {
|
||||||
|
Ok(a) => a,
|
||||||
|
Err(_) => return,
|
||||||
|
};
|
||||||
|
if let Some(agent) = lock.remove(&key_clone) {
|
||||||
|
(agent.tx, agent.session_id.or(result.session_id))
|
||||||
|
} else {
|
||||||
|
(tx_clone.clone(), result.session_id)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let _ = tx_done.send(AgentEvent::Done {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
session_id: done_session_id,
|
||||||
|
});
|
||||||
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
|
// Send a WorkItem event so the auto-assign watcher loop
|
||||||
|
// re-dispatches a new mergemaster if the story still needs
|
||||||
|
// merging. This avoids an async call to start_agent inside
|
||||||
|
// a tokio::spawn (which would require Send).
|
||||||
|
let _ = watcher_tx_clone.send(
|
||||||
|
crate::io::watcher::WatcherEvent::WorkItem {
|
||||||
|
stage: "4_merge".to_string(),
|
||||||
|
item_id: sid.clone(),
|
||||||
|
action: "reassign".to_string(),
|
||||||
|
commit_msg: String::new(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
} else {
|
||||||
// Server-owned completion: run acceptance gates automatically
|
// Server-owned completion: run acceptance gates automatically
|
||||||
// when the agent process exits normally.
|
// when the agent process exits normally.
|
||||||
super::pipeline::run_server_owned_completion(
|
super::pipeline::run_server_owned_completion(
|
||||||
@@ -511,6 +555,7 @@ impl AgentPool {
|
|||||||
.await;
|
.await;
|
||||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
|
slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
|
||||||
let event = AgentEvent::Error {
|
let event = AgentEvent::Error {
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::collections::HashMap;
|
|||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use tokio::sync::broadcast;
|
use tokio::sync::broadcast;
|
||||||
|
|
||||||
use super::super::super::{AgentEvent, AgentStatus, CompletionReport};
|
use super::super::super::{AgentEvent, AgentStatus, CompletionReport, PipelineStage, pipeline_stage};
|
||||||
use super::super::{AgentPool, StoryAgent, composite_key};
|
use super::super::{AgentPool, StoryAgent, composite_key};
|
||||||
use super::advance::spawn_pipeline_advance;
|
use super::advance::spawn_pipeline_advance;
|
||||||
|
|
||||||
@@ -155,6 +155,21 @@ pub(in crate::agents::pool) async fn run_server_owned_completion(
|
|||||||
) {
|
) {
|
||||||
let key = composite_key(story_id, agent_name);
|
let key = composite_key(story_id, agent_name);
|
||||||
|
|
||||||
|
// Guard: mergemaster agents have their own completion path via
|
||||||
|
// start_merge_agent_work / run_merge_pipeline. Running server-owned gates
|
||||||
|
// for a mergemaster would wrongly advance the story to 5_done/ even when
|
||||||
|
// no squash merge has occurred (e.g. rate-limited exit before the agent
|
||||||
|
// called start_merge_agent_work). The lifecycle caller is responsible for
|
||||||
|
// cleaning up the agent entry and triggering auto-assign.
|
||||||
|
if pipeline_stage(agent_name) == PipelineStage::Mergemaster {
|
||||||
|
slog!(
|
||||||
|
"[agents] run_server_owned_completion skipped for mergemaster \
|
||||||
|
'{story_id}:{agent_name}'; mergemaster completion is handled by \
|
||||||
|
start_merge_agent_work."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Guard: skip if completion was already recorded (legacy path).
|
// Guard: skip if completion was already recorded (legacy path).
|
||||||
{
|
{
|
||||||
let lock = match agents.lock() {
|
let lock = match agents.lock() {
|
||||||
@@ -516,4 +531,83 @@ mod tests {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Regression test for bug 445: a rate-limited mergemaster exits before
|
||||||
|
/// calling start_merge_agent_work. run_server_owned_completion must be a
|
||||||
|
/// no-op for mergemaster agents — it must not run acceptance gates and must
|
||||||
|
/// not advance the story to 5_done/ even when a passing script/test exists.
|
||||||
|
///
|
||||||
|
/// Before the fix: run_server_owned_completion would call run_pipeline_advance
|
||||||
|
/// for the Mergemaster stage, which ran post-merge tests on master (they pass
|
||||||
|
/// because nothing changed), then called move_story_to_done — advancing the
|
||||||
|
/// story without any squash merge having occurred.
|
||||||
|
#[cfg(unix)]
|
||||||
|
#[tokio::test]
|
||||||
|
async fn server_owned_completion_is_noop_for_mergemaster() {
|
||||||
|
use std::fs;
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
let tmp = tempdir().unwrap();
|
||||||
|
let root = tmp.path();
|
||||||
|
init_git_repo(root);
|
||||||
|
|
||||||
|
// Create a passing script/test so post-merge tests would succeed if
|
||||||
|
// run_pipeline_advance were incorrectly called for this mergemaster.
|
||||||
|
let script_dir = root.join("script");
|
||||||
|
fs::create_dir_all(&script_dir).unwrap();
|
||||||
|
let script_test = script_dir.join("test");
|
||||||
|
fs::write(&script_test, "#!/usr/bin/env sh\nexit 0\n").unwrap();
|
||||||
|
let mut perms = fs::metadata(&script_test).unwrap().permissions();
|
||||||
|
perms.set_mode(0o755);
|
||||||
|
fs::set_permissions(&script_test, perms).unwrap();
|
||||||
|
|
||||||
|
// Story in 4_merge/ — must NOT be moved to 5_done/.
|
||||||
|
let merge_dir = root.join(".storkit/work/4_merge");
|
||||||
|
fs::create_dir_all(&merge_dir).unwrap();
|
||||||
|
let story_path = merge_dir.join("99_story_merge445.md");
|
||||||
|
fs::write(&story_path, "---\nname: Merge 445 Test\n---\n").unwrap();
|
||||||
|
|
||||||
|
let pool = AgentPool::new_test(3001);
|
||||||
|
pool.inject_test_agent_with_path(
|
||||||
|
"99_story_merge445",
|
||||||
|
"mergemaster",
|
||||||
|
AgentStatus::Running,
|
||||||
|
root.to_path_buf(),
|
||||||
|
);
|
||||||
|
|
||||||
|
run_server_owned_completion(
|
||||||
|
&pool.agents,
|
||||||
|
pool.port,
|
||||||
|
"99_story_merge445",
|
||||||
|
"mergemaster",
|
||||||
|
None,
|
||||||
|
pool.watcher_tx.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Wait briefly in case any background task fires.
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(150)).await;
|
||||||
|
|
||||||
|
// Story must remain in 4_merge/ — not moved to 5_done/.
|
||||||
|
let done_path = root.join(".storkit/work/5_done/99_story_merge445.md");
|
||||||
|
assert!(
|
||||||
|
!done_path.exists(),
|
||||||
|
"Story must NOT be moved to 5_done/ when run_server_owned_completion \
|
||||||
|
is (incorrectly) called for a mergemaster agent"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
story_path.exists(),
|
||||||
|
"Story must remain in 4_merge/ when mergemaster completion is a no-op"
|
||||||
|
);
|
||||||
|
|
||||||
|
// The agent entry should remain in the pool (lifecycle cleanup is the
|
||||||
|
// caller's responsibility, not run_server_owned_completion's).
|
||||||
|
let agents = pool.agents.lock().unwrap();
|
||||||
|
let key = composite_key("99_story_merge445", "mergemaster");
|
||||||
|
assert!(
|
||||||
|
agents.get(&key).is_some(),
|
||||||
|
"Agent must remain in pool — run_server_owned_completion is a no-op for mergemaster"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user