storkit: merge 445_bug_rate_limited_mergemaster_exits_advance_stories_to_done_without_merging
This commit is contained in:
@@ -498,18 +498,63 @@ impl AgentPool {
|
||||
}
|
||||
}
|
||||
|
||||
// Server-owned completion: run acceptance gates automatically
|
||||
// when the agent process exits normally.
|
||||
super::pipeline::run_server_owned_completion(
|
||||
&agents_ref,
|
||||
port_for_task,
|
||||
&sid,
|
||||
&aname,
|
||||
result.session_id,
|
||||
watcher_tx_clone.clone(),
|
||||
)
|
||||
.await;
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
// Mergemaster agents have their own completion path via
|
||||
// start_merge_agent_work / run_merge_pipeline and must NOT go
|
||||
// through server-owned gates. When a mergemaster exits early
|
||||
// (e.g. rate-limited before calling start_merge_agent_work) the
|
||||
// feature-branch worktree compiles fine and post-merge tests on
|
||||
// master pass (nothing changed), which would wrongly advance the
|
||||
// story to 5_done/ without any squash merge having occurred.
|
||||
// Instead: just remove the agent from the pool and let
|
||||
// auto-assign restart a new mergemaster for the story.
|
||||
let stage = config_clone
|
||||
.find_agent(&aname)
|
||||
.map(agent_config_stage)
|
||||
.unwrap_or_else(|| pipeline_stage(&aname));
|
||||
if stage == PipelineStage::Mergemaster {
|
||||
let (tx_done, done_session_id) = {
|
||||
let mut lock = match agents_ref.lock() {
|
||||
Ok(a) => a,
|
||||
Err(_) => return,
|
||||
};
|
||||
if let Some(agent) = lock.remove(&key_clone) {
|
||||
(agent.tx, agent.session_id.or(result.session_id))
|
||||
} else {
|
||||
(tx_clone.clone(), result.session_id)
|
||||
}
|
||||
};
|
||||
let _ = tx_done.send(AgentEvent::Done {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
session_id: done_session_id,
|
||||
});
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
// Send a WorkItem event so the auto-assign watcher loop
|
||||
// re-dispatches a new mergemaster if the story still needs
|
||||
// merging. This avoids an async call to start_agent inside
|
||||
// a tokio::spawn (which would require Send).
|
||||
let _ = watcher_tx_clone.send(
|
||||
crate::io::watcher::WatcherEvent::WorkItem {
|
||||
stage: "4_merge".to_string(),
|
||||
item_id: sid.clone(),
|
||||
action: "reassign".to_string(),
|
||||
commit_msg: String::new(),
|
||||
},
|
||||
);
|
||||
} else {
|
||||
// Server-owned completion: run acceptance gates automatically
|
||||
// when the agent process exits normally.
|
||||
super::pipeline::run_server_owned_completion(
|
||||
&agents_ref,
|
||||
port_for_task,
|
||||
&sid,
|
||||
&aname,
|
||||
result.session_id,
|
||||
watcher_tx_clone.clone(),
|
||||
)
|
||||
.await;
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use super::super::super::{AgentEvent, AgentStatus, CompletionReport};
|
||||
use super::super::super::{AgentEvent, AgentStatus, CompletionReport, PipelineStage, pipeline_stage};
|
||||
use super::super::{AgentPool, StoryAgent, composite_key};
|
||||
use super::advance::spawn_pipeline_advance;
|
||||
|
||||
@@ -155,6 +155,21 @@ pub(in crate::agents::pool) async fn run_server_owned_completion(
|
||||
) {
|
||||
let key = composite_key(story_id, agent_name);
|
||||
|
||||
// Guard: mergemaster agents have their own completion path via
|
||||
// start_merge_agent_work / run_merge_pipeline. Running server-owned gates
|
||||
// for a mergemaster would wrongly advance the story to 5_done/ even when
|
||||
// no squash merge has occurred (e.g. rate-limited exit before the agent
|
||||
// called start_merge_agent_work). The lifecycle caller is responsible for
|
||||
// cleaning up the agent entry and triggering auto-assign.
|
||||
if pipeline_stage(agent_name) == PipelineStage::Mergemaster {
|
||||
slog!(
|
||||
"[agents] run_server_owned_completion skipped for mergemaster \
|
||||
'{story_id}:{agent_name}'; mergemaster completion is handled by \
|
||||
start_merge_agent_work."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Guard: skip if completion was already recorded (legacy path).
|
||||
{
|
||||
let lock = match agents.lock() {
|
||||
@@ -516,4 +531,83 @@ mod tests {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Regression test for bug 445: a rate-limited mergemaster exits before
|
||||
/// calling start_merge_agent_work. run_server_owned_completion must be a
|
||||
/// no-op for mergemaster agents — it must not run acceptance gates and must
|
||||
/// not advance the story to 5_done/ even when a passing script/test exists.
|
||||
///
|
||||
/// Before the fix: run_server_owned_completion would call run_pipeline_advance
|
||||
/// for the Mergemaster stage, which ran post-merge tests on master (they pass
|
||||
/// because nothing changed), then called move_story_to_done — advancing the
|
||||
/// story without any squash merge having occurred.
|
||||
#[cfg(unix)]
|
||||
#[tokio::test]
|
||||
async fn server_owned_completion_is_noop_for_mergemaster() {
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use tempfile::tempdir;
|
||||
|
||||
let tmp = tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
init_git_repo(root);
|
||||
|
||||
// Create a passing script/test so post-merge tests would succeed if
|
||||
// run_pipeline_advance were incorrectly called for this mergemaster.
|
||||
let script_dir = root.join("script");
|
||||
fs::create_dir_all(&script_dir).unwrap();
|
||||
let script_test = script_dir.join("test");
|
||||
fs::write(&script_test, "#!/usr/bin/env sh\nexit 0\n").unwrap();
|
||||
let mut perms = fs::metadata(&script_test).unwrap().permissions();
|
||||
perms.set_mode(0o755);
|
||||
fs::set_permissions(&script_test, perms).unwrap();
|
||||
|
||||
// Story in 4_merge/ — must NOT be moved to 5_done/.
|
||||
let merge_dir = root.join(".storkit/work/4_merge");
|
||||
fs::create_dir_all(&merge_dir).unwrap();
|
||||
let story_path = merge_dir.join("99_story_merge445.md");
|
||||
fs::write(&story_path, "---\nname: Merge 445 Test\n---\n").unwrap();
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
pool.inject_test_agent_with_path(
|
||||
"99_story_merge445",
|
||||
"mergemaster",
|
||||
AgentStatus::Running,
|
||||
root.to_path_buf(),
|
||||
);
|
||||
|
||||
run_server_owned_completion(
|
||||
&pool.agents,
|
||||
pool.port,
|
||||
"99_story_merge445",
|
||||
"mergemaster",
|
||||
None,
|
||||
pool.watcher_tx.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Wait briefly in case any background task fires.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(150)).await;
|
||||
|
||||
// Story must remain in 4_merge/ — not moved to 5_done/.
|
||||
let done_path = root.join(".storkit/work/5_done/99_story_merge445.md");
|
||||
assert!(
|
||||
!done_path.exists(),
|
||||
"Story must NOT be moved to 5_done/ when run_server_owned_completion \
|
||||
is (incorrectly) called for a mergemaster agent"
|
||||
);
|
||||
assert!(
|
||||
story_path.exists(),
|
||||
"Story must remain in 4_merge/ when mergemaster completion is a no-op"
|
||||
);
|
||||
|
||||
// The agent entry should remain in the pool (lifecycle cleanup is the
|
||||
// caller's responsibility, not run_server_owned_completion's).
|
||||
let agents = pool.agents.lock().unwrap();
|
||||
let key = composite_key("99_story_merge445", "mergemaster");
|
||||
assert!(
|
||||
agents.get(&key).is_some(),
|
||||
"Agent must remain in pool — run_server_owned_completion is a no-op for mergemaster"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user