huskies: merge 498_bug_stale_merge_job_lock_prevents_new_merges_after_agent_dies
This commit is contained in:
@@ -212,6 +212,70 @@ mod tests {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// ── bug 498: stale Running job blocks retry ───────────────────────────────
|
||||
|
||||
/// Regression test for bug 498: a Running merge job left behind by a killed
|
||||
/// mergemaster must not block the next call to start_merge_agent_work.
|
||||
///
|
||||
/// Before the fix: start_merge_agent_work would return "Merge already in
|
||||
/// progress" when a Running entry existed, even after the mergemaster died.
|
||||
/// After the fix: the entry is cleared when the mergemaster exits, so a new
|
||||
/// call succeeds.
|
||||
#[tokio::test]
|
||||
async fn stale_running_merge_job_is_cleared_and_retry_succeeds() {
|
||||
use tempfile::tempdir;
|
||||
|
||||
let tmp = tempdir().unwrap();
|
||||
let repo = tmp.path();
|
||||
init_git_repo(repo);
|
||||
|
||||
let pool = Arc::new(AgentPool::new_test(3001));
|
||||
|
||||
// Inject a stale Running entry, simulating a mergemaster that died
|
||||
// before the merge pipeline completed.
|
||||
{
|
||||
let mut jobs = pool.merge_jobs.lock().unwrap();
|
||||
jobs.insert(
|
||||
"77_story_stale".to_string(),
|
||||
MergeJob {
|
||||
story_id: "77_story_stale".to_string(),
|
||||
status: MergeJobStatus::Running,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// With a stale Running entry, start_merge_agent_work must be blocked.
|
||||
let blocked = pool.start_merge_agent_work(repo, "77_story_stale");
|
||||
assert!(
|
||||
blocked.is_err(),
|
||||
"start_merge_agent_work must be blocked while Running job exists"
|
||||
);
|
||||
let err_msg = blocked.unwrap_err();
|
||||
assert!(
|
||||
err_msg.contains("already in progress"),
|
||||
"unexpected error: {err_msg}"
|
||||
);
|
||||
|
||||
// Simulate the mergemaster exit path: clear the stale Running entry.
|
||||
{
|
||||
let mut jobs = pool.merge_jobs.lock().unwrap();
|
||||
if let Some(job) = jobs.get("77_story_stale")
|
||||
&& matches!(job.status, MergeJobStatus::Running)
|
||||
{
|
||||
jobs.remove("77_story_stale");
|
||||
}
|
||||
}
|
||||
|
||||
// After clearing, start_merge_agent_work must succeed (it will fail
|
||||
// the pipeline because there's no feature branch, but it must not be
|
||||
// blocked by "Merge already in progress").
|
||||
let result = pool.start_merge_agent_work(repo, "77_story_stale");
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"start_merge_agent_work must succeed after stale Running job is cleared; got: {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// ── merge_agent_work tests ────────────────────────────────────────────────
|
||||
|
||||
/// Helper: start a merge and poll until terminal state.
|
||||
|
||||
@@ -320,6 +320,7 @@ impl AgentPool {
|
||||
let log_writer_clone = log_writer.clone();
|
||||
let child_killers_clone = self.child_killers.clone();
|
||||
let watcher_tx_clone = self.watcher_tx.clone();
|
||||
let merge_jobs_clone = Arc::clone(&self.merge_jobs);
|
||||
|
||||
// Spawn the background task. Worktree creation and agent launch happen here
|
||||
// so `start_agent` returns immediately after registering the agent as
|
||||
@@ -524,6 +525,15 @@ impl AgentPool {
|
||||
(tx_clone.clone(), result.session_id)
|
||||
}
|
||||
};
|
||||
// Clear any stale Running merge job so the next mergemaster
|
||||
// can call start_merge_agent_work without hitting "Merge
|
||||
// already in progress" (bug 498).
|
||||
if let Ok(mut jobs) = merge_jobs_clone.lock()
|
||||
&& let Some(job) = jobs.get(&sid)
|
||||
&& matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
|
||||
{
|
||||
jobs.remove(&sid);
|
||||
}
|
||||
let _ = tx_done.send(AgentEvent::Done {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
|
||||
Reference in New Issue
Block a user