huskies: merge 1100 bug Multiple LLM agents can run concurrently on the same story (coder + mergemaster + others) — enforce one-agent-per-story invariant

This commit is contained in:
dave
2026-05-15 20:19:35 +00:00
parent 9f4f493486
commit 4216ced493
7 changed files with 429 additions and 13 deletions
@@ -602,6 +602,266 @@ async fn start_agent_allows_correct_stage_agent() {
}
}
// ── story-1100: cross-stage LLM agent rejection ─────────────────────────
#[tokio::test]
async fn start_agent_rejects_mergemaster_when_coder_running_same_story() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk_dir = root.join(".huskies");
fs::create_dir_all(&sk_dir).unwrap();
fs::write(
sk_dir.join("project.toml"),
"[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
)
.unwrap();
let pool = AgentPool::new_test(3099);
pool.inject_test_agent("999_story_cross", "coder-1", AgentStatus::Running);
let result = pool
.start_agent(root, "999_story_cross", Some("mergemaster"), None, None)
.await;
assert!(
result.is_err(),
"mergemaster must be rejected when coder-1 is still running on same story"
);
let err = result.unwrap_err();
assert!(
err.contains("active LLM agent") || err.contains("stale agent"),
"error must mention active LLM agent conflict, got: '{err}'"
);
}
#[tokio::test]
async fn start_agent_rejects_coder_when_mergemaster_running_same_story() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk_dir = root.join(".huskies");
fs::create_dir_all(&sk_dir).unwrap();
fs::write(
sk_dir.join("project.toml"),
"[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
)
.unwrap();
let pool = AgentPool::new_test(3099);
pool.inject_test_agent("888_story_cross2", "mergemaster", AgentStatus::Running);
let result = pool
.start_agent(root, "888_story_cross2", Some("coder-1"), None, None)
.await;
assert!(
result.is_err(),
"coder-1 must be rejected when mergemaster is running on same story"
);
let err = result.unwrap_err();
assert!(
err.contains("active LLM agent") || err.contains("stale agent"),
"error must mention active LLM agent conflict, got: '{err}'"
);
}
#[tokio::test]
async fn start_agent_cross_stage_does_not_block_different_stories() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk_dir = root.join(".huskies");
fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap();
fs::write(
root.join(".huskies/project.toml"),
"[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
)
.unwrap();
fs::write(
root.join(".huskies/work/1_backlog/777_story_other.md"),
"---\nname: Other\n---\n",
)
.unwrap();
let pool = AgentPool::new_test(3099);
// mergemaster running on story-x should NOT block coder on story-y
pool.inject_test_agent("111_story_x", "mergemaster", AgentStatus::Running);
let result = pool
.start_agent(root, "777_story_other", Some("coder-1"), None, None)
.await;
if let Err(ref e) = result {
assert!(
!e.contains("active LLM agent") && !e.contains("stale agent"),
"cross-stage guard must not fire for agents on different stories, got: '{e}'"
);
}
}
#[tokio::test]
async fn reconcile_canonical_agents_stops_stale_coder_in_qa_stage() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk_dir = root.join(".huskies");
fs::create_dir_all(&sk_dir).unwrap();
fs::write(
sk_dir.join("project.toml"),
"[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
)
.unwrap();
// Write story to CRDT in QA stage: canonical = Qa, but coder-1 is Running.
crate::db::ensure_content_store();
crate::db::write_item_with_content(
"777_story_reconcile",
"qa",
"---\nname: Reconcile Test\n---\n",
crate::db::ItemMeta::named("Reconcile Test"),
);
let pool = AgentPool::new_test(3099);
pool.inject_test_agent("777_story_reconcile", "coder-1", AgentStatus::Running);
let before = pool.list_agents().unwrap();
assert!(
before.iter().any(|a| a.agent_name == "coder-1"
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)),
"coder-1 should be Running before reconciliation"
);
pool.reconcile_canonical_agents(root).await;
let after = pool.list_agents().unwrap();
let still_active = after.iter().any(|a| {
a.story_id == "777_story_reconcile"
&& a.agent_name == "coder-1"
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
});
assert!(
!still_active,
"reconciler must have stopped coder-1 (CRDT stage is QA, coder is wrong stage)"
);
}
#[tokio::test]
async fn reconcile_canonical_agents_leaves_correct_stage_agent_alone() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk_dir = root.join(".huskies");
fs::create_dir_all(&sk_dir).unwrap();
fs::write(
sk_dir.join("project.toml"),
"[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
)
.unwrap();
// Story is in coding stage: canonical = Coder. coder-1 is correct.
crate::db::ensure_content_store();
crate::db::write_item_with_content(
"555_story_correct",
"coding",
"---\nname: Correct Stage\n---\n",
crate::db::ItemMeta::named("Correct Stage"),
);
let pool = AgentPool::new_test(3099);
pool.inject_test_agent("555_story_correct", "coder-1", AgentStatus::Running);
pool.reconcile_canonical_agents(root).await;
let after = pool.list_agents().unwrap();
let still_active = after.iter().any(|a| {
a.story_id == "555_story_correct"
&& a.agent_name == "coder-1"
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
});
assert!(
still_active,
"reconciler must NOT stop coder-1 when it matches the canonical stage"
);
}
/// Regression test for story 1100: a stale coder left running after a stage
/// transition blocks both a same-stage coder and a cross-stage mergemaster.
/// The periodic reconciler stops the stale coder, after which the pool no
/// longer has a cross-stage conflict.
#[tokio::test]
async fn regression_1100_stale_coder_blocks_mergemaster_then_reconciler_clears() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk_dir = root.join(".huskies");
fs::create_dir_all(&sk_dir).unwrap();
fs::write(
sk_dir.join("project.toml"),
"[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
[[agent]]\nname = \"coder-2\"\nstage = \"coder\"\n\n\
[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
)
.unwrap();
let pool = AgentPool::new_test(3099);
// Simulate coder-1 still Running after the story advanced past the coding stage.
pool.inject_test_agent("1100_reg", "coder-1", AgentStatus::Running);
// coder-2 blocked by same-stage check (both are Coder stage)
let r1 = pool
.start_agent(root, "1100_reg", Some("coder-2"), None, None)
.await;
assert!(r1.is_err(), "coder-2 must be rejected by same-stage guard");
assert!(
r1.unwrap_err().contains("same pipeline stage"),
"same-stage check must fire for coder-2"
);
// mergemaster blocked by cross-stage LLM guard (coder-1 is a different LLM stage)
let r2 = pool
.start_agent(root, "1100_reg", Some("mergemaster"), None, None)
.await;
assert!(
r2.is_err(),
"mergemaster must be rejected because coder-1 (different LLM stage) is still running"
);
let r2_err = r2.unwrap_err();
assert!(
r2_err.contains("active LLM agent") || r2_err.contains("stale agent"),
"cross-stage rejection expected, got: '{r2_err}'"
);
// Reconciler: story "1100_reg" has no CRDT entry → canonical = None → stop coder-1.
pool.reconcile_canonical_agents(root).await;
// coder-1 must be gone from the active pool.
let remaining = pool.list_agents().unwrap();
assert!(
!remaining.iter().any(|a| {
a.story_id == "1100_reg"
&& a.agent_name == "coder-1"
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
}),
"reconciler must have removed stale coder-1 from the active pool"
);
}
/// Bug 502: when start_agent is called for a non-Coder agent (mergemaster
/// or qa) on a story that's in 4_merge/, the unconditional
/// move_story_to_current at the top of start_agent must NOT fire — even