huskies: merge 681_refactor_decompose_server_src_agents_pool_pipeline_advance_mod_rs_1509_lines

This commit is contained in:
dave
2026-04-27 17:29:11 +00:00
parent 875096b3ec
commit ed8646f0d9
3 changed files with 1016 additions and 1010 deletions
@@ -0,0 +1,775 @@
//! Regression tests for pipeline advance (bugs 295, 519, 529, 645, 668).
use super::super::super::{AgentPool, composite_key};
use crate::agents::{AgentStatus, CompletionReport};
use crate::io::watcher::WatcherEvent;
// ── story 519: mergemaster pre-flight blocks when no commits ahead ──
/// Regression test for story 519: when the feature branch has zero commits
/// ahead of master, mergemaster must not spawn a Claude session. A no-op
/// session spent $0.82 in the 2026-04-09 incident because the worktree was
/// reset to master before mergemaster ran.
#[tokio::test]
async fn mergemaster_blocks_and_sends_story_blocked_when_no_commits_ahead() {
use std::fs;
use std::process::Command;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a bare git repo on master with one empty commit.
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "--allow-empty", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
// Create a feature branch that points at master HEAD (zero commits ahead).
// This replicates the incident where the worktree was reset to master.
Command::new("git")
.args(["branch", "feature/story-9919_story_no_commits"])
.current_dir(root)
.output()
.unwrap();
// Set up pipeline dirs and story file.
let current = root.join(".huskies/work/2_current");
fs::create_dir_all(&current).unwrap();
fs::create_dir_all(root.join(".huskies/work/4_merge")).unwrap();
fs::write(
current.join("9919_story_no_commits.md"),
"---\nname: Test\n---\n",
)
.unwrap();
crate::db::ensure_content_store();
crate::db::write_content("9919_story_no_commits", "---\nname: Test\n---\n");
let pool = AgentPool::new_test(3001);
let mut rx = pool.watcher_tx.subscribe();
// Simulate coder completing with gates passed (qa: server → goes to merge).
pool.run_pipeline_advance(
"9919_story_no_commits",
"coder-1",
CompletionReport {
summary: "done".to_string(),
gates_passed: true,
gate_output: String::new(),
},
Some(root.to_path_buf()),
None,
false,
None,
)
.await;
// Story should still exist in the content store after moving to merge.
assert!(
crate::db::read_content("9919_story_no_commits").is_some(),
"story should remain in content store — not removed"
);
// A StoryBlocked event must have been emitted (triggers chat failure notice,
// not the success 🎉 emoji).
let mut got_blocked = false;
while let Ok(evt) = rx.try_recv() {
if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
&& story_id == "9919_story_no_commits"
{
got_blocked = true;
break;
}
}
assert!(
got_blocked,
"StoryBlocked event must be sent when feature branch has no commits ahead of master"
);
// No mergemaster agent should have been started.
let agents = pool.agents.lock().unwrap();
let mergemaster_started = agents
.values()
.any(|a| a.agent_name.contains("mergemaster"));
assert!(
!mergemaster_started,
"mergemaster agent must NOT be started when no commits ahead of master"
);
}
// ── bug 295: pipeline advance picks up waiting QA stories ──────────
#[tokio::test]
async fn pipeline_advance_picks_up_waiting_qa_stories_after_completion() {
use super::super::super::auto_assign::is_agent_free;
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk = root.join(".huskies");
fs::create_dir_all(&sk).unwrap();
// Configure a single QA agent.
fs::write(
sk.join("project.toml"),
r#"
[[agent]]
name = "qa"
stage = "qa"
"#,
)
.unwrap();
// Seed stories via CRDT (the only source of truth).
crate::db::ensure_content_store();
// Story 292 is in QA with QA agent running (will "complete" via
// run_pipeline_advance below). Story 293 is in QA with NO agent —
// simulating the "stuck" state from bug 295.
crate::db::write_item_with_content(
"292_story_first",
"3_qa",
"---\nname: First\nqa: human\n---\n",
);
crate::db::write_item_with_content(
"293_story_second",
"3_qa",
"---\nname: Second\nqa: human\n---\n",
);
let pool = AgentPool::new_test(3001);
// QA is currently running on story 292.
pool.inject_test_agent("292_story_first", "qa", AgentStatus::Running);
// Verify that 293 cannot get a QA agent right now (QA is busy).
{
let agents = pool.agents.lock().unwrap();
assert!(
!is_agent_free(&agents, "qa"),
"qa should be busy on story 292"
);
}
// Simulate QA completing on story 292: remove the agent from the pool
// (as run_server_owned_completion does) then run pipeline advance.
{
let mut agents = pool.agents.lock().unwrap();
agents.remove(&composite_key("292_story_first", "qa"));
}
pool.run_pipeline_advance(
"292_story_first",
"qa",
CompletionReport {
summary: "QA done".to_string(),
gates_passed: true,
gate_output: String::new(),
},
Some(root.to_path_buf()),
None,
false,
None,
)
.await;
// After pipeline advance, auto_assign should have started QA on story 293.
let agents = pool.agents.lock().unwrap();
let qa_on_293 = agents.values().any(|a| {
a.agent_name == "qa" && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
});
assert!(
qa_on_293,
"auto_assign should have started qa for story 293 after 292's QA completed, \
but no qa agent is pending/running. Pool: {:?}",
agents
.iter()
.map(|(k, a)| format!("{k}: {} ({})", a.agent_name, a.status))
.collect::<Vec<_>>()
);
}
// ── bug 529: stale mergemaster advance for a done story is a no-op ──
/// Regression test for bug 529: when a stale mergemaster advance fires
/// after the story has already reached 5_done, the advance must be a
/// no-op — no post-merge tests, no notifications, no agent restarts.
#[tokio::test]
async fn stale_mergemaster_advance_for_done_story_is_noop() {
use std::fs;
use std::process::Command;
// Initialise CRDT so read_typed works.
crate::crdt_state::init_for_test();
crate::db::ensure_content_store();
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a git repo so post-merge tests would pass if they ran.
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "--allow-empty", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
// Set up pipeline dirs.
fs::create_dir_all(root.join(".huskies/work/5_done")).unwrap();
// Seed the story in 5_done via the DB, which also writes to the CRDT.
let story_id = "9929_story_zombie_merge";
let content = "---\nname: Zombie Merge Test\n---\n";
crate::db::write_content(story_id, content);
crate::db::write_item_with_content(story_id, "5_done", content);
let pool = AgentPool::new_test(3001);
let mut rx = pool.watcher_tx.subscribe();
// Simulate a stale mergemaster advance firing for the already-done story.
pool.run_pipeline_advance(
story_id,
"mergemaster",
CompletionReport {
summary: "stale advance".to_string(),
gates_passed: true,
gate_output: String::new(),
},
Some(root.to_path_buf()),
None,
false,
None,
)
.await;
// No agents should have been started.
let agents = pool.agents.lock().unwrap();
assert!(
agents.is_empty(),
"No agents should be started for a stale advance on a done story. \
Pool: {:?}",
agents.keys().collect::<Vec<_>>()
);
drop(agents);
// No StoryBlocked or other events should have been emitted.
let mut got_event = false;
while let Ok(evt) = rx.try_recv() {
// AgentStateChanged from auto_assign is acceptable only if the
// advance didn't short-circuit. Since we return early, no events.
if matches!(evt, WatcherEvent::StoryBlocked { .. }) {
got_event = true;
}
}
assert!(
!got_event,
"No StoryBlocked event should be emitted for a stale advance"
);
// The story should still be in 5_done (not moved elsewhere).
if let Ok(Some(item)) = crate::pipeline_state::read_typed(story_id) {
assert_eq!(
item.stage.dir_name(),
"5_done",
"Story should remain in 5_done after stale mergemaster advance"
);
}
}
// ── bug 645: work-survived check advances to QA instead of blocking ──
/// Integration test: when a coder agent fails gates but committed work
/// survives and compiles, the story advances to QA (not retry/block).
/// Simulates an agent that commits work and then dies mid-output.
#[tokio::test]
async fn work_survived_advances_to_qa_instead_of_blocking() {
use std::fs;
use std::process::Command;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a git repo with a minimal Cargo project.
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
fs::write(
root.join("Cargo.toml"),
"[package]\nname = \"test_proj\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
)
.unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
// Create a worktree on a feature branch.
let wt_path = tmp.path().join("wt");
Command::new("git")
.args([
"worktree",
"add",
&wt_path.to_string_lossy(),
"-b",
"feature/story-9945_story_survived",
])
.current_dir(root)
.output()
.unwrap();
// Commit valid code on the feature branch.
fs::write(wt_path.join("src/lib.rs"), "pub fn survived() {}\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(&wt_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add survived fn"])
.current_dir(&wt_path)
.output()
.unwrap();
// Set up the story in the content store.
crate::db::ensure_content_store();
crate::db::write_content("9945_story_survived", "---\nname: Survived Test\n---\n");
crate::db::write_item_with_content(
"9945_story_survived",
"2_current",
"---\nname: Survived Test\n---\n",
);
// Simulate a passing run_tests call during the agent's session (bug 668):
// the agent ran script/test, it passed, and the server captured the evidence.
crate::db::write_content("9945_story_survived:run_tests_ok", "1");
let pool = AgentPool::new_test(3001);
// Simulate coder failing gates (e.g. agent crashed, dirty worktree).
pool.run_pipeline_advance(
"9945_story_survived",
"coder-1",
CompletionReport {
summary: "Agent crashed".to_string(),
gates_passed: false,
gate_output: "Worktree has uncommitted changes".to_string(),
},
Some(root.to_path_buf()),
Some(wt_path),
false,
None,
)
.await;
// Story should have advanced — content store should reflect the move.
// The work-survived check should have moved it to QA (or merge for
// server qa mode), NOT incremented retry_count.
let content = crate::db::read_content("9945_story_survived")
.expect("story should exist in content store");
assert!(
!content.contains("blocked"),
"story should NOT be blocked when committed work survives: {content}"
);
assert!(
!content.contains("retry_count"),
"story should NOT have retry_count when work survived: {content}"
);
}
/// Backwards-compat: agents that die WITHOUT committed work still get
/// the existing retry/block treatment.
#[tokio::test]
async fn no_committed_work_still_retries_and_blocks() {
use std::fs;
use std::process::Command;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a git repo (no Cargo project needed — cargo check will fail).
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "--allow-empty", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
// Create a worktree with NO commits on the feature branch.
let wt_path = tmp.path().join("wt");
Command::new("git")
.args([
"worktree",
"add",
&wt_path.to_string_lossy(),
"-b",
"feature/story-9946_story_nowork",
])
.current_dir(root)
.output()
.unwrap();
// Set up the story with max_retries=1 so it blocks immediately.
crate::db::ensure_content_store();
crate::db::write_content("9946_story_nowork", "---\nname: No Work Test\n---\n");
crate::db::write_item_with_content(
"9946_story_nowork",
"2_current",
"---\nname: No Work Test\n---\n",
);
// Write a project.toml with max_retries = 1.
fs::create_dir_all(root.join(".huskies")).unwrap();
fs::write(
root.join(".huskies/project.toml"),
"max_retries = 1\n\n[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
)
.unwrap();
let pool = AgentPool::new_test(3001);
let mut rx = pool.watcher_tx.subscribe();
// Simulate coder failing gates with NO committed work on the worktree.
pool.run_pipeline_advance(
"9946_story_nowork",
"coder-1",
CompletionReport {
summary: "Agent crashed".to_string(),
gates_passed: false,
gate_output: "Tests failed".to_string(),
},
Some(root.to_path_buf()),
Some(wt_path),
false,
None,
)
.await;
// With no committed work and max_retries=1, the story should be blocked.
let mut got_blocked = false;
while let Ok(evt) = rx.try_recv() {
if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
&& story_id == "9946_story_nowork"
{
got_blocked = true;
break;
}
}
assert!(
got_blocked,
"Story with no committed work should be blocked after exceeding retry limit"
);
}
// ── bug 668: pipeline must NOT advance when gates_passed=false and no test evidence ──
/// Path (a): gates_passed=false with committed work but NO captured run_tests
/// evidence → story stays in coding (retries), does NOT advance to QA/merge.
#[tokio::test]
async fn gates_failed_no_test_evidence_does_not_advance() {
use std::fs;
use std::process::Command;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a git repo with committed work on a feature branch.
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
fs::write(
root.join("Cargo.toml"),
"[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
)
.unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
// Create a worktree with committed work on feature branch.
let wt_path = tmp.path().join("wt");
Command::new("git")
.args([
"worktree",
"add",
&wt_path.to_string_lossy(),
"-b",
"feature/story-9947_story_no_evidence",
])
.current_dir(root)
.output()
.unwrap();
fs::write(wt_path.join("src/lib.rs"), "pub fn added() {}\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(&wt_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add fn"])
.current_dir(&wt_path)
.output()
.unwrap();
// Set up the story with max_retries=1 so we can observe the retry/block.
crate::db::ensure_content_store();
crate::db::write_content(
"9947_story_no_evidence",
"---\nname: No Evidence Test\n---\n",
);
crate::db::write_item_with_content(
"9947_story_no_evidence",
"2_current",
"---\nname: No Evidence Test\n---\n",
);
// Explicitly ensure no test evidence exists for this story.
crate::db::delete_content("9947_story_no_evidence:run_tests_ok");
fs::create_dir_all(root.join(".huskies")).unwrap();
fs::write(
root.join(".huskies/project.toml"),
"max_retries = 1\n\n[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
)
.unwrap();
let pool = AgentPool::new_test(3001);
let mut rx = pool.watcher_tx.subscribe();
// gates_passed=false, no run_tests evidence, but committed work exists.
pool.run_pipeline_advance(
"9947_story_no_evidence",
"coder-1",
CompletionReport {
summary: "Gates failed".to_string(),
gates_passed: false,
gate_output: "Tests failed".to_string(),
},
Some(root.to_path_buf()),
Some(wt_path),
false,
None,
)
.await;
// Story must NOT advance — it should be blocked (max_retries=1 means
// first failure triggers block) rather than moving to QA/merge.
let mut got_blocked = false;
while let Ok(evt) = rx.try_recv() {
if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
&& story_id == "9947_story_no_evidence"
{
got_blocked = true;
break;
}
}
assert!(
got_blocked,
"gates_passed=false without run_tests evidence must NOT advance to QA/merge — \
story should stay in coding (bug 668)"
);
}
/// Path (b): gates_passed=false WITH captured run_tests evidence AND committed
/// work → advances to QA/merge (the legitimate bug-645 salvage case).
/// This is the case where the agent ran passing tests then crashed before server
/// gates could confirm results.
#[tokio::test]
async fn gates_failed_with_test_evidence_and_committed_work_advances() {
use std::fs;
use std::process::Command;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a git repo with committed work.
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
fs::write(
root.join("Cargo.toml"),
"[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
)
.unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
let wt_path = tmp.path().join("wt");
Command::new("git")
.args([
"worktree",
"add",
&wt_path.to_string_lossy(),
"-b",
"feature/story-9948_story_with_evidence",
])
.current_dir(root)
.output()
.unwrap();
fs::write(wt_path.join("src/lib.rs"), "pub fn salvaged() {}\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(&wt_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add salvaged fn"])
.current_dir(&wt_path)
.output()
.unwrap();
crate::db::ensure_content_store();
crate::db::write_content(
"9948_story_with_evidence",
"---\nname: With Evidence Test\n---\n",
);
crate::db::write_item_with_content(
"9948_story_with_evidence",
"2_current",
"---\nname: With Evidence Test\n---\n",
);
// Write the run_tests evidence — simulates the agent having called run_tests
// MCP and getting a passing result before it crashed.
crate::db::write_content("9948_story_with_evidence:run_tests_ok", "1");
let pool = AgentPool::new_test(3001);
// gates_passed=false (agent crashed), but test evidence exists.
pool.run_pipeline_advance(
"9948_story_with_evidence",
"coder-1",
CompletionReport {
summary: "Agent crashed".to_string(),
gates_passed: false,
gate_output: "PTY write assertion failed".to_string(),
},
Some(root.to_path_buf()),
Some(wt_path),
false,
None,
)
.await;
// Story should advance (not blocked, no retry_count).
let content = crate::db::read_content("9948_story_with_evidence")
.expect("story must exist in content store");
assert!(
!content.contains("blocked"),
"story must NOT be blocked when test evidence exists and work committed: {content}"
);
assert!(
!content.contains("retry_count"),
"story must NOT have retry_count when salvaged via test evidence: {content}"
);
// Evidence must be consumed (cleared) after use.
assert!(
crate::db::read_content("9948_story_with_evidence:run_tests_ok").is_none(),
"run_tests evidence must be cleared after pipeline advance consumes it"
);
}