huskies: merge 668_bug_pipeline_advances_coder_work_to_merge_when_gates_passed_false

This commit is contained in:
dave
2026-04-27 11:33:36 +00:00
parent 65d2fb210c
commit 5da29c3d91
4 changed files with 299 additions and 13 deletions
+3 -2
View File
@@ -47,8 +47,9 @@ pub(crate) fn worktree_has_committed_work(wt_path: &Path) -> bool {
/// is evaluated, then restores them afterward. Uncommitted work in worktrees is /// is evaluated, then restores them afterward. Uncommitted work in worktrees is
/// never junk — it may be the next agent session's starting point (bug 651). /// never junk — it may be the next agent session's starting point (bug 651).
/// ///
/// Used as part of the "work survived" check when an agent crashes mid-output /// No longer called from main pipeline code (bug 668 replaced cargo-check with
/// (bug 645). /// run_tests evidence), but retained for the bug-651 stash/restore regression test.
#[cfg(test)]
pub(crate) fn cargo_check_in_worktree(wt_path: &Path) -> bool { pub(crate) fn cargo_check_in_worktree(wt_path: &Path) -> bool {
// Stash uncommitted changes (including untracked files) so cargo check // Stash uncommitted changes (including untracked files) so cargo check
// evaluates only committed code. We restore them afterward. // evaluates only committed code. We restore them afterward.
+274 -9
View File
@@ -116,20 +116,28 @@ impl AgentPool {
} }
} }
} else { } else {
// Bug 645: Before retry/block, check if the agent left committed // Bug 645 / 668: Before retry/block, check if the agent left committed
// work that compiles. An agent may crash mid-output (e.g. Claude // work AND the agent had a passing run_tests result captured during its
// Code CLI PTY write assertion) after having already committed valid // session. An agent may crash mid-output (e.g. Claude Code CLI PTY write
// code. When committed work survives and `cargo check` passes, // assertion) after having already committed valid code and run tests.
// advance to QA instead of wasting retries. // We require positive test evidence (not just cargo check) so that only
let work_survived = worktree_path.as_ref().is_some_and(|wt_path| { // stories with genuinely passing test suites are salvaged.
//
// The `run_tests` MCP tool writes `{story_id}:run_tests_ok` to the DB
// whenever script/test exits 0 inside a story worktree. Consume the
// evidence here so it does not persist to the next agent session.
let has_test_evidence =
crate::db::read_content(&format!("{story_id}:run_tests_ok")).is_some();
crate::db::delete_content(&format!("{story_id}:run_tests_ok"));
let work_survived = has_test_evidence
&& worktree_path.as_ref().is_some_and(|wt_path| {
crate::agents::gates::worktree_has_committed_work(wt_path) crate::agents::gates::worktree_has_committed_work(wt_path)
&& crate::agents::gates::cargo_check_in_worktree(wt_path)
}); });
if work_survived { if work_survived {
slog!( slog!(
"[pipeline] Coder '{agent_name}' failed gates for '{story_id}' but \ "[pipeline] Coder '{agent_name}' failed gates for '{story_id}' but \
committed work survives and compiles. Advancing to QA instead of \ committed work survives with captured passing tests. Advancing to QA \
retrying (bug 645)." instead of retrying (bug 645)."
); );
let qa_mode = { let qa_mode = {
let item_type = crate::agents::lifecycle::item_type_from_id(story_id); let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
@@ -1111,6 +1119,10 @@ stage = "qa"
"---\nname: Survived Test\n---\n", "---\nname: Survived Test\n---\n",
); );
// Simulate a passing run_tests call during the agent's session (bug 668):
// the agent ran script/test, it passed, and the server captured the evidence.
crate::db::write_content("9945_story_survived:run_tests_ok", "1");
let pool = AgentPool::new_test(3001); let pool = AgentPool::new_test(3001);
// Simulate coder failing gates (e.g. agent crashed, dirty worktree). // Simulate coder failing gates (e.g. agent crashed, dirty worktree).
@@ -1241,4 +1253,257 @@ stage = "qa"
"Story with no committed work should be blocked after exceeding retry limit" "Story with no committed work should be blocked after exceeding retry limit"
); );
} }
// ── bug 668: pipeline must NOT advance when gates_passed=false and no test evidence ──
/// Path (a): gates_passed=false with committed work but NO captured run_tests
/// evidence → story stays in coding (retries), does NOT advance to QA/merge.
#[tokio::test]
async fn gates_failed_no_test_evidence_does_not_advance() {
use std::fs;
use std::process::Command;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a git repo with committed work on a feature branch.
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
fs::write(
root.join("Cargo.toml"),
"[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
)
.unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
// Create a worktree with committed work on feature branch.
let wt_path = tmp.path().join("wt");
Command::new("git")
.args([
"worktree",
"add",
&wt_path.to_string_lossy(),
"-b",
"feature/story-9947_story_no_evidence",
])
.current_dir(root)
.output()
.unwrap();
fs::write(wt_path.join("src/lib.rs"), "pub fn added() {}\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(&wt_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add fn"])
.current_dir(&wt_path)
.output()
.unwrap();
// Set up the story with max_retries=1 so we can observe the retry/block.
crate::db::ensure_content_store();
crate::db::write_content(
"9947_story_no_evidence",
"---\nname: No Evidence Test\n---\n",
);
crate::db::write_item_with_content(
"9947_story_no_evidence",
"2_current",
"---\nname: No Evidence Test\n---\n",
);
// Explicitly ensure no test evidence exists for this story.
crate::db::delete_content("9947_story_no_evidence:run_tests_ok");
fs::create_dir_all(root.join(".huskies")).unwrap();
fs::write(
root.join(".huskies/project.toml"),
"max_retries = 1\n\n[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
)
.unwrap();
let pool = AgentPool::new_test(3001);
let mut rx = pool.watcher_tx.subscribe();
// gates_passed=false, no run_tests evidence, but committed work exists.
pool.run_pipeline_advance(
"9947_story_no_evidence",
"coder-1",
CompletionReport {
summary: "Gates failed".to_string(),
gates_passed: false,
gate_output: "Tests failed".to_string(),
},
Some(root.to_path_buf()),
Some(wt_path),
false,
None,
)
.await;
// Story must NOT advance — it should be blocked (max_retries=1 means
// first failure triggers block) rather than moving to QA/merge.
let mut got_blocked = false;
while let Ok(evt) = rx.try_recv() {
if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
&& story_id == "9947_story_no_evidence"
{
got_blocked = true;
break;
}
}
assert!(
got_blocked,
"gates_passed=false without run_tests evidence must NOT advance to QA/merge — \
story should stay in coding (bug 668)"
);
}
/// Path (b): gates_passed=false WITH captured run_tests evidence AND committed
/// work → advances to QA/merge (the legitimate bug-645 salvage case).
/// This is the case where the agent ran passing tests then crashed before server
/// gates could confirm results.
#[tokio::test]
async fn gates_failed_with_test_evidence_and_committed_work_advances() {
use std::fs;
use std::process::Command;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Init a git repo with committed work.
Command::new("git")
.args(["init"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(root)
.output()
.unwrap();
fs::write(
root.join("Cargo.toml"),
"[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
)
.unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(root)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "init"])
.current_dir(root)
.output()
.unwrap();
let wt_path = tmp.path().join("wt");
Command::new("git")
.args([
"worktree",
"add",
&wt_path.to_string_lossy(),
"-b",
"feature/story-9948_story_with_evidence",
])
.current_dir(root)
.output()
.unwrap();
fs::write(wt_path.join("src/lib.rs"), "pub fn salvaged() {}\n").unwrap();
Command::new("git")
.args(["add", "."])
.current_dir(&wt_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add salvaged fn"])
.current_dir(&wt_path)
.output()
.unwrap();
crate::db::ensure_content_store();
crate::db::write_content(
"9948_story_with_evidence",
"---\nname: With Evidence Test\n---\n",
);
crate::db::write_item_with_content(
"9948_story_with_evidence",
"2_current",
"---\nname: With Evidence Test\n---\n",
);
// Write the run_tests evidence — simulates the agent having called run_tests
// MCP and getting a passing result before it crashed.
crate::db::write_content("9948_story_with_evidence:run_tests_ok", "1");
let pool = AgentPool::new_test(3001);
// gates_passed=false (agent crashed), but test evidence exists.
pool.run_pipeline_advance(
"9948_story_with_evidence",
"coder-1",
CompletionReport {
summary: "Agent crashed".to_string(),
gates_passed: false,
gate_output: "PTY write assertion failed".to_string(),
},
Some(root.to_path_buf()),
Some(wt_path),
false,
None,
)
.await;
// Story should advance (not blocked, no retry_count).
let content = crate::db::read_content("9948_story_with_evidence")
.expect("story must exist in content store");
assert!(
!content.contains("blocked"),
"story must NOT be blocked when test evidence exists and work committed: {content}"
);
assert!(
!content.contains("retry_count"),
"story must NOT have retry_count when salvaged via test evidence: {content}"
);
// Evidence must be consumed (cleared) after use.
assert!(
crate::db::read_content("9948_story_with_evidence:run_tests_ok").is_none(),
"run_tests evidence must be cleared after pipeline advance consumes it"
);
}
} }
+11
View File
@@ -119,6 +119,17 @@ pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
pid, pid,
passed passed
); );
// Capture positive test evidence in the DB so the pipeline
// advance salvage path (bug 645/668) can confirm the agent
// ran passing tests before it died. Only written when running
// in a story worktree (worktree_path arg provided); extract
// the story ID from the last path component.
if passed
&& args.get("worktree_path").is_some()
&& let Some(story_id) = working_dir.file_name().and_then(|n| n.to_str())
{
crate::db::write_content(&format!("{story_id}:run_tests_ok"), "1");
}
return serde_json::to_string_pretty(&json!({ return serde_json::to_string_pretty(&json!({
"passed": passed, "passed": passed,
"exit_code": exit_code, "exit_code": exit_code,
+9
View File
@@ -60,6 +60,10 @@ pub struct StoryMetadata {
/// When `true`, the story is frozen: auto-assign skips it, the pipeline /// When `true`, the story is frozen: auto-assign skips it, the pipeline
/// does not advance it, and no mergemaster is spawned. /// does not advance it, and no mergemaster is spawned.
pub frozen: Option<bool>, pub frozen: Option<bool>,
/// Set to `true` when an agent's `run_tests` call returns `passed=true`.
/// Used by the bug-645 salvage path to require real test evidence, not just
/// compilation success.
pub run_tests_passed: Option<bool>,
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
@@ -94,6 +98,10 @@ struct FrontMatter {
depends_on: Option<Vec<u32>>, depends_on: Option<Vec<u32>>,
/// When `true`, the story is frozen. /// When `true`, the story is frozen.
frozen: Option<bool>, frozen: Option<bool>,
/// Set to `true` when an agent's `run_tests` call returns `passed=true`.
/// Used by the bug-645 salvage path to distinguish a genuine test-passing
/// session from one that merely compiled.
run_tests_passed: Option<bool>,
} }
pub fn parse_front_matter(contents: &str) -> Result<StoryMetadata, StoryMetaError> { pub fn parse_front_matter(contents: &str) -> Result<StoryMetadata, StoryMetaError> {
@@ -135,6 +143,7 @@ fn build_metadata(front: FrontMatter) -> StoryMetadata {
blocked: front.blocked, blocked: front.blocked,
depends_on: front.depends_on, depends_on: front.depends_on,
frozen: front.frozen, frozen: front.frozen,
run_tests_passed: front.run_tests_passed,
} }
} }