huskies: merge 668_bug_pipeline_advances_coder_work_to_merge_when_gates_passed_false
This commit is contained in:
@@ -47,8 +47,9 @@ pub(crate) fn worktree_has_committed_work(wt_path: &Path) -> bool {
|
||||
/// is evaluated, then restores them afterward. Uncommitted work in worktrees is
|
||||
/// never junk — it may be the next agent session's starting point (bug 651).
|
||||
///
|
||||
/// Used as part of the "work survived" check when an agent crashes mid-output
|
||||
/// (bug 645).
|
||||
/// No longer called from main pipeline code (bug 668 replaced cargo-check with
|
||||
/// run_tests evidence), but retained for the bug-651 stash/restore regression test.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn cargo_check_in_worktree(wt_path: &Path) -> bool {
|
||||
// Stash uncommitted changes (including untracked files) so cargo check
|
||||
// evaluates only committed code. We restore them afterward.
|
||||
|
||||
@@ -116,20 +116,28 @@ impl AgentPool {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Bug 645: Before retry/block, check if the agent left committed
|
||||
// work that compiles. An agent may crash mid-output (e.g. Claude
|
||||
// Code CLI PTY write assertion) after having already committed valid
|
||||
// code. When committed work survives and `cargo check` passes,
|
||||
// advance to QA instead of wasting retries.
|
||||
let work_survived = worktree_path.as_ref().is_some_and(|wt_path| {
|
||||
// Bug 645 / 668: Before retry/block, check if the agent left committed
|
||||
// work AND the agent had a passing run_tests result captured during its
|
||||
// session. An agent may crash mid-output (e.g. Claude Code CLI PTY write
|
||||
// assertion) after having already committed valid code and run tests.
|
||||
// We require positive test evidence (not just cargo check) so that only
|
||||
// stories with genuinely passing test suites are salvaged.
|
||||
//
|
||||
// The `run_tests` MCP tool writes `{story_id}:run_tests_ok` to the DB
|
||||
// whenever script/test exits 0 inside a story worktree. Consume the
|
||||
// evidence here so it does not persist to the next agent session.
|
||||
let has_test_evidence =
|
||||
crate::db::read_content(&format!("{story_id}:run_tests_ok")).is_some();
|
||||
crate::db::delete_content(&format!("{story_id}:run_tests_ok"));
|
||||
let work_survived = has_test_evidence
|
||||
&& worktree_path.as_ref().is_some_and(|wt_path| {
|
||||
crate::agents::gates::worktree_has_committed_work(wt_path)
|
||||
&& crate::agents::gates::cargo_check_in_worktree(wt_path)
|
||||
});
|
||||
if work_survived {
|
||||
slog!(
|
||||
"[pipeline] Coder '{agent_name}' failed gates for '{story_id}' but \
|
||||
committed work survives and compiles. Advancing to QA instead of \
|
||||
retrying (bug 645)."
|
||||
committed work survives with captured passing tests. Advancing to QA \
|
||||
instead of retrying (bug 645)."
|
||||
);
|
||||
let qa_mode = {
|
||||
let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
|
||||
@@ -1111,6 +1119,10 @@ stage = "qa"
|
||||
"---\nname: Survived Test\n---\n",
|
||||
);
|
||||
|
||||
// Simulate a passing run_tests call during the agent's session (bug 668):
|
||||
// the agent ran script/test, it passed, and the server captured the evidence.
|
||||
crate::db::write_content("9945_story_survived:run_tests_ok", "1");
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
|
||||
// Simulate coder failing gates (e.g. agent crashed, dirty worktree).
|
||||
@@ -1241,4 +1253,257 @@ stage = "qa"
|
||||
"Story with no committed work should be blocked after exceeding retry limit"
|
||||
);
|
||||
}
|
||||
|
||||
// ── bug 668: pipeline must NOT advance when gates_passed=false and no test evidence ──
|
||||
|
||||
/// Path (a): gates_passed=false with committed work but NO captured run_tests
|
||||
/// evidence → story stays in coding (retries), does NOT advance to QA/merge.
|
||||
#[tokio::test]
|
||||
async fn gates_failed_no_test_evidence_does_not_advance() {
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
|
||||
// Init a git repo with committed work on a feature branch.
|
||||
Command::new("git")
|
||||
.args(["init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.email", "test@test.com"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.name", "Test"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
fs::write(
|
||||
root.join("Cargo.toml"),
|
||||
"[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::create_dir_all(root.join("src")).unwrap();
|
||||
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
|
||||
Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Create a worktree with committed work on feature branch.
|
||||
let wt_path = tmp.path().join("wt");
|
||||
Command::new("git")
|
||||
.args([
|
||||
"worktree",
|
||||
"add",
|
||||
&wt_path.to_string_lossy(),
|
||||
"-b",
|
||||
"feature/story-9947_story_no_evidence",
|
||||
])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
fs::write(wt_path.join("src/lib.rs"), "pub fn added() {}\n").unwrap();
|
||||
Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(&wt_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "add fn"])
|
||||
.current_dir(&wt_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Set up the story with max_retries=1 so we can observe the retry/block.
|
||||
crate::db::ensure_content_store();
|
||||
crate::db::write_content(
|
||||
"9947_story_no_evidence",
|
||||
"---\nname: No Evidence Test\n---\n",
|
||||
);
|
||||
crate::db::write_item_with_content(
|
||||
"9947_story_no_evidence",
|
||||
"2_current",
|
||||
"---\nname: No Evidence Test\n---\n",
|
||||
);
|
||||
|
||||
// Explicitly ensure no test evidence exists for this story.
|
||||
crate::db::delete_content("9947_story_no_evidence:run_tests_ok");
|
||||
|
||||
fs::create_dir_all(root.join(".huskies")).unwrap();
|
||||
fs::write(
|
||||
root.join(".huskies/project.toml"),
|
||||
"max_retries = 1\n\n[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
let mut rx = pool.watcher_tx.subscribe();
|
||||
|
||||
// gates_passed=false, no run_tests evidence, but committed work exists.
|
||||
pool.run_pipeline_advance(
|
||||
"9947_story_no_evidence",
|
||||
"coder-1",
|
||||
CompletionReport {
|
||||
summary: "Gates failed".to_string(),
|
||||
gates_passed: false,
|
||||
gate_output: "Tests failed".to_string(),
|
||||
},
|
||||
Some(root.to_path_buf()),
|
||||
Some(wt_path),
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Story must NOT advance — it should be blocked (max_retries=1 means
|
||||
// first failure triggers block) rather than moving to QA/merge.
|
||||
let mut got_blocked = false;
|
||||
while let Ok(evt) = rx.try_recv() {
|
||||
if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
|
||||
&& story_id == "9947_story_no_evidence"
|
||||
{
|
||||
got_blocked = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
got_blocked,
|
||||
"gates_passed=false without run_tests evidence must NOT advance to QA/merge — \
|
||||
story should stay in coding (bug 668)"
|
||||
);
|
||||
}
|
||||
|
||||
/// Path (b): gates_passed=false WITH captured run_tests evidence AND committed
|
||||
/// work → advances to QA/merge (the legitimate bug-645 salvage case).
|
||||
/// This is the case where the agent ran passing tests then crashed before server
|
||||
/// gates could confirm results.
|
||||
#[tokio::test]
|
||||
async fn gates_failed_with_test_evidence_and_committed_work_advances() {
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
|
||||
// Init a git repo with committed work.
|
||||
Command::new("git")
|
||||
.args(["init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.email", "test@test.com"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.name", "Test"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
fs::write(
|
||||
root.join("Cargo.toml"),
|
||||
"[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::create_dir_all(root.join("src")).unwrap();
|
||||
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
|
||||
Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
let wt_path = tmp.path().join("wt");
|
||||
Command::new("git")
|
||||
.args([
|
||||
"worktree",
|
||||
"add",
|
||||
&wt_path.to_string_lossy(),
|
||||
"-b",
|
||||
"feature/story-9948_story_with_evidence",
|
||||
])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
fs::write(wt_path.join("src/lib.rs"), "pub fn salvaged() {}\n").unwrap();
|
||||
Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(&wt_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "add salvaged fn"])
|
||||
.current_dir(&wt_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
crate::db::ensure_content_store();
|
||||
crate::db::write_content(
|
||||
"9948_story_with_evidence",
|
||||
"---\nname: With Evidence Test\n---\n",
|
||||
);
|
||||
crate::db::write_item_with_content(
|
||||
"9948_story_with_evidence",
|
||||
"2_current",
|
||||
"---\nname: With Evidence Test\n---\n",
|
||||
);
|
||||
|
||||
// Write the run_tests evidence — simulates the agent having called run_tests
|
||||
// MCP and getting a passing result before it crashed.
|
||||
crate::db::write_content("9948_story_with_evidence:run_tests_ok", "1");
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
|
||||
// gates_passed=false (agent crashed), but test evidence exists.
|
||||
pool.run_pipeline_advance(
|
||||
"9948_story_with_evidence",
|
||||
"coder-1",
|
||||
CompletionReport {
|
||||
summary: "Agent crashed".to_string(),
|
||||
gates_passed: false,
|
||||
gate_output: "PTY write assertion failed".to_string(),
|
||||
},
|
||||
Some(root.to_path_buf()),
|
||||
Some(wt_path),
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Story should advance (not blocked, no retry_count).
|
||||
let content = crate::db::read_content("9948_story_with_evidence")
|
||||
.expect("story must exist in content store");
|
||||
assert!(
|
||||
!content.contains("blocked"),
|
||||
"story must NOT be blocked when test evidence exists and work committed: {content}"
|
||||
);
|
||||
assert!(
|
||||
!content.contains("retry_count"),
|
||||
"story must NOT have retry_count when salvaged via test evidence: {content}"
|
||||
);
|
||||
// Evidence must be consumed (cleared) after use.
|
||||
assert!(
|
||||
crate::db::read_content("9948_story_with_evidence:run_tests_ok").is_none(),
|
||||
"run_tests evidence must be cleared after pipeline advance consumes it"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,6 +119,17 @@ pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
|
||||
pid,
|
||||
passed
|
||||
);
|
||||
// Capture positive test evidence in the DB so the pipeline
|
||||
// advance salvage path (bug 645/668) can confirm the agent
|
||||
// ran passing tests before it died. Only written when running
|
||||
// in a story worktree (worktree_path arg provided); extract
|
||||
// the story ID from the last path component.
|
||||
if passed
|
||||
&& args.get("worktree_path").is_some()
|
||||
&& let Some(story_id) = working_dir.file_name().and_then(|n| n.to_str())
|
||||
{
|
||||
crate::db::write_content(&format!("{story_id}:run_tests_ok"), "1");
|
||||
}
|
||||
return serde_json::to_string_pretty(&json!({
|
||||
"passed": passed,
|
||||
"exit_code": exit_code,
|
||||
|
||||
@@ -60,6 +60,10 @@ pub struct StoryMetadata {
|
||||
/// When `true`, the story is frozen: auto-assign skips it, the pipeline
|
||||
/// does not advance it, and no mergemaster is spawned.
|
||||
pub frozen: Option<bool>,
|
||||
/// Set to `true` when an agent's `run_tests` call returns `passed=true`.
|
||||
/// Used by the bug-645 salvage path to require real test evidence, not just
|
||||
/// compilation success.
|
||||
pub run_tests_passed: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -94,6 +98,10 @@ struct FrontMatter {
|
||||
depends_on: Option<Vec<u32>>,
|
||||
/// When `true`, the story is frozen.
|
||||
frozen: Option<bool>,
|
||||
/// Set to `true` when an agent's `run_tests` call returns `passed=true`.
|
||||
/// Used by the bug-645 salvage path to distinguish a genuine test-passing
|
||||
/// session from one that merely compiled.
|
||||
run_tests_passed: Option<bool>,
|
||||
}
|
||||
|
||||
pub fn parse_front_matter(contents: &str) -> Result<StoryMetadata, StoryMetaError> {
|
||||
@@ -135,6 +143,7 @@ fn build_metadata(front: FrontMatter) -> StoryMetadata {
|
||||
blocked: front.blocked,
|
||||
depends_on: front.depends_on,
|
||||
frozen: front.frozen,
|
||||
run_tests_passed: front.run_tests_passed,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user