huskies: merge 668_bug_pipeline_advances_coder_work_to_merge_when_gates_passed_false

2026-04-27 11:33:36 +00:00
parent 65d2fb210c
commit 5da29c3d91
4 changed files with 299 additions and 13 deletions
@@ -47,8 +47,9 @@ pub(crate) fn worktree_has_committed_work(wt_path: &Path) -> bool {
 /// is evaluated, then restores them afterward.  Uncommitted work in worktrees is
 /// never junk — it may be the next agent session's starting point (bug 651).
 ///
-/// Used as part of the "work survived" check when an agent crashes mid-output
-/// (bug 645).
+/// No longer called from main pipeline code (bug 668 replaced cargo-check with
+/// run_tests evidence), but retained for the bug-651 stash/restore regression test.
+#[cfg(test)]
 pub(crate) fn cargo_check_in_worktree(wt_path: &Path) -> bool {
    // Stash uncommitted changes (including untracked files) so cargo check
    // evaluates only committed code.  We restore them afterward.
@@ -116,20 +116,28 @@ impl AgentPool {
                        }
                    }
                } else {
-                    // Bug 645: Before retry/block, check if the agent left committed
-                    // work that compiles.  An agent may crash mid-output (e.g. Claude
-                    // Code CLI PTY write assertion) after having already committed valid
-                    // code.  When committed work survives and `cargo check` passes,
-                    // advance to QA instead of wasting retries.
-                    let work_survived = worktree_path.as_ref().is_some_and(|wt_path| {
+                    // Bug 645 / 668: Before retry/block, check if the agent left committed
+                    // work AND the agent had a passing run_tests result captured during its
+                    // session.  An agent may crash mid-output (e.g. Claude Code CLI PTY write
+                    // assertion) after having already committed valid code and run tests.
+                    // We require positive test evidence (not just cargo check) so that only
+                    // stories with genuinely passing test suites are salvaged.
+                    //
+                    // The `run_tests` MCP tool writes `{story_id}:run_tests_ok` to the DB
+                    // whenever script/test exits 0 inside a story worktree.  Consume the
+                    // evidence here so it does not persist to the next agent session.
+                    let has_test_evidence =
+                        crate::db::read_content(&format!("{story_id}:run_tests_ok")).is_some();
+                    crate::db::delete_content(&format!("{story_id}:run_tests_ok"));
+                    let work_survived = has_test_evidence
+                        && worktree_path.as_ref().is_some_and(|wt_path| {
                            crate::agents::gates::worktree_has_committed_work(wt_path)
-                            && crate::agents::gates::cargo_check_in_worktree(wt_path)
                        });
                    if work_survived {
                        slog!(
                            "[pipeline] Coder '{agent_name}' failed gates for '{story_id}' but \
-                             committed work survives and compiles. Advancing to QA instead of \
-                             retrying (bug 645)."
+                             committed work survives with captured passing tests. Advancing to QA \
+                             instead of retrying (bug 645)."
                        );
                        let qa_mode = {
                            let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
@@ -1111,6 +1119,10 @@ stage = "qa"
            "---\nname: Survived Test\n---\n",
        );

+        // Simulate a passing run_tests call during the agent's session (bug 668):
+        // the agent ran script/test, it passed, and the server captured the evidence.
+        crate::db::write_content("9945_story_survived:run_tests_ok", "1");
+
        let pool = AgentPool::new_test(3001);

        // Simulate coder failing gates (e.g. agent crashed, dirty worktree).
@@ -1241,4 +1253,257 @@ stage = "qa"
            "Story with no committed work should be blocked after exceeding retry limit"
        );
    }
+
+    // ── bug 668: pipeline must NOT advance when gates_passed=false and no test evidence ──
+
+    /// Path (a): gates_passed=false with committed work but NO captured run_tests
+    /// evidence → story stays in coding (retries), does NOT advance to QA/merge.
+    #[tokio::test]
+    async fn gates_failed_no_test_evidence_does_not_advance() {
+        use std::fs;
+        use std::process::Command;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Init a git repo with committed work on a feature branch.
+        Command::new("git")
+            .args(["init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        fs::write(
+            root.join("Cargo.toml"),
+            "[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
+        )
+        .unwrap();
+        fs::create_dir_all(root.join("src")).unwrap();
+        fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+
+        // Create a worktree with committed work on feature branch.
+        let wt_path = tmp.path().join("wt");
+        Command::new("git")
+            .args([
+                "worktree",
+                "add",
+                &wt_path.to_string_lossy(),
+                "-b",
+                "feature/story-9947_story_no_evidence",
+            ])
+            .current_dir(root)
+            .output()
+            .unwrap();
+
+        fs::write(wt_path.join("src/lib.rs"), "pub fn added() {}\n").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(&wt_path)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add fn"])
+            .current_dir(&wt_path)
+            .output()
+            .unwrap();
+
+        // Set up the story with max_retries=1 so we can observe the retry/block.
+        crate::db::ensure_content_store();
+        crate::db::write_content(
+            "9947_story_no_evidence",
+            "---\nname: No Evidence Test\n---\n",
+        );
+        crate::db::write_item_with_content(
+            "9947_story_no_evidence",
+            "2_current",
+            "---\nname: No Evidence Test\n---\n",
+        );
+
+        // Explicitly ensure no test evidence exists for this story.
+        crate::db::delete_content("9947_story_no_evidence:run_tests_ok");
+
+        fs::create_dir_all(root.join(".huskies")).unwrap();
+        fs::write(
+            root.join(".huskies/project.toml"),
+            "max_retries = 1\n\n[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        let mut rx = pool.watcher_tx.subscribe();
+
+        // gates_passed=false, no run_tests evidence, but committed work exists.
+        pool.run_pipeline_advance(
+            "9947_story_no_evidence",
+            "coder-1",
+            CompletionReport {
+                summary: "Gates failed".to_string(),
+                gates_passed: false,
+                gate_output: "Tests failed".to_string(),
+            },
+            Some(root.to_path_buf()),
+            Some(wt_path),
+            false,
+            None,
+        )
+        .await;
+
+        // Story must NOT advance — it should be blocked (max_retries=1 means
+        // first failure triggers block) rather than moving to QA/merge.
+        let mut got_blocked = false;
+        while let Ok(evt) = rx.try_recv() {
+            if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
+                && story_id == "9947_story_no_evidence"
+            {
+                got_blocked = true;
+                break;
+            }
+        }
+        assert!(
+            got_blocked,
+            "gates_passed=false without run_tests evidence must NOT advance to QA/merge — \
+             story should stay in coding (bug 668)"
+        );
+    }
+
+    /// Path (b): gates_passed=false WITH captured run_tests evidence AND committed
+    /// work → advances to QA/merge (the legitimate bug-645 salvage case).
+    /// This is the case where the agent ran passing tests then crashed before server
+    /// gates could confirm results.
+    #[tokio::test]
+    async fn gates_failed_with_test_evidence_and_committed_work_advances() {
+        use std::fs;
+        use std::process::Command;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Init a git repo with committed work.
+        Command::new("git")
+            .args(["init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        fs::write(
+            root.join("Cargo.toml"),
+            "[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
+        )
+        .unwrap();
+        fs::create_dir_all(root.join("src")).unwrap();
+        fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+
+        let wt_path = tmp.path().join("wt");
+        Command::new("git")
+            .args([
+                "worktree",
+                "add",
+                &wt_path.to_string_lossy(),
+                "-b",
+                "feature/story-9948_story_with_evidence",
+            ])
+            .current_dir(root)
+            .output()
+            .unwrap();
+
+        fs::write(wt_path.join("src/lib.rs"), "pub fn salvaged() {}\n").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(&wt_path)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add salvaged fn"])
+            .current_dir(&wt_path)
+            .output()
+            .unwrap();
+
+        crate::db::ensure_content_store();
+        crate::db::write_content(
+            "9948_story_with_evidence",
+            "---\nname: With Evidence Test\n---\n",
+        );
+        crate::db::write_item_with_content(
+            "9948_story_with_evidence",
+            "2_current",
+            "---\nname: With Evidence Test\n---\n",
+        );
+
+        // Write the run_tests evidence — simulates the agent having called run_tests
+        // MCP and getting a passing result before it crashed.
+        crate::db::write_content("9948_story_with_evidence:run_tests_ok", "1");
+
+        let pool = AgentPool::new_test(3001);
+
+        // gates_passed=false (agent crashed), but test evidence exists.
+        pool.run_pipeline_advance(
+            "9948_story_with_evidence",
+            "coder-1",
+            CompletionReport {
+                summary: "Agent crashed".to_string(),
+                gates_passed: false,
+                gate_output: "PTY write assertion failed".to_string(),
+            },
+            Some(root.to_path_buf()),
+            Some(wt_path),
+            false,
+            None,
+        )
+        .await;
+
+        // Story should advance (not blocked, no retry_count).
+        let content = crate::db::read_content("9948_story_with_evidence")
+            .expect("story must exist in content store");
+        assert!(
+            !content.contains("blocked"),
+            "story must NOT be blocked when test evidence exists and work committed: {content}"
+        );
+        assert!(
+            !content.contains("retry_count"),
+            "story must NOT have retry_count when salvaged via test evidence: {content}"
+        );
+        // Evidence must be consumed (cleared) after use.
+        assert!(
+            crate::db::read_content("9948_story_with_evidence:run_tests_ok").is_none(),
+            "run_tests evidence must be cleared after pipeline advance consumes it"
+        );
+    }
 }
@@ -119,6 +119,17 @@ pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
                        pid,
                        passed
                    );
+                    // Capture positive test evidence in the DB so the pipeline
+                    // advance salvage path (bug 645/668) can confirm the agent
+                    // ran passing tests before it died.  Only written when running
+                    // in a story worktree (worktree_path arg provided); extract
+                    // the story ID from the last path component.
+                    if passed
+                        && args.get("worktree_path").is_some()
+                        && let Some(story_id) = working_dir.file_name().and_then(|n| n.to_str())
+                    {
+                        crate::db::write_content(&format!("{story_id}:run_tests_ok"), "1");
+                    }
                    return serde_json::to_string_pretty(&json!({
                        "passed": passed,
                        "exit_code": exit_code,
@@ -60,6 +60,10 @@ pub struct StoryMetadata {
    /// When `true`, the story is frozen: auto-assign skips it, the pipeline
    /// does not advance it, and no mergemaster is spawned.
    pub frozen: Option<bool>,
+    /// Set to `true` when an agent's `run_tests` call returns `passed=true`.
+    /// Used by the bug-645 salvage path to require real test evidence, not just
+    /// compilation success.
+    pub run_tests_passed: Option<bool>,
 }

 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -94,6 +98,10 @@ struct FrontMatter {
    depends_on: Option<Vec<u32>>,
    /// When `true`, the story is frozen.
    frozen: Option<bool>,
+    /// Set to `true` when an agent's `run_tests` call returns `passed=true`.
+    /// Used by the bug-645 salvage path to distinguish a genuine test-passing
+    /// session from one that merely compiled.
+    run_tests_passed: Option<bool>,
 }

 pub fn parse_front_matter(contents: &str) -> Result<StoryMetadata, StoryMetaError> {
@@ -135,6 +143,7 @@ fn build_metadata(front: FrontMatter) -> StoryMetadata {
        blocked: front.blocked,
        depends_on: front.depends_on,
        frozen: front.frozen,
+        run_tests_passed: front.run_tests_passed,
    }
 }