huskies: merge 668_bug_pipeline_advances_coder_work_to_merge_when_gates_passed_false

2026-04-27 11:33:36 +00:00
parent 65d2fb210c
commit 5da29c3d91
4 changed files with 299 additions and 13 deletions
@@ -47,8 +47,9 @@ pub(crate) fn worktree_has_committed_work(wt_path: &Path) -> bool {
 /// is evaluated, then restores them afterward.  Uncommitted work in worktrees is
 /// never junk — it may be the next agent session's starting point (bug 651).
 ///
-/// Used as part of the "work survived" check when an agent crashes mid-output
+/// No longer called from main pipeline code (bug 668 replaced cargo-check with
-/// (bug 645).
+/// run_tests evidence), but retained for the bug-651 stash/restore regression test.
 #[cfg(test)]
 pub(crate) fn cargo_check_in_worktree(wt_path: &Path) -> bool {
    // Stash uncommitted changes (including untracked files) so cargo check
    // evaluates only committed code.  We restore them afterward.
@@ -116,20 +116,28 @@ impl AgentPool {
                        }
                    }
                } else {
-                    // Bug 645: Before retry/block, check if the agent left committed
+                    // Bug 645 / 668: Before retry/block, check if the agent left committed
-                    // work that compiles.  An agent may crash mid-output (e.g. Claude
+                    // work AND the agent had a passing run_tests result captured during its
-                    // Code CLI PTY write assertion) after having already committed valid
+                    // session.  An agent may crash mid-output (e.g. Claude Code CLI PTY write
-                    // code.  When committed work survives and `cargo check` passes,
+                    // assertion) after having already committed valid code and run tests.
-                    // advance to QA instead of wasting retries.
+                    // We require positive test evidence (not just cargo check) so that only
-                    let work_survived = worktree_path.as_ref().is_some_and(|wt_path| {
+                    // stories with genuinely passing test suites are salvaged.
                    //
                    // The `run_tests` MCP tool writes `{story_id}:run_tests_ok` to the DB
                    // whenever script/test exits 0 inside a story worktree.  Consume the
                    // evidence here so it does not persist to the next agent session.
                    let has_test_evidence =
                        crate::db::read_content(&format!("{story_id}:run_tests_ok")).is_some();
                    crate::db::delete_content(&format!("{story_id}:run_tests_ok"));
                    let work_survived = has_test_evidence
                        && worktree_path.as_ref().is_some_and(|wt_path| {
                            crate::agents::gates::worktree_has_committed_work(wt_path)
                            && crate::agents::gates::cargo_check_in_worktree(wt_path)
                        });
                    if work_survived {
                        slog!(
                            "[pipeline] Coder '{agent_name}' failed gates for '{story_id}' but \
-                             committed work survives and compiles. Advancing to QA instead of \
+                             committed work survives with captured passing tests. Advancing to QA \
-                             retrying (bug 645)."
+                             instead of retrying (bug 645)."
                        );
                        let qa_mode = {
                            let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
@@ -1111,6 +1119,10 @@ stage = "qa"
            "---\nname: Survived Test\n---\n",
        );
        // Simulate a passing run_tests call during the agent's session (bug 668):
        // the agent ran script/test, it passed, and the server captured the evidence.
        crate::db::write_content("9945_story_survived:run_tests_ok", "1");
        let pool = AgentPool::new_test(3001);
        // Simulate coder failing gates (e.g. agent crashed, dirty worktree).
@@ -1241,4 +1253,257 @@ stage = "qa"
            "Story with no committed work should be blocked after exceeding retry limit"
        );
    }
    // ── bug 668: pipeline must NOT advance when gates_passed=false and no test evidence ──
    /// Path (a): gates_passed=false with committed work but NO captured run_tests
    /// evidence → story stays in coding (retries), does NOT advance to QA/merge.
    #[tokio::test]
    async fn gates_failed_no_test_evidence_does_not_advance() {
        use std::fs;
        use std::process::Command;
        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();
        // Init a git repo with committed work on a feature branch.
        Command::new("git")
            .args(["init"])
            .current_dir(root)
            .output()
            .unwrap();
        Command::new("git")
            .args(["config", "user.email", "test@test.com"])
            .current_dir(root)
            .output()
            .unwrap();
        Command::new("git")
            .args(["config", "user.name", "Test"])
            .current_dir(root)
            .output()
            .unwrap();
        fs::write(
            root.join("Cargo.toml"),
            "[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
        )
        .unwrap();
        fs::create_dir_all(root.join("src")).unwrap();
        fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
        Command::new("git")
            .args(["add", "."])
            .current_dir(root)
            .output()
            .unwrap();
        Command::new("git")
            .args(["commit", "-m", "init"])
            .current_dir(root)
            .output()
            .unwrap();
        // Create a worktree with committed work on feature branch.
        let wt_path = tmp.path().join("wt");
        Command::new("git")
            .args([
                "worktree",
                "add",
                &wt_path.to_string_lossy(),
                "-b",
                "feature/story-9947_story_no_evidence",
            ])
            .current_dir(root)
            .output()
            .unwrap();
        fs::write(wt_path.join("src/lib.rs"), "pub fn added() {}\n").unwrap();
        Command::new("git")
            .args(["add", "."])
            .current_dir(&wt_path)
            .output()
            .unwrap();
        Command::new("git")
            .args(["commit", "-m", "add fn"])
            .current_dir(&wt_path)
            .output()
            .unwrap();
        // Set up the story with max_retries=1 so we can observe the retry/block.
        crate::db::ensure_content_store();
        crate::db::write_content(
            "9947_story_no_evidence",
            "---\nname: No Evidence Test\n---\n",
        );
        crate::db::write_item_with_content(
            "9947_story_no_evidence",
            "2_current",
            "---\nname: No Evidence Test\n---\n",
        );
        // Explicitly ensure no test evidence exists for this story.
        crate::db::delete_content("9947_story_no_evidence:run_tests_ok");
        fs::create_dir_all(root.join(".huskies")).unwrap();
        fs::write(
            root.join(".huskies/project.toml"),
            "max_retries = 1\n\n[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        let mut rx = pool.watcher_tx.subscribe();
        // gates_passed=false, no run_tests evidence, but committed work exists.
        pool.run_pipeline_advance(
            "9947_story_no_evidence",
            "coder-1",
            CompletionReport {
                summary: "Gates failed".to_string(),
                gates_passed: false,
                gate_output: "Tests failed".to_string(),
            },
            Some(root.to_path_buf()),
            Some(wt_path),
            false,
            None,
        )
        .await;
        // Story must NOT advance — it should be blocked (max_retries=1 means
        // first failure triggers block) rather than moving to QA/merge.
        let mut got_blocked = false;
        while let Ok(evt) = rx.try_recv() {
            if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
                && story_id == "9947_story_no_evidence"
            {
                got_blocked = true;
                break;
            }
        }
        assert!(
            got_blocked,
            "gates_passed=false without run_tests evidence must NOT advance to QA/merge — \
             story should stay in coding (bug 668)"
        );
    }
    /// Path (b): gates_passed=false WITH captured run_tests evidence AND committed
    /// work → advances to QA/merge (the legitimate bug-645 salvage case).
    /// This is the case where the agent ran passing tests then crashed before server
    /// gates could confirm results.
    #[tokio::test]
    async fn gates_failed_with_test_evidence_and_committed_work_advances() {
        use std::fs;
        use std::process::Command;
        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();
        // Init a git repo with committed work.
        Command::new("git")
            .args(["init"])
            .current_dir(root)
            .output()
            .unwrap();
        Command::new("git")
            .args(["config", "user.email", "test@test.com"])
            .current_dir(root)
            .output()
            .unwrap();
        Command::new("git")
            .args(["config", "user.name", "Test"])
            .current_dir(root)
            .output()
            .unwrap();
        fs::write(
            root.join("Cargo.toml"),
            "[package]\nname=\"t\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
        )
        .unwrap();
        fs::create_dir_all(root.join("src")).unwrap();
        fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
        Command::new("git")
            .args(["add", "."])
            .current_dir(root)
            .output()
            .unwrap();
        Command::new("git")
            .args(["commit", "-m", "init"])
            .current_dir(root)
            .output()
            .unwrap();
        let wt_path = tmp.path().join("wt");
        Command::new("git")
            .args([
                "worktree",
                "add",
                &wt_path.to_string_lossy(),
                "-b",
                "feature/story-9948_story_with_evidence",
            ])
            .current_dir(root)
            .output()
            .unwrap();
        fs::write(wt_path.join("src/lib.rs"), "pub fn salvaged() {}\n").unwrap();
        Command::new("git")
            .args(["add", "."])
            .current_dir(&wt_path)
            .output()
            .unwrap();
        Command::new("git")
            .args(["commit", "-m", "add salvaged fn"])
            .current_dir(&wt_path)
            .output()
            .unwrap();
        crate::db::ensure_content_store();
        crate::db::write_content(
            "9948_story_with_evidence",
            "---\nname: With Evidence Test\n---\n",
        );
        crate::db::write_item_with_content(
            "9948_story_with_evidence",
            "2_current",
            "---\nname: With Evidence Test\n---\n",
        );
        // Write the run_tests evidence — simulates the agent having called run_tests
        // MCP and getting a passing result before it crashed.
        crate::db::write_content("9948_story_with_evidence:run_tests_ok", "1");
        let pool = AgentPool::new_test(3001);
        // gates_passed=false (agent crashed), but test evidence exists.
        pool.run_pipeline_advance(
            "9948_story_with_evidence",
            "coder-1",
            CompletionReport {
                summary: "Agent crashed".to_string(),
                gates_passed: false,
                gate_output: "PTY write assertion failed".to_string(),
            },
            Some(root.to_path_buf()),
            Some(wt_path),
            false,
            None,
        )
        .await;
        // Story should advance (not blocked, no retry_count).
        let content = crate::db::read_content("9948_story_with_evidence")
            .expect("story must exist in content store");
        assert!(
            !content.contains("blocked"),
            "story must NOT be blocked when test evidence exists and work committed: {content}"
        );
        assert!(
            !content.contains("retry_count"),
            "story must NOT have retry_count when salvaged via test evidence: {content}"
        );
        // Evidence must be consumed (cleared) after use.
        assert!(
            crate::db::read_content("9948_story_with_evidence:run_tests_ok").is_none(),
            "run_tests evidence must be cleared after pipeline advance consumes it"
        );
    }
 }
@@ -119,6 +119,17 @@ pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
                        pid,
                        passed
                    );
                    // Capture positive test evidence in the DB so the pipeline
                    // advance salvage path (bug 645/668) can confirm the agent
                    // ran passing tests before it died.  Only written when running
                    // in a story worktree (worktree_path arg provided); extract
                    // the story ID from the last path component.
                    if passed
                        && args.get("worktree_path").is_some()
                        && let Some(story_id) = working_dir.file_name().and_then(|n| n.to_str())
                    {
                        crate::db::write_content(&format!("{story_id}:run_tests_ok"), "1");
                    }
                    return serde_json::to_string_pretty(&json!({
                        "passed": passed,
                        "exit_code": exit_code,
@@ -60,6 +60,10 @@ pub struct StoryMetadata {
    /// When `true`, the story is frozen: auto-assign skips it, the pipeline
    /// does not advance it, and no mergemaster is spawned.
    pub frozen: Option<bool>,
    /// Set to `true` when an agent's `run_tests` call returns `passed=true`.
    /// Used by the bug-645 salvage path to require real test evidence, not just
    /// compilation success.
    pub run_tests_passed: Option<bool>,
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -94,6 +98,10 @@ struct FrontMatter {
    depends_on: Option<Vec<u32>>,
    /// When `true`, the story is frozen.
    frozen: Option<bool>,
    /// Set to `true` when an agent's `run_tests` call returns `passed=true`.
    /// Used by the bug-645 salvage path to distinguish a genuine test-passing
    /// session from one that merely compiled.
    run_tests_passed: Option<bool>,
 }
 pub fn parse_front_matter(contents: &str) -> Result<StoryMetadata, StoryMetaError> {
@@ -135,6 +143,7 @@ fn build_metadata(front: FrontMatter) -> StoryMetadata {
        blocked: front.blocked,
        depends_on: front.depends_on,
        frozen: front.frozen,
        run_tests_passed: front.run_tests_passed,
    }
 }