story-kit: merge 69_story_test_coverage_qa_gate

2026-02-23 13:40:12 +00:00
parent 61383caa01
commit 16989a12fc
2 changed files with 276 additions and 15 deletions
--- a/script/test_coverage
+++ b/script/test_coverage
@@ -0,0 +1,120 @@
 #!/usr/bin/env bash
 # Test coverage collection and threshold enforcement.
 #
 # Runs Rust tests with llvm-cov and frontend tests with vitest --coverage.
 # Reports line coverage percentages for each.
 #
 # Threshold: reads from COVERAGE_THRESHOLD env var, or .coverage_baseline file.
 # Default: 0% (any coverage passes; baseline is written on first run).
 #
 # Coverage can only go up: if current coverage is above the stored baseline,
 # the baseline is updated automatically.
 #
 # Exit codes:
 #   0 — all coverage at or above threshold
 #   1 — coverage below threshold
 set -uo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
 BASELINE_FILE="$PROJECT_ROOT/.coverage_baseline"
 # ── Load threshold ────────────────────────────────────────────────────────────
 if [ -n "${COVERAGE_THRESHOLD:-}" ]; then
    THRESHOLD="$COVERAGE_THRESHOLD"
 elif [ -f "$BASELINE_FILE" ]; then
    THRESHOLD=$(cat "$BASELINE_FILE")
 else
    THRESHOLD=0
 fi
 echo "=== Coverage threshold: ${THRESHOLD}% ==="
 echo ""
 PASS=true
 RUST_LINE_COV=0
 FRONTEND_LINE_COV=0
 # ── Rust coverage ─────────────────────────────────────────────────────────────
 echo "=== Running Rust tests with coverage ==="
 RUST_REPORT=""
 if cargo llvm-cov --version >/dev/null 2>&1; then
    RUST_REPORT=$(cargo llvm-cov \
        --manifest-path "$PROJECT_ROOT/Cargo.toml" \
        --summary-only \
        2>&1) || true
    echo "$RUST_REPORT"
    # Parse the TOTAL line: columns are space-separated with % on coverage cols.
    # Format: TOTAL <regions> <missed> <cover%> <funcs> <missed> <exec%> <lines> <missed> <cover%> ...
    # We want field 10 (lines cover %).
    RUST_RAW=$(echo "$RUST_REPORT" | awk '/^TOTAL/ { print $10 }' | tr -d '%')
    if [ -n "$RUST_RAW" ]; then
        RUST_LINE_COV="$RUST_RAW"
    fi
 else
    echo "cargo-llvm-cov not available; skipping Rust coverage"
 fi
 echo "Rust line coverage: ${RUST_LINE_COV}%"
 echo ""
 # ── Frontend coverage ─────────────────────────────────────────────────────────
 echo "=== Running frontend tests with coverage ==="
 FRONTEND_DIR="$PROJECT_ROOT/frontend"
 FRONTEND_LINE_COV=0
 if [ -d "$FRONTEND_DIR" ]; then
    FRONTEND_REPORT=$(cd "$FRONTEND_DIR" && pnpm run test:coverage 2>&1) || true
    echo "$FRONTEND_REPORT"
    # Parse "All files" line from vitest coverage text table.
    # Format: All files | % Stmts | % Branch | % Funcs | % Lines | ...
    FRONTEND_RAW=$(echo "$FRONTEND_REPORT" | awk -F'|' '/All files/ { gsub(/ /, "", $5); print $5 }' | head -1)
    if [ -n "$FRONTEND_RAW" ]; then
        FRONTEND_LINE_COV="$FRONTEND_RAW"
    fi
 else
    echo "No frontend/ directory found; skipping frontend coverage"
 fi
 echo "Frontend line coverage: ${FRONTEND_LINE_COV}%"
 echo ""
 # ── Overall (average of available measurements) ───────────────────────────────
 if [ "$RUST_LINE_COV" != "0" ] && [ "$FRONTEND_LINE_COV" != "0" ]; then
    OVERALL=$(awk "BEGIN { printf \"%.1f\", ($RUST_LINE_COV + $FRONTEND_LINE_COV) / 2 }")
 elif [ "$RUST_LINE_COV" != "0" ]; then
    OVERALL="$RUST_LINE_COV"
 elif [ "$FRONTEND_LINE_COV" != "0" ]; then
    OVERALL="$FRONTEND_LINE_COV"
 else
    OVERALL=0
 fi
 # ── Summary ───────────────────────────────────────────────────────────────────
 echo "=== Coverage Summary ==="
 echo "  Rust:     ${RUST_LINE_COV}%"
 echo "  Frontend: ${FRONTEND_LINE_COV}%"
 echo "  Overall:  ${OVERALL}%"
 echo "  Threshold: ${THRESHOLD}%"
 echo ""
 # ── Threshold check ───────────────────────────────────────────────────────────
 if awk "BEGIN { exit (($OVERALL + 0) < ($THRESHOLD + 0)) ? 0 : 1 }"; then
    echo "FAIL: Coverage ${OVERALL}% is below threshold ${THRESHOLD}%"
    PASS=false
 else
    echo "PASS: Coverage ${OVERALL}% meets threshold ${THRESHOLD}%"
 fi
 # ── Update baseline when coverage improves ────────────────────────────────────
 if [ "$PASS" = "true" ]; then
    STORED_BASELINE="${THRESHOLD}"
    if awk "BEGIN { exit (($OVERALL + 0) > ($STORED_BASELINE + 0)) ? 0 : 1 }"; then
        echo "${OVERALL}" > "$BASELINE_FILE"
        echo "Baseline updated: ${STORED_BASELINE}% → ${OVERALL}%"
    fi
 fi
 if [ "$PASS" = "false" ]; then
    exit 1
 fi
--- a/server/src/agents.rs
+++ b/server/src/agents.rs
@@ -537,7 +537,8 @@ impl AgentPool {
    ///
    /// - **Coder** + gates passed → move story to `work/3_qa/`, start `qa` agent.
    /// - **Coder** + gates failed → restart the same coder agent with failure context.
-    /// - **QA** + gates passed → move story to `work/4_merge/`, start `mergemaster` agent.
+    /// - **QA** + gates passed + coverage passed → move story to `work/4_merge/`, start `mergemaster` agent.
    /// - **QA** + gates passed + coverage failed → restart `qa` with coverage failure context.
    /// - **QA** + gates failed → restart `qa` with failure context.
    /// - **Mergemaster** → run `script/test` on master; if pass: archive + cleanup worktree;
    ///   if fail: restart `mergemaster` with failure context.
@@ -545,7 +546,7 @@ impl AgentPool {
    async fn run_pipeline_advance_for_completed_agent(&self, story_id: &str, agent_name: &str) {
        let key = composite_key(story_id, agent_name);
-        let (completion, project_root) = {
+        let (completion, project_root, worktree_path) = {
            let agents = match self.agents.lock() {
                Ok(a) => a,
                Err(e) => {
@@ -557,7 +558,11 @@ impl AgentPool {
                Some(a) => a,
                None => return,
            };
-            (agent.completion.clone(), agent.project_root.clone())
+            let wt_path = agent
                .worktree_info
                .as_ref()
                .map(|wt| wt.path.clone());
            (agent.completion.clone(), agent.project_root.clone(), wt_path)
        };
        let completion = match completion {
@@ -618,8 +623,24 @@ impl AgentPool {
            }
            PipelineStage::Qa => {
                if completion.gates_passed {
                    // Run coverage gate in the QA worktree before advancing to merge.
                    let coverage_path = worktree_path.clone().unwrap_or_else(|| project_root.clone());
                    let cp = coverage_path.clone();
                    let coverage_result =
                        tokio::task::spawn_blocking(move || run_coverage_gate(&cp))
                            .await
                            .unwrap_or_else(|e| {
                                eprintln!("[pipeline] Coverage gate task panicked: {e}");
                                Ok((false, format!("Coverage gate task panicked: {e}")))
                            });
                    let (coverage_passed, coverage_output) = match coverage_result {
                        Ok(pair) => pair,
                        Err(e) => (false, e),
                    };
                    if coverage_passed {
                        eprintln!(
-                        "[pipeline] QA passed gates for '{story_id}'. Moving to merge."
+                            "[pipeline] QA passed gates and coverage for '{story_id}'. Moving to merge."
                        );
                        if let Err(e) = move_story_to_merge(&project_root, story_id) {
                            eprintln!("[pipeline] Failed to move '{story_id}' to 4_merge/: {e}");
@@ -631,6 +652,23 @@ impl AgentPool {
                        {
                            eprintln!("[pipeline] Failed to start mergemaster for '{story_id}': {e}");
                        }
                    } else {
                        eprintln!(
                            "[pipeline] QA coverage gate failed for '{story_id}'. Restarting QA."
                        );
                        let context = format!(
                            "\n\n---\n## Coverage Gate Failed\n\
                             The coverage gate (script/test_coverage) failed with the following output:\n{}\n\n\
                             Please improve test coverage until the coverage gate passes.",
                            coverage_output
                        );
                        if let Err(e) = self
                            .start_agent(&project_root, story_id, Some("qa"), Some(&context))
                            .await
                        {
                            eprintln!("[pipeline] Failed to restart qa for '{story_id}': {e}");
                        }
                    }
                } else {
                    eprintln!(
                        "[pipeline] QA failed gates for '{story_id}'. Restarting."
@@ -1353,6 +1391,36 @@ fn run_acceptance_gates(path: &Path) -> Result<(bool, String), String> {
    Ok((all_passed, all_output))
 }
 /// Run `script/test_coverage` in the given directory if the script exists.
 ///
 /// Used as a QA gate before advancing a story from `3_qa/` to `4_merge/`.
 /// Returns `(passed, output)`.  If the script does not exist, returns `(true, …)`.
 fn run_coverage_gate(path: &Path) -> Result<(bool, String), String> {
    let script = path.join("script").join("test_coverage");
    if !script.exists() {
        return Ok((
            true,
            "script/test_coverage not found; coverage gate skipped.\n".to_string(),
        ));
    }
    let mut output = String::from("=== script/test_coverage ===\n");
    let result = Command::new(&script)
        .current_dir(path)
        .output()
        .map_err(|e| format!("Failed to run script/test_coverage: {e}"))?;
    let combined = format!(
        "{}{}",
        String::from_utf8_lossy(&result.stdout),
        String::from_utf8_lossy(&result.stderr)
    );
    output.push_str(&combined);
    output.push('\n');
    Ok((result.status.success(), output))
 }
 // ── Mergemaster helpers ───────────────────────────────────────────────────────
 /// Squash-merge a feature branch into the current branch in the project root.
@@ -2351,4 +2419,77 @@ mod tests {
        assert!(!passed, "script/test exiting 1 should fail");
        assert!(output.contains("script/test"), "output should mention script/test");
    }
    // ── run_coverage_gate tests ───────────────────────────────────────────────
    #[cfg(unix)]
    #[test]
    fn coverage_gate_passes_when_script_absent() {
        use tempfile::tempdir;
        let tmp = tempdir().unwrap();
        let (passed, output) = run_coverage_gate(tmp.path()).unwrap();
        assert!(passed, "coverage gate should pass when script is absent");
        assert!(
            output.contains("not found"),
            "output should mention script not found"
        );
    }
    #[cfg(unix)]
    #[test]
    fn coverage_gate_passes_when_script_exits_zero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
        let tmp = tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
        let script = script_dir.join("test_coverage");
        fs::write(
            &script,
            "#!/usr/bin/env bash\necho 'Rust line coverage: 85%'\necho 'PASS: Coverage 85% meets threshold 0%'\nexit 0\n",
        )
        .unwrap();
        let mut perms = fs::metadata(&script).unwrap().permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&script, perms).unwrap();
        let (passed, output) = run_coverage_gate(path).unwrap();
        assert!(passed, "coverage gate should pass when script exits 0");
        assert!(
            output.contains("script/test_coverage"),
            "output should mention script/test_coverage"
        );
    }
    #[cfg(unix)]
    #[test]
    fn coverage_gate_fails_when_script_exits_nonzero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
        let tmp = tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
        let script = script_dir.join("test_coverage");
        fs::write(
            &script,
            "#!/usr/bin/env bash\necho 'FAIL: Coverage 40% is below threshold 80%'\nexit 1\n",
        )
        .unwrap();
        let mut perms = fs::metadata(&script).unwrap().permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&script, perms).unwrap();
        let (passed, output) = run_coverage_gate(path).unwrap();
        assert!(!passed, "coverage gate should fail when script exits 1");
        assert!(
            output.contains("script/test_coverage"),
            "output should mention script/test_coverage"
        );
    }
 }