From df135e937327b6361697fcf6ed60c90a37b2a007 Mon Sep 17 00:00:00 2001
From: dave <futurechimp@users.noreply.github.com>
Date: Sat, 4 Apr 2026 15:07:37 +0000
Subject: [PATCH] huskies: merge
 474_story_per_file_test_coverage_report_with_improvement_targets

---
 .gitignore                           |   4 +
 .huskies/README.md                   |  41 ++++++
 script/test_coverage                 | 120 ++++++++++++---
 server/src/chat/commands/coverage.rs | 211 ++++++++++++++++++++++++---
 4 files changed, 338 insertions(+), 38 deletions(-)
diff --git a/.gitignore b/.gitignore
index 9ea28b85..ee6ea21b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,10 @@ store.json
 .huskies_port
 .huskies/bot.toml.bak
 
+# Coverage report (generated by script/test_coverage, not tracked in git)
+.coverage_report.json
+.coverage_baseline
+
 # Rust stuff
 target
 
diff --git a/.huskies/README.md b/.huskies/README.md
index b8cd255a..d6f4f3da 100644
--- a/.huskies/README.md
+++ b/.huskies/README.md
@@ -266,3 +266,44 @@ The `bot.toml` file is gitignored (it contains secrets). The example files are c
 **ALWAYS FIX DIAGNOSTICS:** At every stage, you must proactively fix all errors and warnings without waiting for user confirmation. Do not pause to ask whether to fix diagnostics—fix them immediately as part of the workflow.
 
 **Consult `specs/tech/STACK.md`** for the specific tools, commands, linter configurations, and quality gates for this project. The STACK file is the single source of truth for what must pass before a story can be accepted.
+
+---
+
+## 8. Coverage Report Format
+
+Huskies reads a language-agnostic `.coverage_report.json` file at the project root. Any project can produce this file from its own coverage tooling — the huskies server has no language-specific coverage logic.
+
+### Format
+
+```json
+{
+  "overall": 66.25,
+  "threshold": 60.0,
+  "files": [
+    { "path": "server/src/agents/pty.rs", "coverage": 12.5 },
+    { "path": "frontend/src/components/Chat.tsx", "coverage": 31.2 }
+  ]
+}
+```
+
+*   **`overall`** — overall line coverage percentage (float, 0–100).
+*   **`threshold`** — the minimum acceptable coverage (float, 0–100).
+*   **`files`** — per-file coverage array, sorted ascending (lowest coverage first). Each entry has:
+    *   **`path`** — file path relative to the project root (string).
+    *   **`coverage`** — line coverage percentage for this file (float, 0–100).
+
+### How to produce this file
+
+The project's `script/test_coverage` is responsible for generating `.coverage_report.json`. Other projects can adapt this approach for any language:
+
+*   **Rust:** Use `cargo llvm-cov --json` for per-file data; parse `data[0].files[*].summary.lines.percent`.
+*   **TypeScript/Vitest:** Use `--coverage.reporter=json-summary`; parse `coverage/coverage-summary.json` (Istanbul format).
+*   **Any other language:** Run your coverage tool, extract per-file line percentages, and write the JSON in the format above.
+
+### How the bot uses it
+
+The `coverage` bot command reads `.coverage_report.json` and displays:
+1.  Overall coverage and threshold.
+2.  The **top 5 lowest-covered files** as improvement targets.
+
+Run `coverage run` in the bot to regenerate the file via `script/test_coverage`.
diff --git a/script/test_coverage b/script/test_coverage
index 611c46c9..7ecb4e05 100755
--- a/script/test_coverage
+++ b/script/test_coverage
@@ -4,6 +4,15 @@
 # Runs Rust tests with llvm-cov and frontend tests with vitest --coverage.
 # Reports line coverage percentages for each.
 #
+# After collecting coverage, writes a language-agnostic .coverage_report.json
+# at the project root in the standard format:
+#
+#   {
+#     "overall": <float>,
+#     "threshold": <float>,
+#     "files": [{ "path": <string>, "coverage": <float> }]
+#   }
+#
 # Threshold: reads from COVERAGE_THRESHOLD env var, or .coverage_baseline file.
 # Default: 0% (any coverage passes; baseline is written on first run).
 #
@@ -19,6 +28,7 @@ set -uo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
 BASELINE_FILE="$PROJECT_ROOT/.coverage_baseline"
+RUST_COV_JSON="$PROJECT_ROOT/.coverage_rust_raw.json"
 
 # ── Load threshold ────────────────────────────────────────────────────────────
 if [ -n "${COVERAGE_THRESHOLD:-}" ]; then
@@ -38,20 +48,29 @@ FRONTEND_LINE_COV=0
 
 # ── Rust coverage ─────────────────────────────────────────────────────────────
 echo "=== Running Rust tests with coverage ==="
-RUST_REPORT=""
 if cargo llvm-cov --version >/dev/null 2>&1; then
-    RUST_REPORT=$(cargo llvm-cov \
+    # Run tests and generate both a text summary and a JSON report for per-file data.
+    cargo llvm-cov \
         --manifest-path "$PROJECT_ROOT/Cargo.toml" \
         --summary-only \
-        2>&1) || true
-    echo "$RUST_REPORT"
+        2>&1
 
-    # Parse the TOTAL line: columns are space-separated with % on coverage cols.
-    # Format: TOTAL <regions> <missed> <cover%> <funcs> <missed> <exec%> <lines> <missed> <cover%> ...
-    # We want field 10 (lines cover %).
-    RUST_RAW=$(echo "$RUST_REPORT" | awk '/^TOTAL/ { print $10 }' | tr -d '%')
-    if [ -n "$RUST_RAW" ]; then
-        RUST_LINE_COV="$RUST_RAW"
+    # Generate JSON report from the already-collected profdata (no re-run).
+    cargo llvm-cov report \
+        --manifest-path "$PROJECT_ROOT/Cargo.toml" \
+        --json \
+        --output-path "$RUST_COV_JSON" \
+        >/dev/null 2>&1 || true
+
+    # Parse overall Rust line coverage from the JSON (more reliable than awk on text).
+    if [ -f "$RUST_COV_JSON" ]; then
+        RUST_LINE_COV=$(python3 -c "
+import json, sys
+with open('$RUST_COV_JSON') as f:
+    data = json.load(f)
+pct = data['data'][0]['totals']['lines']['percent']
+print(f'{pct:.1f}')
+" 2>/dev/null) || RUST_LINE_COV=0
     fi
 else
     echo "cargo-llvm-cov not available; skipping Rust coverage"
@@ -64,14 +83,18 @@ echo "=== Running frontend tests with coverage ==="
 FRONTEND_DIR="$PROJECT_ROOT/frontend"
 FRONTEND_LINE_COV=0
 if [ -d "$FRONTEND_DIR" ]; then
-    FRONTEND_REPORT=$(cd "$FRONTEND_DIR" && npm run test:coverage 2>&1) || true
-    echo "$FRONTEND_REPORT"
+    (cd "$FRONTEND_DIR" && npm run test:coverage 2>&1) || true
 
-    # Parse "All files" line from vitest coverage text table.
-    # Format: All files | % Stmts | % Branch | % Funcs | % Lines | ...
-    FRONTEND_RAW=$(echo "$FRONTEND_REPORT" | awk -F'|' '/All files/ { gsub(/ /, "", $5); print $5 }' | head -1)
-    if [ -n "$FRONTEND_RAW" ]; then
-        FRONTEND_LINE_COV="$FRONTEND_RAW"
+    # Parse overall from vitest's json-summary report (more reliable than text table).
+    FRONTEND_SUMMARY="$FRONTEND_DIR/coverage/coverage-summary.json"
+    if [ -f "$FRONTEND_SUMMARY" ]; then
+        FRONTEND_LINE_COV=$(python3 -c "
+import json
+with open('$FRONTEND_SUMMARY') as f:
+    data = json.load(f)
+pct = data['total']['lines']['pct']
+print(f'{pct:.1f}')
+" 2>/dev/null) || FRONTEND_LINE_COV=0
     fi
 else
     echo "No frontend/ directory found; skipping frontend coverage"
@@ -115,6 +138,69 @@ if [ "$PASS" = "true" ]; then
     fi
 fi
 
+# ── Write .coverage_report.json ───────────────────────────────────────────────
+# This language-agnostic JSON file is consumed by the huskies `coverage` bot
+# command to show per-file improvement targets without any language-specific
+# logic in the server.
+FRONTEND_SUMMARY="${FRONTEND_DIR}/coverage/coverage-summary.json"
+
+OVERALL_VAL="$OVERALL" \
+THRESHOLD_VAL="$THRESHOLD" \
+RUST_COV_JSON="$RUST_COV_JSON" \
+FRONTEND_SUMMARY="$FRONTEND_SUMMARY" \
+PROJECT_ROOT="$PROJECT_ROOT" \
+python3 - << 'PYEOF'
+import json, os, sys
+
+overall = float(os.environ["OVERALL_VAL"])
+threshold = float(os.environ["THRESHOLD_VAL"])
+rust_json = os.environ["RUST_COV_JSON"]
+frontend_summary = os.environ["FRONTEND_SUMMARY"]
+project_root = os.environ["PROJECT_ROOT"] + "/"
+
+files = []
+
+# Collect per-file Rust coverage from cargo llvm-cov JSON output.
+if os.path.exists(rust_json):
+    with open(rust_json) as f:
+        data = json.load(f)
+    for file_entry in data["data"][0]["files"]:
+        path = file_entry["filename"]
+        if path.startswith(project_root):
+            path = path[len(project_root):]
+        pct = file_entry["summary"]["lines"]["percent"]
+        files.append({"path": path, "coverage": round(pct, 2)})
+
+# Collect per-file frontend coverage from vitest json-summary output.
+if os.path.exists(frontend_summary):
+    with open(frontend_summary) as f:
+        data = json.load(f)
+    for key, value in data.items():
+        if key == "total":
+            continue
+        path = key
+        if path.startswith(project_root):
+            path = path[len(project_root):]
+        pct = value["lines"]["pct"]
+        files.append({"path": path, "coverage": round(pct, 2)})
+
+report = {
+    "overall": overall,
+    "threshold": threshold,
+    "files": sorted(files, key=lambda x: x["coverage"]),
+}
+
+output_path = os.path.join(project_root, ".coverage_report.json")
+with open(output_path, "w") as f:
+    json.dump(report, f, indent=2)
+    f.write("\n")
+
+print(f"Coverage report written: .coverage_report.json ({len(files)} files)")
+PYEOF
+
+# ── Cleanup temp files ────────────────────────────────────────────────────────
+rm -f "$RUST_COV_JSON"
+
 if [ "$PASS" = "false" ]; then
     exit 1
 fi
diff --git a/server/src/chat/commands/coverage.rs b/server/src/chat/commands/coverage.rs
index 7d098b2f..8cb57130 100644
--- a/server/src/chat/commands/coverage.rs
+++ b/server/src/chat/commands/coverage.rs
@@ -1,16 +1,35 @@
 //! Handler for the `coverage` command — show or refresh test coverage.
 //!
-//! Default (no args): reads the cached `.coverage_baseline` file and reports
-//! the stored value instantly without running the test suite.
+//! Default (no args): reads the cached `.coverage_report.json` file (written by
+//! `script/test_coverage`) and reports the overall percentage plus the top 5
+//! lowest-covered files as improvement targets.
 //!
 //! `coverage run`: executes `script/test_coverage` to collect fresh coverage
-//! data, parses the summary, and reports the result.
+//! data (which writes `.coverage_report.json`), then reports the result.
+
+use serde::Deserialize;
 
 use super::CommandContext;
 
+const COVERAGE_REPORT_FILE: &str = ".coverage_report.json";
 const BASELINE_FILE: &str = ".coverage_baseline";
 const COVERAGE_SCRIPT: &str = "script/test_coverage";
 
+/// Top-level structure of `.coverage_report.json`.
+#[derive(Deserialize)]
+struct CoverageReport {
+    overall: f64,
+    threshold: f64,
+    files: Vec<FileCoverage>,
+}
+
+/// Per-file coverage entry.
+#[derive(Deserialize)]
+struct FileCoverage {
+    path: String,
+    coverage: f64,
+}
+
 pub(super) fn handle_coverage(ctx: &CommandContext) -> Option<String> {
     let args = ctx.args.trim();
 
@@ -23,8 +42,61 @@ pub(super) fn handle_coverage(ctx: &CommandContext) -> Option<String> {
     }
 }
 
-/// Read coverage from `.coverage_baseline` and return a formatted report.
+/// Read coverage from `.coverage_report.json` (preferred) or `.coverage_baseline`
+/// (legacy fallback) and return a formatted report.
 fn read_cached_coverage(project_root: &std::path::Path) -> String {
+    let report_path = project_root.join(COVERAGE_REPORT_FILE);
+
+    if report_path.exists() {
+        return read_coverage_report(&report_path);
+    }
+
+    // Legacy fallback: read the plain-text baseline file.
+    read_legacy_baseline(project_root)
+}
+
+/// Parse and format `.coverage_report.json`.
+fn read_coverage_report(path: &std::path::Path) -> String {
+    let content = match std::fs::read_to_string(path) {
+        Ok(c) => c,
+        Err(e) => return format!("**Coverage (cached)**\n\nError reading `.coverage_report.json`: {e}"),
+    };
+
+    let report: CoverageReport = match serde_json::from_str(&content) {
+        Ok(r) => r,
+        Err(e) => return format!("**Coverage (cached)**\n\nFailed to parse `.coverage_report.json`: {e}"),
+    };
+
+    format_coverage_report(&report)
+}
+
+/// Format a `CoverageReport` as a human-readable message.
+fn format_coverage_report(report: &CoverageReport) -> String {
+    let mut out = "**Coverage (cached)**\n\n".to_string();
+    out.push_str(&format!(
+        "Overall: {:.1}% (threshold: {:.1}%)\n",
+        report.overall, report.threshold
+    ));
+
+    // Top 5 lowest-covered files (already sorted ascending in the JSON, but sort
+    // defensively here so the display is correct even if the file was hand-edited).
+    let mut sorted: Vec<&FileCoverage> = report.files.iter().collect();
+    sorted.sort_by(|a, b| a.coverage.partial_cmp(&b.coverage).unwrap_or(std::cmp::Ordering::Equal));
+
+    let targets: Vec<&FileCoverage> = sorted.into_iter().take(5).collect();
+    if !targets.is_empty() {
+        out.push_str("\n**Top 5 files needing coverage:**\n");
+        for (i, file) in targets.iter().enumerate() {
+            out.push_str(&format!("{}. {} — {:.1}%\n", i + 1, file.path, file.coverage));
+        }
+    }
+
+    out.push_str("\n*Run `coverage run` for fresh results.*");
+    out
+}
+
+/// Legacy fallback: read `.coverage_baseline` (plain text, one float per line).
+fn read_legacy_baseline(project_root: &std::path::Path) -> String {
     let baseline_path = project_root.join(BASELINE_FILE);
 
     match std::fs::read_to_string(&baseline_path) {
@@ -33,21 +105,17 @@ fn read_cached_coverage(project_root: &std::path::Path) -> String {
             if lines.is_empty() {
                 return "**Coverage (cached)**\n\nNo baseline data found.\n\n*Run `coverage run` to collect coverage.*".to_string();
             }
-            format_cached_coverage(&lines)
+            format_legacy_baseline(&lines)
         }
         Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
-            "**Coverage (cached)**\n\nNo `.coverage_baseline` file found.\n\n*Run `coverage run` to collect coverage.*".to_string()
+            "**Coverage (cached)**\n\nNo `.coverage_report.json` or `.coverage_baseline` found.\n\n*Run `coverage run` to collect coverage.*".to_string()
         }
         Err(e) => format!("**Coverage (cached)**\n\nError reading `.coverage_baseline`: {e}"),
     }
 }
 
-/// Format cached coverage lines for display.
-///
-/// The baseline file stores the last overall coverage percentage written by
-/// `script/test_coverage`.  It always contains one line (the overall percentage)
-/// but may have been seeded manually with two lines (Rust + Frontend baselines).
-fn format_cached_coverage(lines: &[&str]) -> String {
+/// Format legacy baseline lines for display.
+fn format_legacy_baseline(lines: &[&str]) -> String {
     let mut out = "**Coverage (cached)**\n\n".to_string();
     if lines.len() == 2 {
         out.push_str(&format!("Rust:     {}%\n", lines[0].trim()));
@@ -83,17 +151,30 @@ fn run_coverage(project_root: &std::path::Path) -> String {
 
             // Check if llvm-cov is missing
             if combined.contains("cargo-llvm-cov not available") && !combined.contains("TOTAL") {
-                return format!(
-                    "**Coverage**\n\n`cargo-llvm-cov` is not installed.\n\nInstall it with:\n```\ncargo install cargo-llvm-cov\n```"
-                );
+                return "**Coverage**\n\n`cargo-llvm-cov` is not installed.\n\nInstall it with:\n```\ncargo install cargo-llvm-cov\n```".to_string();
             }
 
+            // After the script runs, the .coverage_report.json is updated.
+            // Read it to show per-file targets in the response.
+            let report_path = project_root.join(COVERAGE_REPORT_FILE);
+            if report_path.exists() {
+                let mut result = read_coverage_report(&report_path);
+                // Replace the "cached" label with "fresh".
+                result = result.replacen("Coverage (cached)", "Coverage (fresh)", 1);
+                // Replace the cached hint with a pass/fail indicator.
+                let pass_indicator = if out.status.success() { "PASS" } else { "FAIL: coverage below threshold" };
+                result = result.replacen("*Run `coverage run` for fresh results.*", pass_indicator, 1);
+                return result;
+            }
+
+            // Fallback: parse the text output as before.
             parse_coverage_output(&combined, out.status.success())
         }
     }
 }
 
 /// Parse the summary block from `script/test_coverage` output and format it.
+/// Used as a fallback when `.coverage_report.json` is not available after a run.
 fn parse_coverage_output(output: &str, passed: bool) -> String {
     let rust_cov = extract_line_value(output, "Rust line coverage:");
     let frontend_cov = extract_line_value(output, "Frontend line coverage:");
@@ -117,7 +198,15 @@ fn parse_coverage_output(output: &str, passed: bool) -> String {
 
     if rust_cov.is_none() && frontend_cov.is_none() && overall.is_none() {
         // Could not parse — show raw output excerpt
-        let excerpt: String = output.lines().rev().take(10).collect::<Vec<_>>().into_iter().rev().collect::<Vec<_>>().join("\n");
+        let excerpt: String = output
+            .lines()
+            .rev()
+            .take(10)
+            .collect::<Vec<_>>()
+            .into_iter()
+            .rev()
+            .collect::<Vec<_>>()
+            .join("\n");
         return format!("**Coverage (fresh)**\n\n```\n{excerpt}\n```");
     }
 
@@ -181,6 +270,17 @@ mod tests {
         Arc::new(Mutex::new(HashSet::new()))
     }
 
+    fn sample_coverage_report(overall: f64, threshold: f64, files: Vec<(&str, f64)>) -> String {
+        let files_json: Vec<String> = files
+            .iter()
+            .map(|(path, cov)| format!(r#"{{"path":"{path}","coverage":{cov}}}"#))
+            .collect();
+        format!(
+            r#"{{"overall":{overall},"threshold":{threshold},"files":[{}]}}"#,
+            files_json.join(",")
+        )
+    }
+
     #[test]
     fn coverage_command_is_registered() {
         use super::super::commands;
@@ -203,7 +303,58 @@ mod tests {
     }
 
     #[test]
-    fn coverage_no_args_reads_baseline() {
+    fn coverage_reads_coverage_report_json() {
+        let dir = tempfile::tempdir().expect("tempdir");
+        let report = sample_coverage_report(
+            72.5,
+            60.0,
+            vec![
+                ("server/src/low.rs", 15.0),
+                ("server/src/medium.rs", 55.0),
+                ("server/src/high.rs", 90.0),
+            ],
+        );
+        std::fs::write(dir.path().join(".coverage_report.json"), &report).unwrap();
+
+        let agents = test_agents();
+        let ambient = test_ambient();
+        let ctx = make_ctx(&agents, &ambient, dir.path(), "");
+        let output = handle_coverage(&ctx).unwrap();
+
+        assert!(output.contains("72.5"), "should include overall: {output}");
+        assert!(output.contains("60.0"), "should include threshold: {output}");
+        assert!(output.contains("15.0"), "should include lowest-covered file pct: {output}");
+        assert!(output.contains("server/src/low.rs"), "should include lowest-covered file path: {output}");
+    }
+
+    #[test]
+    fn coverage_shows_top_5_lowest_files() {
+        let dir = tempfile::tempdir().expect("tempdir");
+        let files: Vec<(&str, f64)> = vec![
+            ("a.rs", 10.0),
+            ("b.rs", 20.0),
+            ("c.rs", 30.0),
+            ("d.rs", 40.0),
+            ("e.rs", 50.0),
+            ("f.rs", 60.0),
+            ("g.rs", 70.0),
+        ];
+        let report = sample_coverage_report(40.0, 30.0, files);
+        std::fs::write(dir.path().join(".coverage_report.json"), &report).unwrap();
+
+        let agents = test_agents();
+        let ambient = test_ambient();
+        let ctx = make_ctx(&agents, &ambient, dir.path(), "");
+        let output = handle_coverage(&ctx).unwrap();
+
+        assert!(output.contains("a.rs"), "should show lowest file: {output}");
+        assert!(output.contains("e.rs"), "should show 5th lowest file: {output}");
+        assert!(!output.contains("f.rs"), "should not show 6th file: {output}");
+        assert!(!output.contains("g.rs"), "should not show 7th file: {output}");
+    }
+
+    #[test]
+    fn coverage_falls_back_to_baseline_when_no_report_json() {
         let dir = tempfile::tempdir().expect("tempdir");
         std::fs::write(dir.path().join(".coverage_baseline"), "72.5\n").unwrap();
 
@@ -243,7 +394,7 @@ mod tests {
     }
 
     #[test]
-    fn coverage_missing_baseline_reports_clearly() {
+    fn coverage_missing_both_files_reports_clearly() {
         let dir = tempfile::tempdir().expect("tempdir");
 
         let agents = test_agents();
@@ -252,8 +403,8 @@ mod tests {
         let output = handle_coverage(&ctx).unwrap();
 
         assert!(
-            output.contains("No `.coverage_baseline`") || output.contains("coverage_baseline"),
-            "missing baseline should mention the file name: {output}"
+            output.contains("coverage_report.json") || output.contains("coverage_baseline"),
+            "missing files should mention a file name: {output}"
         );
     }
 
@@ -288,7 +439,8 @@ mod tests {
     #[test]
     fn coverage_command_works_via_dispatch() {
         let dir = tempfile::tempdir().expect("tempdir");
-        std::fs::write(dir.path().join(".coverage_baseline"), "55.0\n").unwrap();
+        let report = sample_coverage_report(55.0, 50.0, vec![]);
+        std::fs::write(dir.path().join(".coverage_report.json"), &report).unwrap();
 
         let agents = test_agents();
         let ambient = test_ambient();
@@ -308,6 +460,23 @@ mod tests {
         );
     }
 
+    #[test]
+    fn format_coverage_report_shows_overall_and_threshold() {
+        let report = CoverageReport {
+            overall: 66.25,
+            threshold: 60.0,
+            files: vec![
+                FileCoverage { path: "a.rs".to_string(), coverage: 10.0 },
+                FileCoverage { path: "b.rs".to_string(), coverage: 80.0 },
+            ],
+        };
+        let result = format_coverage_report(&report);
+        assert!(result.contains("66.2"), "should show overall: {result}");
+        assert!(result.contains("60.0"), "should show threshold: {result}");
+        assert!(result.contains("a.rs"), "should show lowest file: {result}");
+        assert!(result.contains("10.0"), "should show lowest file pct: {result}");
+    }
+
     #[test]
     fn parse_coverage_output_extracts_fields() {
         let sample = "\