From 7a0c186d9484a59e8a6fda768e86528b9191a59b Mon Sep 17 00:00:00 2001
From: dave <futurechimp@users.noreply.github.com>
Date: Thu, 30 Apr 2026 15:06:02 +0000
Subject: [PATCH] fix(886): parse cargo diagnostics in
 run_check/run_build/run_lint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before: tool_run_check (and run_build/run_lint via run_script_tool)
returned the entire cargo log verbatim in `output`. For runs with many
errors the response routinely exceeded the MCP token cap, was dumped
to a tool-results file, and the agent had to scrape it with python3
just to see the error list — burning many turns on file archaeology
for what should be a one-look operation. Real example: 864's coder
hit `result (143,708 characters) exceeds maximum allowed tokens` and
spent ~8 turns extracting 3 errors.

Now:
- New `service::shell::parse_diagnostics` parses `error[CODE]:` /
  `warning[CODE]:` headers + their `--> file:line` markers into
  structured `Diagnostic { kind, code, message, file, line }`.
- `tool_run_check` (and the run_build/run_lint shared body) returns
  `{ passed, exit_code, errors: [...], warnings: [...], summary }`.
  Raw `output` is dropped from the default response.
- New `verbose: bool` argument (default false) restores the raw
  output for callers who actually need it.
- Updated the existing tool_run_check test to assert the new
  contract (150 errors → 150 structured entries, response < 50KB).

Skipped run_tests in this pass — its parser would need to recognise
test-runner output (different format from cargo); will land separately.

Closes 886.
---
 server/src/http/mcp/shell_tools/script.rs     | 122 ++++++++---
 server/src/service/shell/mod.rs               |   4 +
 server/src/service/shell/parse_diagnostics.rs | 203 ++++++++++++++++++
 3 files changed, 305 insertions(+), 24 deletions(-)
 create mode 100644 server/src/service/shell/parse_diagnostics.rs

diff --git a/server/src/http/mcp/shell_tools/script.rs b/server/src/http/mcp/shell_tools/script.rs
index 0ee50394..c59b126d 100644
--- a/server/src/http/mcp/shell_tools/script.rs
+++ b/server/src/http/mcp/shell_tools/script.rs
@@ -358,15 +358,24 @@ async fn run_script_tool(
     let stdout = String::from_utf8_lossy(&result.stdout);
     let stderr = String::from_utf8_lossy(&result.stderr);
     let combined = format!("{stdout}{stderr}");
-    let output = truncate_output(&combined, MAX_OUTPUT_LINES);
     let exit_code = result.status.code().unwrap_or(-1);
+    let verbose = args
+        .get("verbose")
+        .and_then(|v| v.as_bool())
+        .unwrap_or(false);
 
-    serde_json::to_string_pretty(&json!({
-        "passed": result.status.success(),
-        "exit_code": exit_code,
-        "output": output,
-    }))
-    .map_err(|e| format!("Serialization error: {e}"))
+    // When verbose, fall back to the legacy truncated output so callers
+    // who actually want raw text still get a bounded payload.
+    let mut payload = build_diagnostic_response(
+        result.status.success(),
+        exit_code,
+        &combined,
+        verbose,
+    );
+    if verbose {
+        payload["output"] = serde_json::json!(truncate_output(&combined, MAX_OUTPUT_LINES));
+    }
+    serde_json::to_string_pretty(&payload).map_err(|e| format!("Serialization error: {e}"))
 }
 
 pub(crate) async fn tool_run_build(args: &Value, ctx: &AppContext) -> Result<String, String> {
@@ -420,18 +429,54 @@ pub(crate) async fn tool_run_check(args: &Value, ctx: &AppContext) -> Result<Str
 
     let stdout = String::from_utf8_lossy(&result.stdout);
     let stderr = String::from_utf8_lossy(&result.stderr);
-    // No truncation: agents need the full compiler output to diagnose errors.
-    let output = format!("{stdout}{stderr}");
+    let combined = format!("{stdout}{stderr}");
     let exit_code = result.status.code().unwrap_or(-1);
+    let verbose = args
+        .get("verbose")
+        .and_then(|v| v.as_bool())
+        .unwrap_or(false);
 
-    serde_json::to_string_pretty(&json!({
-        "passed": result.status.success(),
-        "exit_code": exit_code,
-        "output": output,
-    }))
+    serde_json::to_string_pretty(&build_diagnostic_response(
+        result.status.success(),
+        exit_code,
+        &combined,
+        verbose,
+    ))
     .map_err(|e| format!("Serialization error: {e}"))
 }
 
+/// Shared response builder for tools that wrap cargo / rustc output. By
+/// default returns parsed `errors` + `warnings` arrays plus a one-line
+/// summary; the raw `output` is only included when `verbose` is true. This
+/// keeps the MCP response under the token cap for runs with many errors
+/// (bug 886).
+fn build_diagnostic_response(
+    passed: bool,
+    exit_code: i32,
+    raw_output: &str,
+    verbose: bool,
+) -> serde_json::Value {
+    use crate::service::shell::parse_diagnostics::{parse_diagnostics, summarise};
+    let diags = parse_diagnostics(raw_output);
+    let summary = summarise(&diags);
+    let errors: Vec<&_> = diags.iter().filter(|d| d.kind == "error").collect();
+    let warnings: Vec<&_> = diags.iter().filter(|d| d.kind == "warning").collect();
+    let mut payload = json!({
+        "passed": passed,
+        "exit_code": exit_code,
+        "errors": errors,
+        "warnings": warnings,
+        "summary": format!(
+            "{} error(s), {} warning(s)",
+            summary.error_count, summary.warning_count
+        ),
+    });
+    if verbose {
+        payload["output"] = json!(raw_output);
+    }
+    payload
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -626,16 +671,18 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn tool_run_check_returns_full_output_on_nonzero_exit() {
+    async fn tool_run_check_returns_parsed_errors_on_nonzero_exit() {
+        // Bug 886: rather than dumping the entire cargo log into `output`
+        // (which routinely exceeds the MCP token cap), tool_run_check now
+        // parses errors / warnings into structured arrays. Raw output is
+        // available behind `verbose: true`.
         let tmp = tempfile::tempdir().unwrap();
         let script_dir = tmp.path().join("script");
         std::fs::create_dir_all(&script_dir).unwrap();
         let script_path = script_dir.join("check");
-        // Script that emits 150 lines (exceeds the 100-line truncation limit used
-        // by run_build/run_lint) and exits non-zero to verify no truncation occurs.
         std::fs::write(
             &script_path,
-            "#!/usr/bin/env bash\nfor i in $(seq 1 150); do echo \"error[$i]: compile error on line $i\"; done\nexit 1\n",
+            "#!/usr/bin/env bash\nfor i in $(seq 1 150); do echo \"error[E$i]: compile error on line $i\"; done\nexit 1\n",
         )
         .unwrap();
         #[cfg(unix)]
@@ -645,16 +692,43 @@ mod tests {
         }
 
         let ctx = test_ctx(tmp.path());
+
+        // Default mode: no `output` field, structured `errors` array, summary.
         let result = tool_run_check(&json!({}), &ctx).await.unwrap();
         let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
-
         assert_eq!(parsed["passed"], false);
         assert_eq!(parsed["exit_code"], 1);
-        let output = parsed["output"].as_str().unwrap();
-        // All 150 lines must be present — no truncation.
-        assert!(output.contains("error[1]"), "should contain first line");
-        assert!(output.contains("error[150]"), "should contain last line");
-        assert!(!output.contains("omitted"), "must not truncate output");
+        assert!(
+            parsed.get("output").map(|v| v.is_null()).unwrap_or(true),
+            "default mode must not include raw `output`"
+        );
+        let errors = parsed["errors"].as_array().expect("errors array");
+        assert_eq!(errors.len(), 150, "all 150 errors should be parsed");
+        assert_eq!(
+            errors[0]["code"].as_str(),
+            Some("E1"),
+            "first error code should be parsed"
+        );
+        assert_eq!(
+            errors[149]["code"].as_str(),
+            Some("E150"),
+            "last error code should be parsed"
+        );
+        let summary = parsed["summary"].as_str().expect("summary string");
+        assert!(summary.contains("150 error"), "summary mentions error count: {summary}");
+        // Default response should be small even with 150 errors.
+        assert!(
+            result.len() < 50_000,
+            "default response should be compact (was {} bytes)",
+            result.len()
+        );
+
+        // Verbose mode: raw output is included.
+        let result_v = tool_run_check(&json!({"verbose": true}), &ctx).await.unwrap();
+        let parsed_v: serde_json::Value = serde_json::from_str(&result_v).unwrap();
+        let output = parsed_v["output"].as_str().expect("verbose includes output");
+        assert!(output.contains("error[E1]"), "verbose contains first line");
+        assert!(output.contains("error[E150]"), "verbose contains last line");
     }
 
     #[tokio::test]
diff --git a/server/src/service/shell/mod.rs b/server/src/service/shell/mod.rs
index 49a8ed3d..37e6f332 100644
--- a/server/src/service/shell/mod.rs
+++ b/server/src/service/shell/mod.rs
@@ -10,6 +10,10 @@
 pub mod io;
 /// Pure command-safety checks, blocked-binary lists, and output truncation.
 pub mod path_guard;
+/// Cargo / rustc diagnostic parser — extracts structured errors and warnings
+/// from raw cargo output. Used by run_check / run_build / run_lint MCP tools
+/// (bug 886).
+pub mod parse_diagnostics;
 
 #[allow(unused_imports)]
 pub use path_guard::{
diff --git a/server/src/service/shell/parse_diagnostics.rs b/server/src/service/shell/parse_diagnostics.rs
new file mode 100644
index 00000000..b873b875
--- /dev/null
+++ b/server/src/service/shell/parse_diagnostics.rs
@@ -0,0 +1,203 @@
+//! Parser for cargo compiler output (errors + warnings).
+//!
+//! Cargo emits diagnostics in a recognisable two-line shape:
+//!
+//! ```text
+//! error[E0061]: this function takes 4 arguments but 3 arguments were supplied
+//!   --> server/src/agents/pool/auto_assign/merge.rs:86:29
+//! ```
+//!
+//! or, without a code:
+//!
+//! ```text
+//! warning: unused import: `Foo`
+//!   --> server/src/lib.rs:3:5
+//! ```
+//!
+//! This module extracts those headers into structured [`Diagnostic`] entries so
+//! that MCP shell tools (`run_check`, `run_build`, `run_lint`) can return a
+//! compact error list instead of the full multi-hundred-KB cargo log.
+//! See bug 886.
+use regex::Regex;
+use serde::Serialize;
+use std::sync::OnceLock;
+
+/// One compiler diagnostic extracted from cargo output.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct Diagnostic {
+    /// Either `"error"` or `"warning"`.
+    pub kind: String,
+    /// Rust diagnostic code if present, e.g. `"E0061"`. `None` for plain
+    /// `error: ...` / `warning: ...` headers.
+    pub code: Option<String>,
+    /// The message text on the same line as the header.
+    pub message: String,
+    /// Source file path from the `-->` line, when present.
+    pub file: Option<String>,
+    /// 1-based line number from the `-->` line, when present.
+    pub line: Option<u32>,
+}
+
+/// Compact summary of a diagnostics scan.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct DiagnosticSummary {
+    pub error_count: usize,
+    pub warning_count: usize,
+}
+
+/// Parse cargo / rustc style output into structured diagnostics.
+///
+/// Recognises lines starting with `error[CODE]:`, `error:`, `warning[CODE]:`,
+/// `warning:`. If the next non-blank line starts with ` --> file:line(:col)?`
+/// the location is attached to the diagnostic. Lines that do not match either
+/// pattern are ignored, so the parser tolerates arbitrary surrounding output
+/// (Compiling/Checking progress, source snippets, etc.).
+pub fn parse_diagnostics(output: &str) -> Vec<Diagnostic> {
+    static HEADER_RE: OnceLock<Regex> = OnceLock::new();
+    static LOC_RE: OnceLock<Regex> = OnceLock::new();
+    let header_re = HEADER_RE.get_or_init(|| {
+        // Anchored to start-of-line: "error" or "warning", optional [CODE], colon, message.
+        Regex::new(r"^(error|warning)(?:\[([A-Za-z0-9]+)\])?: (.+)$").unwrap()
+    });
+    let loc_re = LOC_RE
+        .get_or_init(|| Regex::new(r"^\s*-->\s+([^:\s][^:]*):(\d+)(?::\d+)?\s*$").unwrap());
+
+    let lines: Vec<&str> = output.lines().collect();
+    let mut diagnostics = Vec::new();
+
+    for (i, line) in lines.iter().enumerate() {
+        let Some(caps) = header_re.captures(line) else {
+            continue;
+        };
+        let kind = caps.get(1).unwrap().as_str().to_string();
+        let code = caps.get(2).map(|m| m.as_str().to_string());
+        let message = caps.get(3).unwrap().as_str().to_string();
+
+        // Look ahead a few lines for the `-->` location marker. Cargo
+        // typically puts it 1 line after the header, but allow up to 3 in
+        // case of formatting quirks.
+        let mut file = None;
+        let mut line_num = None;
+        for look in 1..=3 {
+            if let Some(next) = lines.get(i + look)
+                && let Some(loc) = loc_re.captures(next)
+            {
+                file = Some(loc.get(1).unwrap().as_str().to_string());
+                line_num = loc.get(2).unwrap().as_str().parse::<u32>().ok();
+                break;
+            }
+        }
+
+        diagnostics.push(Diagnostic {
+            kind,
+            code,
+            message,
+            file,
+            line: line_num,
+        });
+    }
+
+    diagnostics
+}
+
+/// Count errors vs warnings in a parsed diagnostics list.
+pub fn summarise(diagnostics: &[Diagnostic]) -> DiagnosticSummary {
+    let error_count = diagnostics.iter().filter(|d| d.kind == "error").count();
+    let warning_count = diagnostics.iter().filter(|d| d.kind == "warning").count();
+    DiagnosticSummary {
+        error_count,
+        warning_count,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parses_error_with_code_and_location() {
+        let input = "error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n  --> server/src/agents/pool/auto_assign/merge.rs:86:29\n   |\n86 | foo();\n";
+        let diags = parse_diagnostics(input);
+        assert_eq!(diags.len(), 1);
+        assert_eq!(diags[0].kind, "error");
+        assert_eq!(diags[0].code.as_deref(), Some("E0061"));
+        assert!(diags[0].message.contains("4 arguments"));
+        assert_eq!(
+            diags[0].file.as_deref(),
+            Some("server/src/agents/pool/auto_assign/merge.rs")
+        );
+        assert_eq!(diags[0].line, Some(86));
+    }
+
+    #[test]
+    fn parses_warning_without_code() {
+        let input = "warning: unused import: `Foo`\n  --> server/src/lib.rs:3:5\n";
+        let diags = parse_diagnostics(input);
+        assert_eq!(diags.len(), 1);
+        assert_eq!(diags[0].kind, "warning");
+        assert!(diags[0].code.is_none());
+        assert_eq!(diags[0].message, "unused import: `Foo`");
+        assert_eq!(diags[0].file.as_deref(), Some("server/src/lib.rs"));
+        assert_eq!(diags[0].line, Some(3));
+    }
+
+    #[test]
+    fn parses_header_without_location() {
+        // Plain `error:` / `warning:` headers without a `-->` marker still
+        // produce a diagnostic — just with file/line as None.
+        let input = "error: could not compile `huskies` (bin \"huskies\") due to 3 previous errors";
+        let diags = parse_diagnostics(input);
+        assert_eq!(diags.len(), 1);
+        assert_eq!(diags[0].kind, "error");
+        assert!(diags[0].file.is_none());
+        assert!(diags[0].line.is_none());
+    }
+
+    #[test]
+    fn ignores_non_diagnostic_lines() {
+        let input = "    Checking huskies v0.10.4\n   Compiling foo v0.1.0\n    Finished dev profile\n";
+        assert!(parse_diagnostics(input).is_empty());
+    }
+
+    #[test]
+    fn parses_three_compile_errors_from_real_cargo_output() {
+        // Regression for bug 886: real-world output from 864's failing run.
+        let input = "    Checking huskies v0.10.4 (/workspace/server)\n\
+warning: unused imports: `ItemMetadata` and `OwnedItemMetadata`\n  \
+--> server/src/db/mod.rs:25:5\n\
+   |\n\
+25 |     ItemMetadata, OwnedItemMetadata, delete_item, move_item_stage, next_item_number,\n   \
+|     ^^^^^^^^^^^^  ^^^^^^^^^^^^^^^^^\n\n\
+error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n  \
+--> server/src/agents/pool/auto_assign/merge.rs:86:29\n\
+   |\n\
+86 | crate::db::write_item_with_content(story_id, \"4_merge\", &updated);\n\n\
+error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n  \
+--> server/src/agents/pool/pipeline/advance/helpers.rs:91:9\n\
+   |\n\
+91 | crate::db::write_item_with_content(story_id, &stage, &updated);\n\n\
+error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n  \
+--> server/src/chat/commands/unblock.rs:103:13\n\
+   |\n\
+103| crate::db::write_item_with_content(story_id, &stage, &updated);\n\n\
+For more information about this error, try `rustc --explain E0061`.\n\
+warning: `huskies` (bin \"huskies\") generated 1 warning\n\
+error: could not compile `huskies` (bin \"huskies\") due to 3 previous errors; 1 warning emitted\n";
+
+        let diags = parse_diagnostics(input);
+        let summary = summarise(&diags);
+
+        // 3 compile errors + 1 unused-import warning + 1 generated-warning summary line + 1 final "could not compile" error
+        assert_eq!(summary.error_count, 4);
+        assert_eq!(summary.warning_count, 2);
+
+        // The 3 main compile errors must be present with file + line.
+        let e0061: Vec<&Diagnostic> =
+            diags.iter().filter(|d| d.code.as_deref() == Some("E0061")).collect();
+        assert_eq!(e0061.len(), 3);
+        let files: Vec<Option<&str>> = e0061.iter().map(|d| d.file.as_deref()).collect();
+        assert!(files.contains(&Some("server/src/agents/pool/auto_assign/merge.rs")));
+        assert!(files.contains(&Some("server/src/agents/pool/pipeline/advance/helpers.rs")));
+        assert!(files.contains(&Some("server/src/chat/commands/unblock.rs")));
+    }
+}