fix(886): parse cargo diagnostics in run_check/run_build/run_lint

Before: tool_run_check (and run_build/run_lint via run_script_tool) returned the entire cargo log verbatim in `output`. For runs with many errors the response routinely exceeded the MCP token cap, was dumped to a tool-results file, and the agent had to scrape it with python3 just to see the error list — burning many turns on file archaeology for what should be a one-look operation. Real example: 864's coder hit `result (143,708 characters) exceeds maximum allowed tokens` and spent ~8 turns extracting 3 errors. Now: - New `service::shell::parse_diagnostics` parses `error[CODE]:` / `warning[CODE]:` headers + their `--> file:line` markers into structured `Diagnostic { kind, code, message, file, line }`. - `tool_run_check` (and the run_build/run_lint shared body) returns `{ passed, exit_code, errors: [...], warnings: [...], summary }`. Raw `output` is dropped from the default response. - New `verbose: bool` argument (default false) restores the raw output for callers who actually need it. - Updated the existing tool_run_check test to assert the new contract (150 errors → 150 structured entries, response < 50KB). Skipped run_tests in this pass — its parser would need to recognise test-runner output (different format from cargo); will land separately. Closes 886.
2026-04-30 15:06:02 +00:00
parent 7ac3fc2e3e
commit 7a0c186d94
3 changed files with 305 additions and 24 deletions
@@ -358,15 +358,24 @@ async fn run_script_tool(
    let stdout = String::from_utf8_lossy(&result.stdout);
    let stderr = String::from_utf8_lossy(&result.stderr);
    let combined = format!("{stdout}{stderr}");
-    let output = truncate_output(&combined, MAX_OUTPUT_LINES);
    let exit_code = result.status.code().unwrap_or(-1);
+    let verbose = args
+        .get("verbose")
+        .and_then(|v| v.as_bool())
+        .unwrap_or(false);

-    serde_json::to_string_pretty(&json!({
-        "passed": result.status.success(),
-        "exit_code": exit_code,
-        "output": output,
-    }))
-    .map_err(|e| format!("Serialization error: {e}"))
+    // When verbose, fall back to the legacy truncated output so callers
+    // who actually want raw text still get a bounded payload.
+    let mut payload = build_diagnostic_response(
+        result.status.success(),
+        exit_code,
+        &combined,
+        verbose,
+    );
+    if verbose {
+        payload["output"] = serde_json::json!(truncate_output(&combined, MAX_OUTPUT_LINES));
+    }
+    serde_json::to_string_pretty(&payload).map_err(|e| format!("Serialization error: {e}"))
 }

 pub(crate) async fn tool_run_build(args: &Value, ctx: &AppContext) -> Result<String, String> {
@@ -420,18 +429,54 @@ pub(crate) async fn tool_run_check(args: &Value, ctx: &AppContext) -> Result<Str

    let stdout = String::from_utf8_lossy(&result.stdout);
    let stderr = String::from_utf8_lossy(&result.stderr);
-    // No truncation: agents need the full compiler output to diagnose errors.
-    let output = format!("{stdout}{stderr}");
+    let combined = format!("{stdout}{stderr}");
    let exit_code = result.status.code().unwrap_or(-1);
+    let verbose = args
+        .get("verbose")
+        .and_then(|v| v.as_bool())
+        .unwrap_or(false);

-    serde_json::to_string_pretty(&json!({
-        "passed": result.status.success(),
-        "exit_code": exit_code,
-        "output": output,
-    }))
+    serde_json::to_string_pretty(&build_diagnostic_response(
+        result.status.success(),
+        exit_code,
+        &combined,
+        verbose,
+    ))
    .map_err(|e| format!("Serialization error: {e}"))
 }

+/// Shared response builder for tools that wrap cargo / rustc output. By
+/// default returns parsed `errors` + `warnings` arrays plus a one-line
+/// summary; the raw `output` is only included when `verbose` is true. This
+/// keeps the MCP response under the token cap for runs with many errors
+/// (bug 886).
+fn build_diagnostic_response(
+    passed: bool,
+    exit_code: i32,
+    raw_output: &str,
+    verbose: bool,
+) -> serde_json::Value {
+    use crate::service::shell::parse_diagnostics::{parse_diagnostics, summarise};
+    let diags = parse_diagnostics(raw_output);
+    let summary = summarise(&diags);
+    let errors: Vec<&_> = diags.iter().filter(|d| d.kind == "error").collect();
+    let warnings: Vec<&_> = diags.iter().filter(|d| d.kind == "warning").collect();
+    let mut payload = json!({
+        "passed": passed,
+        "exit_code": exit_code,
+        "errors": errors,
+        "warnings": warnings,
+        "summary": format!(
+            "{} error(s), {} warning(s)",
+            summary.error_count, summary.warning_count
+        ),
+    });
+    if verbose {
+        payload["output"] = json!(raw_output);
+    }
+    payload
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -626,16 +671,18 @@ mod tests {
    }

    #[tokio::test]
-    async fn tool_run_check_returns_full_output_on_nonzero_exit() {
+    async fn tool_run_check_returns_parsed_errors_on_nonzero_exit() {
+        // Bug 886: rather than dumping the entire cargo log into `output`
+        // (which routinely exceeds the MCP token cap), tool_run_check now
+        // parses errors / warnings into structured arrays. Raw output is
+        // available behind `verbose: true`.
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("check");
-        // Script that emits 150 lines (exceeds the 100-line truncation limit used
-        // by run_build/run_lint) and exits non-zero to verify no truncation occurs.
        std::fs::write(
            &script_path,
-            "#!/usr/bin/env bash\nfor i in $(seq 1 150); do echo \"error[$i]: compile error on line $i\"; done\nexit 1\n",
+            "#!/usr/bin/env bash\nfor i in $(seq 1 150); do echo \"error[E$i]: compile error on line $i\"; done\nexit 1\n",
        )
        .unwrap();
        #[cfg(unix)]
@@ -645,16 +692,43 @@ mod tests {
        }

        let ctx = test_ctx(tmp.path());
+
+        // Default mode: no `output` field, structured `errors` array, summary.
        let result = tool_run_check(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
-
        assert_eq!(parsed["passed"], false);
        assert_eq!(parsed["exit_code"], 1);
-        let output = parsed["output"].as_str().unwrap();
-        // All 150 lines must be present — no truncation.
-        assert!(output.contains("error[1]"), "should contain first line");
-        assert!(output.contains("error[150]"), "should contain last line");
-        assert!(!output.contains("omitted"), "must not truncate output");
+        assert!(
+            parsed.get("output").map(|v| v.is_null()).unwrap_or(true),
+            "default mode must not include raw `output`"
+        );
+        let errors = parsed["errors"].as_array().expect("errors array");
+        assert_eq!(errors.len(), 150, "all 150 errors should be parsed");
+        assert_eq!(
+            errors[0]["code"].as_str(),
+            Some("E1"),
+            "first error code should be parsed"
+        );
+        assert_eq!(
+            errors[149]["code"].as_str(),
+            Some("E150"),
+            "last error code should be parsed"
+        );
+        let summary = parsed["summary"].as_str().expect("summary string");
+        assert!(summary.contains("150 error"), "summary mentions error count: {summary}");
+        // Default response should be small even with 150 errors.
+        assert!(
+            result.len() < 50_000,
+            "default response should be compact (was {} bytes)",
+            result.len()
+        );
+
+        // Verbose mode: raw output is included.
+        let result_v = tool_run_check(&json!({"verbose": true}), &ctx).await.unwrap();
+        let parsed_v: serde_json::Value = serde_json::from_str(&result_v).unwrap();
+        let output = parsed_v["output"].as_str().expect("verbose includes output");
+        assert!(output.contains("error[E1]"), "verbose contains first line");
+        assert!(output.contains("error[E150]"), "verbose contains last line");
    }

    #[tokio::test]