From 7a0c186d9484a59e8a6fda768e86528b9191a59b Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 30 Apr 2026 15:06:02 +0000 Subject: [PATCH] fix(886): parse cargo diagnostics in run_check/run_build/run_lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: tool_run_check (and run_build/run_lint via run_script_tool) returned the entire cargo log verbatim in `output`. For runs with many errors the response routinely exceeded the MCP token cap, was dumped to a tool-results file, and the agent had to scrape it with python3 just to see the error list — burning many turns on file archaeology for what should be a one-look operation. Real example: 864's coder hit `result (143,708 characters) exceeds maximum allowed tokens` and spent ~8 turns extracting 3 errors. Now: - New `service::shell::parse_diagnostics` parses `error[CODE]:` / `warning[CODE]:` headers + their `--> file:line` markers into structured `Diagnostic { kind, code, message, file, line }`. - `tool_run_check` (and the run_build/run_lint shared body) returns `{ passed, exit_code, errors: [...], warnings: [...], summary }`. Raw `output` is dropped from the default response. - New `verbose: bool` argument (default false) restores the raw output for callers who actually need it. - Updated the existing tool_run_check test to assert the new contract (150 errors → 150 structured entries, response < 50KB). Skipped run_tests in this pass — its parser would need to recognise test-runner output (different format from cargo); will land separately. Closes 886. --- server/src/http/mcp/shell_tools/script.rs | 122 ++++++++--- server/src/service/shell/mod.rs | 4 + server/src/service/shell/parse_diagnostics.rs | 203 ++++++++++++++++++ 3 files changed, 305 insertions(+), 24 deletions(-) create mode 100644 server/src/service/shell/parse_diagnostics.rs diff --git a/server/src/http/mcp/shell_tools/script.rs b/server/src/http/mcp/shell_tools/script.rs index 0ee50394..c59b126d 100644 --- a/server/src/http/mcp/shell_tools/script.rs +++ b/server/src/http/mcp/shell_tools/script.rs @@ -358,15 +358,24 @@ async fn run_script_tool( let stdout = String::from_utf8_lossy(&result.stdout); let stderr = String::from_utf8_lossy(&result.stderr); let combined = format!("{stdout}{stderr}"); - let output = truncate_output(&combined, MAX_OUTPUT_LINES); let exit_code = result.status.code().unwrap_or(-1); + let verbose = args + .get("verbose") + .and_then(|v| v.as_bool()) + .unwrap_or(false); - serde_json::to_string_pretty(&json!({ - "passed": result.status.success(), - "exit_code": exit_code, - "output": output, - })) - .map_err(|e| format!("Serialization error: {e}")) + // When verbose, fall back to the legacy truncated output so callers + // who actually want raw text still get a bounded payload. + let mut payload = build_diagnostic_response( + result.status.success(), + exit_code, + &combined, + verbose, + ); + if verbose { + payload["output"] = serde_json::json!(truncate_output(&combined, MAX_OUTPUT_LINES)); + } + serde_json::to_string_pretty(&payload).map_err(|e| format!("Serialization error: {e}")) } pub(crate) async fn tool_run_build(args: &Value, ctx: &AppContext) -> Result { @@ -420,18 +429,54 @@ pub(crate) async fn tool_run_check(args: &Value, ctx: &AppContext) -> Result serde_json::Value { + use crate::service::shell::parse_diagnostics::{parse_diagnostics, summarise}; + let diags = parse_diagnostics(raw_output); + let summary = summarise(&diags); + let errors: Vec<&_> = diags.iter().filter(|d| d.kind == "error").collect(); + let warnings: Vec<&_> = diags.iter().filter(|d| d.kind == "warning").collect(); + let mut payload = json!({ + "passed": passed, + "exit_code": exit_code, + "errors": errors, + "warnings": warnings, + "summary": format!( + "{} error(s), {} warning(s)", + summary.error_count, summary.warning_count + ), + }); + if verbose { + payload["output"] = json!(raw_output); + } + payload +} + #[cfg(test)] mod tests { use super::*; @@ -626,16 +671,18 @@ mod tests { } #[tokio::test] - async fn tool_run_check_returns_full_output_on_nonzero_exit() { + async fn tool_run_check_returns_parsed_errors_on_nonzero_exit() { + // Bug 886: rather than dumping the entire cargo log into `output` + // (which routinely exceeds the MCP token cap), tool_run_check now + // parses errors / warnings into structured arrays. Raw output is + // available behind `verbose: true`. let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("check"); - // Script that emits 150 lines (exceeds the 100-line truncation limit used - // by run_build/run_lint) and exits non-zero to verify no truncation occurs. std::fs::write( &script_path, - "#!/usr/bin/env bash\nfor i in $(seq 1 150); do echo \"error[$i]: compile error on line $i\"; done\nexit 1\n", + "#!/usr/bin/env bash\nfor i in $(seq 1 150); do echo \"error[E$i]: compile error on line $i\"; done\nexit 1\n", ) .unwrap(); #[cfg(unix)] @@ -645,16 +692,43 @@ mod tests { } let ctx = test_ctx(tmp.path()); + + // Default mode: no `output` field, structured `errors` array, summary. let result = tool_run_check(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); - assert_eq!(parsed["passed"], false); assert_eq!(parsed["exit_code"], 1); - let output = parsed["output"].as_str().unwrap(); - // All 150 lines must be present — no truncation. - assert!(output.contains("error[1]"), "should contain first line"); - assert!(output.contains("error[150]"), "should contain last line"); - assert!(!output.contains("omitted"), "must not truncate output"); + assert!( + parsed.get("output").map(|v| v.is_null()).unwrap_or(true), + "default mode must not include raw `output`" + ); + let errors = parsed["errors"].as_array().expect("errors array"); + assert_eq!(errors.len(), 150, "all 150 errors should be parsed"); + assert_eq!( + errors[0]["code"].as_str(), + Some("E1"), + "first error code should be parsed" + ); + assert_eq!( + errors[149]["code"].as_str(), + Some("E150"), + "last error code should be parsed" + ); + let summary = parsed["summary"].as_str().expect("summary string"); + assert!(summary.contains("150 error"), "summary mentions error count: {summary}"); + // Default response should be small even with 150 errors. + assert!( + result.len() < 50_000, + "default response should be compact (was {} bytes)", + result.len() + ); + + // Verbose mode: raw output is included. + let result_v = tool_run_check(&json!({"verbose": true}), &ctx).await.unwrap(); + let parsed_v: serde_json::Value = serde_json::from_str(&result_v).unwrap(); + let output = parsed_v["output"].as_str().expect("verbose includes output"); + assert!(output.contains("error[E1]"), "verbose contains first line"); + assert!(output.contains("error[E150]"), "verbose contains last line"); } #[tokio::test] diff --git a/server/src/service/shell/mod.rs b/server/src/service/shell/mod.rs index 49a8ed3d..37e6f332 100644 --- a/server/src/service/shell/mod.rs +++ b/server/src/service/shell/mod.rs @@ -10,6 +10,10 @@ pub mod io; /// Pure command-safety checks, blocked-binary lists, and output truncation. pub mod path_guard; +/// Cargo / rustc diagnostic parser — extracts structured errors and warnings +/// from raw cargo output. Used by run_check / run_build / run_lint MCP tools +/// (bug 886). +pub mod parse_diagnostics; #[allow(unused_imports)] pub use path_guard::{ diff --git a/server/src/service/shell/parse_diagnostics.rs b/server/src/service/shell/parse_diagnostics.rs new file mode 100644 index 00000000..b873b875 --- /dev/null +++ b/server/src/service/shell/parse_diagnostics.rs @@ -0,0 +1,203 @@ +//! Parser for cargo compiler output (errors + warnings). +//! +//! Cargo emits diagnostics in a recognisable two-line shape: +//! +//! ```text +//! error[E0061]: this function takes 4 arguments but 3 arguments were supplied +//! --> server/src/agents/pool/auto_assign/merge.rs:86:29 +//! ``` +//! +//! or, without a code: +//! +//! ```text +//! warning: unused import: `Foo` +//! --> server/src/lib.rs:3:5 +//! ``` +//! +//! This module extracts those headers into structured [`Diagnostic`] entries so +//! that MCP shell tools (`run_check`, `run_build`, `run_lint`) can return a +//! compact error list instead of the full multi-hundred-KB cargo log. +//! See bug 886. +use regex::Regex; +use serde::Serialize; +use std::sync::OnceLock; + +/// One compiler diagnostic extracted from cargo output. +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct Diagnostic { + /// Either `"error"` or `"warning"`. + pub kind: String, + /// Rust diagnostic code if present, e.g. `"E0061"`. `None` for plain + /// `error: ...` / `warning: ...` headers. + pub code: Option, + /// The message text on the same line as the header. + pub message: String, + /// Source file path from the `-->` line, when present. + pub file: Option, + /// 1-based line number from the `-->` line, when present. + pub line: Option, +} + +/// Compact summary of a diagnostics scan. +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct DiagnosticSummary { + pub error_count: usize, + pub warning_count: usize, +} + +/// Parse cargo / rustc style output into structured diagnostics. +/// +/// Recognises lines starting with `error[CODE]:`, `error:`, `warning[CODE]:`, +/// `warning:`. If the next non-blank line starts with ` --> file:line(:col)?` +/// the location is attached to the diagnostic. Lines that do not match either +/// pattern are ignored, so the parser tolerates arbitrary surrounding output +/// (Compiling/Checking progress, source snippets, etc.). +pub fn parse_diagnostics(output: &str) -> Vec { + static HEADER_RE: OnceLock = OnceLock::new(); + static LOC_RE: OnceLock = OnceLock::new(); + let header_re = HEADER_RE.get_or_init(|| { + // Anchored to start-of-line: "error" or "warning", optional [CODE], colon, message. + Regex::new(r"^(error|warning)(?:\[([A-Za-z0-9]+)\])?: (.+)$").unwrap() + }); + let loc_re = LOC_RE + .get_or_init(|| Regex::new(r"^\s*-->\s+([^:\s][^:]*):(\d+)(?::\d+)?\s*$").unwrap()); + + let lines: Vec<&str> = output.lines().collect(); + let mut diagnostics = Vec::new(); + + for (i, line) in lines.iter().enumerate() { + let Some(caps) = header_re.captures(line) else { + continue; + }; + let kind = caps.get(1).unwrap().as_str().to_string(); + let code = caps.get(2).map(|m| m.as_str().to_string()); + let message = caps.get(3).unwrap().as_str().to_string(); + + // Look ahead a few lines for the `-->` location marker. Cargo + // typically puts it 1 line after the header, but allow up to 3 in + // case of formatting quirks. + let mut file = None; + let mut line_num = None; + for look in 1..=3 { + if let Some(next) = lines.get(i + look) + && let Some(loc) = loc_re.captures(next) + { + file = Some(loc.get(1).unwrap().as_str().to_string()); + line_num = loc.get(2).unwrap().as_str().parse::().ok(); + break; + } + } + + diagnostics.push(Diagnostic { + kind, + code, + message, + file, + line: line_num, + }); + } + + diagnostics +} + +/// Count errors vs warnings in a parsed diagnostics list. +pub fn summarise(diagnostics: &[Diagnostic]) -> DiagnosticSummary { + let error_count = diagnostics.iter().filter(|d| d.kind == "error").count(); + let warning_count = diagnostics.iter().filter(|d| d.kind == "warning").count(); + DiagnosticSummary { + error_count, + warning_count, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_error_with_code_and_location() { + let input = "error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n --> server/src/agents/pool/auto_assign/merge.rs:86:29\n |\n86 | foo();\n"; + let diags = parse_diagnostics(input); + assert_eq!(diags.len(), 1); + assert_eq!(diags[0].kind, "error"); + assert_eq!(diags[0].code.as_deref(), Some("E0061")); + assert!(diags[0].message.contains("4 arguments")); + assert_eq!( + diags[0].file.as_deref(), + Some("server/src/agents/pool/auto_assign/merge.rs") + ); + assert_eq!(diags[0].line, Some(86)); + } + + #[test] + fn parses_warning_without_code() { + let input = "warning: unused import: `Foo`\n --> server/src/lib.rs:3:5\n"; + let diags = parse_diagnostics(input); + assert_eq!(diags.len(), 1); + assert_eq!(diags[0].kind, "warning"); + assert!(diags[0].code.is_none()); + assert_eq!(diags[0].message, "unused import: `Foo`"); + assert_eq!(diags[0].file.as_deref(), Some("server/src/lib.rs")); + assert_eq!(diags[0].line, Some(3)); + } + + #[test] + fn parses_header_without_location() { + // Plain `error:` / `warning:` headers without a `-->` marker still + // produce a diagnostic — just with file/line as None. + let input = "error: could not compile `huskies` (bin \"huskies\") due to 3 previous errors"; + let diags = parse_diagnostics(input); + assert_eq!(diags.len(), 1); + assert_eq!(diags[0].kind, "error"); + assert!(diags[0].file.is_none()); + assert!(diags[0].line.is_none()); + } + + #[test] + fn ignores_non_diagnostic_lines() { + let input = " Checking huskies v0.10.4\n Compiling foo v0.1.0\n Finished dev profile\n"; + assert!(parse_diagnostics(input).is_empty()); + } + + #[test] + fn parses_three_compile_errors_from_real_cargo_output() { + // Regression for bug 886: real-world output from 864's failing run. + let input = " Checking huskies v0.10.4 (/workspace/server)\n\ +warning: unused imports: `ItemMetadata` and `OwnedItemMetadata`\n \ +--> server/src/db/mod.rs:25:5\n\ + |\n\ +25 | ItemMetadata, OwnedItemMetadata, delete_item, move_item_stage, next_item_number,\n \ +| ^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^\n\n\ +error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n \ +--> server/src/agents/pool/auto_assign/merge.rs:86:29\n\ + |\n\ +86 | crate::db::write_item_with_content(story_id, \"4_merge\", &updated);\n\n\ +error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n \ +--> server/src/agents/pool/pipeline/advance/helpers.rs:91:9\n\ + |\n\ +91 | crate::db::write_item_with_content(story_id, &stage, &updated);\n\n\ +error[E0061]: this function takes 4 arguments but 3 arguments were supplied\n \ +--> server/src/chat/commands/unblock.rs:103:13\n\ + |\n\ +103| crate::db::write_item_with_content(story_id, &stage, &updated);\n\n\ +For more information about this error, try `rustc --explain E0061`.\n\ +warning: `huskies` (bin \"huskies\") generated 1 warning\n\ +error: could not compile `huskies` (bin \"huskies\") due to 3 previous errors; 1 warning emitted\n"; + + let diags = parse_diagnostics(input); + let summary = summarise(&diags); + + // 3 compile errors + 1 unused-import warning + 1 generated-warning summary line + 1 final "could not compile" error + assert_eq!(summary.error_count, 4); + assert_eq!(summary.warning_count, 2); + + // The 3 main compile errors must be present with file + line. + let e0061: Vec<&Diagnostic> = + diags.iter().filter(|d| d.code.as_deref() == Some("E0061")).collect(); + assert_eq!(e0061.len(), 3); + let files: Vec> = e0061.iter().map(|d| d.file.as_deref()).collect(); + assert!(files.contains(&Some("server/src/agents/pool/auto_assign/merge.rs"))); + assert!(files.contains(&Some("server/src/agents/pool/pipeline/advance/helpers.rs"))); + assert!(files.contains(&Some("server/src/chat/commands/unblock.rs"))); + } +}