server/src/http/mcp/shell_tools/script.rs

//! MCP shell script tools: run_tests / get_test_result / run_build / run_lint.

use serde_json::{Value, json};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Mutex, OnceLock};

use crate::http::context::AppContext;
#[allow(unused_imports)]
use crate::service::shell::{extract_count, parse_test_counts, truncate_output};

use super::exec::validate_working_dir;

const TEST_TIMEOUT_SECS: u64 = 1200;
const MAX_OUTPUT_LINES: usize = 100;

// ── In-flight process registry ───────────────────────────────────────────────
//
// Child process handles are ephemeral and cannot survive a server restart.
// Persistent state (status, output, timestamps) lives in the CRDT `test_jobs`
// collection. This module-level static tracks only the OS-level child process
// so we can kill it on restart or poll it during `get_test_result`.

struct InFlightJob {
    child: std::process::Child,
}

static ACTIVE_JOBS: OnceLock<Mutex<HashMap<PathBuf, InFlightJob>>> = OnceLock::new();

fn active_jobs() -> &'static Mutex<HashMap<PathBuf, InFlightJob>> {
    ACTIVE_JOBS.get_or_init(|| Mutex::new(HashMap::new()))
}

/// Derive the CRDT key for a test job from its working directory.
///
/// Uses the last path component (the story ID or project directory name) so
/// that CRDT entries are human-readable and stable across path changes.
fn story_key(working_dir: &Path) -> String {
    working_dir
        .file_name()
        .and_then(|n| n.to_str())
        .unwrap_or("root")
        .to_string()
}

/// Current time as a Unix timestamp (seconds, f64) for CRDT fields.
fn unix_now() -> f64 {
    std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap_or_default()
        .as_secs_f64()
}

// ── run_tests ────────────────────────────────────────────────────────────────

pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let project_root = ctx.services.agents.get_project_root(&ctx.state)?;

    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
        Some(wt) => validate_working_dir(wt, ctx)?,
        None => project_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
    };

    let script_path = working_dir.join("script").join("test");
    if !script_path.exists() {
        return Err(format!("Test script not found: {}", script_path.display()));
    }

    let sid = story_key(&working_dir);

    // If a test job is already in flight for this worktree, ATTACH to it
    // rather than kill+respawn. This makes the agent system_prompt advice
    // ("if run_tests appears to time out, call run_tests again — it
    // attaches to the in-flight test job") actually true, and eliminates
    // the respawn-loop bug where MCP client-side timeouts (~60s) cause
    // agents to retry, killing the still-running cargo build each time
    // and never making progress. The original job's poll loop below
    // updates the CRDT on completion; attached callers just poll the CRDT.
    let already_running = {
        let jobs = active_jobs().lock().map_err(|e| e.to_string())?;
        jobs.contains_key(&working_dir)
    };
    if already_running {
        crate::slog!(
            "[run_tests] Attaching to in-flight test job for {}",
            working_dir.display()
        );
        return attach_to_in_flight_test_job(&sid).await;
    }

    // Worktrees are isolated and may run cargo tests concurrently — no
    // cross-worktree serialisation. The only invariant enforced here is
    // "at most one test job per worktree at a time", which the
    // already_running check above gives us.

    // Spawn the test process with piped stdout/stderr so we can capture output.
    // Pipes are drained in background threads to prevent deadlock when the
    // child fills the 64KB OS pipe buffer.
    let mut child = std::process::Command::new("bash")
        .arg(&script_path)
        .current_dir(&working_dir)
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
        .spawn()
        .map_err(|e| format!("Failed to spawn test script: {e}"))?;

    let pid = child.id();
    crate::slog!(
        "[run_tests] Started test job for {} (pid {})",
        working_dir.display(),
        pid
    );

    // Drain stdout/stderr in background threads so pipe buffers never fill.
    let mut stdout_handle = child.stdout.take().map(|mut r| {
        std::thread::spawn(move || {
            let mut s = String::new();
            std::io::Read::read_to_string(&mut r, &mut s).ok();
            s
        })
    });
    let mut stderr_handle = child.stderr.take().map(|mut r| {
        std::thread::spawn(move || {
            let mut s = String::new();
            std::io::Read::read_to_string(&mut r, &mut s).ok();
            s
        })
    });

    let started_at_unix = unix_now();

    // Persist "running" state in the CRDT so status survives a server restart.
    crate::crdt_state::write_test_job(&sid, "running", started_at_unix, None, None);

    // Register the child process in the in-flight map.
    {
        let mut jobs = active_jobs().lock().map_err(|e| e.to_string())?;
        jobs.insert(working_dir.clone(), InFlightJob { child });
    }

    // Block server-side, checking every second until done or timeout.
    let start = std::time::Instant::now();
    loop {
        tokio::time::sleep(std::time::Duration::from_secs(1)).await;

        let mut jobs = active_jobs().lock().map_err(|e| e.to_string())?;
        let job = match jobs.get_mut(&working_dir) {
            Some(j) => j,
            None => return Err("Test job disappeared unexpectedly".to_string()),
        };

        match job.child.try_wait() {
            Ok(Some(status)) => {
                // Child exited — collect output and write final CRDT state.
                jobs.remove(&working_dir);
                let stdout = stdout_handle
                    .take()
                    .and_then(|h| h.join().ok())
                    .unwrap_or_default();
                let stderr = stderr_handle
                    .take()
                    .and_then(|h| h.join().ok())
                    .unwrap_or_default();
                let combined = format!("{stdout}{stderr}");
                let (tests_passed, tests_failed) = parse_test_counts(&combined);
                let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
                let passed = status.success();
                let exit_code = status.code().unwrap_or(-1);
                let crdt_status = if passed { "pass" } else { "fail" };
                crate::slog!(
                    "[run_tests] Test job for {} finished (pid {}, passed={})",
                    working_dir.display(),
                    pid,
                    passed
                );

                // Persist result in CRDT for post-restart visibility.
                crate::crdt_state::write_test_job(
                    &sid,
                    crdt_status,
                    started_at_unix,
                    Some(unix_now()),
                    Some(&truncated),
                );

                // Capture positive test evidence in the DB so the pipeline
                // advance salvage path (bug 645/668) can confirm the agent
                // ran passing tests before it died. Only written when running
                // in a story worktree (worktree_path arg provided).
                if passed && args.get("worktree_path").is_some() {
                    crate::db::write_content(&format!("{sid}:run_tests_ok"), "1");
                }
                return serde_json::to_string_pretty(&json!({
                    "passed": passed,
                    "exit_code": exit_code,
                    "timed_out": false,
                    "tests_passed": tests_passed,
                    "tests_failed": tests_failed,
                    "output": truncated,
                }))
                .map_err(|e| format!("Serialization error: {e}"));
            }
            Ok(None) => {
                // Still running — check timeout.
                if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
                    let _ = job.child.kill();
                    let _ = job.child.wait();
                    crate::slog!(
                        "[run_tests] Killed test job for {} (pid {}) after {}s timeout",
                        working_dir.display(),
                        pid,
                        TEST_TIMEOUT_SECS
                    );
                    jobs.remove(&working_dir);
                    let timeout_msg = format!("Test suite timed out after {TEST_TIMEOUT_SECS}s");
                    crate::crdt_state::write_test_job(
                        &sid,
                        "fail",
                        started_at_unix,
                        Some(unix_now()),
                        Some(&timeout_msg),
                    );
                    return serde_json::to_string_pretty(&json!({
                        "passed": false,
                        "exit_code": -1,
                        "timed_out": true,
                        "tests_passed": 0,
                        "tests_failed": 0,
                        "output": timeout_msg,
                    }))
                    .map_err(|e| format!("Serialization error: {e}"));
                }
            }
            Err(e) => {
                jobs.remove(&working_dir);
                let msg = e.to_string();
                crate::crdt_state::write_test_job(
                    &sid,
                    "fail",
                    started_at_unix,
                    Some(unix_now()),
                    Some(&msg),
                );
                return Err(format!("Failed to check child status: {e}"));
            }
        }
    }
}

/// Poll the CRDT `test_jobs` collection for `sid` until the entry transitions
/// out of "running" or we hit [`TEST_TIMEOUT_SECS`].
///
/// Used by `run_tests` to attach to a job that another caller already spawned
/// for the same worktree, so concurrent callers all observe the same single
/// `cargo test` run rather than racing to kill+respawn.
async fn attach_to_in_flight_test_job(sid: &str) -> Result<String, String> {
    let start = std::time::Instant::now();
    loop {
        match crate::crdt_state::read_test_job(sid) {
            None => {
                // The job entry disappeared from the CRDT — most likely the
                // spawning task lost the race between insert/read. Treat as a
                // transient error so the agent can retry.
                return Err("In-flight test job vanished from CRDT before completing".to_string());
            }
            Some(view) if view.status == "pass" || view.status == "fail" => {
                return format_crdt_result(&view);
            }
            Some(_) => {
                // Still "running" — wait and re-poll.
            }
        }

        if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
            return Err(format!(
                "Attached test job for '{sid}' did not complete within {TEST_TIMEOUT_SECS}s"
            ));
        }
        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
    }
}

// ── get_test_result ──────────────────────────────────────────────────────────

/// How long `get_test_result` blocks server-side before returning "running".
/// This prevents agents from burning turns polling every 2 seconds.
const TEST_POLL_BLOCK_SECS: u64 = 20;

/// Check on a running test job and return results if complete.
///
/// Reads persistent state from the CRDT `test_jobs` collection. Blocks for up
/// to [`TEST_POLL_BLOCK_SECS`], checking the CRDT every second. Returns
/// immediately when the test finishes, or after the blocking period with
/// `{"status": "running"}`.
///
/// After a server restart the CRDT entry written by `run_tests` remains
/// visible, so callers can observe the "running" status even when the original
/// child process is gone.
pub(crate) async fn tool_get_test_result(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let project_root = ctx.services.agents.get_project_root(&ctx.state)?;

    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
        Some(wt) => validate_working_dir(wt, ctx)?,
        None => project_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
    };

    let sid = story_key(&working_dir);

    // Poll CRDT for up to TEST_POLL_BLOCK_SECS, returning as soon as the job
    // transitions from "running" to a terminal state.
    for _ in 0..TEST_POLL_BLOCK_SECS {
        match crate::crdt_state::read_test_job(&sid) {
            None => {
                return Err(
                    "No test job running for this worktree. Call run_tests first.".to_string(),
                );
            }
            Some(view) if view.status == "pass" || view.status == "fail" => {
                return format_crdt_result(&view);
            }
            Some(_) => {
                // Still "running" — wait one second and re-check.
            }
        }
        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
    }

    // Still running after the blocking window — return status so the caller
    // can decide whether to poll again or give up.
    match crate::crdt_state::read_test_job(&sid) {
        None => Err("No test job running for this worktree. Call run_tests first.".to_string()),
        Some(view) if view.status == "pass" || view.status == "fail" => format_crdt_result(&view),
        Some(view) => {
            let elapsed = unix_now() - view.started_at;
            serde_json::to_string_pretty(&json!({
                "status": "running",
                "elapsed_secs": elapsed.max(0.0) as u64,
            }))
            .map_err(|e| format!("Serialization error: {e}"))
        }
    }
}

/// Build a JSON result object from a completed CRDT test-job view.
///
/// `exit_code` is approximated from status (0 = pass, 1 = fail) because the
/// CRDT schema does not store raw exit codes.
fn format_crdt_result(view: &crate::crdt_state::TestJobView) -> Result<String, String> {
    let passed = view.status == "pass";
    let output = view.output.clone().unwrap_or_default();
    let (tests_passed, tests_failed) = parse_test_counts(&output);
    serde_json::to_string_pretty(&json!({
        "passed": passed,
        "exit_code": if passed { 0 } else { 1 },
        "timed_out": false,
        "tests_passed": tests_passed,
        "tests_failed": tests_failed,
        "output": output,
    }))
    .map_err(|e| format!("Serialization error: {e}"))
}

// ── run_build / run_lint ─────────────────────────────────────────────────────

/// Shared implementation for run_build and run_lint: runs a named script
/// (`script/<name>`) in the working directory, captures output, and returns
async fn run_script_tool(
    script_name: &str,
    args: &Value,
    ctx: &AppContext,
) -> Result<String, String> {
    let project_root = ctx.services.agents.get_project_root(&ctx.state)?;

    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
        Some(wt) => validate_working_dir(wt, ctx)?,
        None => project_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
    };

    let script_path = working_dir.join("script").join(script_name);
    if !script_path.exists() {
        return Err(format!(
            "{script_name} script not found: {}",
            script_path.display()
        ));
    }

    let result = tokio::task::spawn_blocking({
        let script = script_path.clone();
        let dir = working_dir.clone();
        move || {
            std::process::Command::new("bash")
                .arg(&script)
                .current_dir(&dir)
                .output()
        }
    })
    .await
    .map_err(|e| format!("Task join error: {e}"))?
    .map_err(|e| format!("Failed to spawn {script_name} script: {e}"))?;

    let stdout = String::from_utf8_lossy(&result.stdout);
    let stderr = String::from_utf8_lossy(&result.stderr);
    let combined = format!("{stdout}{stderr}");
    let exit_code = result.status.code().unwrap_or(-1);
    let verbose = args
        .get("verbose")
        .and_then(|v| v.as_bool())
        .unwrap_or(false);

    // When verbose, fall back to the legacy truncated output so callers
    // who actually want raw text still get a bounded payload.
    let mut payload =
        build_diagnostic_response(result.status.success(), exit_code, &combined, verbose);
    if verbose {
        payload["output"] = serde_json::json!(truncate_output(&combined, MAX_OUTPUT_LINES));
    }
    serde_json::to_string_pretty(&payload).map_err(|e| format!("Serialization error: {e}"))
}

pub(crate) async fn tool_run_build(args: &Value, ctx: &AppContext) -> Result<String, String> {
    run_script_tool("build", args, ctx).await
}

pub(crate) async fn tool_run_lint(args: &Value, ctx: &AppContext) -> Result<String, String> {
    run_script_tool("lint", args, ctx).await
}

// ── run_check ────────────────────────────────────────────────────────────────

/// Fast compile-only check (`script/check`).
///
/// Runs `script/check` (expected to be `cargo check --tests --workspace`) in
/// the agent's worktree and returns the **full, untruncated** stdout + stderr
/// so that every compiler diagnostic is visible to the caller. Unlike
/// `run_build` and `run_lint`, output is never truncated — compile errors must
/// be readable in their entirety for fast iteration feedback.
pub(crate) async fn tool_run_check(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let project_root = ctx.services.agents.get_project_root(&ctx.state)?;

    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
        Some(wt) => validate_working_dir(wt, ctx)?,
        None => project_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
    };

    let script_path = working_dir.join("script").join("check");
    if !script_path.exists() {
        return Err(format!(
            "script/check not found: {}. Create script/check (e.g. `cargo check --tests --workspace`) to enable fast compile feedback.",
            script_path.display()
        ));
    }

    let result = tokio::task::spawn_blocking({
        let script = script_path.clone();
        let dir = working_dir.clone();
        move || {
            std::process::Command::new("bash")
                .arg(&script)
                .current_dir(&dir)
                .output()
        }
    })
    .await
    .map_err(|e| format!("Task join error: {e}"))?
    .map_err(|e| format!("Failed to spawn script/check: {e}"))?;

    let stdout = String::from_utf8_lossy(&result.stdout);
    let stderr = String::from_utf8_lossy(&result.stderr);
    let combined = format!("{stdout}{stderr}");
    let exit_code = result.status.code().unwrap_or(-1);
    let verbose = args
        .get("verbose")
        .and_then(|v| v.as_bool())
        .unwrap_or(false);

    serde_json::to_string_pretty(&build_diagnostic_response(
        result.status.success(),
        exit_code,
        &combined,
        verbose,
    ))
    .map_err(|e| format!("Serialization error: {e}"))
}

/// Shared response builder for tools that wrap cargo / rustc output. By
/// default returns parsed `errors` + `warnings` arrays plus a one-line
/// summary; the raw `output` is only included when `verbose` is true. This
/// keeps the MCP response under the token cap for runs with many errors
/// (bug 886).
fn build_diagnostic_response(
    passed: bool,
    exit_code: i32,
    raw_output: &str,
    verbose: bool,
) -> serde_json::Value {
    use crate::service::shell::parse_diagnostics::{parse_diagnostics, summarise};
    let diags = parse_diagnostics(raw_output);
    let summary = summarise(&diags);
    let errors: Vec<&_> = diags.iter().filter(|d| d.kind == "error").collect();
    let warnings: Vec<&_> = diags.iter().filter(|d| d.kind == "warning").collect();
    let mut payload = json!({
        "passed": passed,
        "exit_code": exit_code,
        "errors": errors,
        "warnings": warnings,
        "summary": format!(
            "{} error(s), {} warning(s)",
            summary.error_count, summary.warning_count
        ),
    });
    if verbose {
        payload["output"] = json!(raw_output);
    }
    payload
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::http::test_helpers::test_ctx;

    #[tokio::test]
    async fn tool_run_tests_missing_script_returns_error() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        // No script/test in tmp — should return Err
        let result = tool_run_tests(&json!({}), &ctx).await;
        assert!(
            result.is_err(),
            "expected error for missing script: {result:?}"
        );
        assert!(
            result.unwrap_err().contains("not found"),
            "error should mention 'not found'"
        );
    }

    #[tokio::test]
    async fn tool_run_tests_passes_when_script_exits_zero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("test");
        std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }

        let ctx = test_ctx(tmp.path());
        let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();

        assert_eq!(parsed["passed"], true);
        assert_eq!(parsed["exit_code"], 0);
    }

    #[tokio::test]
    async fn tool_run_tests_fails_when_script_exits_nonzero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("test");
        std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }

        let ctx = test_ctx(tmp.path());
        let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();

        assert_eq!(parsed["passed"], false);
        assert_eq!(parsed["exit_code"], 1);
    }

    #[tokio::test]
    async fn tool_run_tests_concurrent_calls_attach_to_single_job() {
        // Bug 903 regression: a second `run_tests` call for the same worktree
        // while the first is still in flight must ATTACH to that job (i.e.
        // observe its result) rather than kill+respawn. Pre-fix, every call
        // killed the prior `cargo test` child and spawned a fresh one,
        // creating a respawn loop driven by the agent's MCP-timeout retries.
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("test");
        // Slow enough that the second call definitely overlaps the first.
        std::fs::write(
            &script_path,
            "#!/usr/bin/env bash\nsleep 2\necho 'test result: ok. 0 passed'\nexit 0\n",
        )
        .unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }

        let ctx = test_ctx(tmp.path());
        crate::crdt_state::init_for_test();
        crate::db::ensure_content_store();

        let start = std::time::Instant::now();
        let ctx_a = ctx.clone();
        let ctx_b = ctx.clone();
        let t1 = tokio::spawn(async move { tool_run_tests(&json!({}), &ctx_a).await });
        // Give T1 time to spawn its child + insert into active_jobs before T2
        // arrives, so T2 deterministically takes the attach path.
        tokio::time::sleep(std::time::Duration::from_millis(200)).await;
        let t2 = tokio::spawn(async move { tool_run_tests(&json!({}), &ctx_b).await });

        let r1 = t1.await.unwrap().unwrap();
        let r2 = t2.await.unwrap().unwrap();
        let elapsed = start.elapsed().as_secs_f64();

        let p1: serde_json::Value = serde_json::from_str(&r1).unwrap();
        let p2: serde_json::Value = serde_json::from_str(&r2).unwrap();
        assert_eq!(p1["passed"], true, "first call must succeed: {p1}");
        assert_eq!(p2["passed"], true, "second call must succeed: {p2}");

        // If the second call had killed + respawned, total elapsed would be
        // ~4s (two 2s runs serially). With attach, both calls observe the
        // SAME single 2s run, so elapsed stays close to 2s.
        assert!(
            elapsed < 3.5,
            "concurrent calls must share one job (~2s), not respawn (~4s); elapsed={elapsed:.2}s"
        );
    }

    #[tokio::test]
    async fn tool_run_tests_worktree_path_must_be_inside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let wt_dir = tmp.path().join(".huskies").join("worktrees");
        std::fs::create_dir_all(&wt_dir).unwrap();
        let ctx = test_ctx(tmp.path());
        // tmp.path() itself is outside worktrees → should fail validation
        let result = tool_run_tests(
            &json!({"worktree_path": tmp.path().to_str().unwrap()}),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(
            result.unwrap_err().contains("worktrees"),
            "expected sandbox error"
        );
    }

    // ── tool_run_build / tool_run_lint ────────────────────────────────

    #[tokio::test]
    async fn tool_run_build_missing_script_returns_error() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_build(&json!({}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("not found"));
    }

    #[tokio::test]
    async fn tool_run_build_passes_when_script_exits_zero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("build");
        std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }
        let ctx = test_ctx(tmp.path());
        let result = tool_run_build(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["passed"], true);
        assert_eq!(parsed["exit_code"], 0);
    }

    #[tokio::test]
    async fn tool_run_build_worktree_path_must_be_inside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let wt_dir = tmp.path().join(".huskies").join("worktrees");
        std::fs::create_dir_all(&wt_dir).unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_build(
            &json!({"worktree_path": tmp.path().to_str().unwrap()}),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("worktrees"));
    }

    #[tokio::test]
    async fn tool_run_lint_missing_script_returns_error() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_lint(&json!({}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("not found"));
    }

    #[tokio::test]
    async fn tool_run_lint_passes_when_script_exits_zero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("lint");
        std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }
        let ctx = test_ctx(tmp.path());
        let result = tool_run_lint(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["passed"], true);
        assert_eq!(parsed["exit_code"], 0);
    }

    #[tokio::test]
    async fn tool_run_lint_fails_when_script_exits_nonzero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("lint");
        std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }
        let ctx = test_ctx(tmp.path());
        let result = tool_run_lint(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["passed"], false);
        assert_eq!(parsed["exit_code"], 1);
    }

    // ── tool_run_check ────────────────────────────────────────────────

    #[tokio::test]
    async fn tool_run_check_missing_script_returns_error() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_check(&json!({}), &ctx).await;
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            err.contains("script/check"),
            "error should name script/check: {err}"
        );
        assert!(
            err.contains("not found"),
            "error should say not found: {err}"
        );
    }

    #[tokio::test]
    async fn tool_run_check_returns_parsed_errors_on_nonzero_exit() {
        // Bug 886: rather than dumping the entire cargo log into `output`
        // (which routinely exceeds the MCP token cap), tool_run_check now
        // parses errors / warnings into structured arrays. Raw output is
        // available behind `verbose: true`.
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("check");
        std::fs::write(
            &script_path,
            "#!/usr/bin/env bash\nfor i in $(seq 1 150); do echo \"error[E$i]: compile error on line $i\"; done\nexit 1\n",
        )
        .unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }

        let ctx = test_ctx(tmp.path());

        // Default mode: no `output` field, structured `errors` array, summary.
        let result = tool_run_check(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["passed"], false);
        assert_eq!(parsed["exit_code"], 1);
        assert!(
            parsed.get("output").map(|v| v.is_null()).unwrap_or(true),
            "default mode must not include raw `output`"
        );
        let errors = parsed["errors"].as_array().expect("errors array");
        assert_eq!(errors.len(), 150, "all 150 errors should be parsed");
        assert_eq!(
            errors[0]["code"].as_str(),
            Some("E1"),
            "first error code should be parsed"
        );
        assert_eq!(
            errors[149]["code"].as_str(),
            Some("E150"),
            "last error code should be parsed"
        );
        let summary = parsed["summary"].as_str().expect("summary string");
        assert!(
            summary.contains("150 error"),
            "summary mentions error count: {summary}"
        );
        // Default response should be small even with 150 errors.
        assert!(
            result.len() < 50_000,
            "default response should be compact (was {} bytes)",
            result.len()
        );

        // Verbose mode: raw output is included.
        let result_v = tool_run_check(&json!({"verbose": true}), &ctx)
            .await
            .unwrap();
        let parsed_v: serde_json::Value = serde_json::from_str(&result_v).unwrap();
        let output = parsed_v["output"]
            .as_str()
            .expect("verbose includes output");
        assert!(output.contains("error[E1]"), "verbose contains first line");
        assert!(output.contains("error[E150]"), "verbose contains last line");
    }

    #[tokio::test]
    async fn tool_run_check_passes_when_script_exits_zero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("check");
        std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }
        let ctx = test_ctx(tmp.path());
        let result = tool_run_check(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["passed"], true);
        assert_eq!(parsed["exit_code"], 0);
    }

    // ── truncate_output ───────────────────────────────────────────────

    #[test]
    fn truncate_output_short_text_unchanged() {
        let text = "line1\nline2\nline3";
        assert_eq!(truncate_output(text, 10), text);
    }

    #[test]
    fn truncate_output_long_text_keeps_tail() {
        let lines: Vec<String> = (1..=200).map(|i| format!("line {i}")).collect();
        let text = lines.join("\n");
        let result = truncate_output(&text, 50);
        assert!(
            result.contains("line 200"),
            "should keep last line: {result}"
        );
        assert!(
            result.contains("omitted"),
            "should note omitted lines: {result}"
        );
        assert!(
            !result.contains("line 1\n"),
            "should not keep first line: {result}"
        );
    }

    // ── parse_test_counts ─────────────────────────────────────────────

    #[test]
    fn parse_test_counts_extracts_passed_and_failed() {
        let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;";
        let (passed, failed) = parse_test_counts(output);
        assert_eq!(passed, 7);
        assert_eq!(failed, 3);
    }

    #[test]
    fn parse_test_counts_no_results_returns_zeros() {
        let (passed, failed) = parse_test_counts("no test output here");
        assert_eq!(passed, 0);
        assert_eq!(failed, 0);
    }

    #[test]
    fn extract_count_finds_number_before_label() {
        assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5));
        assert_eq!(extract_count("0 failed", "failed"), Some(0));
        assert_eq!(extract_count("no number here passed", "passed"), None);
    }
}