server/src/http/mcp/shell_tools.rs

use crate::http::context::AppContext;
use bytes::Bytes;
use futures::StreamExt;
use poem::{Body, Response};
use serde_json::{json, Value};
use std::path::PathBuf;

const DEFAULT_TIMEOUT_SECS: u64 = 120;
const MAX_TIMEOUT_SECS: u64 = 600;
const TEST_TIMEOUT_SECS: u64 = 1200;
const MAX_OUTPUT_LINES: usize = 100;

/// Patterns that are unconditionally blocked regardless of context.
static BLOCKED_PATTERNS: &[&str] = &[
    "rm -rf /",
    "rm -fr /",
    "rm -rf /*",
    "rm -fr /*",
    "rm --no-preserve-root",
    ":(){ :|:& };:",
    "> /dev/sda",
    "dd if=/dev",
];

/// Binaries that are unconditionally blocked.
static BLOCKED_BINARIES: &[&str] = &[
    "sudo",
    "su",
    "shutdown",
    "reboot",
    "halt",
    "poweroff",
    "mkfs",
];

/// Returns an error message if the command matches a blocked pattern or binary.
fn is_dangerous(command: &str) -> Option<String> {
    let trimmed = command.trim();

    // Check each blocked pattern (substring match)
    for &pattern in BLOCKED_PATTERNS {
        if trimmed.contains(pattern) {
            return Some(format!(
                "Command blocked: dangerous pattern '{pattern}' detected"
            ));
        }
    }

    // Check first token of the command against blocked binaries
    if let Some(first_token) = trimmed.split_whitespace().next() {
        let binary = std::path::Path::new(first_token)
            .file_name()
            .and_then(|n| n.to_str())
            .unwrap_or(first_token);
        if BLOCKED_BINARIES.contains(&binary) {
            return Some(format!("Command blocked: '{binary}' is not permitted"));
        }
    }

    None
}

/// Validates that `working_dir` exists and is inside the project's
/// `.huskies/worktrees/` directory. Returns the canonicalized path.
fn validate_working_dir(working_dir: &str, ctx: &AppContext) -> Result<PathBuf, String> {
    let wd = PathBuf::from(working_dir);

    if !wd.is_absolute() {
        return Err("working_dir must be an absolute path".to_string());
    }
    if !wd.exists() {
        return Err(format!("working_dir does not exist: {working_dir}"));
    }

    let project_root = ctx.agents.get_project_root(&ctx.state)?;
    let worktrees_root = project_root.join(".huskies").join("worktrees");

    let canonical_wd = wd
        .canonicalize()
        .map_err(|e| format!("Cannot canonicalize working_dir: {e}"))?;

    // If worktrees_root doesn't exist yet, we can't allow anything
    let canonical_wt = if worktrees_root.exists() {
        worktrees_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize worktrees root: {e}"))?
    } else {
        return Err("No worktrees directory found in project".to_string());
    };

    if !canonical_wd.starts_with(&canonical_wt) {
        return Err(format!(
            "working_dir must be inside .huskies/worktrees/. Got: {working_dir}"
        ));
    }

    Ok(canonical_wd)
}

/// Regular (non-SSE) run_command: runs the bash command to completion and
/// returns stdout, stderr, exit_code, and whether it timed out.
pub(super) async fn tool_run_command(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let command = args
        .get("command")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: command")?
        .to_string();

    let working_dir = args
        .get("working_dir")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: working_dir")?;

    let timeout_secs = args
        .get("timeout")
        .and_then(|v| v.as_u64())
        .unwrap_or(DEFAULT_TIMEOUT_SECS)
        .min(MAX_TIMEOUT_SECS);

    if let Some(reason) = is_dangerous(&command) {
        return Err(reason);
    }

    let canonical_dir = validate_working_dir(working_dir, ctx)?;

    let result = tokio::time::timeout(
        std::time::Duration::from_secs(timeout_secs),
        tokio::task::spawn_blocking({
            let cmd = command.clone();
            let dir = canonical_dir.clone();
            move || {
                std::process::Command::new("bash")
                    .arg("-c")
                    .arg(&cmd)
                    .current_dir(&dir)
                    .output()
            }
        }),
    )
    .await;

    match result {
        Err(_) => {
            // timed out
            serde_json::to_string_pretty(&json!({
                "stdout": "",
                "stderr": format!("Command timed out after {timeout_secs}s"),
                "exit_code": -1,
                "timed_out": true,
            }))
            .map_err(|e| format!("Serialization error: {e}"))
        }
        Ok(Err(e)) => Err(format!("Task join error: {e}")),
        Ok(Ok(Err(e))) => Err(format!("Failed to execute command: {e}")),
        Ok(Ok(Ok(output))) => {
            serde_json::to_string_pretty(&json!({
                "stdout": String::from_utf8_lossy(&output.stdout),
                "stderr": String::from_utf8_lossy(&output.stderr),
                "exit_code": output.status.code().unwrap_or(-1),
                "timed_out": false,
            }))
            .map_err(|e| format!("Serialization error: {e}"))
        }
    }
}

/// SSE streaming run_command: spawns the process and emits stdout/stderr lines
/// as JSON-RPC notifications, then a final response with exit_code.
pub(super) fn handle_run_command_sse(
    id: Option<Value>,
    params: &Value,
    ctx: &AppContext,
) -> Response {
    use super::{to_sse_response, JsonRpcResponse};

    let args = params.get("arguments").cloned().unwrap_or(json!({}));

    let command = match args.get("command").and_then(|v| v.as_str()) {
        Some(c) => c.to_string(),
        None => {
            return to_sse_response(JsonRpcResponse::error(
                id,
                -32602,
                "Missing required argument: command".into(),
            ))
        }
    };

    let working_dir = match args.get("working_dir").and_then(|v| v.as_str()) {
        Some(d) => d.to_string(),
        None => {
            return to_sse_response(JsonRpcResponse::error(
                id,
                -32602,
                "Missing required argument: working_dir".into(),
            ))
        }
    };

    let timeout_secs = args
        .get("timeout")
        .and_then(|v| v.as_u64())
        .unwrap_or(DEFAULT_TIMEOUT_SECS)
        .min(MAX_TIMEOUT_SECS);

    if let Some(reason) = is_dangerous(&command) {
        return to_sse_response(JsonRpcResponse::error(id, -32602, reason));
    }

    let canonical_dir = match validate_working_dir(&working_dir, ctx) {
        Ok(d) => d,
        Err(e) => return to_sse_response(JsonRpcResponse::error(id, -32602, e)),
    };

    let final_id = id;

    let stream = async_stream::stream! {
        use tokio::io::AsyncBufReadExt;

        let mut child = match tokio::process::Command::new("bash")
            .arg("-c")
            .arg(&command)
            .current_dir(&canonical_dir)
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
            .spawn()
        {
            Ok(c) => c,
            Err(e) => {
                let resp = JsonRpcResponse::success(
                    final_id,
                    json!({
                        "content": [{"type": "text", "text": format!("Failed to spawn process: {e}")}],
                        "isError": true
                    }),
                );
                if let Ok(s) = serde_json::to_string(&resp) {
                    yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
                }
                return;
            }
        };

        let stdout = child.stdout.take().expect("stdout piped");
        let stderr = child.stderr.take().expect("stderr piped");
        let mut stdout_lines = tokio::io::BufReader::new(stdout).lines();
        let mut stderr_lines = tokio::io::BufReader::new(stderr).lines();

        let deadline = tokio::time::Instant::now()
            + std::time::Duration::from_secs(timeout_secs);
        let mut stdout_done = false;
        let mut stderr_done = false;
        let mut timed_out = false;

        loop {
            if stdout_done && stderr_done {
                break;
            }

            let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
            if remaining.is_zero() {
                timed_out = true;
                let _ = child.kill().await;
                break;
            }

            tokio::select! {
                line = stdout_lines.next_line(), if !stdout_done => {
                    match line {
                        Ok(Some(l)) => {
                            let notif = json!({
                                "jsonrpc": "2.0",
                                "method": "notifications/tools/progress",
                                "params": { "stream": "stdout", "line": l }
                            });
                            if let Ok(s) = serde_json::to_string(&notif) {
                                yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
                            }
                        }
                        _ => { stdout_done = true; }
                    }
                }
                line = stderr_lines.next_line(), if !stderr_done => {
                    match line {
                        Ok(Some(l)) => {
                            let notif = json!({
                                "jsonrpc": "2.0",
                                "method": "notifications/tools/progress",
                                "params": { "stream": "stderr", "line": l }
                            });
                            if let Ok(s) = serde_json::to_string(&notif) {
                                yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
                            }
                        }
                        _ => { stderr_done = true; }
                    }
                }
                _ = tokio::time::sleep(remaining) => {
                    timed_out = true;
                    let _ = child.kill().await;
                    break;
                }
            }
        }

        let exit_code = child.wait().await.ok().and_then(|s| s.code()).unwrap_or(-1);

        let summary = json!({
            "exit_code": exit_code,
            "timed_out": timed_out,
        });

        let final_resp = JsonRpcResponse::success(
            final_id,
            json!({
                "content": [{"type": "text", "text": summary.to_string()}]
            }),
        );
        if let Ok(s) = serde_json::to_string(&final_resp) {
            yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
        }
    };

    Response::builder()
        .status(poem::http::StatusCode::OK)
        .header("Content-Type", "text/event-stream")
        .header("Cache-Control", "no-cache")
        .body(Body::from_bytes_stream(stream.map(|r| {
            r.map(Bytes::from)
        })))
}

/// Truncate output to at most `max_lines` lines, keeping the tail.
fn truncate_output(output: &str, max_lines: usize) -> String {
    let lines: Vec<&str> = output.lines().collect();
    if lines.len() <= max_lines {
        return output.to_string();
    }
    let omitted = lines.len() - max_lines;
    let tail = lines[lines.len() - max_lines..].join("\n");
    format!("[... {omitted} lines omitted ...]\n{tail}")
}

/// Parse cumulative passed/failed counts from `cargo test` output lines like:
/// `"test result: ok. 5 passed; 0 failed; ..."`
fn parse_test_counts(output: &str) -> (u64, u64) {
    let mut total_passed = 0u64;
    let mut total_failed = 0u64;
    for line in output.lines() {
        if line.contains("test result:") {
            if let Some(p) = extract_count(line, "passed") {
                total_passed += p;
            }
            if let Some(f) = extract_count(line, "failed") {
                total_failed += f;
            }
        }
    }
    (total_passed, total_failed)
}

/// Extract a count immediately before `label` in `line` (e.g. `"5 passed"` → 5).
fn extract_count(line: &str, label: &str) -> Option<u64> {
    let pos = line.find(label)?;
    let before = line[..pos].trim_end();
    let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect();
    if num_str.is_empty() {
        return None;
    }
    let num_str: String = num_str.chars().rev().collect();
    num_str.parse().ok()
}

/// Run the project's test suite (`script/test`) and block until complete.
///
/// Spawns the test process, then polls every second server-side until the
/// child exits or the timeout is reached. Returns the full test result in
/// a single MCP call — no polling needed from the agent.
///
/// The child process is properly killed on timeout (no zombies).
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let project_root = ctx.agents.get_project_root(&ctx.state)?;

    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
        Some(wt) => validate_working_dir(wt, ctx)?,
        None => project_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
    };

    let script_path = working_dir.join("script").join("test");
    if !script_path.exists() {
        return Err(format!(
            "Test script not found: {}",
            script_path.display()
        ));
    }

    // Kill any existing test job for this worktree.
    {
        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
        if let Some(mut old_job) = jobs.remove(&working_dir) {
            if let Some(ref mut child) = old_job.child {
                let _ = child.kill();
                let _ = child.wait();
            }
        }
    }

    // Spawn the test process.
    let child = std::process::Command::new("bash")
        .arg(&script_path)
        .current_dir(&working_dir)
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
        .spawn()
        .map_err(|e| format!("Failed to spawn test script: {e}"))?;

    let pid = child.id();
    crate::slog!(
        "[run_tests] Started test job for {} (pid {})",
        working_dir.display(),
        pid
    );

    // Store the child so it can be cleaned up if the server restarts.
    {
        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
        jobs.insert(
            working_dir.clone(),
            crate::http::context::TestJob {
                child: Some(child),
                result: None,
                started_at: std::time::Instant::now(),
            },
        );
    }

    // Block server-side, checking every second until done or timeout.
    let start = std::time::Instant::now();
    loop {
        tokio::time::sleep(std::time::Duration::from_secs(1)).await;

        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
        let job = match jobs.get_mut(&working_dir) {
            Some(j) => j,
            None => return Err("Test job disappeared unexpectedly".to_string()),
        };

        if let Some(child) = job.child.as_mut() {
            match child.try_wait() {
                Ok(Some(status)) => {
                    // Done — collect results.
                    let result = collect_child_result(child, status);
                    crate::slog!(
                        "[run_tests] Test job for {} finished (pid {}, passed={})",
                        working_dir.display(),
                        pid,
                        result.passed
                    );
                    jobs.remove(&working_dir);
                    return format_test_result(&result);
                }
                Ok(None) => {
                    // Still running — check timeout.
                    if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
                        let _ = child.kill();
                        let _ = child.wait();
                        crate::slog!(
                            "[run_tests] Killed test job for {} (pid {}) after {}s timeout",
                            working_dir.display(),
                            pid,
                            TEST_TIMEOUT_SECS
                        );
                        jobs.remove(&working_dir);
                        return serde_json::to_string_pretty(&json!({
                            "passed": false,
                            "exit_code": -1,
                            "timed_out": true,
                            "tests_passed": 0,
                            "tests_failed": 0,
                            "output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS),
                        }))
                        .map_err(|e| format!("Serialization error: {e}"));
                    }
                }
                Err(e) => {
                    jobs.remove(&working_dir);
                    return Err(format!("Failed to check child status: {e}"));
                }
            }
        }
    }
}

/// How long `get_test_result` blocks server-side before returning "running".
/// This prevents agents from burning turns polling every 2 seconds.
const TEST_POLL_BLOCK_SECS: u64 = 20;

/// Check on a running test job and return results if complete.
///
/// Blocks for up to 15 seconds, checking every second. Returns immediately
/// when the test finishes, or after 15s with `{"status": "running"}`.
/// This server-side blocking prevents agents from wasting turns polling.
pub(super) async fn tool_get_test_result(
    args: &Value,
    ctx: &AppContext,
) -> Result<String, String> {
    let project_root = ctx.agents.get_project_root(&ctx.state)?;

    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
        Some(wt) => validate_working_dir(wt, ctx)?,
        None => project_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
    };

    // Block for up to TEST_POLL_BLOCK_SECS, checking once per second.
    let test_jobs = ctx.test_jobs.clone();
    let wd = working_dir.clone();
    for _ in 0..TEST_POLL_BLOCK_SECS {
        {
            let mut jobs = test_jobs.lock().map_err(|e| e.to_string())?;
            if let Some(job) = jobs.get_mut(&wd) {
                if let Some(child) = job.child.as_mut() {
                    match child.try_wait() {
                        Ok(Some(status)) => {
                            let result = collect_child_result(child, status);
                            job.child = None;
                            job.result = Some(result.clone());
                            jobs.remove(&wd);
                            return format_test_result(&result);
                        }
                        Ok(None) => {} // still running, keep waiting
                        Err(e) => {
                            jobs.remove(&wd);
                            return Err(format!("Failed to check child status: {e}"));
                        }
                    }
                } else if let Some(result) = job.result.clone() {
                    jobs.remove(&wd);
                    return format_test_result(&result);
                }
            } else {
                return Err(
                    "No test job running for this worktree. Call run_tests first.".to_string(),
                );
            }
        }
        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
    }

    // Still running after blocking period — return status.
    let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;

    let job = jobs.get_mut(&working_dir).ok_or_else(|| {
        "No test job running for this worktree. Call run_tests first.".to_string()
    })?;

    // Check if child has finished.
    if let Some(child) = job.child.as_mut() {
        match child.try_wait() {
            Ok(Some(status)) => {
                let result = collect_child_result(child, status);
                job.child = None;
                job.result = Some(result.clone());
                let resp = format_test_result(&result);
                jobs.remove(&working_dir);
                return resp;
            }
            Ok(None) => {
                let elapsed = job.started_at.elapsed().as_secs();
                // If exceeded our max timeout, kill it.
                if elapsed > TEST_TIMEOUT_SECS {
                    let _ = child.kill();
                    let _ = child.wait();
                    crate::slog!(
                        "[run_tests] Killed test job for {} after {elapsed}s timeout",
                        working_dir.display()
                    );
                    jobs.remove(&working_dir);
                    return serde_json::to_string_pretty(&json!({
                        "passed": false,
                        "exit_code": -1,
                        "timed_out": true,
                        "tests_passed": 0,
                        "tests_failed": 0,
                        "output": format!("Test suite timed out after {elapsed}s"),
                    }))
                    .map_err(|e| format!("Serialization error: {e}"));
                }
                return serde_json::to_string_pretty(&json!({
                    "status": "running",
                    "elapsed_secs": elapsed,
                }))
                .map_err(|e| format!("Serialization error: {e}"));
            }
            Err(e) => {
                jobs.remove(&working_dir);
                return Err(format!("Failed to check child status: {e}"));
            }
        }
    }

    // Job exists with cached result.
    if let Some(result) = job.result.clone() {
        jobs.remove(&working_dir);
        return format_test_result(&result);
    }

    Err("Test job in unexpected state".to_string())
}

/// Collect stdout/stderr from a finished child and build a `TestJobResult`.
fn collect_child_result(
    child: &mut std::process::Child,
    status: std::process::ExitStatus,
) -> crate::http::context::TestJobResult {
    let mut stdout = String::new();
    let mut stderr = String::new();
    if let Some(ref mut out) = child.stdout {
        use std::io::Read;
        let _ = out.read_to_string(&mut stdout);
    }
    if let Some(ref mut err) = child.stderr {
        use std::io::Read;
        let _ = err.read_to_string(&mut stderr);
    }
    let combined = format!("{stdout}{stderr}");
    let (tests_passed, tests_failed) = parse_test_counts(&combined);
    let exit_code = status.code().unwrap_or(-1);
    crate::http::context::TestJobResult {
        passed: status.success(),
        exit_code,
        tests_passed,
        tests_failed,
        output: truncate_output(&combined, MAX_OUTPUT_LINES),
    }
}

/// Format a `TestJobResult` as the JSON string returned to the agent.
fn format_test_result(
    result: &crate::http::context::TestJobResult,
) -> Result<String, String> {
    serde_json::to_string_pretty(&json!({
        "passed": result.passed,
        "exit_code": result.exit_code,
        "timed_out": false,
        "tests_passed": result.tests_passed,
        "tests_failed": result.tests_failed,
        "output": result.output,
    }))
    .map_err(|e| format!("Serialization error: {e}"))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::http::test_helpers::test_ctx;
    use serde_json::json;

    // ── is_dangerous ─────────────────────────────────────────────────

    #[test]
    fn is_dangerous_blocks_rm_rf_root() {
        assert!(is_dangerous("rm -rf /").is_some());
        assert!(is_dangerous("  rm -rf /  ").is_some());
    }

    #[test]
    fn is_dangerous_blocks_rm_fr_root() {
        assert!(is_dangerous("rm -fr /").is_some());
    }

    #[test]
    fn is_dangerous_blocks_rm_rf_star() {
        assert!(is_dangerous("rm -rf /*").is_some());
        assert!(is_dangerous("rm -fr /*").is_some());
    }

    #[test]
    fn is_dangerous_blocks_sudo() {
        assert!(is_dangerous("sudo ls").is_some());
    }

    #[test]
    fn is_dangerous_blocks_shutdown() {
        assert!(is_dangerous("shutdown -h now").is_some());
    }

    #[test]
    fn is_dangerous_blocks_mkfs() {
        assert!(is_dangerous("mkfs /dev/sda1").is_some());
    }

    #[test]
    fn is_dangerous_blocks_fork_bomb() {
        assert!(is_dangerous(":(){ :|:& };:").is_some());
    }

    #[test]
    fn is_dangerous_allows_safe_commands() {
        assert!(is_dangerous("cargo build").is_none());
        assert!(is_dangerous("npm test").is_none());
        assert!(is_dangerous("git status").is_none());
        assert!(is_dangerous("ls -la").is_none());
        assert!(is_dangerous("rm -rf target/").is_none());
    }

    // ── validate_working_dir ──────────────────────────────────────────

    #[test]
    fn validate_working_dir_rejects_relative_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = validate_working_dir("relative/path", &ctx);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("absolute"));
    }

    #[test]
    fn validate_working_dir_rejects_nonexistent_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = validate_working_dir("/nonexistent_path_xyz_abc", &ctx);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("does not exist"));
    }

    #[test]
    fn validate_working_dir_rejects_path_outside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        // Create the worktrees dir so it exists
        let wt_dir = tmp.path().join(".huskies").join("worktrees");
        std::fs::create_dir_all(&wt_dir).unwrap();
        let ctx = test_ctx(tmp.path());
        // Try to use /tmp (outside worktrees)
        let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx);
        assert!(result.is_err());
        assert!(
            result.unwrap_err().contains("inside .huskies/worktrees"),
            "expected sandbox error"
        );
    }

    #[test]
    fn validate_working_dir_accepts_path_inside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let story_wt = tmp
            .path()
            .join(".huskies")
            .join("worktrees")
            .join("42_test_story");
        std::fs::create_dir_all(&story_wt).unwrap();
        let ctx = test_ctx(tmp.path());
        let result = validate_working_dir(story_wt.to_str().unwrap(), &ctx);
        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
    }

    #[test]
    fn validate_working_dir_rejects_no_worktrees_dir() {
        let tmp = tempfile::tempdir().unwrap();
        // Do NOT create worktrees dir
        let ctx = test_ctx(tmp.path());
        let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx);
        assert!(result.is_err());
    }

    // ── tool_run_command ───────────────────────────────────────────────

    #[tokio::test]
    async fn tool_run_command_missing_command() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(&json!({"working_dir": "/tmp"}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("command"));
    }

    #[tokio::test]
    async fn tool_run_command_missing_working_dir() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(&json!({"command": "ls"}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("working_dir"));
    }

    #[tokio::test]
    async fn tool_run_command_blocks_dangerous_command() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(
            &json!({"command": "rm -rf /", "working_dir": "/tmp"}),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("blocked"));
    }

    #[tokio::test]
    async fn tool_run_command_rejects_path_outside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let wt_dir = tmp.path().join(".huskies").join("worktrees");
        std::fs::create_dir_all(&wt_dir).unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(
            &json!({
                "command": "ls",
                "working_dir": tmp.path().to_str().unwrap()
            }),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(
            result.unwrap_err().contains("worktrees"),
            "expected sandbox error"
        );
    }

    #[tokio::test]
    async fn tool_run_command_runs_in_worktree_and_returns_output() {
        let tmp = tempfile::tempdir().unwrap();
        let story_wt = tmp
            .path()
            .join(".huskies")
            .join("worktrees")
            .join("42_test");
        std::fs::create_dir_all(&story_wt).unwrap();
        std::fs::write(story_wt.join("canary.txt"), "hello").unwrap();

        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(
            &json!({
                "command": "ls",
                "working_dir": story_wt.to_str().unwrap()
            }),
            &ctx,
        )
        .await
        .unwrap();

        let parsed: Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["exit_code"], 0);
        assert!(parsed["stdout"].as_str().unwrap().contains("canary.txt"));
        assert_eq!(parsed["timed_out"], false);
    }

    #[tokio::test]
    async fn tool_run_command_captures_nonzero_exit_code() {
        let tmp = tempfile::tempdir().unwrap();
        let story_wt = tmp
            .path()
            .join(".huskies")
            .join("worktrees")
            .join("43_test");
        std::fs::create_dir_all(&story_wt).unwrap();

        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(
            &json!({
                "command": "exit 42",
                "working_dir": story_wt.to_str().unwrap()
            }),
            &ctx,
        )
        .await
        .unwrap();

        let parsed: Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["exit_code"], 42);
        assert_eq!(parsed["timed_out"], false);
    }

    #[tokio::test]
    async fn tool_run_command_timeout_returns_timed_out_true() {
        let tmp = tempfile::tempdir().unwrap();
        let story_wt = tmp
            .path()
            .join(".huskies")
            .join("worktrees")
            .join("44_test");
        std::fs::create_dir_all(&story_wt).unwrap();

        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(
            &json!({
                "command": "sleep 10",
                "working_dir": story_wt.to_str().unwrap(),
                "timeout": 1
            }),
            &ctx,
        )
        .await
        .unwrap();

        let parsed: Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["timed_out"], true);
    }

    #[tokio::test]
    async fn tool_run_command_captures_stderr() {
        let tmp = tempfile::tempdir().unwrap();
        let story_wt = tmp
            .path()
            .join(".huskies")
            .join("worktrees")
            .join("45_test");
        std::fs::create_dir_all(&story_wt).unwrap();

        let ctx = test_ctx(tmp.path());
        let result = tool_run_command(
            &json!({
                "command": "echo 'error msg' >&2",
                "working_dir": story_wt.to_str().unwrap()
            }),
            &ctx,
        )
        .await
        .unwrap();

        let parsed: Value = serde_json::from_str(&result).unwrap();
        assert!(
            parsed["stderr"].as_str().unwrap().contains("error msg"),
            "expected stderr: {parsed}"
        );
    }

    #[tokio::test]
    async fn tool_run_command_clamps_timeout_to_max() {
        // Verify timeout > 600 is clamped to 600. We don't run a 600s sleep;
        // just confirm the tool accepts the arg without error (sandbox check will
        // fail first in a different test, here we test the arg parsing path).
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        // Will fail at working_dir validation, not timeout parsing — that's fine
        let result = tool_run_command(
            &json!({"command": "ls", "working_dir": "/tmp", "timeout": 9999}),
            &ctx,
        )
        .await;
        // Just ensure it doesn't panic and returns an Err about sandbox (not timeout)
        assert!(result.is_err());
    }

    // ── tool_run_tests ────────────────────────────────────────────────

    #[tokio::test]
    async fn tool_run_tests_missing_script_returns_error() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        // No script/test in tmp — should return Err
        let result = tool_run_tests(&json!({}), &ctx).await;
        assert!(result.is_err(), "expected error for missing script: {result:?}");
        assert!(
            result.unwrap_err().contains("not found"),
            "error should mention 'not found'"
        );
    }

    #[tokio::test]
    async fn tool_run_tests_passes_when_script_exits_zero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("test");
        std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: ok. 3 passed; 0 failed'\nexit 0\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }

        let ctx = test_ctx(tmp.path());
        let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();

        assert_eq!(parsed["passed"], true);
        assert_eq!(parsed["exit_code"], 0);
        assert_eq!(parsed["timed_out"], false);
        assert_eq!(parsed["tests_passed"], 3);
        assert_eq!(parsed["tests_failed"], 0);
    }

    #[tokio::test]
    async fn tool_run_tests_fails_when_script_exits_nonzero() {
        let tmp = tempfile::tempdir().unwrap();
        let script_dir = tmp.path().join("script");
        std::fs::create_dir_all(&script_dir).unwrap();
        let script_path = script_dir.join("test");
        std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n").unwrap();
        #[cfg(unix)]
        {
            use std::os::unix::fs::PermissionsExt;
            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
        }

        let ctx = test_ctx(tmp.path());
        let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();

        assert_eq!(parsed["passed"], false);
        assert_eq!(parsed["exit_code"], 1);
        assert_eq!(parsed["tests_passed"], 1);
        assert_eq!(parsed["tests_failed"], 2);
    }

    #[tokio::test]
    async fn tool_run_tests_worktree_path_must_be_inside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let wt_dir = tmp.path().join(".huskies").join("worktrees");
        std::fs::create_dir_all(&wt_dir).unwrap();
        let ctx = test_ctx(tmp.path());
        // tmp.path() itself is outside worktrees → should fail validation
        let result =
            tool_run_tests(&json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx).await;
        assert!(result.is_err());
        assert!(
            result.unwrap_err().contains("worktrees"),
            "expected sandbox error"
        );
    }

    // ── truncate_output ───────────────────────────────────────────────

    #[test]
    fn truncate_output_short_text_unchanged() {
        let text = "line1\nline2\nline3";
        assert_eq!(truncate_output(text, 10), text);
    }

    #[test]
    fn truncate_output_long_text_keeps_tail() {
        let lines: Vec<String> = (1..=200).map(|i| format!("line {i}")).collect();
        let text = lines.join("\n");
        let result = truncate_output(&text, 50);
        assert!(result.contains("line 200"), "should keep last line: {result}");
        assert!(result.contains("omitted"), "should note omitted lines: {result}");
        assert!(!result.contains("line 1\n"), "should not keep first line: {result}");
    }

    // ── parse_test_counts ─────────────────────────────────────────────

    #[test]
    fn parse_test_counts_extracts_passed_and_failed() {
        let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;";
        let (passed, failed) = parse_test_counts(output);
        assert_eq!(passed, 7);
        assert_eq!(failed, 3);
    }

    #[test]
    fn parse_test_counts_no_results_returns_zeros() {
        let (passed, failed) = parse_test_counts("no test output here");
        assert_eq!(passed, 0);
        assert_eq!(failed, 0);
    }

    #[test]
    fn extract_count_finds_number_before_label() {
        assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5));
        assert_eq!(extract_count("0 failed", "failed"), Some(0));
        assert_eq!(extract_count("no number here passed", "passed"), None);
    }
}