//! MCP shell script tools: run_tests / get_test_result / run_build / run_lint. use serde_json::{Value, json}; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::{Mutex, OnceLock}; use crate::http::context::AppContext; #[allow(unused_imports)] use crate::service::shell::{extract_count, parse_test_counts, truncate_output}; use super::exec::validate_working_dir; const TEST_TIMEOUT_SECS: u64 = 1200; const MAX_OUTPUT_LINES: usize = 100; // ── In-flight process registry ─────────────────────────────────────────────── // // Child process handles are ephemeral and cannot survive a server restart. // Persistent state (status, output, timestamps) lives in the CRDT `test_jobs` // collection. This module-level static tracks only the OS-level child process // so we can kill it on restart or poll it during `get_test_result`. struct InFlightJob { child: std::process::Child, } static ACTIVE_JOBS: OnceLock>> = OnceLock::new(); fn active_jobs() -> &'static Mutex> { ACTIVE_JOBS.get_or_init(|| Mutex::new(HashMap::new())) } /// Derive the CRDT key for a test job from its working directory. /// /// Uses the last path component (the story ID or project directory name) so /// that CRDT entries are human-readable and stable across path changes. fn story_key(working_dir: &Path) -> String { working_dir .file_name() .and_then(|n| n.to_str()) .unwrap_or("root") .to_string() } /// Current time as a Unix timestamp (seconds, f64) for CRDT fields. fn unix_now() -> f64 { std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs_f64() } // ── run_tests ──────────────────────────────────────────────────────────────── pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result { let project_root = ctx.services.agents.get_project_root(&ctx.state)?; let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) { Some(wt) => validate_working_dir(wt, ctx)?, None => project_root .canonicalize() .map_err(|e| format!("Cannot canonicalize project root: {e}"))?, }; let script_path = working_dir.join("script").join("test"); if !script_path.exists() { return Err(format!("Test script not found: {}", script_path.display())); } let sid = story_key(&working_dir); // Kill any existing in-flight job for this worktree before starting a new one. { let mut jobs = active_jobs().lock().map_err(|e| e.to_string())?; if let Some(mut old_job) = jobs.remove(&working_dir) { let _ = old_job.child.kill(); let _ = old_job.child.wait(); } } // Spawn the test process with piped stdout/stderr so we can capture output. // Pipes are drained in background threads to prevent deadlock when the // child fills the 64KB OS pipe buffer. let mut child = std::process::Command::new("bash") .arg(&script_path) .current_dir(&working_dir) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .spawn() .map_err(|e| format!("Failed to spawn test script: {e}"))?; let pid = child.id(); crate::slog!( "[run_tests] Started test job for {} (pid {})", working_dir.display(), pid ); // Drain stdout/stderr in background threads so pipe buffers never fill. let mut stdout_handle = child.stdout.take().map(|mut r| { std::thread::spawn(move || { let mut s = String::new(); std::io::Read::read_to_string(&mut r, &mut s).ok(); s }) }); let mut stderr_handle = child.stderr.take().map(|mut r| { std::thread::spawn(move || { let mut s = String::new(); std::io::Read::read_to_string(&mut r, &mut s).ok(); s }) }); let started_at_unix = unix_now(); // Persist "running" state in the CRDT so status survives a server restart. crate::crdt_state::write_test_job(&sid, "running", started_at_unix, None, None); // Register the child process in the in-flight map. { let mut jobs = active_jobs().lock().map_err(|e| e.to_string())?; jobs.insert(working_dir.clone(), InFlightJob { child }); } // Block server-side, checking every second until done or timeout. let start = std::time::Instant::now(); loop { tokio::time::sleep(std::time::Duration::from_secs(1)).await; let mut jobs = active_jobs().lock().map_err(|e| e.to_string())?; let job = match jobs.get_mut(&working_dir) { Some(j) => j, None => return Err("Test job disappeared unexpectedly".to_string()), }; match job.child.try_wait() { Ok(Some(status)) => { // Child exited — collect output and write final CRDT state. jobs.remove(&working_dir); let stdout = stdout_handle .take() .and_then(|h| h.join().ok()) .unwrap_or_default(); let stderr = stderr_handle .take() .and_then(|h| h.join().ok()) .unwrap_or_default(); let combined = format!("{stdout}{stderr}"); let (tests_passed, tests_failed) = parse_test_counts(&combined); let truncated = truncate_output(&combined, MAX_OUTPUT_LINES); let passed = status.success(); let exit_code = status.code().unwrap_or(-1); let crdt_status = if passed { "pass" } else { "fail" }; crate::slog!( "[run_tests] Test job for {} finished (pid {}, passed={})", working_dir.display(), pid, passed ); // Persist result in CRDT for post-restart visibility. crate::crdt_state::write_test_job( &sid, crdt_status, started_at_unix, Some(unix_now()), Some(&truncated), ); // Capture positive test evidence in the DB so the pipeline // advance salvage path (bug 645/668) can confirm the agent // ran passing tests before it died. Only written when running // in a story worktree (worktree_path arg provided). if passed && args.get("worktree_path").is_some() { crate::db::write_content(&format!("{sid}:run_tests_ok"), "1"); } return serde_json::to_string_pretty(&json!({ "passed": passed, "exit_code": exit_code, "timed_out": false, "tests_passed": tests_passed, "tests_failed": tests_failed, "output": truncated, })) .map_err(|e| format!("Serialization error: {e}")); } Ok(None) => { // Still running — check timeout. if start.elapsed().as_secs() > TEST_TIMEOUT_SECS { let _ = job.child.kill(); let _ = job.child.wait(); crate::slog!( "[run_tests] Killed test job for {} (pid {}) after {}s timeout", working_dir.display(), pid, TEST_TIMEOUT_SECS ); jobs.remove(&working_dir); let timeout_msg = format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"); crate::crdt_state::write_test_job( &sid, "fail", started_at_unix, Some(unix_now()), Some(&timeout_msg), ); return serde_json::to_string_pretty(&json!({ "passed": false, "exit_code": -1, "timed_out": true, "tests_passed": 0, "tests_failed": 0, "output": timeout_msg, })) .map_err(|e| format!("Serialization error: {e}")); } } Err(e) => { jobs.remove(&working_dir); let msg = e.to_string(); crate::crdt_state::write_test_job( &sid, "fail", started_at_unix, Some(unix_now()), Some(&msg), ); return Err(format!("Failed to check child status: {e}")); } } } } // ── get_test_result ────────────────────────────────────────────────────────── /// How long `get_test_result` blocks server-side before returning "running". /// This prevents agents from burning turns polling every 2 seconds. const TEST_POLL_BLOCK_SECS: u64 = 20; /// Check on a running test job and return results if complete. /// /// Reads persistent state from the CRDT `test_jobs` collection. Blocks for up /// to [`TEST_POLL_BLOCK_SECS`], checking the CRDT every second. Returns /// immediately when the test finishes, or after the blocking period with /// `{"status": "running"}`. /// /// After a server restart the CRDT entry written by `run_tests` remains /// visible, so callers can observe the "running" status even when the original /// child process is gone. pub(crate) async fn tool_get_test_result(args: &Value, ctx: &AppContext) -> Result { let project_root = ctx.services.agents.get_project_root(&ctx.state)?; let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) { Some(wt) => validate_working_dir(wt, ctx)?, None => project_root .canonicalize() .map_err(|e| format!("Cannot canonicalize project root: {e}"))?, }; let sid = story_key(&working_dir); // Poll CRDT for up to TEST_POLL_BLOCK_SECS, returning as soon as the job // transitions from "running" to a terminal state. for _ in 0..TEST_POLL_BLOCK_SECS { match crate::crdt_state::read_test_job(&sid) { None => { return Err( "No test job running for this worktree. Call run_tests first.".to_string(), ); } Some(view) if view.status == "pass" || view.status == "fail" => { return format_crdt_result(&view); } Some(_) => { // Still "running" — wait one second and re-check. } } tokio::time::sleep(std::time::Duration::from_secs(1)).await; } // Still running after the blocking window — return status so the caller // can decide whether to poll again or give up. match crate::crdt_state::read_test_job(&sid) { None => Err("No test job running for this worktree. Call run_tests first.".to_string()), Some(view) if view.status == "pass" || view.status == "fail" => format_crdt_result(&view), Some(view) => { let elapsed = unix_now() - view.started_at; serde_json::to_string_pretty(&json!({ "status": "running", "elapsed_secs": elapsed.max(0.0) as u64, })) .map_err(|e| format!("Serialization error: {e}")) } } } /// Build a JSON result object from a completed CRDT test-job view. /// /// `exit_code` is approximated from status (0 = pass, 1 = fail) because the /// CRDT schema does not store raw exit codes. fn format_crdt_result(view: &crate::crdt_state::TestJobView) -> Result { let passed = view.status == "pass"; let output = view.output.clone().unwrap_or_default(); let (tests_passed, tests_failed) = parse_test_counts(&output); serde_json::to_string_pretty(&json!({ "passed": passed, "exit_code": if passed { 0 } else { 1 }, "timed_out": false, "tests_passed": tests_passed, "tests_failed": tests_failed, "output": output, })) .map_err(|e| format!("Serialization error: {e}")) } // ── run_build / run_lint ───────────────────────────────────────────────────── /// Shared implementation for run_build and run_lint: runs a named script /// (`script/`) in the working directory, captures output, and returns async fn run_script_tool( script_name: &str, args: &Value, ctx: &AppContext, ) -> Result { let project_root = ctx.services.agents.get_project_root(&ctx.state)?; let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) { Some(wt) => validate_working_dir(wt, ctx)?, None => project_root .canonicalize() .map_err(|e| format!("Cannot canonicalize project root: {e}"))?, }; let script_path = working_dir.join("script").join(script_name); if !script_path.exists() { return Err(format!( "{script_name} script not found: {}", script_path.display() )); } let result = tokio::task::spawn_blocking({ let script = script_path.clone(); let dir = working_dir.clone(); move || { std::process::Command::new("bash") .arg(&script) .current_dir(&dir) .output() } }) .await .map_err(|e| format!("Task join error: {e}"))? .map_err(|e| format!("Failed to spawn {script_name} script: {e}"))?; let stdout = String::from_utf8_lossy(&result.stdout); let stderr = String::from_utf8_lossy(&result.stderr); let combined = format!("{stdout}{stderr}"); let output = truncate_output(&combined, MAX_OUTPUT_LINES); let exit_code = result.status.code().unwrap_or(-1); serde_json::to_string_pretty(&json!({ "passed": result.status.success(), "exit_code": exit_code, "output": output, })) .map_err(|e| format!("Serialization error: {e}")) } pub(crate) async fn tool_run_build(args: &Value, ctx: &AppContext) -> Result { run_script_tool("build", args, ctx).await } pub(crate) async fn tool_run_lint(args: &Value, ctx: &AppContext) -> Result { run_script_tool("lint", args, ctx).await } #[cfg(test)] mod tests { use super::*; use crate::http::test_helpers::test_ctx; #[tokio::test] async fn tool_run_tests_missing_script_returns_error() { let tmp = tempfile::tempdir().unwrap(); let ctx = test_ctx(tmp.path()); // No script/test in tmp — should return Err let result = tool_run_tests(&json!({}), &ctx).await; assert!( result.is_err(), "expected error for missing script: {result:?}" ); assert!( result.unwrap_err().contains("not found"), "error should mention 'not found'" ); } #[tokio::test] async fn tool_run_tests_passes_when_script_exits_zero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("test"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_tests(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], true); assert_eq!(parsed["exit_code"], 0); } #[tokio::test] async fn tool_run_tests_fails_when_script_exits_nonzero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("test"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_tests(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], false); assert_eq!(parsed["exit_code"], 1); } #[tokio::test] async fn tool_run_tests_worktree_path_must_be_inside_worktrees() { let tmp = tempfile::tempdir().unwrap(); let wt_dir = tmp.path().join(".huskies").join("worktrees"); std::fs::create_dir_all(&wt_dir).unwrap(); let ctx = test_ctx(tmp.path()); // tmp.path() itself is outside worktrees → should fail validation let result = tool_run_tests( &json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx, ) .await; assert!(result.is_err()); assert!( result.unwrap_err().contains("worktrees"), "expected sandbox error" ); } // ── tool_run_build / tool_run_lint ──────────────────────────────── #[tokio::test] async fn tool_run_build_missing_script_returns_error() { let tmp = tempfile::tempdir().unwrap(); let ctx = test_ctx(tmp.path()); let result = tool_run_build(&json!({}), &ctx).await; assert!(result.is_err()); assert!(result.unwrap_err().contains("not found")); } #[tokio::test] async fn tool_run_build_passes_when_script_exits_zero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("build"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_build(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], true); assert_eq!(parsed["exit_code"], 0); } #[tokio::test] async fn tool_run_build_worktree_path_must_be_inside_worktrees() { let tmp = tempfile::tempdir().unwrap(); let wt_dir = tmp.path().join(".huskies").join("worktrees"); std::fs::create_dir_all(&wt_dir).unwrap(); let ctx = test_ctx(tmp.path()); let result = tool_run_build( &json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx, ) .await; assert!(result.is_err()); assert!(result.unwrap_err().contains("worktrees")); } #[tokio::test] async fn tool_run_lint_missing_script_returns_error() { let tmp = tempfile::tempdir().unwrap(); let ctx = test_ctx(tmp.path()); let result = tool_run_lint(&json!({}), &ctx).await; assert!(result.is_err()); assert!(result.unwrap_err().contains("not found")); } #[tokio::test] async fn tool_run_lint_passes_when_script_exits_zero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("lint"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_lint(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], true); assert_eq!(parsed["exit_code"], 0); } #[tokio::test] async fn tool_run_lint_fails_when_script_exits_nonzero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("lint"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_lint(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], false); assert_eq!(parsed["exit_code"], 1); } // ── truncate_output ─────────────────────────────────────────────── #[test] fn truncate_output_short_text_unchanged() { let text = "line1\nline2\nline3"; assert_eq!(truncate_output(text, 10), text); } #[test] fn truncate_output_long_text_keeps_tail() { let lines: Vec = (1..=200).map(|i| format!("line {i}")).collect(); let text = lines.join("\n"); let result = truncate_output(&text, 50); assert!( result.contains("line 200"), "should keep last line: {result}" ); assert!( result.contains("omitted"), "should note omitted lines: {result}" ); assert!( !result.contains("line 1\n"), "should not keep first line: {result}" ); } // ── parse_test_counts ───────────────────────────────────────────── #[test] fn parse_test_counts_extracts_passed_and_failed() { let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;"; let (passed, failed) = parse_test_counts(output); assert_eq!(passed, 7); assert_eq!(failed, 3); } #[test] fn parse_test_counts_no_results_returns_zeros() { let (passed, failed) = parse_test_counts("no test output here"); assert_eq!(passed, 0); assert_eq!(failed, 0); } #[test] fn extract_count_finds_number_before_label() { assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5)); assert_eq!(extract_count("0 failed", "failed"), Some(0)); assert_eq!(extract_count("no number here passed", "passed"), None); } }