//! MCP shell script tools: run_tests / get_test_result / run_build / run_lint. use serde_json::{Value, json}; use crate::http::context::AppContext; #[allow(unused_imports)] use crate::service::shell::{extract_count, parse_test_counts, truncate_output}; use super::exec::validate_working_dir; const TEST_TIMEOUT_SECS: u64 = 1200; const MAX_OUTPUT_LINES: usize = 100; pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result { let project_root = ctx.services.agents.get_project_root(&ctx.state)?; let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) { Some(wt) => validate_working_dir(wt, ctx)?, None => project_root .canonicalize() .map_err(|e| format!("Cannot canonicalize project root: {e}"))?, }; let script_path = working_dir.join("script").join("test"); if !script_path.exists() { return Err(format!("Test script not found: {}", script_path.display())); } // Kill any existing test job for this worktree. { let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?; if let Some(mut old_job) = jobs.remove(&working_dir) && let Some(ref mut child) = old_job.child { let _ = child.kill(); let _ = child.wait(); } } // Spawn the test process with piped stdout/stderr so we can capture output. // Pipes are drained in background threads to prevent deadlock when the // child fills the 64KB OS pipe buffer. let mut child = std::process::Command::new("bash") .arg(&script_path) .current_dir(&working_dir) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .spawn() .map_err(|e| format!("Failed to spawn test script: {e}"))?; let pid = child.id(); crate::slog!( "[run_tests] Started test job for {} (pid {})", working_dir.display(), pid ); // Drain stdout/stderr in background threads so pipe buffers never fill. let mut stdout_handle = child.stdout.take().map(|mut r| { std::thread::spawn(move || { let mut s = String::new(); std::io::Read::read_to_string(&mut r, &mut s).ok(); s }) }); let mut stderr_handle = child.stderr.take().map(|mut r| { std::thread::spawn(move || { let mut s = String::new(); std::io::Read::read_to_string(&mut r, &mut s).ok(); s }) }); // Store the child so it can be cleaned up if the server restarts. { let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?; jobs.insert( working_dir.clone(), crate::http::context::TestJob { child: Some(child), result: None, started_at: std::time::Instant::now(), }, ); } // Block server-side, checking every second until done or timeout. let start = std::time::Instant::now(); loop { tokio::time::sleep(std::time::Duration::from_secs(1)).await; let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?; let job = match jobs.get_mut(&working_dir) { Some(j) => j, None => return Err("Test job disappeared unexpectedly".to_string()), }; if let Some(child) = job.child.as_mut() { match child.try_wait() { Ok(Some(status)) => { // Done — join drain threads and collect output. jobs.remove(&working_dir); let stdout = stdout_handle .take() .and_then(|h| h.join().ok()) .unwrap_or_default(); let stderr = stderr_handle .take() .and_then(|h| h.join().ok()) .unwrap_or_default(); let combined = format!("{stdout}{stderr}"); let (tests_passed, tests_failed) = parse_test_counts(&combined); let truncated = truncate_output(&combined, MAX_OUTPUT_LINES); let passed = status.success(); let exit_code = status.code().unwrap_or(-1); crate::slog!( "[run_tests] Test job for {} finished (pid {}, passed={})", working_dir.display(), pid, passed ); // Capture positive test evidence in the DB so the pipeline // advance salvage path (bug 645/668) can confirm the agent // ran passing tests before it died. Only written when running // in a story worktree (worktree_path arg provided); extract // the story ID from the last path component. if passed && args.get("worktree_path").is_some() && let Some(story_id) = working_dir.file_name().and_then(|n| n.to_str()) { crate::db::write_content(&format!("{story_id}:run_tests_ok"), "1"); } return serde_json::to_string_pretty(&json!({ "passed": passed, "exit_code": exit_code, "timed_out": false, "tests_passed": tests_passed, "tests_failed": tests_failed, "output": truncated, })) .map_err(|e| format!("Serialization error: {e}")); } Ok(None) => { // Still running — check timeout. if start.elapsed().as_secs() > TEST_TIMEOUT_SECS { let _ = child.kill(); let _ = child.wait(); crate::slog!( "[run_tests] Killed test job for {} (pid {}) after {}s timeout", working_dir.display(), pid, TEST_TIMEOUT_SECS ); jobs.remove(&working_dir); return serde_json::to_string_pretty(&json!({ "passed": false, "exit_code": -1, "timed_out": true, "tests_passed": 0, "tests_failed": 0, "output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS), })) .map_err(|e| format!("Serialization error: {e}")); } } Err(e) => { jobs.remove(&working_dir); return Err(format!("Failed to check child status: {e}")); } } } } } /// How long `get_test_result` blocks server-side before returning "running". /// This prevents agents from burning turns polling every 2 seconds. const TEST_POLL_BLOCK_SECS: u64 = 20; /// Check on a running test job and return results if complete. /// /// Blocks for up to 15 seconds, checking every second. Returns immediately /// when the test finishes, or after 15s with `{"status": "running"}`. pub(crate) async fn tool_get_test_result(args: &Value, ctx: &AppContext) -> Result { let project_root = ctx.services.agents.get_project_root(&ctx.state)?; let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) { Some(wt) => validate_working_dir(wt, ctx)?, None => project_root .canonicalize() .map_err(|e| format!("Cannot canonicalize project root: {e}"))?, }; // Block for up to TEST_POLL_BLOCK_SECS, checking once per second. let test_jobs = ctx.test_jobs.clone(); let wd = working_dir.clone(); for _ in 0..TEST_POLL_BLOCK_SECS { { let mut jobs = test_jobs.lock().map_err(|e| e.to_string())?; if let Some(job) = jobs.get_mut(&wd) { if let Some(child) = job.child.as_mut() { match child.try_wait() { Ok(Some(status)) => { let result = collect_child_result(child, status); job.child = None; job.result = Some(result.clone()); jobs.remove(&wd); return format_test_result(&result); } Ok(None) => {} // still running, keep waiting Err(e) => { jobs.remove(&wd); return Err(format!("Failed to check child status: {e}")); } } } else if let Some(result) = job.result.clone() { jobs.remove(&wd); return format_test_result(&result); } } else { return Err( "No test job running for this worktree. Call run_tests first.".to_string(), ); } } tokio::time::sleep(std::time::Duration::from_secs(1)).await; } // Still running after blocking period — return status. let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?; let job = jobs.get_mut(&working_dir).ok_or_else(|| { "No test job running for this worktree. Call run_tests first.".to_string() })?; // Check if child has finished. if let Some(child) = job.child.as_mut() { match child.try_wait() { Ok(Some(status)) => { let result = collect_child_result(child, status); job.child = None; job.result = Some(result.clone()); let resp = format_test_result(&result); jobs.remove(&working_dir); return resp; } Ok(None) => { let elapsed = job.started_at.elapsed().as_secs(); // If exceeded our max timeout, kill it. if elapsed > TEST_TIMEOUT_SECS { let _ = child.kill(); let _ = child.wait(); crate::slog!( "[run_tests] Killed test job for {} after {elapsed}s timeout", working_dir.display() ); jobs.remove(&working_dir); return serde_json::to_string_pretty(&json!({ "passed": false, "exit_code": -1, "timed_out": true, "tests_passed": 0, "tests_failed": 0, "output": format!("Test suite timed out after {elapsed}s"), })) .map_err(|e| format!("Serialization error: {e}")); } return serde_json::to_string_pretty(&json!({ "status": "running", "elapsed_secs": elapsed, })) .map_err(|e| format!("Serialization error: {e}")); } Err(e) => { jobs.remove(&working_dir); return Err(format!("Failed to check child status: {e}")); } } } // Job exists with cached result. if let Some(result) = job.result.clone() { jobs.remove(&working_dir); return format_test_result(&result); } Err("Test job in unexpected state".to_string()) } fn collect_child_result( child: &mut std::process::Child, status: std::process::ExitStatus, ) -> crate::http::context::TestJobResult { let mut stdout = String::new(); let mut stderr = String::new(); if let Some(ref mut out) = child.stdout { use std::io::Read; let _ = out.read_to_string(&mut stdout); } if let Some(ref mut err) = child.stderr { use std::io::Read; let _ = err.read_to_string(&mut stderr); } let combined = format!("{stdout}{stderr}"); let (tests_passed, tests_failed) = parse_test_counts(&combined); let exit_code = status.code().unwrap_or(-1); crate::http::context::TestJobResult { passed: status.success(), exit_code, tests_passed, tests_failed, output: truncate_output(&combined, MAX_OUTPUT_LINES), } } /// Shared implementation for run_build and run_lint: runs a named script /// (`script/`) in the working directory, captures output, and returns async fn run_script_tool( script_name: &str, args: &Value, ctx: &AppContext, ) -> Result { let project_root = ctx.services.agents.get_project_root(&ctx.state)?; let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) { Some(wt) => validate_working_dir(wt, ctx)?, None => project_root .canonicalize() .map_err(|e| format!("Cannot canonicalize project root: {e}"))?, }; let script_path = working_dir.join("script").join(script_name); if !script_path.exists() { return Err(format!( "{script_name} script not found: {}", script_path.display() )); } let result = tokio::task::spawn_blocking({ let script = script_path.clone(); let dir = working_dir.clone(); move || { std::process::Command::new("bash") .arg(&script) .current_dir(&dir) .output() } }) .await .map_err(|e| format!("Task join error: {e}"))? .map_err(|e| format!("Failed to spawn {script_name} script: {e}"))?; let stdout = String::from_utf8_lossy(&result.stdout); let stderr = String::from_utf8_lossy(&result.stderr); let combined = format!("{stdout}{stderr}"); let output = truncate_output(&combined, MAX_OUTPUT_LINES); let exit_code = result.status.code().unwrap_or(-1); serde_json::to_string_pretty(&json!({ "passed": result.status.success(), "exit_code": exit_code, "output": output, })) .map_err(|e| format!("Serialization error: {e}")) } pub(crate) async fn tool_run_build(args: &Value, ctx: &AppContext) -> Result { run_script_tool("build", args, ctx).await } pub(crate) async fn tool_run_lint(args: &Value, ctx: &AppContext) -> Result { run_script_tool("lint", args, ctx).await } fn format_test_result(result: &crate::http::context::TestJobResult) -> Result { serde_json::to_string_pretty(&json!({ "passed": result.passed, "exit_code": result.exit_code, "timed_out": false, "tests_passed": result.tests_passed, "tests_failed": result.tests_failed, "output": result.output, })) .map_err(|e| format!("Serialization error: {e}")) } #[cfg(test)] mod tests { use super::*; use crate::http::test_helpers::test_ctx; #[tokio::test] async fn tool_run_tests_missing_script_returns_error() { let tmp = tempfile::tempdir().unwrap(); let ctx = test_ctx(tmp.path()); // No script/test in tmp — should return Err let result = tool_run_tests(&json!({}), &ctx).await; assert!( result.is_err(), "expected error for missing script: {result:?}" ); assert!( result.unwrap_err().contains("not found"), "error should mention 'not found'" ); } #[tokio::test] async fn tool_run_tests_passes_when_script_exits_zero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("test"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_tests(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], true); assert_eq!(parsed["exit_code"], 0); } #[tokio::test] async fn tool_run_tests_fails_when_script_exits_nonzero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("test"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_tests(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], false); assert_eq!(parsed["exit_code"], 1); } #[tokio::test] async fn tool_run_tests_worktree_path_must_be_inside_worktrees() { let tmp = tempfile::tempdir().unwrap(); let wt_dir = tmp.path().join(".huskies").join("worktrees"); std::fs::create_dir_all(&wt_dir).unwrap(); let ctx = test_ctx(tmp.path()); // tmp.path() itself is outside worktrees → should fail validation let result = tool_run_tests( &json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx, ) .await; assert!(result.is_err()); assert!( result.unwrap_err().contains("worktrees"), "expected sandbox error" ); } // ── tool_run_build / tool_run_lint ──────────────────────────────── #[tokio::test] async fn tool_run_build_missing_script_returns_error() { let tmp = tempfile::tempdir().unwrap(); let ctx = test_ctx(tmp.path()); let result = tool_run_build(&json!({}), &ctx).await; assert!(result.is_err()); assert!(result.unwrap_err().contains("not found")); } #[tokio::test] async fn tool_run_build_passes_when_script_exits_zero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("build"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_build(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], true); assert_eq!(parsed["exit_code"], 0); } #[tokio::test] async fn tool_run_build_worktree_path_must_be_inside_worktrees() { let tmp = tempfile::tempdir().unwrap(); let wt_dir = tmp.path().join(".huskies").join("worktrees"); std::fs::create_dir_all(&wt_dir).unwrap(); let ctx = test_ctx(tmp.path()); let result = tool_run_build( &json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx, ) .await; assert!(result.is_err()); assert!(result.unwrap_err().contains("worktrees")); } #[tokio::test] async fn tool_run_lint_missing_script_returns_error() { let tmp = tempfile::tempdir().unwrap(); let ctx = test_ctx(tmp.path()); let result = tool_run_lint(&json!({}), &ctx).await; assert!(result.is_err()); assert!(result.unwrap_err().contains("not found")); } #[tokio::test] async fn tool_run_lint_passes_when_script_exits_zero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("lint"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_lint(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], true); assert_eq!(parsed["exit_code"], 0); } #[tokio::test] async fn tool_run_lint_fails_when_script_exits_nonzero() { let tmp = tempfile::tempdir().unwrap(); let script_dir = tmp.path().join("script"); std::fs::create_dir_all(&script_dir).unwrap(); let script_path = script_dir.join("lint"); std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); } let ctx = test_ctx(tmp.path()); let result = tool_run_lint(&json!({}), &ctx).await.unwrap(); let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); assert_eq!(parsed["passed"], false); assert_eq!(parsed["exit_code"], 1); } // ── truncate_output ─────────────────────────────────────────────── #[test] fn truncate_output_short_text_unchanged() { let text = "line1\nline2\nline3"; assert_eq!(truncate_output(text, 10), text); } #[test] fn truncate_output_long_text_keeps_tail() { let lines: Vec = (1..=200).map(|i| format!("line {i}")).collect(); let text = lines.join("\n"); let result = truncate_output(&text, 50); assert!( result.contains("line 200"), "should keep last line: {result}" ); assert!( result.contains("omitted"), "should note omitted lines: {result}" ); assert!( !result.contains("line 1\n"), "should not keep first line: {result}" ); } // ── parse_test_counts ───────────────────────────────────────────── #[test] fn parse_test_counts_extracts_passed_and_failed() { let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;"; let (passed, failed) = parse_test_counts(output); assert_eq!(passed, 7); assert_eq!(failed, 3); } #[test] fn parse_test_counts_no_results_returns_zeros() { let (passed, failed) = parse_test_counts("no test output here"); assert_eq!(passed, 0); assert_eq!(failed, 0); } #[test] fn extract_count_finds_number_before_label() { assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5)); assert_eq!(extract_count("0 failed", "failed"), Some(0)); assert_eq!(extract_count("no number here passed", "passed"), None); } }