diff --git a/server/src/http/mcp/shell_tools.rs b/server/src/http/mcp/shell_tools.rs index d5d75e25..16164bad 100644 --- a/server/src/http/mcp/shell_tools.rs +++ b/server/src/http/mcp/shell_tools.rs @@ -371,15 +371,13 @@ fn extract_count(line: &str, label: &str) -> Option { num_str.parse().ok() } -/// Start the project's test suite (`script/test`) as a background process. +/// Run the project's test suite (`script/test`) and block until complete. /// -/// Returns immediately with `{"status": "started"}`. The agent should poll -/// `get_test_result` with the same `worktree_path` to retrieve results once -/// the tests complete. +/// Spawns the test process, then polls every second server-side until the +/// child exits or the timeout is reached. Returns the full test result in +/// a single MCP call — no polling needed from the agent. /// -/// If a test job is already running for the same worktree, returns -/// `{"status": "already_running"}`. If a previous job completed and results -/// haven't been consumed yet, they are returned inline and the job is cleared. +/// The child process is properly killed on timeout (no zombies). pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result { let project_root = ctx.agents.get_project_root(&ctx.state)?; @@ -398,42 +396,13 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result { - // Child finished — collect results now. - let result = collect_child_result(child, status); - job.child = None; - job.result = Some(result.clone()); - // Return the completed result inline. - let resp = format_test_result(&result); - jobs.remove(&working_dir); - return resp; - } - Ok(None) => { - // Still running. - let elapsed = job.started_at.elapsed().as_secs(); - return serde_json::to_string_pretty(&json!({ - "status": "running", - "elapsed_secs": elapsed, - })) - .map_err(|e| format!("Serialization error: {e}")); - } - Err(e) => { - jobs.remove(&working_dir); - return Err(format!("Failed to check child status: {e}")); - } - } - } - // Job exists with result but no child — return cached result. - if let Some(result) = job.result.clone() { - jobs.remove(&working_dir); - return format_test_result(&result); + if let Some(mut old_job) = jobs.remove(&working_dir) { + if let Some(ref mut child) = old_job.child { + let _ = child.kill(); + let _ = child.wait(); } } } @@ -447,16 +416,18 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result Result j, + None => return Err("Test job disappeared unexpectedly".to_string()), + }; + + if let Some(child) = job.child.as_mut() { + match child.try_wait() { + Ok(Some(status)) => { + // Done — collect results. + let result = collect_child_result(child, status); + crate::slog!( + "[run_tests] Test job for {} finished (pid {}, passed={})", + working_dir.display(), + pid, + result.passed + ); + jobs.remove(&working_dir); + return format_test_result(&result); + } + Ok(None) => { + // Still running — check timeout. + if start.elapsed().as_secs() > TEST_TIMEOUT_SECS { + let _ = child.kill(); + let _ = child.wait(); + crate::slog!( + "[run_tests] Killed test job for {} (pid {}) after {}s timeout", + working_dir.display(), + pid, + TEST_TIMEOUT_SECS + ); + jobs.remove(&working_dir); + return serde_json::to_string_pretty(&json!({ + "passed": false, + "exit_code": -1, + "timed_out": true, + "tests_passed": 0, + "tests_failed": 0, + "output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS), + })) + .map_err(|e| format!("Serialization error: {e}")); + } + } + Err(e) => { + jobs.remove(&working_dir); + return Err(format!("Failed to check child status: {e}")); + } + } + } + } } /// How long `get_test_result` blocks server-side before returning "running".