fix: make run_tests block server-side instead of requiring agent polling

run_tests now spawns the child and blocks in a 1-second poll loop until
tests complete or the 20-minute timeout fires. Returns the full result
in a single MCP call — agents use 1 turn instead of 50+. Child process
is properly killed on timeout (no zombies).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
dave
2026-04-11 23:07:02 +00:00
parent bac07d28a7
commit 8ae6ca3eb8
+68 -46
View File
@@ -371,15 +371,13 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
num_str.parse().ok()
}
/// Start the project's test suite (`script/test`) as a background process.
/// Run the project's test suite (`script/test`) and block until complete.
///
/// Returns immediately with `{"status": "started"}`. The agent should poll
/// `get_test_result` with the same `worktree_path` to retrieve results once
/// the tests complete.
/// Spawns the test process, then polls every second server-side until the
/// child exits or the timeout is reached. Returns the full test result in
/// a single MCP call — no polling needed from the agent.
///
/// If a test job is already running for the same worktree, returns
/// `{"status": "already_running"}`. If a previous job completed and results
/// haven't been consumed yet, they are returned inline and the job is cleared.
/// The child process is properly killed on timeout (no zombies).
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
let project_root = ctx.agents.get_project_root(&ctx.state)?;
@@ -398,42 +396,13 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
));
}
// Check for an existing job on this worktree.
// Kill any existing test job for this worktree.
{
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
if let Some(job) = jobs.get_mut(&working_dir) {
// Check if the child has finished.
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
// Child finished — collect results now.
let result = collect_child_result(child, status);
job.child = None;
job.result = Some(result.clone());
// Return the completed result inline.
let resp = format_test_result(&result);
jobs.remove(&working_dir);
return resp;
}
Ok(None) => {
// Still running.
let elapsed = job.started_at.elapsed().as_secs();
return serde_json::to_string_pretty(&json!({
"status": "running",
"elapsed_secs": elapsed,
}))
.map_err(|e| format!("Serialization error: {e}"));
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
// Job exists with result but no child — return cached result.
if let Some(result) = job.result.clone() {
jobs.remove(&working_dir);
return format_test_result(&result);
if let Some(mut old_job) = jobs.remove(&working_dir) {
if let Some(ref mut child) = old_job.child {
let _ = child.kill();
let _ = child.wait();
}
}
}
@@ -447,16 +416,18 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
.spawn()
.map_err(|e| format!("Failed to spawn test script: {e}"))?;
let pid = child.id();
crate::slog!(
"[run_tests] Started test job for {} (pid {})",
working_dir.display(),
child.id()
pid
);
// Store the child so it can be cleaned up if the server restarts.
{
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
jobs.insert(
working_dir,
working_dir.clone(),
crate::http::context::TestJob {
child: Some(child),
result: None,
@@ -465,10 +436,61 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
);
}
serde_json::to_string_pretty(&json!({
"status": "started",
// Block server-side, checking every second until done or timeout.
let start = std::time::Instant::now();
loop {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
let job = match jobs.get_mut(&working_dir) {
Some(j) => j,
None => return Err("Test job disappeared unexpectedly".to_string()),
};
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
// Done — collect results.
let result = collect_child_result(child, status);
crate::slog!(
"[run_tests] Test job for {} finished (pid {}, passed={})",
working_dir.display(),
pid,
result.passed
);
jobs.remove(&working_dir);
return format_test_result(&result);
}
Ok(None) => {
// Still running — check timeout.
if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
let _ = child.kill();
let _ = child.wait();
crate::slog!(
"[run_tests] Killed test job for {} (pid {}) after {}s timeout",
working_dir.display(),
pid,
TEST_TIMEOUT_SECS
);
jobs.remove(&working_dir);
return serde_json::to_string_pretty(&json!({
"passed": false,
"exit_code": -1,
"timed_out": true,
"tests_passed": 0,
"tests_failed": 0,
"output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS),
}))
.map_err(|e| format!("Serialization error: {e}"))
.map_err(|e| format!("Serialization error: {e}"));
}
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
}
}
/// How long `get_test_result` blocks server-side before returning "running".