fix: make run_tests block server-side instead of requiring agent polling

run_tests now spawns the child and blocks in a 1-second poll loop until
tests complete or the 20-minute timeout fires. Returns the full result
in a single MCP call — agents use 1 turn instead of 50+. Child process
is properly killed on timeout (no zombies).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
dave
2026-04-11 23:07:02 +00:00
parent bac07d28a7
commit 8ae6ca3eb8
+68 -46
View File
@@ -371,15 +371,13 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
num_str.parse().ok() num_str.parse().ok()
} }
/// Start the project's test suite (`script/test`) as a background process. /// Run the project's test suite (`script/test`) and block until complete.
/// ///
/// Returns immediately with `{"status": "started"}`. The agent should poll /// Spawns the test process, then polls every second server-side until the
/// `get_test_result` with the same `worktree_path` to retrieve results once /// child exits or the timeout is reached. Returns the full test result in
/// the tests complete. /// a single MCP call — no polling needed from the agent.
/// ///
/// If a test job is already running for the same worktree, returns /// The child process is properly killed on timeout (no zombies).
/// `{"status": "already_running"}`. If a previous job completed and results
/// haven't been consumed yet, they are returned inline and the job is cleared.
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> { pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
let project_root = ctx.agents.get_project_root(&ctx.state)?; let project_root = ctx.agents.get_project_root(&ctx.state)?;
@@ -398,42 +396,13 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
)); ));
} }
// Check for an existing job on this worktree. // Kill any existing test job for this worktree.
{ {
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?; let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
if let Some(job) = jobs.get_mut(&working_dir) { if let Some(mut old_job) = jobs.remove(&working_dir) {
// Check if the child has finished. if let Some(ref mut child) = old_job.child {
if let Some(child) = job.child.as_mut() { let _ = child.kill();
match child.try_wait() { let _ = child.wait();
Ok(Some(status)) => {
// Child finished — collect results now.
let result = collect_child_result(child, status);
job.child = None;
job.result = Some(result.clone());
// Return the completed result inline.
let resp = format_test_result(&result);
jobs.remove(&working_dir);
return resp;
}
Ok(None) => {
// Still running.
let elapsed = job.started_at.elapsed().as_secs();
return serde_json::to_string_pretty(&json!({
"status": "running",
"elapsed_secs": elapsed,
}))
.map_err(|e| format!("Serialization error: {e}"));
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
// Job exists with result but no child — return cached result.
if let Some(result) = job.result.clone() {
jobs.remove(&working_dir);
return format_test_result(&result);
} }
} }
} }
@@ -447,16 +416,18 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
.spawn() .spawn()
.map_err(|e| format!("Failed to spawn test script: {e}"))?; .map_err(|e| format!("Failed to spawn test script: {e}"))?;
let pid = child.id();
crate::slog!( crate::slog!(
"[run_tests] Started test job for {} (pid {})", "[run_tests] Started test job for {} (pid {})",
working_dir.display(), working_dir.display(),
child.id() pid
); );
// Store the child so it can be cleaned up if the server restarts.
{ {
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?; let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
jobs.insert( jobs.insert(
working_dir, working_dir.clone(),
crate::http::context::TestJob { crate::http::context::TestJob {
child: Some(child), child: Some(child),
result: None, result: None,
@@ -465,10 +436,61 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
); );
} }
serde_json::to_string_pretty(&json!({ // Block server-side, checking every second until done or timeout.
"status": "started", let start = std::time::Instant::now();
loop {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
let job = match jobs.get_mut(&working_dir) {
Some(j) => j,
None => return Err("Test job disappeared unexpectedly".to_string()),
};
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
// Done — collect results.
let result = collect_child_result(child, status);
crate::slog!(
"[run_tests] Test job for {} finished (pid {}, passed={})",
working_dir.display(),
pid,
result.passed
);
jobs.remove(&working_dir);
return format_test_result(&result);
}
Ok(None) => {
// Still running — check timeout.
if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
let _ = child.kill();
let _ = child.wait();
crate::slog!(
"[run_tests] Killed test job for {} (pid {}) after {}s timeout",
working_dir.display(),
pid,
TEST_TIMEOUT_SECS
);
jobs.remove(&working_dir);
return serde_json::to_string_pretty(&json!({
"passed": false,
"exit_code": -1,
"timed_out": true,
"tests_passed": 0,
"tests_failed": 0,
"output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS),
})) }))
.map_err(|e| format!("Serialization error: {e}")) .map_err(|e| format!("Serialization error: {e}"));
}
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
}
} }
/// How long `get_test_result` blocks server-side before returning "running". /// How long `get_test_result` blocks server-side before returning "running".