fix: make run_tests block server-side instead of requiring agent polling
run_tests now spawns the child and blocks in a 1-second poll loop until tests complete or the 20-minute timeout fires. Returns the full result in a single MCP call — agents use 1 turn instead of 50+. Child process is properly killed on timeout (no zombies). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -371,15 +371,13 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
|
||||
num_str.parse().ok()
|
||||
}
|
||||
|
||||
/// Start the project's test suite (`script/test`) as a background process.
|
||||
/// Run the project's test suite (`script/test`) and block until complete.
|
||||
///
|
||||
/// Returns immediately with `{"status": "started"}`. The agent should poll
|
||||
/// `get_test_result` with the same `worktree_path` to retrieve results once
|
||||
/// the tests complete.
|
||||
/// Spawns the test process, then polls every second server-side until the
|
||||
/// child exits or the timeout is reached. Returns the full test result in
|
||||
/// a single MCP call — no polling needed from the agent.
|
||||
///
|
||||
/// If a test job is already running for the same worktree, returns
|
||||
/// `{"status": "already_running"}`. If a previous job completed and results
|
||||
/// haven't been consumed yet, they are returned inline and the job is cleared.
|
||||
/// The child process is properly killed on timeout (no zombies).
|
||||
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
|
||||
let project_root = ctx.agents.get_project_root(&ctx.state)?;
|
||||
|
||||
@@ -398,42 +396,13 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
|
||||
));
|
||||
}
|
||||
|
||||
// Check for an existing job on this worktree.
|
||||
// Kill any existing test job for this worktree.
|
||||
{
|
||||
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
|
||||
if let Some(job) = jobs.get_mut(&working_dir) {
|
||||
// Check if the child has finished.
|
||||
if let Some(child) = job.child.as_mut() {
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => {
|
||||
// Child finished — collect results now.
|
||||
let result = collect_child_result(child, status);
|
||||
job.child = None;
|
||||
job.result = Some(result.clone());
|
||||
// Return the completed result inline.
|
||||
let resp = format_test_result(&result);
|
||||
jobs.remove(&working_dir);
|
||||
return resp;
|
||||
}
|
||||
Ok(None) => {
|
||||
// Still running.
|
||||
let elapsed = job.started_at.elapsed().as_secs();
|
||||
return serde_json::to_string_pretty(&json!({
|
||||
"status": "running",
|
||||
"elapsed_secs": elapsed,
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"));
|
||||
}
|
||||
Err(e) => {
|
||||
jobs.remove(&working_dir);
|
||||
return Err(format!("Failed to check child status: {e}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Job exists with result but no child — return cached result.
|
||||
if let Some(result) = job.result.clone() {
|
||||
jobs.remove(&working_dir);
|
||||
return format_test_result(&result);
|
||||
if let Some(mut old_job) = jobs.remove(&working_dir) {
|
||||
if let Some(ref mut child) = old_job.child {
|
||||
let _ = child.kill();
|
||||
let _ = child.wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -447,16 +416,18 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
|
||||
.spawn()
|
||||
.map_err(|e| format!("Failed to spawn test script: {e}"))?;
|
||||
|
||||
let pid = child.id();
|
||||
crate::slog!(
|
||||
"[run_tests] Started test job for {} (pid {})",
|
||||
working_dir.display(),
|
||||
child.id()
|
||||
pid
|
||||
);
|
||||
|
||||
// Store the child so it can be cleaned up if the server restarts.
|
||||
{
|
||||
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
|
||||
jobs.insert(
|
||||
working_dir,
|
||||
working_dir.clone(),
|
||||
crate::http::context::TestJob {
|
||||
child: Some(child),
|
||||
result: None,
|
||||
@@ -465,10 +436,61 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
|
||||
);
|
||||
}
|
||||
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"status": "started",
|
||||
// Block server-side, checking every second until done or timeout.
|
||||
let start = std::time::Instant::now();
|
||||
loop {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
|
||||
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
|
||||
let job = match jobs.get_mut(&working_dir) {
|
||||
Some(j) => j,
|
||||
None => return Err("Test job disappeared unexpectedly".to_string()),
|
||||
};
|
||||
|
||||
if let Some(child) = job.child.as_mut() {
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => {
|
||||
// Done — collect results.
|
||||
let result = collect_child_result(child, status);
|
||||
crate::slog!(
|
||||
"[run_tests] Test job for {} finished (pid {}, passed={})",
|
||||
working_dir.display(),
|
||||
pid,
|
||||
result.passed
|
||||
);
|
||||
jobs.remove(&working_dir);
|
||||
return format_test_result(&result);
|
||||
}
|
||||
Ok(None) => {
|
||||
// Still running — check timeout.
|
||||
if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
|
||||
let _ = child.kill();
|
||||
let _ = child.wait();
|
||||
crate::slog!(
|
||||
"[run_tests] Killed test job for {} (pid {}) after {}s timeout",
|
||||
working_dir.display(),
|
||||
pid,
|
||||
TEST_TIMEOUT_SECS
|
||||
);
|
||||
jobs.remove(&working_dir);
|
||||
return serde_json::to_string_pretty(&json!({
|
||||
"passed": false,
|
||||
"exit_code": -1,
|
||||
"timed_out": true,
|
||||
"tests_passed": 0,
|
||||
"tests_failed": 0,
|
||||
"output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS),
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
.map_err(|e| format!("Serialization error: {e}"));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
jobs.remove(&working_dir);
|
||||
return Err(format!("Failed to check child status: {e}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// How long `get_test_result` blocks server-side before returning "running".
|
||||
|
||||
Reference in New Issue
Block a user