fix: make run_tests block server-side instead of requiring agent polling

run_tests now spawns the child and blocks in a 1-second poll loop until tests complete or the 20-minute timeout fires. Returns the full result in a single MCP call — agents use 1 turn instead of 50+. Child process is properly killed on timeout (no zombies). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 23:07:02 +00:00
parent bac07d28a7
commit 8ae6ca3eb8
1 changed files with 69 additions and 47 deletions
@@ -371,15 +371,13 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
    num_str.parse().ok()
 }
-/// Start the project's test suite (`script/test`) as a background process.
+/// Run the project's test suite (`script/test`) and block until complete.
 ///
-/// Returns immediately with `{"status": "started"}`. The agent should poll
+/// Spawns the test process, then polls every second server-side until the
-/// `get_test_result` with the same `worktree_path` to retrieve results once
+/// child exits or the timeout is reached. Returns the full test result in
-/// the tests complete.
+/// a single MCP call — no polling needed from the agent.
 ///
-/// If a test job is already running for the same worktree, returns
+/// The child process is properly killed on timeout (no zombies).
 /// `{"status": "already_running"}`. If a previous job completed and results
 /// haven't been consumed yet, they are returned inline and the job is cleared.
 pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let project_root = ctx.agents.get_project_root(&ctx.state)?;
@@ -398,42 +396,13 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
        ));
    }
-    // Check for an existing job on this worktree.
+    // Kill any existing test job for this worktree.
    {
        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
-        if let Some(job) = jobs.get_mut(&working_dir) {
+        if let Some(mut old_job) = jobs.remove(&working_dir) {
-            // Check if the child has finished.
+            if let Some(ref mut child) = old_job.child {
-            if let Some(child) = job.child.as_mut() {
+                let _ = child.kill();
-                match child.try_wait() {
+                let _ = child.wait();
                    Ok(Some(status)) => {
                        // Child finished — collect results now.
                        let result = collect_child_result(child, status);
                        job.child = None;
                        job.result = Some(result.clone());
                        // Return the completed result inline.
                        let resp = format_test_result(&result);
                        jobs.remove(&working_dir);
                        return resp;
                    }
                    Ok(None) => {
                        // Still running.
                        let elapsed = job.started_at.elapsed().as_secs();
                        return serde_json::to_string_pretty(&json!({
                            "status": "running",
                            "elapsed_secs": elapsed,
                        }))
                        .map_err(|e| format!("Serialization error: {e}"));
                    }
                    Err(e) => {
                        jobs.remove(&working_dir);
                        return Err(format!("Failed to check child status: {e}"));
                    }
                }
            }
            // Job exists with result but no child — return cached result.
            if let Some(result) = job.result.clone() {
                jobs.remove(&working_dir);
                return format_test_result(&result);
            }
        }
    }
@@ -447,16 +416,18 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
        .spawn()
        .map_err(|e| format!("Failed to spawn test script: {e}"))?;
    let pid = child.id();
    crate::slog!(
        "[run_tests] Started test job for {} (pid {})",
        working_dir.display(),
-        child.id()
+        pid
    );
    // Store the child so it can be cleaned up if the server restarts.
    {
        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
        jobs.insert(
-            working_dir,
+            working_dir.clone(),
            crate::http::context::TestJob {
                child: Some(child),
                result: None,
@@ -465,10 +436,61 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
        );
    }
-    serde_json::to_string_pretty(&json!({
+    // Block server-side, checking every second until done or timeout.
-        "status": "started",
+    let start = std::time::Instant::now();
    loop {
        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
        let job = match jobs.get_mut(&working_dir) {
            Some(j) => j,
            None => return Err("Test job disappeared unexpectedly".to_string()),
        };
        if let Some(child) = job.child.as_mut() {
            match child.try_wait() {
                Ok(Some(status)) => {
                    // Done — collect results.
                    let result = collect_child_result(child, status);
                    crate::slog!(
                        "[run_tests] Test job for {} finished (pid {}, passed={})",
                        working_dir.display(),
                        pid,
                        result.passed
                    );
                    jobs.remove(&working_dir);
                    return format_test_result(&result);
                }
                Ok(None) => {
                    // Still running — check timeout.
                    if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
                        let _ = child.kill();
                        let _ = child.wait();
                        crate::slog!(
                            "[run_tests] Killed test job for {} (pid {}) after {}s timeout",
                            working_dir.display(),
                            pid,
                            TEST_TIMEOUT_SECS
                        );
                        jobs.remove(&working_dir);
                        return serde_json::to_string_pretty(&json!({
                            "passed": false,
                            "exit_code": -1,
                            "timed_out": true,
                            "tests_passed": 0,
                            "tests_failed": 0,
                            "output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS),
                        }))
-    .map_err(|e| format!("Serialization error: {e}"))
+                        .map_err(|e| format!("Serialization error: {e}"));
                    }
                }
                Err(e) => {
                    jobs.remove(&working_dir);
                    return Err(format!("Failed to check child status: {e}"));
                }
            }
        }
    }
 }
 /// How long `get_test_result` blocks server-side before returning "running".