diff --git a/server/src/http/mcp/shell_tools.rs b/server/src/http/mcp/shell_tools.rs
index d5d75e25..16164bad 100644
--- a/server/src/http/mcp/shell_tools.rs
+++ b/server/src/http/mcp/shell_tools.rs
@@ -371,15 +371,13 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
     num_str.parse().ok()
 }
 
-/// Start the project's test suite (`script/test`) as a background process.
+/// Run the project's test suite (`script/test`) and block until complete.
 ///
-/// Returns immediately with `{"status": "started"}`. The agent should poll
-/// `get_test_result` with the same `worktree_path` to retrieve results once
-/// the tests complete.
+/// Spawns the test process, then polls every second server-side until the
+/// child exits or the timeout is reached. Returns the full test result in
+/// a single MCP call — no polling needed from the agent.
 ///
-/// If a test job is already running for the same worktree, returns
-/// `{"status": "already_running"}`. If a previous job completed and results
-/// haven't been consumed yet, they are returned inline and the job is cleared.
+/// The child process is properly killed on timeout (no zombies).
 pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
     let project_root = ctx.agents.get_project_root(&ctx.state)?;
 
@@ -398,42 +396,13 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
         ));
     }
 
-    // Check for an existing job on this worktree.
+    // Kill any existing test job for this worktree.
     {
         let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
-        if let Some(job) = jobs.get_mut(&working_dir) {
-            // Check if the child has finished.
-            if let Some(child) = job.child.as_mut() {
-                match child.try_wait() {
-                    Ok(Some(status)) => {
-                        // Child finished — collect results now.
-                        let result = collect_child_result(child, status);
-                        job.child = None;
-                        job.result = Some(result.clone());
-                        // Return the completed result inline.
-                        let resp = format_test_result(&result);
-                        jobs.remove(&working_dir);
-                        return resp;
-                    }
-                    Ok(None) => {
-                        // Still running.
-                        let elapsed = job.started_at.elapsed().as_secs();
-                        return serde_json::to_string_pretty(&json!({
-                            "status": "running",
-                            "elapsed_secs": elapsed,
-                        }))
-                        .map_err(|e| format!("Serialization error: {e}"));
-                    }
-                    Err(e) => {
-                        jobs.remove(&working_dir);
-                        return Err(format!("Failed to check child status: {e}"));
-                    }
-                }
-            }
-            // Job exists with result but no child — return cached result.
-            if let Some(result) = job.result.clone() {
-                jobs.remove(&working_dir);
-                return format_test_result(&result);
+        if let Some(mut old_job) = jobs.remove(&working_dir) {
+            if let Some(ref mut child) = old_job.child {
+                let _ = child.kill();
+                let _ = child.wait();
             }
         }
     }
@@ -447,16 +416,18 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
         .spawn()
         .map_err(|e| format!("Failed to spawn test script: {e}"))?;
 
+    let pid = child.id();
     crate::slog!(
         "[run_tests] Started test job for {} (pid {})",
         working_dir.display(),
-        child.id()
+        pid
     );
 
+    // Store the child so it can be cleaned up if the server restarts.
     {
         let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
         jobs.insert(
-            working_dir,
+            working_dir.clone(),
             crate::http::context::TestJob {
                 child: Some(child),
                 result: None,
@@ -465,10 +436,61 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
         );
     }
 
-    serde_json::to_string_pretty(&json!({
-        "status": "started",
-    }))
-    .map_err(|e| format!("Serialization error: {e}"))
+    // Block server-side, checking every second until done or timeout.
+    let start = std::time::Instant::now();
+    loop {
+        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+
+        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
+        let job = match jobs.get_mut(&working_dir) {
+            Some(j) => j,
+            None => return Err("Test job disappeared unexpectedly".to_string()),
+        };
+
+        if let Some(child) = job.child.as_mut() {
+            match child.try_wait() {
+                Ok(Some(status)) => {
+                    // Done — collect results.
+                    let result = collect_child_result(child, status);
+                    crate::slog!(
+                        "[run_tests] Test job for {} finished (pid {}, passed={})",
+                        working_dir.display(),
+                        pid,
+                        result.passed
+                    );
+                    jobs.remove(&working_dir);
+                    return format_test_result(&result);
+                }
+                Ok(None) => {
+                    // Still running — check timeout.
+                    if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
+                        let _ = child.kill();
+                        let _ = child.wait();
+                        crate::slog!(
+                            "[run_tests] Killed test job for {} (pid {}) after {}s timeout",
+                            working_dir.display(),
+                            pid,
+                            TEST_TIMEOUT_SECS
+                        );
+                        jobs.remove(&working_dir);
+                        return serde_json::to_string_pretty(&json!({
+                            "passed": false,
+                            "exit_code": -1,
+                            "timed_out": true,
+                            "tests_passed": 0,
+                            "tests_failed": 0,
+                            "output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS),
+                        }))
+                        .map_err(|e| format!("Serialization error: {e}"));
+                    }
+                }
+                Err(e) => {
+                    jobs.remove(&working_dir);
+                    return Err(format!("Failed to check child status: {e}"));
+                }
+            }
+        }
+    }
 }
 
 /// How long `get_test_result` blocks server-side before returning "running".