fix: async run_tests to prevent zombie cargo processes blocking gates

run_tests MCP tool now spawns tests in the background and returns immediately. Agents poll get_test_result to check completion. This prevents zombie cargo processes from holding the build lock when the CLI times out the MCP call before tests finish. Also fixes agent permission mode: acceptEdits replaces invalid allowFullAutoEdit that was causing agents to crash-loop on spawn. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 22:00:05 +00:00
parent 8393a67c89
commit f958f57e56
6 changed files with 262 additions and 63 deletions
@@ -6,9 +6,36 @@ use crate::state::SessionState;
 use crate::store::JsonFileStore;
 use crate::workflow::WorkflowState;
 use poem::http::StatusCode;
+use std::collections::HashMap;
+use std::path::PathBuf;
 use std::sync::Arc;
 use tokio::sync::{broadcast, mpsc, oneshot};

+/// A running or completed test job spawned by the `run_tests` MCP tool.
+pub struct TestJob {
+    /// The child process handle. `None` once the process has exited and results
+    /// have been collected.
+    pub child: Option<std::process::Child>,
+    /// Populated once the child exits.
+    pub result: Option<TestJobResult>,
+    /// When the job was started.
+    pub started_at: std::time::Instant,
+}
+
+/// The result of a completed test job.
+#[derive(Clone)]
+pub struct TestJobResult {
+    pub passed: bool,
+    pub exit_code: i32,
+    pub tests_passed: u64,
+    pub tests_failed: u64,
+    pub output: String,
+}
+
+/// Shared registry of in-flight and recently completed test jobs, keyed by
+/// worktree path.
+pub type TestJobRegistry = Arc<std::sync::Mutex<HashMap<PathBuf, TestJob>>>;
+
 /// The user's decision when responding to a permission dialog.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum PermissionDecision {
@@ -75,6 +102,9 @@ pub struct AppContext {
    /// spawned by the bot so that cancellations take effect in-memory rather
    /// than only on disk.
    pub timer_store: Arc<TimerStore>,
+    /// Registry of running/completed test jobs spawned by the `run_tests` MCP
+    /// tool. Keyed by worktree path so each worktree has at most one active job.
+    pub test_jobs: TestJobRegistry,
 }

 #[cfg(test)]
@@ -102,6 +132,7 @@ impl AppContext {
            bot_shutdown: None,
            matrix_shutdown_tx: None,
            timer_store,
+            test_jobs: Arc::new(std::sync::Mutex::new(HashMap::new())),
        }
    }
 }
@@ -1134,7 +1134,7 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
                },
                {
                    "name": "run_tests",
-                    "description": "Run the project's test suite (script/test) and return a structured result with pass/fail, test counts, and truncated output. Runs from the project root by default, or from a specific worktree if worktree_path is provided.",
+                    "description": "Start the project's test suite (script/test) as a background job. Returns immediately with {\"status\": \"started\"}. Poll get_test_result with the same worktree_path to check for completion. If the previous run already finished, returns the result inline.",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
@@ -1146,6 +1146,20 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
                        "required": []
                    }
                },
+                {
+                    "name": "get_test_result",
+                    "description": "Check on a running test job started by run_tests. Returns {\"status\": \"running\", \"elapsed_secs\": N} if still in progress, or the full test result (passed, exit_code, test counts, output) if finished.",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {
+                            "worktree_path": {
+                                "type": "string",
+                                "description": "Optional absolute path to the worktree. Must match the worktree_path used in run_tests."
+                            }
+                        },
+                        "required": []
+                    }
+                },
                {
                    "name": "git_status",
                    "description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .huskies/worktrees/. Push and remote operations are not available.",
@@ -1402,6 +1416,7 @@ async fn handle_tools_call(
        // Shell command execution
        "run_command" => shell_tools::tool_run_command(&args, ctx).await,
        "run_tests" => shell_tools::tool_run_tests(&args, ctx).await,
+        "get_test_result" => shell_tools::tool_get_test_result(&args, ctx).await,
        // Git operations
        "git_status" => git_tools::tool_git_status(&args, ctx).await,
        "git_diff" => git_tools::tool_git_diff(&args, ctx).await,
@@ -1526,6 +1541,7 @@ mod tests {
        assert!(names.contains(&"delete_story"));
        assert!(names.contains(&"run_command"));
        assert!(names.contains(&"run_tests"));
+        assert!(names.contains(&"get_test_result"));
        assert!(names.contains(&"git_status"));
        assert!(names.contains(&"git_diff"));
        assert!(names.contains(&"git_add"));
@@ -371,10 +371,15 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
    num_str.parse().ok()
 }

-/// Run the project's `script/test` and return a structured result.
+/// Start the project's test suite (`script/test`) as a background process.
 ///
-/// If `worktree_path` is provided the script is run from that worktree
-/// (must be inside `.huskies/worktrees/`). Otherwise the project root is used.
+/// Returns immediately with `{"status": "started"}`. The agent should poll
+/// `get_test_result` with the same `worktree_path` to retrieve results once
+/// the tests complete.
+///
+/// If a test job is already running for the same worktree, returns
+/// `{"status": "already_running"}`. If a previous job completed and results
+/// haven't been consumed yet, they are returned inline and the job is cleared.
 pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let project_root = ctx.agents.get_project_root(&ctx.state)?;

@@ -393,52 +398,197 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
        ));
    }

-    let result = tokio::time::timeout(
-        std::time::Duration::from_secs(TEST_TIMEOUT_SECS),
-        tokio::task::spawn_blocking({
-            let dir = working_dir.clone();
-            let script = script_path.clone();
-            move || {
-                std::process::Command::new("bash")
-                    .arg(&script)
-                    .current_dir(&dir)
-                    .output()
+    // Check for an existing job on this worktree.
+    {
+        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
+        if let Some(job) = jobs.get_mut(&working_dir) {
+            // Check if the child has finished.
+            if let Some(child) = job.child.as_mut() {
+                match child.try_wait() {
+                    Ok(Some(status)) => {
+                        // Child finished — collect results now.
+                        let result = collect_child_result(child, status);
+                        job.child = None;
+                        job.result = Some(result.clone());
+                        // Return the completed result inline.
+                        let resp = format_test_result(&result);
+                        jobs.remove(&working_dir);
+                        return resp;
+                    }
+                    Ok(None) => {
+                        // Still running.
+                        let elapsed = job.started_at.elapsed().as_secs();
+                        return serde_json::to_string_pretty(&json!({
+                            "status": "running",
+                            "elapsed_secs": elapsed,
+                        }))
+                        .map_err(|e| format!("Serialization error: {e}"));
+                    }
+                    Err(e) => {
+                        jobs.remove(&working_dir);
+                        return Err(format!("Failed to check child status: {e}"));
+                    }
+                }
+            }
+            // Job exists with result but no child — return cached result.
+            if let Some(result) = job.result.clone() {
+                jobs.remove(&working_dir);
+                return format_test_result(&result);
            }
-        }),
-    )
-    .await;
-
-    match result {
-        Err(_) => serde_json::to_string_pretty(&json!({
-            "passed": false,
-            "exit_code": -1,
-            "timed_out": true,
-            "tests_passed": 0,
-            "tests_failed": 0,
-            "output": format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"),
-        }))
-        .map_err(|e| format!("Serialization error: {e}")),
-        Ok(Err(e)) => Err(format!("Task join error: {e}")),
-        Ok(Ok(Err(e))) => Err(format!("Failed to execute test script: {e}")),
-        Ok(Ok(Ok(output))) => {
-            let passed = output.status.success();
-            let exit_code = output.status.code().unwrap_or(-1);
-            let stdout = String::from_utf8_lossy(&output.stdout).to_string();
-            let stderr = String::from_utf8_lossy(&output.stderr).to_string();
-            let combined = format!("{stdout}{stderr}");
-            let (tests_passed, tests_failed) = parse_test_counts(&combined);
-            let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
-            serde_json::to_string_pretty(&json!({
-                "passed": passed,
-                "exit_code": exit_code,
-                "timed_out": false,
-                "tests_passed": tests_passed,
-                "tests_failed": tests_failed,
-                "output": truncated,
-            }))
-            .map_err(|e| format!("Serialization error: {e}"))
        }
    }
+
+    // Spawn the test process.
+    let child = std::process::Command::new("bash")
+        .arg(&script_path)
+        .current_dir(&working_dir)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| format!("Failed to spawn test script: {e}"))?;
+
+    crate::slog!(
+        "[run_tests] Started test job for {} (pid {})",
+        working_dir.display(),
+        child.id()
+    );
+
+    {
+        let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
+        jobs.insert(
+            working_dir,
+            crate::http::context::TestJob {
+                child: Some(child),
+                result: None,
+                started_at: std::time::Instant::now(),
+            },
+        );
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "status": "started",
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+/// Check on a running test job and return results if complete.
+///
+/// Returns `{"status": "running", "elapsed_secs": N}` if still in progress,
+/// or the full test result if finished. If no job exists for the worktree,
+/// returns an error.
+pub(super) async fn tool_get_test_result(
+    args: &Value,
+    ctx: &AppContext,
+) -> Result<String, String> {
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
+        Some(wt) => validate_working_dir(wt, ctx)?,
+        None => project_root
+            .canonicalize()
+            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
+    };
+
+    let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
+
+    let job = jobs.get_mut(&working_dir).ok_or_else(|| {
+        "No test job running for this worktree. Call run_tests first.".to_string()
+    })?;
+
+    // Check if child has finished.
+    if let Some(child) = job.child.as_mut() {
+        match child.try_wait() {
+            Ok(Some(status)) => {
+                let result = collect_child_result(child, status);
+                job.child = None;
+                job.result = Some(result.clone());
+                let resp = format_test_result(&result);
+                jobs.remove(&working_dir);
+                return resp;
+            }
+            Ok(None) => {
+                let elapsed = job.started_at.elapsed().as_secs();
+                // If exceeded our max timeout, kill it.
+                if elapsed > TEST_TIMEOUT_SECS {
+                    let _ = child.kill();
+                    let _ = child.wait();
+                    crate::slog!(
+                        "[run_tests] Killed test job for {} after {elapsed}s timeout",
+                        working_dir.display()
+                    );
+                    jobs.remove(&working_dir);
+                    return serde_json::to_string_pretty(&json!({
+                        "passed": false,
+                        "exit_code": -1,
+                        "timed_out": true,
+                        "tests_passed": 0,
+                        "tests_failed": 0,
+                        "output": format!("Test suite timed out after {elapsed}s"),
+                    }))
+                    .map_err(|e| format!("Serialization error: {e}"));
+                }
+                return serde_json::to_string_pretty(&json!({
+                    "status": "running",
+                    "elapsed_secs": elapsed,
+                }))
+                .map_err(|e| format!("Serialization error: {e}"));
+            }
+            Err(e) => {
+                jobs.remove(&working_dir);
+                return Err(format!("Failed to check child status: {e}"));
+            }
+        }
+    }
+
+    // Job exists with cached result.
+    if let Some(result) = job.result.clone() {
+        jobs.remove(&working_dir);
+        return format_test_result(&result);
+    }
+
+    Err("Test job in unexpected state".to_string())
+}
+
+/// Collect stdout/stderr from a finished child and build a `TestJobResult`.
+fn collect_child_result(
+    child: &mut std::process::Child,
+    status: std::process::ExitStatus,
+) -> crate::http::context::TestJobResult {
+    let mut stdout = String::new();
+    let mut stderr = String::new();
+    if let Some(ref mut out) = child.stdout {
+        use std::io::Read;
+        let _ = out.read_to_string(&mut stdout);
+    }
+    if let Some(ref mut err) = child.stderr {
+        use std::io::Read;
+        let _ = err.read_to_string(&mut stderr);
+    }
+    let combined = format!("{stdout}{stderr}");
+    let (tests_passed, tests_failed) = parse_test_counts(&combined);
+    let exit_code = status.code().unwrap_or(-1);
+    crate::http::context::TestJobResult {
+        passed: status.success(),
+        exit_code,
+        tests_passed,
+        tests_failed,
+        output: truncate_output(&combined, MAX_OUTPUT_LINES),
+    }
+}
+
+/// Format a `TestJobResult` as the JSON string returned to the agent.
+fn format_test_result(
+    result: &crate::http::context::TestJobResult,
+) -> Result<String, String> {
+    serde_json::to_string_pretty(&json!({
+        "passed": result.passed,
+        "exit_code": result.exit_code,
+        "timed_out": false,
+        "tests_passed": result.tests_passed,
+        "tests_failed": result.tests_failed,
+        "output": result.output,
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
 }

 #[cfg(test)]