fix: async run_tests to prevent zombie cargo processes blocking gates

run_tests MCP tool now spawns tests in the background and returns
immediately. Agents poll get_test_result to check completion. This
prevents zombie cargo processes from holding the build lock when the
CLI times out the MCP call before tests finish.

Also fixes agent permission mode: acceptEdits replaces invalid
allowFullAutoEdit that was causing agents to crash-loop on spawn.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
dave
2026-04-11 22:00:05 +00:00
parent 8393a67c89
commit f958f57e56
6 changed files with 262 additions and 63 deletions
+196 -46
View File
@@ -371,10 +371,15 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
num_str.parse().ok()
}
/// Run the project's `script/test` and return a structured result.
/// Start the project's test suite (`script/test`) as a background process.
///
/// If `worktree_path` is provided the script is run from that worktree
/// (must be inside `.huskies/worktrees/`). Otherwise the project root is used.
/// Returns immediately with `{"status": "started"}`. The agent should poll
/// `get_test_result` with the same `worktree_path` to retrieve results once
/// the tests complete.
///
/// If a test job is already running for the same worktree, returns
/// `{"status": "already_running"}`. If a previous job completed and results
/// haven't been consumed yet, they are returned inline and the job is cleared.
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
let project_root = ctx.agents.get_project_root(&ctx.state)?;
@@ -393,52 +398,197 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
));
}
let result = tokio::time::timeout(
std::time::Duration::from_secs(TEST_TIMEOUT_SECS),
tokio::task::spawn_blocking({
let dir = working_dir.clone();
let script = script_path.clone();
move || {
std::process::Command::new("bash")
.arg(&script)
.current_dir(&dir)
.output()
// Check for an existing job on this worktree.
{
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
if let Some(job) = jobs.get_mut(&working_dir) {
// Check if the child has finished.
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
// Child finished — collect results now.
let result = collect_child_result(child, status);
job.child = None;
job.result = Some(result.clone());
// Return the completed result inline.
let resp = format_test_result(&result);
jobs.remove(&working_dir);
return resp;
}
Ok(None) => {
// Still running.
let elapsed = job.started_at.elapsed().as_secs();
return serde_json::to_string_pretty(&json!({
"status": "running",
"elapsed_secs": elapsed,
}))
.map_err(|e| format!("Serialization error: {e}"));
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
// Job exists with result but no child — return cached result.
if let Some(result) = job.result.clone() {
jobs.remove(&working_dir);
return format_test_result(&result);
}
}),
)
.await;
match result {
Err(_) => serde_json::to_string_pretty(&json!({
"passed": false,
"exit_code": -1,
"timed_out": true,
"tests_passed": 0,
"tests_failed": 0,
"output": format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"),
}))
.map_err(|e| format!("Serialization error: {e}")),
Ok(Err(e)) => Err(format!("Task join error: {e}")),
Ok(Ok(Err(e))) => Err(format!("Failed to execute test script: {e}")),
Ok(Ok(Ok(output))) => {
let passed = output.status.success();
let exit_code = output.status.code().unwrap_or(-1);
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
let combined = format!("{stdout}{stderr}");
let (tests_passed, tests_failed) = parse_test_counts(&combined);
let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
serde_json::to_string_pretty(&json!({
"passed": passed,
"exit_code": exit_code,
"timed_out": false,
"tests_passed": tests_passed,
"tests_failed": tests_failed,
"output": truncated,
}))
.map_err(|e| format!("Serialization error: {e}"))
}
}
// Spawn the test process.
let child = std::process::Command::new("bash")
.arg(&script_path)
.current_dir(&working_dir)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| format!("Failed to spawn test script: {e}"))?;
crate::slog!(
"[run_tests] Started test job for {} (pid {})",
working_dir.display(),
child.id()
);
{
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
jobs.insert(
working_dir,
crate::http::context::TestJob {
child: Some(child),
result: None,
started_at: std::time::Instant::now(),
},
);
}
serde_json::to_string_pretty(&json!({
"status": "started",
}))
.map_err(|e| format!("Serialization error: {e}"))
}
/// Check on a running test job and return results if complete.
///
/// Returns `{"status": "running", "elapsed_secs": N}` if still in progress,
/// or the full test result if finished. If no job exists for the worktree,
/// returns an error.
pub(super) async fn tool_get_test_result(
args: &Value,
ctx: &AppContext,
) -> Result<String, String> {
let project_root = ctx.agents.get_project_root(&ctx.state)?;
let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
Some(wt) => validate_working_dir(wt, ctx)?,
None => project_root
.canonicalize()
.map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
};
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
let job = jobs.get_mut(&working_dir).ok_or_else(|| {
"No test job running for this worktree. Call run_tests first.".to_string()
})?;
// Check if child has finished.
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
let result = collect_child_result(child, status);
job.child = None;
job.result = Some(result.clone());
let resp = format_test_result(&result);
jobs.remove(&working_dir);
return resp;
}
Ok(None) => {
let elapsed = job.started_at.elapsed().as_secs();
// If exceeded our max timeout, kill it.
if elapsed > TEST_TIMEOUT_SECS {
let _ = child.kill();
let _ = child.wait();
crate::slog!(
"[run_tests] Killed test job for {} after {elapsed}s timeout",
working_dir.display()
);
jobs.remove(&working_dir);
return serde_json::to_string_pretty(&json!({
"passed": false,
"exit_code": -1,
"timed_out": true,
"tests_passed": 0,
"tests_failed": 0,
"output": format!("Test suite timed out after {elapsed}s"),
}))
.map_err(|e| format!("Serialization error: {e}"));
}
return serde_json::to_string_pretty(&json!({
"status": "running",
"elapsed_secs": elapsed,
}))
.map_err(|e| format!("Serialization error: {e}"));
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
// Job exists with cached result.
if let Some(result) = job.result.clone() {
jobs.remove(&working_dir);
return format_test_result(&result);
}
Err("Test job in unexpected state".to_string())
}
/// Collect stdout/stderr from a finished child and build a `TestJobResult`.
fn collect_child_result(
child: &mut std::process::Child,
status: std::process::ExitStatus,
) -> crate::http::context::TestJobResult {
let mut stdout = String::new();
let mut stderr = String::new();
if let Some(ref mut out) = child.stdout {
use std::io::Read;
let _ = out.read_to_string(&mut stdout);
}
if let Some(ref mut err) = child.stderr {
use std::io::Read;
let _ = err.read_to_string(&mut stderr);
}
let combined = format!("{stdout}{stderr}");
let (tests_passed, tests_failed) = parse_test_counts(&combined);
let exit_code = status.code().unwrap_or(-1);
crate::http::context::TestJobResult {
passed: status.success(),
exit_code,
tests_passed,
tests_failed,
output: truncate_output(&combined, MAX_OUTPUT_LINES),
}
}
/// Format a `TestJobResult` as the JSON string returned to the agent.
fn format_test_result(
result: &crate::http::context::TestJobResult,
) -> Result<String, String> {
serde_json::to_string_pretty(&json!({
"passed": result.passed,
"exit_code": result.exit_code,
"timed_out": false,
"tests_passed": result.tests_passed,
"tests_failed": result.tests_failed,
"output": result.output,
}))
.map_err(|e| format!("Serialization error: {e}"))
}
#[cfg(test)]