fix: async run_tests to prevent zombie cargo processes blocking gates
run_tests MCP tool now spawns tests in the background and returns immediately. Agents poll get_test_result to check completion. This prevents zombie cargo processes from holding the build lock when the CLI times out the MCP call before tests finish. Also fixes agent permission mode: acceptEdits replaces invalid allowFullAutoEdit that was causing agents to crash-loop on spawn. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,9 +6,36 @@ use crate::state::SessionState;
|
||||
use crate::store::JsonFileStore;
|
||||
use crate::workflow::WorkflowState;
|
||||
use poem::http::StatusCode;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{broadcast, mpsc, oneshot};
|
||||
|
||||
/// A running or completed test job spawned by the `run_tests` MCP tool.
|
||||
pub struct TestJob {
|
||||
/// The child process handle. `None` once the process has exited and results
|
||||
/// have been collected.
|
||||
pub child: Option<std::process::Child>,
|
||||
/// Populated once the child exits.
|
||||
pub result: Option<TestJobResult>,
|
||||
/// When the job was started.
|
||||
pub started_at: std::time::Instant,
|
||||
}
|
||||
|
||||
/// The result of a completed test job.
|
||||
#[derive(Clone)]
|
||||
pub struct TestJobResult {
|
||||
pub passed: bool,
|
||||
pub exit_code: i32,
|
||||
pub tests_passed: u64,
|
||||
pub tests_failed: u64,
|
||||
pub output: String,
|
||||
}
|
||||
|
||||
/// Shared registry of in-flight and recently completed test jobs, keyed by
|
||||
/// worktree path.
|
||||
pub type TestJobRegistry = Arc<std::sync::Mutex<HashMap<PathBuf, TestJob>>>;
|
||||
|
||||
/// The user's decision when responding to a permission dialog.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PermissionDecision {
|
||||
@@ -75,6 +102,9 @@ pub struct AppContext {
|
||||
/// spawned by the bot so that cancellations take effect in-memory rather
|
||||
/// than only on disk.
|
||||
pub timer_store: Arc<TimerStore>,
|
||||
/// Registry of running/completed test jobs spawned by the `run_tests` MCP
|
||||
/// tool. Keyed by worktree path so each worktree has at most one active job.
|
||||
pub test_jobs: TestJobRegistry,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -102,6 +132,7 @@ impl AppContext {
|
||||
bot_shutdown: None,
|
||||
matrix_shutdown_tx: None,
|
||||
timer_store,
|
||||
test_jobs: Arc::new(std::sync::Mutex::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1134,7 +1134,7 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
|
||||
},
|
||||
{
|
||||
"name": "run_tests",
|
||||
"description": "Run the project's test suite (script/test) and return a structured result with pass/fail, test counts, and truncated output. Runs from the project root by default, or from a specific worktree if worktree_path is provided.",
|
||||
"description": "Start the project's test suite (script/test) as a background job. Returns immediately with {\"status\": \"started\"}. Poll get_test_result with the same worktree_path to check for completion. If the previous run already finished, returns the result inline.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1146,6 +1146,20 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "get_test_result",
|
||||
"description": "Check on a running test job started by run_tests. Returns {\"status\": \"running\", \"elapsed_secs\": N} if still in progress, or the full test result (passed, exit_code, test counts, output) if finished.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"worktree_path": {
|
||||
"type": "string",
|
||||
"description": "Optional absolute path to the worktree. Must match the worktree_path used in run_tests."
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "git_status",
|
||||
"description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .huskies/worktrees/. Push and remote operations are not available.",
|
||||
@@ -1402,6 +1416,7 @@ async fn handle_tools_call(
|
||||
// Shell command execution
|
||||
"run_command" => shell_tools::tool_run_command(&args, ctx).await,
|
||||
"run_tests" => shell_tools::tool_run_tests(&args, ctx).await,
|
||||
"get_test_result" => shell_tools::tool_get_test_result(&args, ctx).await,
|
||||
// Git operations
|
||||
"git_status" => git_tools::tool_git_status(&args, ctx).await,
|
||||
"git_diff" => git_tools::tool_git_diff(&args, ctx).await,
|
||||
@@ -1526,6 +1541,7 @@ mod tests {
|
||||
assert!(names.contains(&"delete_story"));
|
||||
assert!(names.contains(&"run_command"));
|
||||
assert!(names.contains(&"run_tests"));
|
||||
assert!(names.contains(&"get_test_result"));
|
||||
assert!(names.contains(&"git_status"));
|
||||
assert!(names.contains(&"git_diff"));
|
||||
assert!(names.contains(&"git_add"));
|
||||
|
||||
@@ -371,10 +371,15 @@ fn extract_count(line: &str, label: &str) -> Option<u64> {
|
||||
num_str.parse().ok()
|
||||
}
|
||||
|
||||
/// Run the project's `script/test` and return a structured result.
|
||||
/// Start the project's test suite (`script/test`) as a background process.
|
||||
///
|
||||
/// If `worktree_path` is provided the script is run from that worktree
|
||||
/// (must be inside `.huskies/worktrees/`). Otherwise the project root is used.
|
||||
/// Returns immediately with `{"status": "started"}`. The agent should poll
|
||||
/// `get_test_result` with the same `worktree_path` to retrieve results once
|
||||
/// the tests complete.
|
||||
///
|
||||
/// If a test job is already running for the same worktree, returns
|
||||
/// `{"status": "already_running"}`. If a previous job completed and results
|
||||
/// haven't been consumed yet, they are returned inline and the job is cleared.
|
||||
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
|
||||
let project_root = ctx.agents.get_project_root(&ctx.state)?;
|
||||
|
||||
@@ -393,52 +398,197 @@ pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<Str
|
||||
));
|
||||
}
|
||||
|
||||
let result = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(TEST_TIMEOUT_SECS),
|
||||
tokio::task::spawn_blocking({
|
||||
let dir = working_dir.clone();
|
||||
let script = script_path.clone();
|
||||
move || {
|
||||
std::process::Command::new("bash")
|
||||
.arg(&script)
|
||||
.current_dir(&dir)
|
||||
.output()
|
||||
// Check for an existing job on this worktree.
|
||||
{
|
||||
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
|
||||
if let Some(job) = jobs.get_mut(&working_dir) {
|
||||
// Check if the child has finished.
|
||||
if let Some(child) = job.child.as_mut() {
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => {
|
||||
// Child finished — collect results now.
|
||||
let result = collect_child_result(child, status);
|
||||
job.child = None;
|
||||
job.result = Some(result.clone());
|
||||
// Return the completed result inline.
|
||||
let resp = format_test_result(&result);
|
||||
jobs.remove(&working_dir);
|
||||
return resp;
|
||||
}
|
||||
Ok(None) => {
|
||||
// Still running.
|
||||
let elapsed = job.started_at.elapsed().as_secs();
|
||||
return serde_json::to_string_pretty(&json!({
|
||||
"status": "running",
|
||||
"elapsed_secs": elapsed,
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"));
|
||||
}
|
||||
Err(e) => {
|
||||
jobs.remove(&working_dir);
|
||||
return Err(format!("Failed to check child status: {e}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Job exists with result but no child — return cached result.
|
||||
if let Some(result) = job.result.clone() {
|
||||
jobs.remove(&working_dir);
|
||||
return format_test_result(&result);
|
||||
}
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Err(_) => serde_json::to_string_pretty(&json!({
|
||||
"passed": false,
|
||||
"exit_code": -1,
|
||||
"timed_out": true,
|
||||
"tests_passed": 0,
|
||||
"tests_failed": 0,
|
||||
"output": format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"),
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}")),
|
||||
Ok(Err(e)) => Err(format!("Task join error: {e}")),
|
||||
Ok(Ok(Err(e))) => Err(format!("Failed to execute test script: {e}")),
|
||||
Ok(Ok(Ok(output))) => {
|
||||
let passed = output.status.success();
|
||||
let exit_code = output.status.code().unwrap_or(-1);
|
||||
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
|
||||
let combined = format!("{stdout}{stderr}");
|
||||
let (tests_passed, tests_failed) = parse_test_counts(&combined);
|
||||
let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"passed": passed,
|
||||
"exit_code": exit_code,
|
||||
"timed_out": false,
|
||||
"tests_passed": tests_passed,
|
||||
"tests_failed": tests_failed,
|
||||
"output": truncated,
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
}
|
||||
}
|
||||
|
||||
// Spawn the test process.
|
||||
let child = std::process::Command::new("bash")
|
||||
.arg(&script_path)
|
||||
.current_dir(&working_dir)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| format!("Failed to spawn test script: {e}"))?;
|
||||
|
||||
crate::slog!(
|
||||
"[run_tests] Started test job for {} (pid {})",
|
||||
working_dir.display(),
|
||||
child.id()
|
||||
);
|
||||
|
||||
{
|
||||
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
|
||||
jobs.insert(
|
||||
working_dir,
|
||||
crate::http::context::TestJob {
|
||||
child: Some(child),
|
||||
result: None,
|
||||
started_at: std::time::Instant::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"status": "started",
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
}
|
||||
|
||||
/// Check on a running test job and return results if complete.
|
||||
///
|
||||
/// Returns `{"status": "running", "elapsed_secs": N}` if still in progress,
|
||||
/// or the full test result if finished. If no job exists for the worktree,
|
||||
/// returns an error.
|
||||
pub(super) async fn tool_get_test_result(
|
||||
args: &Value,
|
||||
ctx: &AppContext,
|
||||
) -> Result<String, String> {
|
||||
let project_root = ctx.agents.get_project_root(&ctx.state)?;
|
||||
|
||||
let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
|
||||
Some(wt) => validate_working_dir(wt, ctx)?,
|
||||
None => project_root
|
||||
.canonicalize()
|
||||
.map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
|
||||
};
|
||||
|
||||
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
|
||||
|
||||
let job = jobs.get_mut(&working_dir).ok_or_else(|| {
|
||||
"No test job running for this worktree. Call run_tests first.".to_string()
|
||||
})?;
|
||||
|
||||
// Check if child has finished.
|
||||
if let Some(child) = job.child.as_mut() {
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => {
|
||||
let result = collect_child_result(child, status);
|
||||
job.child = None;
|
||||
job.result = Some(result.clone());
|
||||
let resp = format_test_result(&result);
|
||||
jobs.remove(&working_dir);
|
||||
return resp;
|
||||
}
|
||||
Ok(None) => {
|
||||
let elapsed = job.started_at.elapsed().as_secs();
|
||||
// If exceeded our max timeout, kill it.
|
||||
if elapsed > TEST_TIMEOUT_SECS {
|
||||
let _ = child.kill();
|
||||
let _ = child.wait();
|
||||
crate::slog!(
|
||||
"[run_tests] Killed test job for {} after {elapsed}s timeout",
|
||||
working_dir.display()
|
||||
);
|
||||
jobs.remove(&working_dir);
|
||||
return serde_json::to_string_pretty(&json!({
|
||||
"passed": false,
|
||||
"exit_code": -1,
|
||||
"timed_out": true,
|
||||
"tests_passed": 0,
|
||||
"tests_failed": 0,
|
||||
"output": format!("Test suite timed out after {elapsed}s"),
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"));
|
||||
}
|
||||
return serde_json::to_string_pretty(&json!({
|
||||
"status": "running",
|
||||
"elapsed_secs": elapsed,
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"));
|
||||
}
|
||||
Err(e) => {
|
||||
jobs.remove(&working_dir);
|
||||
return Err(format!("Failed to check child status: {e}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Job exists with cached result.
|
||||
if let Some(result) = job.result.clone() {
|
||||
jobs.remove(&working_dir);
|
||||
return format_test_result(&result);
|
||||
}
|
||||
|
||||
Err("Test job in unexpected state".to_string())
|
||||
}
|
||||
|
||||
/// Collect stdout/stderr from a finished child and build a `TestJobResult`.
|
||||
fn collect_child_result(
|
||||
child: &mut std::process::Child,
|
||||
status: std::process::ExitStatus,
|
||||
) -> crate::http::context::TestJobResult {
|
||||
let mut stdout = String::new();
|
||||
let mut stderr = String::new();
|
||||
if let Some(ref mut out) = child.stdout {
|
||||
use std::io::Read;
|
||||
let _ = out.read_to_string(&mut stdout);
|
||||
}
|
||||
if let Some(ref mut err) = child.stderr {
|
||||
use std::io::Read;
|
||||
let _ = err.read_to_string(&mut stderr);
|
||||
}
|
||||
let combined = format!("{stdout}{stderr}");
|
||||
let (tests_passed, tests_failed) = parse_test_counts(&combined);
|
||||
let exit_code = status.code().unwrap_or(-1);
|
||||
crate::http::context::TestJobResult {
|
||||
passed: status.success(),
|
||||
exit_code,
|
||||
tests_passed,
|
||||
tests_failed,
|
||||
output: truncate_output(&combined, MAX_OUTPUT_LINES),
|
||||
}
|
||||
}
|
||||
|
||||
/// Format a `TestJobResult` as the JSON string returned to the agent.
|
||||
fn format_test_result(
|
||||
result: &crate::http::context::TestJobResult,
|
||||
) -> Result<String, String> {
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"passed": result.passed,
|
||||
"exit_code": result.exit_code,
|
||||
"timed_out": false,
|
||||
"tests_passed": result.tests_passed,
|
||||
"tests_failed": result.tests_failed,
|
||||
"output": result.output,
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user