Files
huskies/server/src/http/mcp/shell_tools/script.rs
T

612 lines
23 KiB
Rust
Raw Normal View History

//! MCP shell script tools: run_tests / get_test_result / run_build / run_lint.
use serde_json::{Value, json};
use crate::http::context::AppContext;
#[allow(unused_imports)]
use crate::service::shell::{extract_count, parse_test_counts, truncate_output};
use super::exec::validate_working_dir;
const TEST_TIMEOUT_SECS: u64 = 1200;
const MAX_OUTPUT_LINES: usize = 100;
pub(crate) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
let project_root = ctx.services.agents.get_project_root(&ctx.state)?;
let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
Some(wt) => validate_working_dir(wt, ctx)?,
None => project_root
.canonicalize()
.map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
};
let script_path = working_dir.join("script").join("test");
if !script_path.exists() {
return Err(format!("Test script not found: {}", script_path.display()));
}
// Kill any existing test job for this worktree.
{
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
if let Some(mut old_job) = jobs.remove(&working_dir)
&& let Some(ref mut child) = old_job.child
{
let _ = child.kill();
let _ = child.wait();
}
}
// Spawn the test process with piped stdout/stderr so we can capture output.
// Pipes are drained in background threads to prevent deadlock when the
// child fills the 64KB OS pipe buffer.
let mut child = std::process::Command::new("bash")
.arg(&script_path)
.current_dir(&working_dir)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| format!("Failed to spawn test script: {e}"))?;
let pid = child.id();
crate::slog!(
"[run_tests] Started test job for {} (pid {})",
working_dir.display(),
pid
);
// Drain stdout/stderr in background threads so pipe buffers never fill.
let mut stdout_handle = child.stdout.take().map(|mut r| {
std::thread::spawn(move || {
let mut s = String::new();
std::io::Read::read_to_string(&mut r, &mut s).ok();
s
})
});
let mut stderr_handle = child.stderr.take().map(|mut r| {
std::thread::spawn(move || {
let mut s = String::new();
std::io::Read::read_to_string(&mut r, &mut s).ok();
s
})
});
// Store the child so it can be cleaned up if the server restarts.
{
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
jobs.insert(
working_dir.clone(),
crate::http::context::TestJob {
child: Some(child),
result: None,
started_at: std::time::Instant::now(),
},
);
}
// Block server-side, checking every second until done or timeout.
let start = std::time::Instant::now();
loop {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
let job = match jobs.get_mut(&working_dir) {
Some(j) => j,
None => return Err("Test job disappeared unexpectedly".to_string()),
};
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
// Done — join drain threads and collect output.
jobs.remove(&working_dir);
let stdout = stdout_handle
.take()
.and_then(|h| h.join().ok())
.unwrap_or_default();
let stderr = stderr_handle
.take()
.and_then(|h| h.join().ok())
.unwrap_or_default();
let combined = format!("{stdout}{stderr}");
let (tests_passed, tests_failed) = parse_test_counts(&combined);
let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
let passed = status.success();
let exit_code = status.code().unwrap_or(-1);
crate::slog!(
"[run_tests] Test job for {} finished (pid {}, passed={})",
working_dir.display(),
pid,
passed
);
// Capture positive test evidence in the DB so the pipeline
// advance salvage path (bug 645/668) can confirm the agent
// ran passing tests before it died. Only written when running
// in a story worktree (worktree_path arg provided); extract
// the story ID from the last path component.
if passed
&& args.get("worktree_path").is_some()
&& let Some(story_id) = working_dir.file_name().and_then(|n| n.to_str())
{
crate::db::write_content(&format!("{story_id}:run_tests_ok"), "1");
}
return serde_json::to_string_pretty(&json!({
"passed": passed,
"exit_code": exit_code,
"timed_out": false,
"tests_passed": tests_passed,
"tests_failed": tests_failed,
"output": truncated,
}))
.map_err(|e| format!("Serialization error: {e}"));
}
Ok(None) => {
// Still running — check timeout.
if start.elapsed().as_secs() > TEST_TIMEOUT_SECS {
let _ = child.kill();
let _ = child.wait();
crate::slog!(
"[run_tests] Killed test job for {} (pid {}) after {}s timeout",
working_dir.display(),
pid,
TEST_TIMEOUT_SECS
);
jobs.remove(&working_dir);
return serde_json::to_string_pretty(&json!({
"passed": false,
"exit_code": -1,
"timed_out": true,
"tests_passed": 0,
"tests_failed": 0,
"output": format!("Test suite timed out after {}s", TEST_TIMEOUT_SECS),
}))
.map_err(|e| format!("Serialization error: {e}"));
}
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
}
}
/// How long `get_test_result` blocks server-side before returning "running".
/// This prevents agents from burning turns polling every 2 seconds.
const TEST_POLL_BLOCK_SECS: u64 = 20;
/// Check on a running test job and return results if complete.
///
/// Blocks for up to 15 seconds, checking every second. Returns immediately
/// when the test finishes, or after 15s with `{"status": "running"}`.
pub(crate) async fn tool_get_test_result(args: &Value, ctx: &AppContext) -> Result<String, String> {
let project_root = ctx.services.agents.get_project_root(&ctx.state)?;
let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
Some(wt) => validate_working_dir(wt, ctx)?,
None => project_root
.canonicalize()
.map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
};
// Block for up to TEST_POLL_BLOCK_SECS, checking once per second.
let test_jobs = ctx.test_jobs.clone();
let wd = working_dir.clone();
for _ in 0..TEST_POLL_BLOCK_SECS {
{
let mut jobs = test_jobs.lock().map_err(|e| e.to_string())?;
if let Some(job) = jobs.get_mut(&wd) {
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
let result = collect_child_result(child, status);
job.child = None;
job.result = Some(result.clone());
jobs.remove(&wd);
return format_test_result(&result);
}
Ok(None) => {} // still running, keep waiting
Err(e) => {
jobs.remove(&wd);
return Err(format!("Failed to check child status: {e}"));
}
}
} else if let Some(result) = job.result.clone() {
jobs.remove(&wd);
return format_test_result(&result);
}
} else {
return Err(
"No test job running for this worktree. Call run_tests first.".to_string(),
);
}
}
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
// Still running after blocking period — return status.
let mut jobs = ctx.test_jobs.lock().map_err(|e| e.to_string())?;
let job = jobs.get_mut(&working_dir).ok_or_else(|| {
"No test job running for this worktree. Call run_tests first.".to_string()
})?;
// Check if child has finished.
if let Some(child) = job.child.as_mut() {
match child.try_wait() {
Ok(Some(status)) => {
let result = collect_child_result(child, status);
job.child = None;
job.result = Some(result.clone());
let resp = format_test_result(&result);
jobs.remove(&working_dir);
return resp;
}
Ok(None) => {
let elapsed = job.started_at.elapsed().as_secs();
// If exceeded our max timeout, kill it.
if elapsed > TEST_TIMEOUT_SECS {
let _ = child.kill();
let _ = child.wait();
crate::slog!(
"[run_tests] Killed test job for {} after {elapsed}s timeout",
working_dir.display()
);
jobs.remove(&working_dir);
return serde_json::to_string_pretty(&json!({
"passed": false,
"exit_code": -1,
"timed_out": true,
"tests_passed": 0,
"tests_failed": 0,
"output": format!("Test suite timed out after {elapsed}s"),
}))
.map_err(|e| format!("Serialization error: {e}"));
}
return serde_json::to_string_pretty(&json!({
"status": "running",
"elapsed_secs": elapsed,
}))
.map_err(|e| format!("Serialization error: {e}"));
}
Err(e) => {
jobs.remove(&working_dir);
return Err(format!("Failed to check child status: {e}"));
}
}
}
// Job exists with cached result.
if let Some(result) = job.result.clone() {
jobs.remove(&working_dir);
return format_test_result(&result);
}
Err("Test job in unexpected state".to_string())
}
fn collect_child_result(
child: &mut std::process::Child,
status: std::process::ExitStatus,
) -> crate::http::context::TestJobResult {
let mut stdout = String::new();
let mut stderr = String::new();
if let Some(ref mut out) = child.stdout {
use std::io::Read;
let _ = out.read_to_string(&mut stdout);
}
if let Some(ref mut err) = child.stderr {
use std::io::Read;
let _ = err.read_to_string(&mut stderr);
}
let combined = format!("{stdout}{stderr}");
let (tests_passed, tests_failed) = parse_test_counts(&combined);
let exit_code = status.code().unwrap_or(-1);
crate::http::context::TestJobResult {
passed: status.success(),
exit_code,
tests_passed,
tests_failed,
output: truncate_output(&combined, MAX_OUTPUT_LINES),
}
}
/// Shared implementation for run_build and run_lint: runs a named script
/// (`script/<name>`) in the working directory, captures output, and returns
async fn run_script_tool(
script_name: &str,
args: &Value,
ctx: &AppContext,
) -> Result<String, String> {
let project_root = ctx.services.agents.get_project_root(&ctx.state)?;
let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
Some(wt) => validate_working_dir(wt, ctx)?,
None => project_root
.canonicalize()
.map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
};
let script_path = working_dir.join("script").join(script_name);
if !script_path.exists() {
return Err(format!(
"{script_name} script not found: {}",
script_path.display()
));
}
let result = tokio::task::spawn_blocking({
let script = script_path.clone();
let dir = working_dir.clone();
move || {
std::process::Command::new("bash")
.arg(&script)
.current_dir(&dir)
.output()
}
})
.await
.map_err(|e| format!("Task join error: {e}"))?
.map_err(|e| format!("Failed to spawn {script_name} script: {e}"))?;
let stdout = String::from_utf8_lossy(&result.stdout);
let stderr = String::from_utf8_lossy(&result.stderr);
let combined = format!("{stdout}{stderr}");
let output = truncate_output(&combined, MAX_OUTPUT_LINES);
let exit_code = result.status.code().unwrap_or(-1);
serde_json::to_string_pretty(&json!({
"passed": result.status.success(),
"exit_code": exit_code,
"output": output,
}))
.map_err(|e| format!("Serialization error: {e}"))
}
pub(crate) async fn tool_run_build(args: &Value, ctx: &AppContext) -> Result<String, String> {
run_script_tool("build", args, ctx).await
}
pub(crate) async fn tool_run_lint(args: &Value, ctx: &AppContext) -> Result<String, String> {
run_script_tool("lint", args, ctx).await
}
fn format_test_result(result: &crate::http::context::TestJobResult) -> Result<String, String> {
serde_json::to_string_pretty(&json!({
"passed": result.passed,
"exit_code": result.exit_code,
"timed_out": false,
"tests_passed": result.tests_passed,
"tests_failed": result.tests_failed,
"output": result.output,
}))
.map_err(|e| format!("Serialization error: {e}"))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::http::test_helpers::test_ctx;
#[tokio::test]
async fn tool_run_tests_missing_script_returns_error() {
let tmp = tempfile::tempdir().unwrap();
let ctx = test_ctx(tmp.path());
// No script/test in tmp — should return Err
let result = tool_run_tests(&json!({}), &ctx).await;
assert!(
result.is_err(),
"expected error for missing script: {result:?}"
);
assert!(
result.unwrap_err().contains("not found"),
"error should mention 'not found'"
);
}
#[tokio::test]
async fn tool_run_tests_passes_when_script_exits_zero() {
let tmp = tempfile::tempdir().unwrap();
let script_dir = tmp.path().join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let script_path = script_dir.join("test");
std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
let ctx = test_ctx(tmp.path());
let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["passed"], true);
assert_eq!(parsed["exit_code"], 0);
}
#[tokio::test]
async fn tool_run_tests_fails_when_script_exits_nonzero() {
let tmp = tempfile::tempdir().unwrap();
let script_dir = tmp.path().join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let script_path = script_dir.join("test");
std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
let ctx = test_ctx(tmp.path());
let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["passed"], false);
assert_eq!(parsed["exit_code"], 1);
}
#[tokio::test]
async fn tool_run_tests_worktree_path_must_be_inside_worktrees() {
let tmp = tempfile::tempdir().unwrap();
let wt_dir = tmp.path().join(".huskies").join("worktrees");
std::fs::create_dir_all(&wt_dir).unwrap();
let ctx = test_ctx(tmp.path());
// tmp.path() itself is outside worktrees → should fail validation
let result = tool_run_tests(
&json!({"worktree_path": tmp.path().to_str().unwrap()}),
&ctx,
)
.await;
assert!(result.is_err());
assert!(
result.unwrap_err().contains("worktrees"),
"expected sandbox error"
);
}
// ── tool_run_build / tool_run_lint ────────────────────────────────
#[tokio::test]
async fn tool_run_build_missing_script_returns_error() {
let tmp = tempfile::tempdir().unwrap();
let ctx = test_ctx(tmp.path());
let result = tool_run_build(&json!({}), &ctx).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("not found"));
}
#[tokio::test]
async fn tool_run_build_passes_when_script_exits_zero() {
let tmp = tempfile::tempdir().unwrap();
let script_dir = tmp.path().join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let script_path = script_dir.join("build");
std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
let ctx = test_ctx(tmp.path());
let result = tool_run_build(&json!({}), &ctx).await.unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["passed"], true);
assert_eq!(parsed["exit_code"], 0);
}
#[tokio::test]
async fn tool_run_build_worktree_path_must_be_inside_worktrees() {
let tmp = tempfile::tempdir().unwrap();
let wt_dir = tmp.path().join(".huskies").join("worktrees");
std::fs::create_dir_all(&wt_dir).unwrap();
let ctx = test_ctx(tmp.path());
let result = tool_run_build(
&json!({"worktree_path": tmp.path().to_str().unwrap()}),
&ctx,
)
.await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("worktrees"));
}
#[tokio::test]
async fn tool_run_lint_missing_script_returns_error() {
let tmp = tempfile::tempdir().unwrap();
let ctx = test_ctx(tmp.path());
let result = tool_run_lint(&json!({}), &ctx).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("not found"));
}
#[tokio::test]
async fn tool_run_lint_passes_when_script_exits_zero() {
let tmp = tempfile::tempdir().unwrap();
let script_dir = tmp.path().join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let script_path = script_dir.join("lint");
std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 0\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
let ctx = test_ctx(tmp.path());
let result = tool_run_lint(&json!({}), &ctx).await.unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["passed"], true);
assert_eq!(parsed["exit_code"], 0);
}
#[tokio::test]
async fn tool_run_lint_fails_when_script_exits_nonzero() {
let tmp = tempfile::tempdir().unwrap();
let script_dir = tmp.path().join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let script_path = script_dir.join("lint");
std::fs::write(&script_path, "#!/usr/bin/env bash\nexit 1\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
let ctx = test_ctx(tmp.path());
let result = tool_run_lint(&json!({}), &ctx).await.unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["passed"], false);
assert_eq!(parsed["exit_code"], 1);
}
// ── truncate_output ───────────────────────────────────────────────
#[test]
fn truncate_output_short_text_unchanged() {
let text = "line1\nline2\nline3";
assert_eq!(truncate_output(text, 10), text);
}
#[test]
fn truncate_output_long_text_keeps_tail() {
let lines: Vec<String> = (1..=200).map(|i| format!("line {i}")).collect();
let text = lines.join("\n");
let result = truncate_output(&text, 50);
assert!(
result.contains("line 200"),
"should keep last line: {result}"
);
assert!(
result.contains("omitted"),
"should note omitted lines: {result}"
);
assert!(
!result.contains("line 1\n"),
"should not keep first line: {result}"
);
}
// ── parse_test_counts ─────────────────────────────────────────────
#[test]
fn parse_test_counts_extracts_passed_and_failed() {
let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;";
let (passed, failed) = parse_test_counts(output);
assert_eq!(passed, 7);
assert_eq!(failed, 3);
}
#[test]
fn parse_test_counts_no_results_returns_zeros() {
let (passed, failed) = parse_test_counts("no test output here");
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn extract_count_finds_number_before_label() {
assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5));
assert_eq!(extract_count("0 failed", "failed"), Some(0));
assert_eq!(extract_count("no number here passed", "passed"), None);
}
}