huskies: merge 494_story_mcp_tool_to_run_project_test_suite
This commit is contained in:
@@ -1048,6 +1048,20 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
|
||||
"required": ["command", "working_dir"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "run_tests",
|
||||
"description": "Run the project's test suite (script/test) and return a structured result with pass/fail, test counts, and truncated output. Runs from the project root by default, or from a specific worktree if worktree_path is provided.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"worktree_path": {
|
||||
"type": "string",
|
||||
"description": "Optional absolute path to a worktree to run tests in. Must be inside .huskies/worktrees/. Defaults to the project root."
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "git_status",
|
||||
"description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .huskies/worktrees/. Push and remote operations are not available.",
|
||||
@@ -1299,6 +1313,7 @@ async fn handle_tools_call(
|
||||
"unblock_story" => story_tools::tool_unblock_story(&args, ctx),
|
||||
// Shell command execution
|
||||
"run_command" => shell_tools::tool_run_command(&args, ctx).await,
|
||||
"run_tests" => shell_tools::tool_run_tests(&args, ctx).await,
|
||||
// Git operations
|
||||
"git_status" => git_tools::tool_git_status(&args, ctx).await,
|
||||
"git_diff" => git_tools::tool_git_diff(&args, ctx).await,
|
||||
@@ -1422,6 +1437,7 @@ mod tests {
|
||||
assert!(names.contains(&"unblock_story"));
|
||||
assert!(names.contains(&"delete_story"));
|
||||
assert!(names.contains(&"run_command"));
|
||||
assert!(names.contains(&"run_tests"));
|
||||
assert!(names.contains(&"git_status"));
|
||||
assert!(names.contains(&"git_diff"));
|
||||
assert!(names.contains(&"git_add"));
|
||||
@@ -1429,7 +1445,7 @@ mod tests {
|
||||
assert!(names.contains(&"git_log"));
|
||||
assert!(names.contains(&"status"));
|
||||
assert!(names.contains(&"loc_file"));
|
||||
assert_eq!(tools.len(), 56);
|
||||
assert_eq!(tools.len(), 57);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -7,6 +7,8 @@ use std::path::PathBuf;
|
||||
|
||||
const DEFAULT_TIMEOUT_SECS: u64 = 120;
|
||||
const MAX_TIMEOUT_SECS: u64 = 600;
|
||||
const TEST_TIMEOUT_SECS: u64 = 600;
|
||||
const MAX_OUTPUT_LINES: usize = 100;
|
||||
|
||||
/// Patterns that are unconditionally blocked regardless of context.
|
||||
static BLOCKED_PATTERNS: &[&str] = &[
|
||||
@@ -328,6 +330,117 @@ pub(super) fn handle_run_command_sse(
|
||||
})))
|
||||
}
|
||||
|
||||
/// Truncate output to at most `max_lines` lines, keeping the tail.
|
||||
fn truncate_output(output: &str, max_lines: usize) -> String {
|
||||
let lines: Vec<&str> = output.lines().collect();
|
||||
if lines.len() <= max_lines {
|
||||
return output.to_string();
|
||||
}
|
||||
let omitted = lines.len() - max_lines;
|
||||
let tail = lines[lines.len() - max_lines..].join("\n");
|
||||
format!("[... {omitted} lines omitted ...]\n{tail}")
|
||||
}
|
||||
|
||||
/// Parse cumulative passed/failed counts from `cargo test` output lines like:
|
||||
/// `"test result: ok. 5 passed; 0 failed; ..."`
|
||||
fn parse_test_counts(output: &str) -> (u64, u64) {
|
||||
let mut total_passed = 0u64;
|
||||
let mut total_failed = 0u64;
|
||||
for line in output.lines() {
|
||||
if line.contains("test result:") {
|
||||
if let Some(p) = extract_count(line, "passed") {
|
||||
total_passed += p;
|
||||
}
|
||||
if let Some(f) = extract_count(line, "failed") {
|
||||
total_failed += f;
|
||||
}
|
||||
}
|
||||
}
|
||||
(total_passed, total_failed)
|
||||
}
|
||||
|
||||
/// Extract a count immediately before `label` in `line` (e.g. `"5 passed"` → 5).
|
||||
fn extract_count(line: &str, label: &str) -> Option<u64> {
|
||||
let pos = line.find(label)?;
|
||||
let before = line[..pos].trim_end();
|
||||
let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect();
|
||||
if num_str.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let num_str: String = num_str.chars().rev().collect();
|
||||
num_str.parse().ok()
|
||||
}
|
||||
|
||||
/// Run the project's `script/test` and return a structured result.
|
||||
///
|
||||
/// If `worktree_path` is provided the script is run from that worktree
|
||||
/// (must be inside `.huskies/worktrees/`). Otherwise the project root is used.
|
||||
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
|
||||
let project_root = ctx.agents.get_project_root(&ctx.state)?;
|
||||
|
||||
let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
|
||||
Some(wt) => validate_working_dir(wt, ctx)?,
|
||||
None => project_root
|
||||
.canonicalize()
|
||||
.map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
|
||||
};
|
||||
|
||||
let script_path = working_dir.join("script").join("test");
|
||||
if !script_path.exists() {
|
||||
return Err(format!(
|
||||
"Test script not found: {}",
|
||||
script_path.display()
|
||||
));
|
||||
}
|
||||
|
||||
let result = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(TEST_TIMEOUT_SECS),
|
||||
tokio::task::spawn_blocking({
|
||||
let dir = working_dir.clone();
|
||||
let script = script_path.clone();
|
||||
move || {
|
||||
std::process::Command::new("bash")
|
||||
.arg(&script)
|
||||
.current_dir(&dir)
|
||||
.output()
|
||||
}
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Err(_) => serde_json::to_string_pretty(&json!({
|
||||
"passed": false,
|
||||
"exit_code": -1,
|
||||
"timed_out": true,
|
||||
"tests_passed": 0,
|
||||
"tests_failed": 0,
|
||||
"output": format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"),
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}")),
|
||||
Ok(Err(e)) => Err(format!("Task join error: {e}")),
|
||||
Ok(Ok(Err(e))) => Err(format!("Failed to execute test script: {e}")),
|
||||
Ok(Ok(Ok(output))) => {
|
||||
let passed = output.status.success();
|
||||
let exit_code = output.status.code().unwrap_or(-1);
|
||||
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
|
||||
let combined = format!("{stdout}{stderr}");
|
||||
let (tests_passed, tests_failed) = parse_test_counts(&combined);
|
||||
let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"passed": passed,
|
||||
"exit_code": exit_code,
|
||||
"timed_out": false,
|
||||
"tests_passed": tests_passed,
|
||||
"tests_failed": tests_failed,
|
||||
"output": truncated,
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -619,4 +732,124 @@ mod tests {
|
||||
// Just ensure it doesn't panic and returns an Err about sandbox (not timeout)
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
// ── tool_run_tests ────────────────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn tool_run_tests_missing_script_returns_error() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let ctx = test_ctx(tmp.path());
|
||||
// No script/test in tmp — should return Err
|
||||
let result = tool_run_tests(&json!({}), &ctx).await;
|
||||
assert!(result.is_err(), "expected error for missing script: {result:?}");
|
||||
assert!(
|
||||
result.unwrap_err().contains("not found"),
|
||||
"error should mention 'not found'"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tool_run_tests_passes_when_script_exits_zero() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let script_dir = tmp.path().join("script");
|
||||
std::fs::create_dir_all(&script_dir).unwrap();
|
||||
let script_path = script_dir.join("test");
|
||||
std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: ok. 3 passed; 0 failed'\nexit 0\n").unwrap();
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
|
||||
}
|
||||
|
||||
let ctx = test_ctx(tmp.path());
|
||||
let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
|
||||
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
|
||||
|
||||
assert_eq!(parsed["passed"], true);
|
||||
assert_eq!(parsed["exit_code"], 0);
|
||||
assert_eq!(parsed["timed_out"], false);
|
||||
assert_eq!(parsed["tests_passed"], 3);
|
||||
assert_eq!(parsed["tests_failed"], 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tool_run_tests_fails_when_script_exits_nonzero() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let script_dir = tmp.path().join("script");
|
||||
std::fs::create_dir_all(&script_dir).unwrap();
|
||||
let script_path = script_dir.join("test");
|
||||
std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n").unwrap();
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
|
||||
}
|
||||
|
||||
let ctx = test_ctx(tmp.path());
|
||||
let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
|
||||
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
|
||||
|
||||
assert_eq!(parsed["passed"], false);
|
||||
assert_eq!(parsed["exit_code"], 1);
|
||||
assert_eq!(parsed["tests_passed"], 1);
|
||||
assert_eq!(parsed["tests_failed"], 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tool_run_tests_worktree_path_must_be_inside_worktrees() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let wt_dir = tmp.path().join(".huskies").join("worktrees");
|
||||
std::fs::create_dir_all(&wt_dir).unwrap();
|
||||
let ctx = test_ctx(tmp.path());
|
||||
// tmp.path() itself is outside worktrees → should fail validation
|
||||
let result =
|
||||
tool_run_tests(&json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx).await;
|
||||
assert!(result.is_err());
|
||||
assert!(
|
||||
result.unwrap_err().contains("worktrees"),
|
||||
"expected sandbox error"
|
||||
);
|
||||
}
|
||||
|
||||
// ── truncate_output ───────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn truncate_output_short_text_unchanged() {
|
||||
let text = "line1\nline2\nline3";
|
||||
assert_eq!(truncate_output(text, 10), text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_output_long_text_keeps_tail() {
|
||||
let lines: Vec<String> = (1..=200).map(|i| format!("line {i}")).collect();
|
||||
let text = lines.join("\n");
|
||||
let result = truncate_output(&text, 50);
|
||||
assert!(result.contains("line 200"), "should keep last line: {result}");
|
||||
assert!(result.contains("omitted"), "should note omitted lines: {result}");
|
||||
assert!(!result.contains("line 1\n"), "should not keep first line: {result}");
|
||||
}
|
||||
|
||||
// ── parse_test_counts ─────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn parse_test_counts_extracts_passed_and_failed() {
|
||||
let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;";
|
||||
let (passed, failed) = parse_test_counts(output);
|
||||
assert_eq!(passed, 7);
|
||||
assert_eq!(failed, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_test_counts_no_results_returns_zeros() {
|
||||
let (passed, failed) = parse_test_counts("no test output here");
|
||||
assert_eq!(passed, 0);
|
||||
assert_eq!(failed, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_count_finds_number_before_label() {
|
||||
assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5));
|
||||
assert_eq!(extract_count("0 failed", "failed"), Some(0));
|
||||
assert_eq!(extract_count("no number here passed", "passed"), None);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user