From 19768c23d5c4fc384af484fd75493f1ccba90136 Mon Sep 17 00:00:00 2001 From: dave Date: Tue, 7 Apr 2026 14:39:47 +0000 Subject: [PATCH] huskies: merge 494_story_mcp_tool_to_run_project_test_suite --- frontend/src/slashCommands.ts | 5 + server/src/chat/commands/mod.rs | 6 + server/src/chat/commands/run_tests.rs | 242 ++++++++++++++++++++++++++ server/src/http/mcp/mod.rs | 18 +- server/src/http/mcp/shell_tools.rs | 233 +++++++++++++++++++++++++ 5 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 server/src/chat/commands/run_tests.rs diff --git a/frontend/src/slashCommands.ts b/frontend/src/slashCommands.ts index f23f35d1..6209dc7c 100644 --- a/frontend/src/slashCommands.ts +++ b/frontend/src/slashCommands.ts @@ -97,6 +97,11 @@ export const SLASH_COMMANDS: SlashCommand[] = [ description: "Clear the current Claude Code session and start fresh (messages and session ID are cleared locally).", }, + { + name: "/test", + description: + "Run the project's test suite (`script/test`) and show pass/fail with output.", + }, { name: "/btw ", description: diff --git a/server/src/chat/commands/mod.rs b/server/src/chat/commands/mod.rs index d902f68b..867ab4f8 100644 --- a/server/src/chat/commands/mod.rs +++ b/server/src/chat/commands/mod.rs @@ -15,6 +15,7 @@ mod help; pub(crate) mod loc; mod move_story; mod overview; +mod run_tests; mod setup; mod show; mod status; @@ -130,6 +131,11 @@ pub fn commands() -> &'static [BotCommand] { description: "Show test coverage: cached baseline by default, or `coverage run` to rerun the full suite", handler: coverage::handle_coverage, }, + BotCommand { + name: "test", + description: "Run the project's test suite (`script/test`) and show pass/fail with output", + handler: run_tests::handle_test, + }, BotCommand { name: "loc", description: "Show top source files by line count: `loc` (top 10), `loc `, or `loc ` for a specific file", diff --git a/server/src/chat/commands/run_tests.rs b/server/src/chat/commands/run_tests.rs new file mode 100644 index 00000000..b9adccd7 --- /dev/null +++ b/server/src/chat/commands/run_tests.rs @@ -0,0 +1,242 @@ +//! Handler for the `test` bot command — run the project's test suite. +//! +//! Executes `script/test` from the project root and returns a formatted +//! pass/fail summary with output (truncated for failures). + +use super::CommandContext; + +const TEST_SCRIPT: &str = "script/test"; +/// Maximum number of output lines to include in the response. +const MAX_OUTPUT_LINES: usize = 80; + +pub(super) fn handle_test(ctx: &CommandContext) -> Option { + let script_path = ctx.project_root.join(TEST_SCRIPT); + + if !script_path.exists() { + return Some(format!( + "**Test**\n\nTest script not found: `{TEST_SCRIPT}`\n\nEnsure `{TEST_SCRIPT}` exists in the project root." + )); + } + + let output = std::process::Command::new("bash") + .arg(&script_path) + .current_dir(ctx.project_root) + .output(); + + match output { + Err(e) => Some(format!("**Test**\n\nFailed to run test script: {e}")), + Ok(out) => { + let passed = out.status.success(); + let stdout = String::from_utf8_lossy(&out.stdout).to_string(); + let stderr = String::from_utf8_lossy(&out.stderr).to_string(); + let combined = format!("{stdout}{stderr}"); + let (tests_passed, tests_failed) = parse_test_counts(&combined); + let truncated = truncate_output(&combined, MAX_OUTPUT_LINES); + + let status = if passed { "PASS" } else { "FAIL" }; + let mut result = format!("**Test: {status}**\n\n"); + + if tests_passed > 0 || tests_failed > 0 { + result.push_str(&format!( + "{tests_passed} passed, {tests_failed} failed\n\n" + )); + } + + result.push_str(&format!("```\n{truncated}\n```")); + Some(result) + } + } +} + +/// Truncate output to at most `max_lines` tail lines. +fn truncate_output(output: &str, max_lines: usize) -> String { + let lines: Vec<&str> = output.lines().collect(); + if lines.len() <= max_lines { + return output.to_string(); + } + let omitted = lines.len() - max_lines; + let tail = lines[lines.len() - max_lines..].join("\n"); + format!("[... {omitted} lines omitted ...]\n{tail}") +} + +/// Parse cumulative passed/failed counts from `cargo test` output lines. +fn parse_test_counts(output: &str) -> (u64, u64) { + let mut total_passed = 0u64; + let mut total_failed = 0u64; + for line in output.lines() { + if line.contains("test result:") { + if let Some(p) = extract_count(line, "passed") { + total_passed += p; + } + if let Some(f) = extract_count(line, "failed") { + total_failed += f; + } + } + } + (total_passed, total_failed) +} + +fn extract_count(line: &str, label: &str) -> Option { + let pos = line.find(label)?; + let before = line[..pos].trim_end(); + let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect(); + if num_str.is_empty() { + return None; + } + let num_str: String = num_str.chars().rev().collect(); + num_str.parse().ok() +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::agents::AgentPool; + use std::collections::HashSet; + use std::sync::{Arc, Mutex}; + + fn make_ctx<'a>( + agents: &'a Arc, + ambient_rooms: &'a Arc>>, + project_root: &'a std::path::Path, + args: &'a str, + ) -> super::super::CommandContext<'a> { + super::super::CommandContext { + bot_name: "Timmy", + args, + project_root, + agents, + ambient_rooms, + room_id: "!test:example.com", + } + } + + fn test_agents() -> Arc { + Arc::new(AgentPool::new_test(3000)) + } + + fn test_ambient() -> Arc>> { + Arc::new(Mutex::new(HashSet::new())) + } + + fn write_script(dir: &std::path::Path, content: &str) { + let script_dir = dir.join("script"); + std::fs::create_dir_all(&script_dir).unwrap(); + let path = script_dir.join("test"); + std::fs::write(&path, content).unwrap(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).unwrap(); + } + } + + #[test] + fn test_command_is_registered() { + use super::super::commands; + let found = commands().iter().any(|c| c.name == "test"); + assert!(found, "test command must be in the registry"); + } + + #[test] + fn test_command_appears_in_help() { + let result = super::super::tests::try_cmd_addressed( + "Timmy", + "@timmy:homeserver.local", + "@timmy help", + ); + let output = result.unwrap(); + assert!( + output.contains("test"), + "help should list test command: {output}" + ); + } + + #[test] + fn test_command_missing_script_returns_error() { + let dir = tempfile::tempdir().unwrap(); + let agents = test_agents(); + let ambient = test_ambient(); + let ctx = make_ctx(&agents, &ambient, dir.path(), ""); + let output = handle_test(&ctx).unwrap(); + assert!( + output.contains("not found") || output.contains("script"), + "missing script should produce a clear error: {output}" + ); + } + + #[test] + fn test_command_pass_when_script_exits_zero() { + let dir = tempfile::tempdir().unwrap(); + write_script( + dir.path(), + "#!/usr/bin/env bash\necho 'test result: ok. 4 passed; 0 failed'\nexit 0\n", + ); + let agents = test_agents(); + let ambient = test_ambient(); + let ctx = make_ctx(&agents, &ambient, dir.path(), ""); + let output = handle_test(&ctx).unwrap(); + assert!(output.contains("PASS"), "should show PASS: {output}"); + assert!(output.contains('4'), "should show test count: {output}"); + } + + #[test] + fn test_command_fail_when_script_exits_nonzero() { + let dir = tempfile::tempdir().unwrap(); + write_script( + dir.path(), + "#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n", + ); + let agents = test_agents(); + let ambient = test_ambient(); + let ctx = make_ctx(&agents, &ambient, dir.path(), ""); + let output = handle_test(&ctx).unwrap(); + assert!(output.contains("FAIL"), "should show FAIL: {output}"); + assert!(output.contains('2'), "should show failed count: {output}"); + } + + #[test] + fn test_command_works_via_dispatch() { + let dir = tempfile::tempdir().unwrap(); + write_script( + dir.path(), + "#!/usr/bin/env bash\necho 'ok'\nexit 0\n", + ); + let agents = test_agents(); + let ambient = test_ambient(); + let room_id = "!test:example.com".to_string(); + let dispatch = super::super::CommandDispatch { + bot_name: "Timmy", + bot_user_id: "@timmy:homeserver.local", + project_root: dir.path(), + agents: &agents, + ambient_rooms: &ambient, + room_id: &room_id, + }; + let result = super::super::try_handle_command(&dispatch, "@timmy test"); + assert!( + result.is_some(), + "test command must respond via dispatch (not fall through to LLM)" + ); + } + + #[test] + fn truncate_output_keeps_tail() { + let lines: Vec = (1..=150).map(|i| format!("line {i}")).collect(); + let text = lines.join("\n"); + let result = truncate_output(&text, 80); + assert!(result.contains("line 150"), "should keep last line"); + assert!(result.contains("omitted"), "should note omitted lines"); + } + + #[test] + fn parse_test_counts_sums_multiple_results() { + let output = "test result: ok. 5 passed; 0 failed;\ntest result: ok. 3 passed; 1 failed;"; + let (p, f) = parse_test_counts(output); + assert_eq!(p, 8); + assert_eq!(f, 1); + } +} diff --git a/server/src/http/mcp/mod.rs b/server/src/http/mcp/mod.rs index 0e4bd633..74ff3608 100644 --- a/server/src/http/mcp/mod.rs +++ b/server/src/http/mcp/mod.rs @@ -1048,6 +1048,20 @@ fn handle_tools_list(id: Option) -> JsonRpcResponse { "required": ["command", "working_dir"] } }, + { + "name": "run_tests", + "description": "Run the project's test suite (script/test) and return a structured result with pass/fail, test counts, and truncated output. Runs from the project root by default, or from a specific worktree if worktree_path is provided.", + "inputSchema": { + "type": "object", + "properties": { + "worktree_path": { + "type": "string", + "description": "Optional absolute path to a worktree to run tests in. Must be inside .huskies/worktrees/. Defaults to the project root." + } + }, + "required": [] + } + }, { "name": "git_status", "description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .huskies/worktrees/. Push and remote operations are not available.", @@ -1299,6 +1313,7 @@ async fn handle_tools_call( "unblock_story" => story_tools::tool_unblock_story(&args, ctx), // Shell command execution "run_command" => shell_tools::tool_run_command(&args, ctx).await, + "run_tests" => shell_tools::tool_run_tests(&args, ctx).await, // Git operations "git_status" => git_tools::tool_git_status(&args, ctx).await, "git_diff" => git_tools::tool_git_diff(&args, ctx).await, @@ -1422,6 +1437,7 @@ mod tests { assert!(names.contains(&"unblock_story")); assert!(names.contains(&"delete_story")); assert!(names.contains(&"run_command")); + assert!(names.contains(&"run_tests")); assert!(names.contains(&"git_status")); assert!(names.contains(&"git_diff")); assert!(names.contains(&"git_add")); @@ -1429,7 +1445,7 @@ mod tests { assert!(names.contains(&"git_log")); assert!(names.contains(&"status")); assert!(names.contains(&"loc_file")); - assert_eq!(tools.len(), 56); + assert_eq!(tools.len(), 57); } #[test] diff --git a/server/src/http/mcp/shell_tools.rs b/server/src/http/mcp/shell_tools.rs index 50f8df23..d4362ecb 100644 --- a/server/src/http/mcp/shell_tools.rs +++ b/server/src/http/mcp/shell_tools.rs @@ -7,6 +7,8 @@ use std::path::PathBuf; const DEFAULT_TIMEOUT_SECS: u64 = 120; const MAX_TIMEOUT_SECS: u64 = 600; +const TEST_TIMEOUT_SECS: u64 = 600; +const MAX_OUTPUT_LINES: usize = 100; /// Patterns that are unconditionally blocked regardless of context. static BLOCKED_PATTERNS: &[&str] = &[ @@ -328,6 +330,117 @@ pub(super) fn handle_run_command_sse( }))) } +/// Truncate output to at most `max_lines` lines, keeping the tail. +fn truncate_output(output: &str, max_lines: usize) -> String { + let lines: Vec<&str> = output.lines().collect(); + if lines.len() <= max_lines { + return output.to_string(); + } + let omitted = lines.len() - max_lines; + let tail = lines[lines.len() - max_lines..].join("\n"); + format!("[... {omitted} lines omitted ...]\n{tail}") +} + +/// Parse cumulative passed/failed counts from `cargo test` output lines like: +/// `"test result: ok. 5 passed; 0 failed; ..."` +fn parse_test_counts(output: &str) -> (u64, u64) { + let mut total_passed = 0u64; + let mut total_failed = 0u64; + for line in output.lines() { + if line.contains("test result:") { + if let Some(p) = extract_count(line, "passed") { + total_passed += p; + } + if let Some(f) = extract_count(line, "failed") { + total_failed += f; + } + } + } + (total_passed, total_failed) +} + +/// Extract a count immediately before `label` in `line` (e.g. `"5 passed"` → 5). +fn extract_count(line: &str, label: &str) -> Option { + let pos = line.find(label)?; + let before = line[..pos].trim_end(); + let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect(); + if num_str.is_empty() { + return None; + } + let num_str: String = num_str.chars().rev().collect(); + num_str.parse().ok() +} + +/// Run the project's `script/test` and return a structured result. +/// +/// If `worktree_path` is provided the script is run from that worktree +/// (must be inside `.huskies/worktrees/`). Otherwise the project root is used. +pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result { + let project_root = ctx.agents.get_project_root(&ctx.state)?; + + let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) { + Some(wt) => validate_working_dir(wt, ctx)?, + None => project_root + .canonicalize() + .map_err(|e| format!("Cannot canonicalize project root: {e}"))?, + }; + + let script_path = working_dir.join("script").join("test"); + if !script_path.exists() { + return Err(format!( + "Test script not found: {}", + script_path.display() + )); + } + + let result = tokio::time::timeout( + std::time::Duration::from_secs(TEST_TIMEOUT_SECS), + tokio::task::spawn_blocking({ + let dir = working_dir.clone(); + let script = script_path.clone(); + move || { + std::process::Command::new("bash") + .arg(&script) + .current_dir(&dir) + .output() + } + }), + ) + .await; + + match result { + Err(_) => serde_json::to_string_pretty(&json!({ + "passed": false, + "exit_code": -1, + "timed_out": true, + "tests_passed": 0, + "tests_failed": 0, + "output": format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"), + })) + .map_err(|e| format!("Serialization error: {e}")), + Ok(Err(e)) => Err(format!("Task join error: {e}")), + Ok(Ok(Err(e))) => Err(format!("Failed to execute test script: {e}")), + Ok(Ok(Ok(output))) => { + let passed = output.status.success(); + let exit_code = output.status.code().unwrap_or(-1); + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let combined = format!("{stdout}{stderr}"); + let (tests_passed, tests_failed) = parse_test_counts(&combined); + let truncated = truncate_output(&combined, MAX_OUTPUT_LINES); + serde_json::to_string_pretty(&json!({ + "passed": passed, + "exit_code": exit_code, + "timed_out": false, + "tests_passed": tests_passed, + "tests_failed": tests_failed, + "output": truncated, + })) + .map_err(|e| format!("Serialization error: {e}")) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -619,4 +732,124 @@ mod tests { // Just ensure it doesn't panic and returns an Err about sandbox (not timeout) assert!(result.is_err()); } + + // ── tool_run_tests ──────────────────────────────────────────────── + + #[tokio::test] + async fn tool_run_tests_missing_script_returns_error() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + // No script/test in tmp — should return Err + let result = tool_run_tests(&json!({}), &ctx).await; + assert!(result.is_err(), "expected error for missing script: {result:?}"); + assert!( + result.unwrap_err().contains("not found"), + "error should mention 'not found'" + ); + } + + #[tokio::test] + async fn tool_run_tests_passes_when_script_exits_zero() { + let tmp = tempfile::tempdir().unwrap(); + let script_dir = tmp.path().join("script"); + std::fs::create_dir_all(&script_dir).unwrap(); + let script_path = script_dir.join("test"); + std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: ok. 3 passed; 0 failed'\nexit 0\n").unwrap(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); + } + + let ctx = test_ctx(tmp.path()); + let result = tool_run_tests(&json!({}), &ctx).await.unwrap(); + let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); + + assert_eq!(parsed["passed"], true); + assert_eq!(parsed["exit_code"], 0); + assert_eq!(parsed["timed_out"], false); + assert_eq!(parsed["tests_passed"], 3); + assert_eq!(parsed["tests_failed"], 0); + } + + #[tokio::test] + async fn tool_run_tests_fails_when_script_exits_nonzero() { + let tmp = tempfile::tempdir().unwrap(); + let script_dir = tmp.path().join("script"); + std::fs::create_dir_all(&script_dir).unwrap(); + let script_path = script_dir.join("test"); + std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n").unwrap(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap(); + } + + let ctx = test_ctx(tmp.path()); + let result = tool_run_tests(&json!({}), &ctx).await.unwrap(); + let parsed: serde_json::Value = serde_json::from_str(&result).unwrap(); + + assert_eq!(parsed["passed"], false); + assert_eq!(parsed["exit_code"], 1); + assert_eq!(parsed["tests_passed"], 1); + assert_eq!(parsed["tests_failed"], 2); + } + + #[tokio::test] + async fn tool_run_tests_worktree_path_must_be_inside_worktrees() { + let tmp = tempfile::tempdir().unwrap(); + let wt_dir = tmp.path().join(".huskies").join("worktrees"); + std::fs::create_dir_all(&wt_dir).unwrap(); + let ctx = test_ctx(tmp.path()); + // tmp.path() itself is outside worktrees → should fail validation + let result = + tool_run_tests(&json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx).await; + assert!(result.is_err()); + assert!( + result.unwrap_err().contains("worktrees"), + "expected sandbox error" + ); + } + + // ── truncate_output ─────────────────────────────────────────────── + + #[test] + fn truncate_output_short_text_unchanged() { + let text = "line1\nline2\nline3"; + assert_eq!(truncate_output(text, 10), text); + } + + #[test] + fn truncate_output_long_text_keeps_tail() { + let lines: Vec = (1..=200).map(|i| format!("line {i}")).collect(); + let text = lines.join("\n"); + let result = truncate_output(&text, 50); + assert!(result.contains("line 200"), "should keep last line: {result}"); + assert!(result.contains("omitted"), "should note omitted lines: {result}"); + assert!(!result.contains("line 1\n"), "should not keep first line: {result}"); + } + + // ── parse_test_counts ───────────────────────────────────────────── + + #[test] + fn parse_test_counts_extracts_passed_and_failed() { + let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;"; + let (passed, failed) = parse_test_counts(output); + assert_eq!(passed, 7); + assert_eq!(failed, 3); + } + + #[test] + fn parse_test_counts_no_results_returns_zeros() { + let (passed, failed) = parse_test_counts("no test output here"); + assert_eq!(passed, 0); + assert_eq!(failed, 0); + } + + #[test] + fn extract_count_finds_number_before_label() { + assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5)); + assert_eq!(extract_count("0 failed", "failed"), Some(0)); + assert_eq!(extract_count("no number here passed", "passed"), None); + } }