From 19768c23d5c4fc384af484fd75493f1ccba90136 Mon Sep 17 00:00:00 2001
From: dave <futurechimp@users.noreply.github.com>
Date: Tue, 7 Apr 2026 14:39:47 +0000
Subject: [PATCH] huskies: merge 494_story_mcp_tool_to_run_project_test_suite

---
 frontend/src/slashCommands.ts         |   5 +
 server/src/chat/commands/mod.rs       |   6 +
 server/src/chat/commands/run_tests.rs | 242 ++++++++++++++++++++++++++
 server/src/http/mcp/mod.rs            |  18 +-
 server/src/http/mcp/shell_tools.rs    | 233 +++++++++++++++++++++++++
 5 files changed, 503 insertions(+), 1 deletion(-)
 create mode 100644 server/src/chat/commands/run_tests.rs
diff --git a/frontend/src/slashCommands.ts b/frontend/src/slashCommands.ts
index f23f35d1..6209dc7c 100644
--- a/frontend/src/slashCommands.ts
+++ b/frontend/src/slashCommands.ts
@@ -97,6 +97,11 @@ export const SLASH_COMMANDS: SlashCommand[] = [
 		description:
 			"Clear the current Claude Code session and start fresh (messages and session ID are cleared locally).",
 	},
+	{
+		name: "/test",
+		description:
+			"Run the project's test suite (`script/test`) and show pass/fail with output.",
+	},
 	{
 		name: "/btw <question>",
 		description:
diff --git a/server/src/chat/commands/mod.rs b/server/src/chat/commands/mod.rs
index d902f68b..867ab4f8 100644
--- a/server/src/chat/commands/mod.rs
+++ b/server/src/chat/commands/mod.rs
@@ -15,6 +15,7 @@ mod help;
 pub(crate) mod loc;
 mod move_story;
 mod overview;
+mod run_tests;
 mod setup;
 mod show;
 mod status;
@@ -130,6 +131,11 @@ pub fn commands() -> &'static [BotCommand] {
             description: "Show test coverage: cached baseline by default, or `coverage run` to rerun the full suite",
             handler: coverage::handle_coverage,
         },
+        BotCommand {
+            name: "test",
+            description: "Run the project's test suite (`script/test`) and show pass/fail with output",
+            handler: run_tests::handle_test,
+        },
         BotCommand {
             name: "loc",
             description: "Show top source files by line count: `loc` (top 10), `loc <N>`, or `loc <filepath>` for a specific file",
diff --git a/server/src/chat/commands/run_tests.rs b/server/src/chat/commands/run_tests.rs
new file mode 100644
index 00000000..b9adccd7
--- /dev/null
+++ b/server/src/chat/commands/run_tests.rs
@@ -0,0 +1,242 @@
+//! Handler for the `test` bot command — run the project's test suite.
+//!
+//! Executes `script/test` from the project root and returns a formatted
+//! pass/fail summary with output (truncated for failures).
+
+use super::CommandContext;
+
+const TEST_SCRIPT: &str = "script/test";
+/// Maximum number of output lines to include in the response.
+const MAX_OUTPUT_LINES: usize = 80;
+
+pub(super) fn handle_test(ctx: &CommandContext) -> Option<String> {
+    let script_path = ctx.project_root.join(TEST_SCRIPT);
+
+    if !script_path.exists() {
+        return Some(format!(
+            "**Test**\n\nTest script not found: `{TEST_SCRIPT}`\n\nEnsure `{TEST_SCRIPT}` exists in the project root."
+        ));
+    }
+
+    let output = std::process::Command::new("bash")
+        .arg(&script_path)
+        .current_dir(ctx.project_root)
+        .output();
+
+    match output {
+        Err(e) => Some(format!("**Test**\n\nFailed to run test script: {e}")),
+        Ok(out) => {
+            let passed = out.status.success();
+            let stdout = String::from_utf8_lossy(&out.stdout).to_string();
+            let stderr = String::from_utf8_lossy(&out.stderr).to_string();
+            let combined = format!("{stdout}{stderr}");
+            let (tests_passed, tests_failed) = parse_test_counts(&combined);
+            let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
+
+            let status = if passed { "PASS" } else { "FAIL" };
+            let mut result = format!("**Test: {status}**\n\n");
+
+            if tests_passed > 0 || tests_failed > 0 {
+                result.push_str(&format!(
+                    "{tests_passed} passed, {tests_failed} failed\n\n"
+                ));
+            }
+
+            result.push_str(&format!("```\n{truncated}\n```"));
+            Some(result)
+        }
+    }
+}
+
+/// Truncate output to at most `max_lines` tail lines.
+fn truncate_output(output: &str, max_lines: usize) -> String {
+    let lines: Vec<&str> = output.lines().collect();
+    if lines.len() <= max_lines {
+        return output.to_string();
+    }
+    let omitted = lines.len() - max_lines;
+    let tail = lines[lines.len() - max_lines..].join("\n");
+    format!("[... {omitted} lines omitted ...]\n{tail}")
+}
+
+/// Parse cumulative passed/failed counts from `cargo test` output lines.
+fn parse_test_counts(output: &str) -> (u64, u64) {
+    let mut total_passed = 0u64;
+    let mut total_failed = 0u64;
+    for line in output.lines() {
+        if line.contains("test result:") {
+            if let Some(p) = extract_count(line, "passed") {
+                total_passed += p;
+            }
+            if let Some(f) = extract_count(line, "failed") {
+                total_failed += f;
+            }
+        }
+    }
+    (total_passed, total_failed)
+}
+
+fn extract_count(line: &str, label: &str) -> Option<u64> {
+    let pos = line.find(label)?;
+    let before = line[..pos].trim_end();
+    let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect();
+    if num_str.is_empty() {
+        return None;
+    }
+    let num_str: String = num_str.chars().rev().collect();
+    num_str.parse().ok()
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    fn make_ctx<'a>(
+        agents: &'a Arc<AgentPool>,
+        ambient_rooms: &'a Arc<Mutex<HashSet<String>>>,
+        project_root: &'a std::path::Path,
+        args: &'a str,
+    ) -> super::super::CommandContext<'a> {
+        super::super::CommandContext {
+            bot_name: "Timmy",
+            args,
+            project_root,
+            agents,
+            ambient_rooms,
+            room_id: "!test:example.com",
+        }
+    }
+
+    fn test_agents() -> Arc<AgentPool> {
+        Arc::new(AgentPool::new_test(3000))
+    }
+
+    fn test_ambient() -> Arc<Mutex<HashSet<String>>> {
+        Arc::new(Mutex::new(HashSet::new()))
+    }
+
+    fn write_script(dir: &std::path::Path, content: &str) {
+        let script_dir = dir.join("script");
+        std::fs::create_dir_all(&script_dir).unwrap();
+        let path = script_dir.join("test");
+        std::fs::write(&path, content).unwrap();
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt;
+            std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).unwrap();
+        }
+    }
+
+    #[test]
+    fn test_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "test");
+        assert!(found, "test command must be in the registry");
+    }
+
+    #[test]
+    fn test_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(
+            output.contains("test"),
+            "help should list test command: {output}"
+        );
+    }
+
+    #[test]
+    fn test_command_missing_script_returns_error() {
+        let dir = tempfile::tempdir().unwrap();
+        let agents = test_agents();
+        let ambient = test_ambient();
+        let ctx = make_ctx(&agents, &ambient, dir.path(), "");
+        let output = handle_test(&ctx).unwrap();
+        assert!(
+            output.contains("not found") || output.contains("script"),
+            "missing script should produce a clear error: {output}"
+        );
+    }
+
+    #[test]
+    fn test_command_pass_when_script_exits_zero() {
+        let dir = tempfile::tempdir().unwrap();
+        write_script(
+            dir.path(),
+            "#!/usr/bin/env bash\necho 'test result: ok. 4 passed; 0 failed'\nexit 0\n",
+        );
+        let agents = test_agents();
+        let ambient = test_ambient();
+        let ctx = make_ctx(&agents, &ambient, dir.path(), "");
+        let output = handle_test(&ctx).unwrap();
+        assert!(output.contains("PASS"), "should show PASS: {output}");
+        assert!(output.contains('4'), "should show test count: {output}");
+    }
+
+    #[test]
+    fn test_command_fail_when_script_exits_nonzero() {
+        let dir = tempfile::tempdir().unwrap();
+        write_script(
+            dir.path(),
+            "#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n",
+        );
+        let agents = test_agents();
+        let ambient = test_ambient();
+        let ctx = make_ctx(&agents, &ambient, dir.path(), "");
+        let output = handle_test(&ctx).unwrap();
+        assert!(output.contains("FAIL"), "should show FAIL: {output}");
+        assert!(output.contains('2'), "should show failed count: {output}");
+    }
+
+    #[test]
+    fn test_command_works_via_dispatch() {
+        let dir = tempfile::tempdir().unwrap();
+        write_script(
+            dir.path(),
+            "#!/usr/bin/env bash\necho 'ok'\nexit 0\n",
+        );
+        let agents = test_agents();
+        let ambient = test_ambient();
+        let room_id = "!test:example.com".to_string();
+        let dispatch = super::super::CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: dir.path(),
+            agents: &agents,
+            ambient_rooms: &ambient,
+            room_id: &room_id,
+        };
+        let result = super::super::try_handle_command(&dispatch, "@timmy test");
+        assert!(
+            result.is_some(),
+            "test command must respond via dispatch (not fall through to LLM)"
+        );
+    }
+
+    #[test]
+    fn truncate_output_keeps_tail() {
+        let lines: Vec<String> = (1..=150).map(|i| format!("line {i}")).collect();
+        let text = lines.join("\n");
+        let result = truncate_output(&text, 80);
+        assert!(result.contains("line 150"), "should keep last line");
+        assert!(result.contains("omitted"), "should note omitted lines");
+    }
+
+    #[test]
+    fn parse_test_counts_sums_multiple_results() {
+        let output = "test result: ok. 5 passed; 0 failed;\ntest result: ok. 3 passed; 1 failed;";
+        let (p, f) = parse_test_counts(output);
+        assert_eq!(p, 8);
+        assert_eq!(f, 1);
+    }
+}
diff --git a/server/src/http/mcp/mod.rs b/server/src/http/mcp/mod.rs
index 0e4bd633..74ff3608 100644
--- a/server/src/http/mcp/mod.rs
+++ b/server/src/http/mcp/mod.rs
@@ -1048,6 +1048,20 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
                         "required": ["command", "working_dir"]
                     }
                 },
+                {
+                    "name": "run_tests",
+                    "description": "Run the project's test suite (script/test) and return a structured result with pass/fail, test counts, and truncated output. Runs from the project root by default, or from a specific worktree if worktree_path is provided.",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {
+                            "worktree_path": {
+                                "type": "string",
+                                "description": "Optional absolute path to a worktree to run tests in. Must be inside .huskies/worktrees/. Defaults to the project root."
+                            }
+                        },
+                        "required": []
+                    }
+                },
                 {
                     "name": "git_status",
                     "description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .huskies/worktrees/. Push and remote operations are not available.",
@@ -1299,6 +1313,7 @@ async fn handle_tools_call(
         "unblock_story" => story_tools::tool_unblock_story(&args, ctx),
         // Shell command execution
         "run_command" => shell_tools::tool_run_command(&args, ctx).await,
+        "run_tests" => shell_tools::tool_run_tests(&args, ctx).await,
         // Git operations
         "git_status" => git_tools::tool_git_status(&args, ctx).await,
         "git_diff" => git_tools::tool_git_diff(&args, ctx).await,
@@ -1422,6 +1437,7 @@ mod tests {
         assert!(names.contains(&"unblock_story"));
         assert!(names.contains(&"delete_story"));
         assert!(names.contains(&"run_command"));
+        assert!(names.contains(&"run_tests"));
         assert!(names.contains(&"git_status"));
         assert!(names.contains(&"git_diff"));
         assert!(names.contains(&"git_add"));
@@ -1429,7 +1445,7 @@ mod tests {
         assert!(names.contains(&"git_log"));
         assert!(names.contains(&"status"));
         assert!(names.contains(&"loc_file"));
-        assert_eq!(tools.len(), 56);
+        assert_eq!(tools.len(), 57);
     }
 
     #[test]
diff --git a/server/src/http/mcp/shell_tools.rs b/server/src/http/mcp/shell_tools.rs
index 50f8df23..d4362ecb 100644
--- a/server/src/http/mcp/shell_tools.rs
+++ b/server/src/http/mcp/shell_tools.rs
@@ -7,6 +7,8 @@ use std::path::PathBuf;
 
 const DEFAULT_TIMEOUT_SECS: u64 = 120;
 const MAX_TIMEOUT_SECS: u64 = 600;
+const TEST_TIMEOUT_SECS: u64 = 600;
+const MAX_OUTPUT_LINES: usize = 100;
 
 /// Patterns that are unconditionally blocked regardless of context.
 static BLOCKED_PATTERNS: &[&str] = &[
@@ -328,6 +330,117 @@ pub(super) fn handle_run_command_sse(
         })))
 }
 
+/// Truncate output to at most `max_lines` lines, keeping the tail.
+fn truncate_output(output: &str, max_lines: usize) -> String {
+    let lines: Vec<&str> = output.lines().collect();
+    if lines.len() <= max_lines {
+        return output.to_string();
+    }
+    let omitted = lines.len() - max_lines;
+    let tail = lines[lines.len() - max_lines..].join("\n");
+    format!("[... {omitted} lines omitted ...]\n{tail}")
+}
+
+/// Parse cumulative passed/failed counts from `cargo test` output lines like:
+/// `"test result: ok. 5 passed; 0 failed; ..."`
+fn parse_test_counts(output: &str) -> (u64, u64) {
+    let mut total_passed = 0u64;
+    let mut total_failed = 0u64;
+    for line in output.lines() {
+        if line.contains("test result:") {
+            if let Some(p) = extract_count(line, "passed") {
+                total_passed += p;
+            }
+            if let Some(f) = extract_count(line, "failed") {
+                total_failed += f;
+            }
+        }
+    }
+    (total_passed, total_failed)
+}
+
+/// Extract a count immediately before `label` in `line` (e.g. `"5 passed"` → 5).
+fn extract_count(line: &str, label: &str) -> Option<u64> {
+    let pos = line.find(label)?;
+    let before = line[..pos].trim_end();
+    let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect();
+    if num_str.is_empty() {
+        return None;
+    }
+    let num_str: String = num_str.chars().rev().collect();
+    num_str.parse().ok()
+}
+
+/// Run the project's `script/test` and return a structured result.
+///
+/// If `worktree_path` is provided the script is run from that worktree
+/// (must be inside `.huskies/worktrees/`). Otherwise the project root is used.
+pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
+        Some(wt) => validate_working_dir(wt, ctx)?,
+        None => project_root
+            .canonicalize()
+            .map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
+    };
+
+    let script_path = working_dir.join("script").join("test");
+    if !script_path.exists() {
+        return Err(format!(
+            "Test script not found: {}",
+            script_path.display()
+        ));
+    }
+
+    let result = tokio::time::timeout(
+        std::time::Duration::from_secs(TEST_TIMEOUT_SECS),
+        tokio::task::spawn_blocking({
+            let dir = working_dir.clone();
+            let script = script_path.clone();
+            move || {
+                std::process::Command::new("bash")
+                    .arg(&script)
+                    .current_dir(&dir)
+                    .output()
+            }
+        }),
+    )
+    .await;
+
+    match result {
+        Err(_) => serde_json::to_string_pretty(&json!({
+            "passed": false,
+            "exit_code": -1,
+            "timed_out": true,
+            "tests_passed": 0,
+            "tests_failed": 0,
+            "output": format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"),
+        }))
+        .map_err(|e| format!("Serialization error: {e}")),
+        Ok(Err(e)) => Err(format!("Task join error: {e}")),
+        Ok(Ok(Err(e))) => Err(format!("Failed to execute test script: {e}")),
+        Ok(Ok(Ok(output))) => {
+            let passed = output.status.success();
+            let exit_code = output.status.code().unwrap_or(-1);
+            let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+            let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+            let combined = format!("{stdout}{stderr}");
+            let (tests_passed, tests_failed) = parse_test_counts(&combined);
+            let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
+            serde_json::to_string_pretty(&json!({
+                "passed": passed,
+                "exit_code": exit_code,
+                "timed_out": false,
+                "tests_passed": tests_passed,
+                "tests_failed": tests_failed,
+                "output": truncated,
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -619,4 +732,124 @@ mod tests {
         // Just ensure it doesn't panic and returns an Err about sandbox (not timeout)
         assert!(result.is_err());
     }
+
+    // ── tool_run_tests ────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn tool_run_tests_missing_script_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        // No script/test in tmp — should return Err
+        let result = tool_run_tests(&json!({}), &ctx).await;
+        assert!(result.is_err(), "expected error for missing script: {result:?}");
+        assert!(
+            result.unwrap_err().contains("not found"),
+            "error should mention 'not found'"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_run_tests_passes_when_script_exits_zero() {
+        let tmp = tempfile::tempdir().unwrap();
+        let script_dir = tmp.path().join("script");
+        std::fs::create_dir_all(&script_dir).unwrap();
+        let script_path = script_dir.join("test");
+        std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: ok. 3 passed; 0 failed'\nexit 0\n").unwrap();
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt;
+            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
+        }
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+
+        assert_eq!(parsed["passed"], true);
+        assert_eq!(parsed["exit_code"], 0);
+        assert_eq!(parsed["timed_out"], false);
+        assert_eq!(parsed["tests_passed"], 3);
+        assert_eq!(parsed["tests_failed"], 0);
+    }
+
+    #[tokio::test]
+    async fn tool_run_tests_fails_when_script_exits_nonzero() {
+        let tmp = tempfile::tempdir().unwrap();
+        let script_dir = tmp.path().join("script");
+        std::fs::create_dir_all(&script_dir).unwrap();
+        let script_path = script_dir.join("test");
+        std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n").unwrap();
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt;
+            std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
+        }
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+
+        assert_eq!(parsed["passed"], false);
+        assert_eq!(parsed["exit_code"], 1);
+        assert_eq!(parsed["tests_passed"], 1);
+        assert_eq!(parsed["tests_failed"], 2);
+    }
+
+    #[tokio::test]
+    async fn tool_run_tests_worktree_path_must_be_inside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let wt_dir = tmp.path().join(".huskies").join("worktrees");
+        std::fs::create_dir_all(&wt_dir).unwrap();
+        let ctx = test_ctx(tmp.path());
+        // tmp.path() itself is outside worktrees → should fail validation
+        let result =
+            tool_run_tests(&json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx).await;
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().contains("worktrees"),
+            "expected sandbox error"
+        );
+    }
+
+    // ── truncate_output ───────────────────────────────────────────────
+
+    #[test]
+    fn truncate_output_short_text_unchanged() {
+        let text = "line1\nline2\nline3";
+        assert_eq!(truncate_output(text, 10), text);
+    }
+
+    #[test]
+    fn truncate_output_long_text_keeps_tail() {
+        let lines: Vec<String> = (1..=200).map(|i| format!("line {i}")).collect();
+        let text = lines.join("\n");
+        let result = truncate_output(&text, 50);
+        assert!(result.contains("line 200"), "should keep last line: {result}");
+        assert!(result.contains("omitted"), "should note omitted lines: {result}");
+        assert!(!result.contains("line 1\n"), "should not keep first line: {result}");
+    }
+
+    // ── parse_test_counts ─────────────────────────────────────────────
+
+    #[test]
+    fn parse_test_counts_extracts_passed_and_failed() {
+        let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;";
+        let (passed, failed) = parse_test_counts(output);
+        assert_eq!(passed, 7);
+        assert_eq!(failed, 3);
+    }
+
+    #[test]
+    fn parse_test_counts_no_results_returns_zeros() {
+        let (passed, failed) = parse_test_counts("no test output here");
+        assert_eq!(passed, 0);
+        assert_eq!(failed, 0);
+    }
+
+    #[test]
+    fn extract_count_finds_number_before_label() {
+        assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5));
+        assert_eq!(extract_count("0 failed", "failed"), Some(0));
+        assert_eq!(extract_count("no number here passed", "passed"), None);
+    }
 }