huskies: merge 494_story_mcp_tool_to_run_project_test_suite

This commit is contained in:
dave
2026-04-07 14:39:47 +00:00
parent 1b8c391836
commit 19768c23d5
5 changed files with 503 additions and 1 deletions
+5
View File
@@ -97,6 +97,11 @@ export const SLASH_COMMANDS: SlashCommand[] = [
description: description:
"Clear the current Claude Code session and start fresh (messages and session ID are cleared locally).", "Clear the current Claude Code session and start fresh (messages and session ID are cleared locally).",
}, },
{
name: "/test",
description:
"Run the project's test suite (`script/test`) and show pass/fail with output.",
},
{ {
name: "/btw <question>", name: "/btw <question>",
description: description:
+6
View File
@@ -15,6 +15,7 @@ mod help;
pub(crate) mod loc; pub(crate) mod loc;
mod move_story; mod move_story;
mod overview; mod overview;
mod run_tests;
mod setup; mod setup;
mod show; mod show;
mod status; mod status;
@@ -130,6 +131,11 @@ pub fn commands() -> &'static [BotCommand] {
description: "Show test coverage: cached baseline by default, or `coverage run` to rerun the full suite", description: "Show test coverage: cached baseline by default, or `coverage run` to rerun the full suite",
handler: coverage::handle_coverage, handler: coverage::handle_coverage,
}, },
BotCommand {
name: "test",
description: "Run the project's test suite (`script/test`) and show pass/fail with output",
handler: run_tests::handle_test,
},
BotCommand { BotCommand {
name: "loc", name: "loc",
description: "Show top source files by line count: `loc` (top 10), `loc <N>`, or `loc <filepath>` for a specific file", description: "Show top source files by line count: `loc` (top 10), `loc <N>`, or `loc <filepath>` for a specific file",
+242
View File
@@ -0,0 +1,242 @@
//! Handler for the `test` bot command — run the project's test suite.
//!
//! Executes `script/test` from the project root and returns a formatted
//! pass/fail summary with output (truncated for failures).
use super::CommandContext;
const TEST_SCRIPT: &str = "script/test";
/// Maximum number of output lines to include in the response.
const MAX_OUTPUT_LINES: usize = 80;
pub(super) fn handle_test(ctx: &CommandContext) -> Option<String> {
let script_path = ctx.project_root.join(TEST_SCRIPT);
if !script_path.exists() {
return Some(format!(
"**Test**\n\nTest script not found: `{TEST_SCRIPT}`\n\nEnsure `{TEST_SCRIPT}` exists in the project root."
));
}
let output = std::process::Command::new("bash")
.arg(&script_path)
.current_dir(ctx.project_root)
.output();
match output {
Err(e) => Some(format!("**Test**\n\nFailed to run test script: {e}")),
Ok(out) => {
let passed = out.status.success();
let stdout = String::from_utf8_lossy(&out.stdout).to_string();
let stderr = String::from_utf8_lossy(&out.stderr).to_string();
let combined = format!("{stdout}{stderr}");
let (tests_passed, tests_failed) = parse_test_counts(&combined);
let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
let status = if passed { "PASS" } else { "FAIL" };
let mut result = format!("**Test: {status}**\n\n");
if tests_passed > 0 || tests_failed > 0 {
result.push_str(&format!(
"{tests_passed} passed, {tests_failed} failed\n\n"
));
}
result.push_str(&format!("```\n{truncated}\n```"));
Some(result)
}
}
}
/// Truncate output to at most `max_lines` tail lines.
fn truncate_output(output: &str, max_lines: usize) -> String {
let lines: Vec<&str> = output.lines().collect();
if lines.len() <= max_lines {
return output.to_string();
}
let omitted = lines.len() - max_lines;
let tail = lines[lines.len() - max_lines..].join("\n");
format!("[... {omitted} lines omitted ...]\n{tail}")
}
/// Parse cumulative passed/failed counts from `cargo test` output lines.
fn parse_test_counts(output: &str) -> (u64, u64) {
let mut total_passed = 0u64;
let mut total_failed = 0u64;
for line in output.lines() {
if line.contains("test result:") {
if let Some(p) = extract_count(line, "passed") {
total_passed += p;
}
if let Some(f) = extract_count(line, "failed") {
total_failed += f;
}
}
}
(total_passed, total_failed)
}
fn extract_count(line: &str, label: &str) -> Option<u64> {
let pos = line.find(label)?;
let before = line[..pos].trim_end();
let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect();
if num_str.is_empty() {
return None;
}
let num_str: String = num_str.chars().rev().collect();
num_str.parse().ok()
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::agents::AgentPool;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
fn make_ctx<'a>(
agents: &'a Arc<AgentPool>,
ambient_rooms: &'a Arc<Mutex<HashSet<String>>>,
project_root: &'a std::path::Path,
args: &'a str,
) -> super::super::CommandContext<'a> {
super::super::CommandContext {
bot_name: "Timmy",
args,
project_root,
agents,
ambient_rooms,
room_id: "!test:example.com",
}
}
fn test_agents() -> Arc<AgentPool> {
Arc::new(AgentPool::new_test(3000))
}
fn test_ambient() -> Arc<Mutex<HashSet<String>>> {
Arc::new(Mutex::new(HashSet::new()))
}
fn write_script(dir: &std::path::Path, content: &str) {
let script_dir = dir.join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let path = script_dir.join("test");
std::fs::write(&path, content).unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
}
#[test]
fn test_command_is_registered() {
use super::super::commands;
let found = commands().iter().any(|c| c.name == "test");
assert!(found, "test command must be in the registry");
}
#[test]
fn test_command_appears_in_help() {
let result = super::super::tests::try_cmd_addressed(
"Timmy",
"@timmy:homeserver.local",
"@timmy help",
);
let output = result.unwrap();
assert!(
output.contains("test"),
"help should list test command: {output}"
);
}
#[test]
fn test_command_missing_script_returns_error() {
let dir = tempfile::tempdir().unwrap();
let agents = test_agents();
let ambient = test_ambient();
let ctx = make_ctx(&agents, &ambient, dir.path(), "");
let output = handle_test(&ctx).unwrap();
assert!(
output.contains("not found") || output.contains("script"),
"missing script should produce a clear error: {output}"
);
}
#[test]
fn test_command_pass_when_script_exits_zero() {
let dir = tempfile::tempdir().unwrap();
write_script(
dir.path(),
"#!/usr/bin/env bash\necho 'test result: ok. 4 passed; 0 failed'\nexit 0\n",
);
let agents = test_agents();
let ambient = test_ambient();
let ctx = make_ctx(&agents, &ambient, dir.path(), "");
let output = handle_test(&ctx).unwrap();
assert!(output.contains("PASS"), "should show PASS: {output}");
assert!(output.contains('4'), "should show test count: {output}");
}
#[test]
fn test_command_fail_when_script_exits_nonzero() {
let dir = tempfile::tempdir().unwrap();
write_script(
dir.path(),
"#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n",
);
let agents = test_agents();
let ambient = test_ambient();
let ctx = make_ctx(&agents, &ambient, dir.path(), "");
let output = handle_test(&ctx).unwrap();
assert!(output.contains("FAIL"), "should show FAIL: {output}");
assert!(output.contains('2'), "should show failed count: {output}");
}
#[test]
fn test_command_works_via_dispatch() {
let dir = tempfile::tempdir().unwrap();
write_script(
dir.path(),
"#!/usr/bin/env bash\necho 'ok'\nexit 0\n",
);
let agents = test_agents();
let ambient = test_ambient();
let room_id = "!test:example.com".to_string();
let dispatch = super::super::CommandDispatch {
bot_name: "Timmy",
bot_user_id: "@timmy:homeserver.local",
project_root: dir.path(),
agents: &agents,
ambient_rooms: &ambient,
room_id: &room_id,
};
let result = super::super::try_handle_command(&dispatch, "@timmy test");
assert!(
result.is_some(),
"test command must respond via dispatch (not fall through to LLM)"
);
}
#[test]
fn truncate_output_keeps_tail() {
let lines: Vec<String> = (1..=150).map(|i| format!("line {i}")).collect();
let text = lines.join("\n");
let result = truncate_output(&text, 80);
assert!(result.contains("line 150"), "should keep last line");
assert!(result.contains("omitted"), "should note omitted lines");
}
#[test]
fn parse_test_counts_sums_multiple_results() {
let output = "test result: ok. 5 passed; 0 failed;\ntest result: ok. 3 passed; 1 failed;";
let (p, f) = parse_test_counts(output);
assert_eq!(p, 8);
assert_eq!(f, 1);
}
}
+17 -1
View File
@@ -1048,6 +1048,20 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
"required": ["command", "working_dir"] "required": ["command", "working_dir"]
} }
}, },
{
"name": "run_tests",
"description": "Run the project's test suite (script/test) and return a structured result with pass/fail, test counts, and truncated output. Runs from the project root by default, or from a specific worktree if worktree_path is provided.",
"inputSchema": {
"type": "object",
"properties": {
"worktree_path": {
"type": "string",
"description": "Optional absolute path to a worktree to run tests in. Must be inside .huskies/worktrees/. Defaults to the project root."
}
},
"required": []
}
},
{ {
"name": "git_status", "name": "git_status",
"description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .huskies/worktrees/. Push and remote operations are not available.", "description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .huskies/worktrees/. Push and remote operations are not available.",
@@ -1299,6 +1313,7 @@ async fn handle_tools_call(
"unblock_story" => story_tools::tool_unblock_story(&args, ctx), "unblock_story" => story_tools::tool_unblock_story(&args, ctx),
// Shell command execution // Shell command execution
"run_command" => shell_tools::tool_run_command(&args, ctx).await, "run_command" => shell_tools::tool_run_command(&args, ctx).await,
"run_tests" => shell_tools::tool_run_tests(&args, ctx).await,
// Git operations // Git operations
"git_status" => git_tools::tool_git_status(&args, ctx).await, "git_status" => git_tools::tool_git_status(&args, ctx).await,
"git_diff" => git_tools::tool_git_diff(&args, ctx).await, "git_diff" => git_tools::tool_git_diff(&args, ctx).await,
@@ -1422,6 +1437,7 @@ mod tests {
assert!(names.contains(&"unblock_story")); assert!(names.contains(&"unblock_story"));
assert!(names.contains(&"delete_story")); assert!(names.contains(&"delete_story"));
assert!(names.contains(&"run_command")); assert!(names.contains(&"run_command"));
assert!(names.contains(&"run_tests"));
assert!(names.contains(&"git_status")); assert!(names.contains(&"git_status"));
assert!(names.contains(&"git_diff")); assert!(names.contains(&"git_diff"));
assert!(names.contains(&"git_add")); assert!(names.contains(&"git_add"));
@@ -1429,7 +1445,7 @@ mod tests {
assert!(names.contains(&"git_log")); assert!(names.contains(&"git_log"));
assert!(names.contains(&"status")); assert!(names.contains(&"status"));
assert!(names.contains(&"loc_file")); assert!(names.contains(&"loc_file"));
assert_eq!(tools.len(), 56); assert_eq!(tools.len(), 57);
} }
#[test] #[test]
+233
View File
@@ -7,6 +7,8 @@ use std::path::PathBuf;
const DEFAULT_TIMEOUT_SECS: u64 = 120; const DEFAULT_TIMEOUT_SECS: u64 = 120;
const MAX_TIMEOUT_SECS: u64 = 600; const MAX_TIMEOUT_SECS: u64 = 600;
const TEST_TIMEOUT_SECS: u64 = 600;
const MAX_OUTPUT_LINES: usize = 100;
/// Patterns that are unconditionally blocked regardless of context. /// Patterns that are unconditionally blocked regardless of context.
static BLOCKED_PATTERNS: &[&str] = &[ static BLOCKED_PATTERNS: &[&str] = &[
@@ -328,6 +330,117 @@ pub(super) fn handle_run_command_sse(
}))) })))
} }
/// Truncate output to at most `max_lines` lines, keeping the tail.
fn truncate_output(output: &str, max_lines: usize) -> String {
let lines: Vec<&str> = output.lines().collect();
if lines.len() <= max_lines {
return output.to_string();
}
let omitted = lines.len() - max_lines;
let tail = lines[lines.len() - max_lines..].join("\n");
format!("[... {omitted} lines omitted ...]\n{tail}")
}
/// Parse cumulative passed/failed counts from `cargo test` output lines like:
/// `"test result: ok. 5 passed; 0 failed; ..."`
fn parse_test_counts(output: &str) -> (u64, u64) {
let mut total_passed = 0u64;
let mut total_failed = 0u64;
for line in output.lines() {
if line.contains("test result:") {
if let Some(p) = extract_count(line, "passed") {
total_passed += p;
}
if let Some(f) = extract_count(line, "failed") {
total_failed += f;
}
}
}
(total_passed, total_failed)
}
/// Extract a count immediately before `label` in `line` (e.g. `"5 passed"` → 5).
fn extract_count(line: &str, label: &str) -> Option<u64> {
let pos = line.find(label)?;
let before = line[..pos].trim_end();
let num_str: String = before.chars().rev().take_while(|c| c.is_ascii_digit()).collect();
if num_str.is_empty() {
return None;
}
let num_str: String = num_str.chars().rev().collect();
num_str.parse().ok()
}
/// Run the project's `script/test` and return a structured result.
///
/// If `worktree_path` is provided the script is run from that worktree
/// (must be inside `.huskies/worktrees/`). Otherwise the project root is used.
pub(super) async fn tool_run_tests(args: &Value, ctx: &AppContext) -> Result<String, String> {
let project_root = ctx.agents.get_project_root(&ctx.state)?;
let working_dir = match args.get("worktree_path").and_then(|v| v.as_str()) {
Some(wt) => validate_working_dir(wt, ctx)?,
None => project_root
.canonicalize()
.map_err(|e| format!("Cannot canonicalize project root: {e}"))?,
};
let script_path = working_dir.join("script").join("test");
if !script_path.exists() {
return Err(format!(
"Test script not found: {}",
script_path.display()
));
}
let result = tokio::time::timeout(
std::time::Duration::from_secs(TEST_TIMEOUT_SECS),
tokio::task::spawn_blocking({
let dir = working_dir.clone();
let script = script_path.clone();
move || {
std::process::Command::new("bash")
.arg(&script)
.current_dir(&dir)
.output()
}
}),
)
.await;
match result {
Err(_) => serde_json::to_string_pretty(&json!({
"passed": false,
"exit_code": -1,
"timed_out": true,
"tests_passed": 0,
"tests_failed": 0,
"output": format!("Test suite timed out after {TEST_TIMEOUT_SECS}s"),
}))
.map_err(|e| format!("Serialization error: {e}")),
Ok(Err(e)) => Err(format!("Task join error: {e}")),
Ok(Ok(Err(e))) => Err(format!("Failed to execute test script: {e}")),
Ok(Ok(Ok(output))) => {
let passed = output.status.success();
let exit_code = output.status.code().unwrap_or(-1);
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
let combined = format!("{stdout}{stderr}");
let (tests_passed, tests_failed) = parse_test_counts(&combined);
let truncated = truncate_output(&combined, MAX_OUTPUT_LINES);
serde_json::to_string_pretty(&json!({
"passed": passed,
"exit_code": exit_code,
"timed_out": false,
"tests_passed": tests_passed,
"tests_failed": tests_failed,
"output": truncated,
}))
.map_err(|e| format!("Serialization error: {e}"))
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -619,4 +732,124 @@ mod tests {
// Just ensure it doesn't panic and returns an Err about sandbox (not timeout) // Just ensure it doesn't panic and returns an Err about sandbox (not timeout)
assert!(result.is_err()); assert!(result.is_err());
} }
// ── tool_run_tests ────────────────────────────────────────────────
#[tokio::test]
async fn tool_run_tests_missing_script_returns_error() {
let tmp = tempfile::tempdir().unwrap();
let ctx = test_ctx(tmp.path());
// No script/test in tmp — should return Err
let result = tool_run_tests(&json!({}), &ctx).await;
assert!(result.is_err(), "expected error for missing script: {result:?}");
assert!(
result.unwrap_err().contains("not found"),
"error should mention 'not found'"
);
}
#[tokio::test]
async fn tool_run_tests_passes_when_script_exits_zero() {
let tmp = tempfile::tempdir().unwrap();
let script_dir = tmp.path().join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let script_path = script_dir.join("test");
std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: ok. 3 passed; 0 failed'\nexit 0\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
let ctx = test_ctx(tmp.path());
let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["passed"], true);
assert_eq!(parsed["exit_code"], 0);
assert_eq!(parsed["timed_out"], false);
assert_eq!(parsed["tests_passed"], 3);
assert_eq!(parsed["tests_failed"], 0);
}
#[tokio::test]
async fn tool_run_tests_fails_when_script_exits_nonzero() {
let tmp = tempfile::tempdir().unwrap();
let script_dir = tmp.path().join("script");
std::fs::create_dir_all(&script_dir).unwrap();
let script_path = script_dir.join("test");
std::fs::write(&script_path, "#!/usr/bin/env bash\necho 'test result: FAILED. 1 passed; 2 failed'\nexit 1\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&script_path, std::fs::Permissions::from_mode(0o755)).unwrap();
}
let ctx = test_ctx(tmp.path());
let result = tool_run_tests(&json!({}), &ctx).await.unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["passed"], false);
assert_eq!(parsed["exit_code"], 1);
assert_eq!(parsed["tests_passed"], 1);
assert_eq!(parsed["tests_failed"], 2);
}
#[tokio::test]
async fn tool_run_tests_worktree_path_must_be_inside_worktrees() {
let tmp = tempfile::tempdir().unwrap();
let wt_dir = tmp.path().join(".huskies").join("worktrees");
std::fs::create_dir_all(&wt_dir).unwrap();
let ctx = test_ctx(tmp.path());
// tmp.path() itself is outside worktrees → should fail validation
let result =
tool_run_tests(&json!({"worktree_path": tmp.path().to_str().unwrap()}), &ctx).await;
assert!(result.is_err());
assert!(
result.unwrap_err().contains("worktrees"),
"expected sandbox error"
);
}
// ── truncate_output ───────────────────────────────────────────────
#[test]
fn truncate_output_short_text_unchanged() {
let text = "line1\nline2\nline3";
assert_eq!(truncate_output(text, 10), text);
}
#[test]
fn truncate_output_long_text_keeps_tail() {
let lines: Vec<String> = (1..=200).map(|i| format!("line {i}")).collect();
let text = lines.join("\n");
let result = truncate_output(&text, 50);
assert!(result.contains("line 200"), "should keep last line: {result}");
assert!(result.contains("omitted"), "should note omitted lines: {result}");
assert!(!result.contains("line 1\n"), "should not keep first line: {result}");
}
// ── parse_test_counts ─────────────────────────────────────────────
#[test]
fn parse_test_counts_extracts_passed_and_failed() {
let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;";
let (passed, failed) = parse_test_counts(output);
assert_eq!(passed, 7);
assert_eq!(failed, 3);
}
#[test]
fn parse_test_counts_no_results_returns_zeros() {
let (passed, failed) = parse_test_counts("no test output here");
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn extract_count_finds_number_before_label() {
assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5));
assert_eq!(extract_count("0 failed", "failed"), Some(0));
assert_eq!(extract_count("no number here passed", "passed"), None);
}
} }