From 287c64faf16b6eb58de2d24e4e5726af037d7912 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 20 Mar 2026 09:51:53 +0000 Subject: [PATCH] story-kit: merge 347_story_mcp_tool_for_shell_command_execution --- Cargo.lock | 11 + server/Cargo.toml | 2 +- server/src/http/mcp/mod.rs | 39 +- server/src/http/mcp/shell_tools.rs | 626 +++++++++++++++++++++++++++++ 4 files changed, 672 insertions(+), 6 deletions(-) create mode 100644 server/src/http/mcp/shell_tools.rs diff --git a/Cargo.lock b/Cargo.lock index 009c830..b6d1901 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3909,6 +3909,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + [[package]] name = "signature" version = "2.2.0" @@ -4282,6 +4292,7 @@ dependencies = [ "mio", "parking_lot", "pin-project-lite", + "signal-hook-registry", "socket2", "tokio-macros", "windows-sys 0.61.2", diff --git a/server/Cargo.toml b/server/Cargo.toml index 5f8edfc..0890e68 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -25,7 +25,7 @@ serde_json = { workspace = true } serde_urlencoded = { workspace = true } serde_yaml = { workspace = true } strip-ansi-escapes = { workspace = true } -tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] } +tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync", "process"] } toml = { workspace = true } uuid = { workspace = true, features = ["v4", "serde"] } walkdir = { workspace = true } diff --git a/server/src/http/mcp/mod.rs b/server/src/http/mcp/mod.rs index 1b6cfa2..75927ca 100644 --- a/server/src/http/mcp/mod.rs +++ b/server/src/http/mcp/mod.rs @@ -12,6 +12,7 @@ pub mod agent_tools; pub mod diagnostics; pub mod merge_tools; pub mod qa_tools; +pub mod shell_tools; pub mod story_tools; /// Returns true when the Accept header includes text/event-stream. @@ -33,7 +34,7 @@ struct JsonRpcRequest { } #[derive(Serialize)] -struct JsonRpcResponse { +pub(super) struct JsonRpcResponse { jsonrpc: &'static str, #[serde(skip_serializing_if = "Option::is_none")] id: Option, @@ -52,7 +53,7 @@ struct JsonRpcError { } impl JsonRpcResponse { - fn success(id: Option, result: Value) -> Self { + pub(super) fn success(id: Option, result: Value) -> Self { Self { jsonrpc: "2.0", id, @@ -61,7 +62,7 @@ impl JsonRpcResponse { } } - fn error(id: Option, code: i64, message: String) -> Self { + pub(super) fn error(id: Option, code: i64, message: String) -> Self { Self { jsonrpc: "2.0", id, @@ -132,6 +133,9 @@ pub async fn mcp_post_handler(req: &Request, body: Body, ctx: Data<&Arc Response { .body(Body::from(body)) } -fn to_sse_response(resp: JsonRpcResponse) -> Response { +pub(super) fn to_sse_response(resp: JsonRpcResponse) -> Response { let json = serde_json::to_string(&resp).unwrap_or_default(); let sse_body = format!("data: {json}\n\n"); Response::builder() @@ -999,6 +1003,28 @@ fn handle_tools_list(id: Option) -> JsonRpcResponse { }, "required": ["story_id", "target_stage"] } + }, + { + "name": "run_command", + "description": "Execute a shell command in an agent's worktree directory. The working_dir must be inside .story_kit/worktrees/. Returns stdout, stderr, exit_code, and timed_out. Supports SSE streaming (send Accept: text/event-stream) for long-running commands. Dangerous commands (rm -rf /, sudo, etc.) are blocked.", + "inputSchema": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The bash command to execute (passed to bash -c)" + }, + "working_dir": { + "type": "string", + "description": "Absolute path to the worktree directory to run the command in. Must be inside .story_kit/worktrees/." + }, + "timeout": { + "type": "integer", + "description": "Timeout in seconds (default: 120, max: 600)" + } + }, + "required": ["command", "working_dir"] + } } ] }), @@ -1079,6 +1105,8 @@ async fn handle_tools_call( "delete_story" => story_tools::tool_delete_story(&args, ctx).await, // Arbitrary pipeline movement "move_story" => diagnostics::tool_move_story(&args, ctx), + // Shell command execution + "run_command" => shell_tools::tool_run_command(&args, ctx).await, _ => Err(format!("Unknown tool: {tool_name}")), }; @@ -1188,7 +1216,8 @@ mod tests { assert!(names.contains(&"get_token_usage")); assert!(names.contains(&"move_story")); assert!(names.contains(&"delete_story")); - assert_eq!(tools.len(), 42); + assert!(names.contains(&"run_command")); + assert_eq!(tools.len(), 43); } #[test] diff --git a/server/src/http/mcp/shell_tools.rs b/server/src/http/mcp/shell_tools.rs new file mode 100644 index 0000000..50046f3 --- /dev/null +++ b/server/src/http/mcp/shell_tools.rs @@ -0,0 +1,626 @@ +use crate::http::context::AppContext; +use bytes::Bytes; +use futures::StreamExt; +use poem::{Body, Response}; +use serde_json::{json, Value}; +use std::path::PathBuf; + +const DEFAULT_TIMEOUT_SECS: u64 = 120; +const MAX_TIMEOUT_SECS: u64 = 600; + +/// Patterns that are unconditionally blocked regardless of context. +static BLOCKED_PATTERNS: &[&str] = &[ + "rm -rf /", + "rm -fr /", + "rm -rf /*", + "rm -fr /*", + "rm --no-preserve-root", + ":(){ :|:& };:", + "> /dev/sda", + "dd if=/dev", +]; + +/// Binaries that are unconditionally blocked. +static BLOCKED_BINARIES: &[&str] = &[ + "sudo", + "su", + "shutdown", + "reboot", + "halt", + "poweroff", + "mkfs", +]; + +/// Returns an error message if the command matches a blocked pattern or binary. +fn is_dangerous(command: &str) -> Option { + let trimmed = command.trim(); + + // Check each blocked pattern (substring match) + for &pattern in BLOCKED_PATTERNS { + if trimmed.contains(pattern) { + return Some(format!( + "Command blocked: dangerous pattern '{pattern}' detected" + )); + } + } + + // Check first token of the command against blocked binaries + if let Some(first_token) = trimmed.split_whitespace().next() { + let binary = std::path::Path::new(first_token) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or(first_token); + if BLOCKED_BINARIES.contains(&binary) { + return Some(format!("Command blocked: '{binary}' is not permitted")); + } + } + + None +} + +/// Validates that `working_dir` exists and is inside the project's +/// `.story_kit/worktrees/` directory. Returns the canonicalized path. +fn validate_working_dir(working_dir: &str, ctx: &AppContext) -> Result { + let wd = PathBuf::from(working_dir); + + if !wd.is_absolute() { + return Err("working_dir must be an absolute path".to_string()); + } + if !wd.exists() { + return Err(format!("working_dir does not exist: {working_dir}")); + } + + let project_root = ctx.agents.get_project_root(&ctx.state)?; + let worktrees_root = project_root.join(".story_kit").join("worktrees"); + + let canonical_wd = wd + .canonicalize() + .map_err(|e| format!("Cannot canonicalize working_dir: {e}"))?; + + // If worktrees_root doesn't exist yet, we can't allow anything + let canonical_wt = if worktrees_root.exists() { + worktrees_root + .canonicalize() + .map_err(|e| format!("Cannot canonicalize worktrees root: {e}"))? + } else { + return Err("No worktrees directory found in project".to_string()); + }; + + if !canonical_wd.starts_with(&canonical_wt) { + return Err(format!( + "working_dir must be inside .story_kit/worktrees/. Got: {working_dir}" + )); + } + + Ok(canonical_wd) +} + +/// Regular (non-SSE) run_command: runs the bash command to completion and +/// returns stdout, stderr, exit_code, and whether it timed out. +pub(super) async fn tool_run_command(args: &Value, ctx: &AppContext) -> Result { + let command = args + .get("command") + .and_then(|v| v.as_str()) + .ok_or("Missing required argument: command")? + .to_string(); + + let working_dir = args + .get("working_dir") + .and_then(|v| v.as_str()) + .ok_or("Missing required argument: working_dir")?; + + let timeout_secs = args + .get("timeout") + .and_then(|v| v.as_u64()) + .unwrap_or(DEFAULT_TIMEOUT_SECS) + .min(MAX_TIMEOUT_SECS); + + if let Some(reason) = is_dangerous(&command) { + return Err(reason); + } + + let canonical_dir = validate_working_dir(working_dir, ctx)?; + + let result = tokio::time::timeout( + std::time::Duration::from_secs(timeout_secs), + tokio::task::spawn_blocking({ + let cmd = command.clone(); + let dir = canonical_dir.clone(); + move || { + std::process::Command::new("bash") + .arg("-c") + .arg(&cmd) + .current_dir(&dir) + .output() + } + }), + ) + .await; + + match result { + Err(_) => { + // timed out + serde_json::to_string_pretty(&json!({ + "stdout": "", + "stderr": format!("Command timed out after {timeout_secs}s"), + "exit_code": -1, + "timed_out": true, + })) + .map_err(|e| format!("Serialization error: {e}")) + } + Ok(Err(e)) => Err(format!("Task join error: {e}")), + Ok(Ok(Err(e))) => Err(format!("Failed to execute command: {e}")), + Ok(Ok(Ok(output))) => { + serde_json::to_string_pretty(&json!({ + "stdout": String::from_utf8_lossy(&output.stdout), + "stderr": String::from_utf8_lossy(&output.stderr), + "exit_code": output.status.code().unwrap_or(-1), + "timed_out": false, + })) + .map_err(|e| format!("Serialization error: {e}")) + } + } +} + +/// SSE streaming run_command: spawns the process and emits stdout/stderr lines +/// as JSON-RPC notifications, then a final response with exit_code. +pub(super) fn handle_run_command_sse( + id: Option, + params: &Value, + ctx: &AppContext, +) -> Response { + use super::{to_sse_response, JsonRpcResponse}; + + let args = params.get("arguments").cloned().unwrap_or(json!({})); + + let command = match args.get("command").and_then(|v| v.as_str()) { + Some(c) => c.to_string(), + None => { + return to_sse_response(JsonRpcResponse::error( + id, + -32602, + "Missing required argument: command".into(), + )) + } + }; + + let working_dir = match args.get("working_dir").and_then(|v| v.as_str()) { + Some(d) => d.to_string(), + None => { + return to_sse_response(JsonRpcResponse::error( + id, + -32602, + "Missing required argument: working_dir".into(), + )) + } + }; + + let timeout_secs = args + .get("timeout") + .and_then(|v| v.as_u64()) + .unwrap_or(DEFAULT_TIMEOUT_SECS) + .min(MAX_TIMEOUT_SECS); + + if let Some(reason) = is_dangerous(&command) { + return to_sse_response(JsonRpcResponse::error(id, -32602, reason)); + } + + let canonical_dir = match validate_working_dir(&working_dir, ctx) { + Ok(d) => d, + Err(e) => return to_sse_response(JsonRpcResponse::error(id, -32602, e)), + }; + + let final_id = id; + + let stream = async_stream::stream! { + use tokio::io::AsyncBufReadExt; + + let mut child = match tokio::process::Command::new("bash") + .arg("-c") + .arg(&command) + .current_dir(&canonical_dir) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + { + Ok(c) => c, + Err(e) => { + let resp = JsonRpcResponse::success( + final_id, + json!({ + "content": [{"type": "text", "text": format!("Failed to spawn process: {e}")}], + "isError": true + }), + ); + if let Ok(s) = serde_json::to_string(&resp) { + yield Ok::<_, std::io::Error>(format!("data: {s}\n\n")); + } + return; + } + }; + + let stdout = child.stdout.take().expect("stdout piped"); + let stderr = child.stderr.take().expect("stderr piped"); + let mut stdout_lines = tokio::io::BufReader::new(stdout).lines(); + let mut stderr_lines = tokio::io::BufReader::new(stderr).lines(); + + let deadline = tokio::time::Instant::now() + + std::time::Duration::from_secs(timeout_secs); + let mut stdout_done = false; + let mut stderr_done = false; + let mut timed_out = false; + + loop { + if stdout_done && stderr_done { + break; + } + + let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); + if remaining.is_zero() { + timed_out = true; + let _ = child.kill().await; + break; + } + + tokio::select! { + line = stdout_lines.next_line(), if !stdout_done => { + match line { + Ok(Some(l)) => { + let notif = json!({ + "jsonrpc": "2.0", + "method": "notifications/tools/progress", + "params": { "stream": "stdout", "line": l } + }); + if let Ok(s) = serde_json::to_string(¬if) { + yield Ok::<_, std::io::Error>(format!("data: {s}\n\n")); + } + } + _ => { stdout_done = true; } + } + } + line = stderr_lines.next_line(), if !stderr_done => { + match line { + Ok(Some(l)) => { + let notif = json!({ + "jsonrpc": "2.0", + "method": "notifications/tools/progress", + "params": { "stream": "stderr", "line": l } + }); + if let Ok(s) = serde_json::to_string(¬if) { + yield Ok::<_, std::io::Error>(format!("data: {s}\n\n")); + } + } + _ => { stderr_done = true; } + } + } + _ = tokio::time::sleep(remaining) => { + timed_out = true; + let _ = child.kill().await; + break; + } + } + } + + let exit_code = child.wait().await.ok().and_then(|s| s.code()).unwrap_or(-1); + + let summary = json!({ + "exit_code": exit_code, + "timed_out": timed_out, + }); + + let final_resp = JsonRpcResponse::success( + final_id, + json!({ + "content": [{"type": "text", "text": summary.to_string()}] + }), + ); + if let Ok(s) = serde_json::to_string(&final_resp) { + yield Ok::<_, std::io::Error>(format!("data: {s}\n\n")); + } + }; + + Response::builder() + .status(poem::http::StatusCode::OK) + .header("Content-Type", "text/event-stream") + .header("Cache-Control", "no-cache") + .body(Body::from_bytes_stream(stream.map(|r| { + r.map(Bytes::from) + }))) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::http::context::AppContext; + use serde_json::json; + + fn test_ctx(dir: &std::path::Path) -> AppContext { + AppContext::new_test(dir.to_path_buf()) + } + + // ── is_dangerous ───────────────────────────────────────────────── + + #[test] + fn is_dangerous_blocks_rm_rf_root() { + assert!(is_dangerous("rm -rf /").is_some()); + assert!(is_dangerous(" rm -rf / ").is_some()); + } + + #[test] + fn is_dangerous_blocks_rm_fr_root() { + assert!(is_dangerous("rm -fr /").is_some()); + } + + #[test] + fn is_dangerous_blocks_rm_rf_star() { + assert!(is_dangerous("rm -rf /*").is_some()); + assert!(is_dangerous("rm -fr /*").is_some()); + } + + #[test] + fn is_dangerous_blocks_sudo() { + assert!(is_dangerous("sudo ls").is_some()); + } + + #[test] + fn is_dangerous_blocks_shutdown() { + assert!(is_dangerous("shutdown -h now").is_some()); + } + + #[test] + fn is_dangerous_blocks_mkfs() { + assert!(is_dangerous("mkfs /dev/sda1").is_some()); + } + + #[test] + fn is_dangerous_blocks_fork_bomb() { + assert!(is_dangerous(":(){ :|:& };:").is_some()); + } + + #[test] + fn is_dangerous_allows_safe_commands() { + assert!(is_dangerous("cargo build").is_none()); + assert!(is_dangerous("npm test").is_none()); + assert!(is_dangerous("git status").is_none()); + assert!(is_dangerous("ls -la").is_none()); + assert!(is_dangerous("rm -rf target/").is_none()); + } + + // ── validate_working_dir ────────────────────────────────────────── + + #[test] + fn validate_working_dir_rejects_relative_path() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + let result = validate_working_dir("relative/path", &ctx); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("absolute")); + } + + #[test] + fn validate_working_dir_rejects_nonexistent_path() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + let result = validate_working_dir("/nonexistent_path_xyz_abc", &ctx); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("does not exist")); + } + + #[test] + fn validate_working_dir_rejects_path_outside_worktrees() { + let tmp = tempfile::tempdir().unwrap(); + // Create the worktrees dir so it exists + let wt_dir = tmp.path().join(".story_kit").join("worktrees"); + std::fs::create_dir_all(&wt_dir).unwrap(); + let ctx = test_ctx(tmp.path()); + // Try to use /tmp (outside worktrees) + let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx); + assert!(result.is_err()); + assert!( + result.unwrap_err().contains("inside .story_kit/worktrees"), + "expected sandbox error" + ); + } + + #[test] + fn validate_working_dir_accepts_path_inside_worktrees() { + let tmp = tempfile::tempdir().unwrap(); + let story_wt = tmp + .path() + .join(".story_kit") + .join("worktrees") + .join("42_test_story"); + std::fs::create_dir_all(&story_wt).unwrap(); + let ctx = test_ctx(tmp.path()); + let result = validate_working_dir(story_wt.to_str().unwrap(), &ctx); + assert!(result.is_ok(), "expected Ok, got: {:?}", result); + } + + #[test] + fn validate_working_dir_rejects_no_worktrees_dir() { + let tmp = tempfile::tempdir().unwrap(); + // Do NOT create worktrees dir + let ctx = test_ctx(tmp.path()); + let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx); + assert!(result.is_err()); + } + + // ── tool_run_command ─────────────────────────────────────────────── + + #[tokio::test] + async fn tool_run_command_missing_command() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + let result = tool_run_command(&json!({"working_dir": "/tmp"}), &ctx).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("command")); + } + + #[tokio::test] + async fn tool_run_command_missing_working_dir() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + let result = tool_run_command(&json!({"command": "ls"}), &ctx).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("working_dir")); + } + + #[tokio::test] + async fn tool_run_command_blocks_dangerous_command() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + let result = tool_run_command( + &json!({"command": "rm -rf /", "working_dir": "/tmp"}), + &ctx, + ) + .await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("blocked")); + } + + #[tokio::test] + async fn tool_run_command_rejects_path_outside_worktrees() { + let tmp = tempfile::tempdir().unwrap(); + let wt_dir = tmp.path().join(".story_kit").join("worktrees"); + std::fs::create_dir_all(&wt_dir).unwrap(); + let ctx = test_ctx(tmp.path()); + let result = tool_run_command( + &json!({ + "command": "ls", + "working_dir": tmp.path().to_str().unwrap() + }), + &ctx, + ) + .await; + assert!(result.is_err()); + assert!( + result.unwrap_err().contains("worktrees"), + "expected sandbox error" + ); + } + + #[tokio::test] + async fn tool_run_command_runs_in_worktree_and_returns_output() { + let tmp = tempfile::tempdir().unwrap(); + let story_wt = tmp + .path() + .join(".story_kit") + .join("worktrees") + .join("42_test"); + std::fs::create_dir_all(&story_wt).unwrap(); + std::fs::write(story_wt.join("canary.txt"), "hello").unwrap(); + + let ctx = test_ctx(tmp.path()); + let result = tool_run_command( + &json!({ + "command": "ls", + "working_dir": story_wt.to_str().unwrap() + }), + &ctx, + ) + .await + .unwrap(); + + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert_eq!(parsed["exit_code"], 0); + assert!(parsed["stdout"].as_str().unwrap().contains("canary.txt")); + assert_eq!(parsed["timed_out"], false); + } + + #[tokio::test] + async fn tool_run_command_captures_nonzero_exit_code() { + let tmp = tempfile::tempdir().unwrap(); + let story_wt = tmp + .path() + .join(".story_kit") + .join("worktrees") + .join("43_test"); + std::fs::create_dir_all(&story_wt).unwrap(); + + let ctx = test_ctx(tmp.path()); + let result = tool_run_command( + &json!({ + "command": "exit 42", + "working_dir": story_wt.to_str().unwrap() + }), + &ctx, + ) + .await + .unwrap(); + + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert_eq!(parsed["exit_code"], 42); + assert_eq!(parsed["timed_out"], false); + } + + #[tokio::test] + async fn tool_run_command_timeout_returns_timed_out_true() { + let tmp = tempfile::tempdir().unwrap(); + let story_wt = tmp + .path() + .join(".story_kit") + .join("worktrees") + .join("44_test"); + std::fs::create_dir_all(&story_wt).unwrap(); + + let ctx = test_ctx(tmp.path()); + let result = tool_run_command( + &json!({ + "command": "sleep 10", + "working_dir": story_wt.to_str().unwrap(), + "timeout": 1 + }), + &ctx, + ) + .await + .unwrap(); + + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert_eq!(parsed["timed_out"], true); + } + + #[tokio::test] + async fn tool_run_command_captures_stderr() { + let tmp = tempfile::tempdir().unwrap(); + let story_wt = tmp + .path() + .join(".story_kit") + .join("worktrees") + .join("45_test"); + std::fs::create_dir_all(&story_wt).unwrap(); + + let ctx = test_ctx(tmp.path()); + let result = tool_run_command( + &json!({ + "command": "echo 'error msg' >&2", + "working_dir": story_wt.to_str().unwrap() + }), + &ctx, + ) + .await + .unwrap(); + + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert!( + parsed["stderr"].as_str().unwrap().contains("error msg"), + "expected stderr: {parsed}" + ); + } + + #[tokio::test] + async fn tool_run_command_clamps_timeout_to_max() { + // Verify timeout > 600 is clamped to 600. We don't run a 600s sleep; + // just confirm the tool accepts the arg without error (sandbox check will + // fail first in a different test, here we test the arg parsing path). + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + // Will fail at working_dir validation, not timeout parsing — that's fine + let result = tool_run_command( + &json!({"command": "ls", "working_dir": "/tmp", "timeout": 9999}), + &ctx, + ) + .await; + // Just ensure it doesn't panic and returns an Err about sandbox (not timeout) + assert!(result.is_err()); + } +}