story-kit: merge 347_story_mcp_tool_for_shell_command_execution

2026-03-20 09:51:53 +00:00
parent 31085e8c9f
commit 287c64faf1
4 changed files with 672 additions and 6 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3909,6 +3909,16 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"

+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
 [[package]]
 name = "signature"
 version = "2.2.0"
@@ -4282,6 +4292,7 @@ dependencies = [
 "mio",
 "parking_lot",
 "pin-project-lite",
+ "signal-hook-registry",
 "socket2",
 "tokio-macros",
 "windows-sys 0.61.2",
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@@ -25,7 +25,7 @@ serde_json = { workspace = true }
 serde_urlencoded = { workspace = true }
 serde_yaml = { workspace = true }
 strip-ansi-escapes = { workspace = true }
-tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] }
+tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync", "process"] }
 toml = { workspace = true }
 uuid = { workspace = true, features = ["v4", "serde"] }
 walkdir = { workspace = true }
--- a/server/src/http/mcp/mod.rs
+++ b/server/src/http/mcp/mod.rs
@@ -12,6 +12,7 @@ pub mod agent_tools;
 pub mod diagnostics;
 pub mod merge_tools;
 pub mod qa_tools;
+pub mod shell_tools;
 pub mod story_tools;

 /// Returns true when the Accept header includes text/event-stream.
@@ -33,7 +34,7 @@ struct JsonRpcRequest {
 }

 #[derive(Serialize)]
-struct JsonRpcResponse {
+pub(super) struct JsonRpcResponse {
    jsonrpc: &'static str,
    #[serde(skip_serializing_if = "Option::is_none")]
    id: Option<Value>,
@@ -52,7 +53,7 @@ struct JsonRpcError {
 }

 impl JsonRpcResponse {
-    fn success(id: Option<Value>, result: Value) -> Self {
+    pub(super) fn success(id: Option<Value>, result: Value) -> Self {
        Self {
            jsonrpc: "2.0",
            id,
@@ -61,7 +62,7 @@ impl JsonRpcResponse {
        }
    }

-    fn error(id: Option<Value>, code: i64, message: String) -> Self {
+    pub(super) fn error(id: Option<Value>, code: i64, message: String) -> Self {
        Self {
            jsonrpc: "2.0",
            id,
@@ -132,6 +133,9 @@ pub async fn mcp_post_handler(req: &Request, body: Body, ctx: Data<&Arc<AppConte
        if tool_name == "get_agent_output" {
            return handle_agent_output_sse(rpc.id, &rpc.params, &ctx);
        }
+        if tool_name == "run_command" {
+            return shell_tools::handle_run_command_sse(rpc.id, &rpc.params, &ctx);
+        }
    }

    let resp = match rpc.method.as_str() {
@@ -160,7 +164,7 @@ fn to_json_response(resp: JsonRpcResponse) -> Response {
        .body(Body::from(body))
 }

-fn to_sse_response(resp: JsonRpcResponse) -> Response {
+pub(super) fn to_sse_response(resp: JsonRpcResponse) -> Response {
    let json = serde_json::to_string(&resp).unwrap_or_default();
    let sse_body = format!("data: {json}\n\n");
    Response::builder()
@@ -999,6 +1003,28 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
                        },
                        "required": ["story_id", "target_stage"]
                    }
+                },
+                {
+                    "name": "run_command",
+                    "description": "Execute a shell command in an agent's worktree directory. The working_dir must be inside .story_kit/worktrees/. Returns stdout, stderr, exit_code, and timed_out. Supports SSE streaming (send Accept: text/event-stream) for long-running commands. Dangerous commands (rm -rf /, sudo, etc.) are blocked.",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {
+                            "command": {
+                                "type": "string",
+                                "description": "The bash command to execute (passed to bash -c)"
+                            },
+                            "working_dir": {
+                                "type": "string",
+                                "description": "Absolute path to the worktree directory to run the command in. Must be inside .story_kit/worktrees/."
+                            },
+                            "timeout": {
+                                "type": "integer",
+                                "description": "Timeout in seconds (default: 120, max: 600)"
+                            }
+                        },
+                        "required": ["command", "working_dir"]
+                    }
                }
            ]
        }),
@@ -1079,6 +1105,8 @@ async fn handle_tools_call(
        "delete_story" => story_tools::tool_delete_story(&args, ctx).await,
        // Arbitrary pipeline movement
        "move_story" => diagnostics::tool_move_story(&args, ctx),
+        // Shell command execution
+        "run_command" => shell_tools::tool_run_command(&args, ctx).await,
        _ => Err(format!("Unknown tool: {tool_name}")),
    };

@@ -1188,7 +1216,8 @@ mod tests {
        assert!(names.contains(&"get_token_usage"));
        assert!(names.contains(&"move_story"));
        assert!(names.contains(&"delete_story"));
-        assert_eq!(tools.len(), 42);
+        assert!(names.contains(&"run_command"));
+        assert_eq!(tools.len(), 43);
    }

    #[test]
--- a/server/src/http/mcp/shell_tools.rs
+++ b/server/src/http/mcp/shell_tools.rs
@@ -0,0 +1,626 @@
+use crate::http::context::AppContext;
+use bytes::Bytes;
+use futures::StreamExt;
+use poem::{Body, Response};
+use serde_json::{json, Value};
+use std::path::PathBuf;
+
+const DEFAULT_TIMEOUT_SECS: u64 = 120;
+const MAX_TIMEOUT_SECS: u64 = 600;
+
+/// Patterns that are unconditionally blocked regardless of context.
+static BLOCKED_PATTERNS: &[&str] = &[
+    "rm -rf /",
+    "rm -fr /",
+    "rm -rf /*",
+    "rm -fr /*",
+    "rm --no-preserve-root",
+    ":(){ :|:& };:",
+    "> /dev/sda",
+    "dd if=/dev",
+];
+
+/// Binaries that are unconditionally blocked.
+static BLOCKED_BINARIES: &[&str] = &[
+    "sudo",
+    "su",
+    "shutdown",
+    "reboot",
+    "halt",
+    "poweroff",
+    "mkfs",
+];
+
+/// Returns an error message if the command matches a blocked pattern or binary.
+fn is_dangerous(command: &str) -> Option<String> {
+    let trimmed = command.trim();
+
+    // Check each blocked pattern (substring match)
+    for &pattern in BLOCKED_PATTERNS {
+        if trimmed.contains(pattern) {
+            return Some(format!(
+                "Command blocked: dangerous pattern '{pattern}' detected"
+            ));
+        }
+    }
+
+    // Check first token of the command against blocked binaries
+    if let Some(first_token) = trimmed.split_whitespace().next() {
+        let binary = std::path::Path::new(first_token)
+            .file_name()
+            .and_then(|n| n.to_str())
+            .unwrap_or(first_token);
+        if BLOCKED_BINARIES.contains(&binary) {
+            return Some(format!("Command blocked: '{binary}' is not permitted"));
+        }
+    }
+
+    None
+}
+
+/// Validates that `working_dir` exists and is inside the project's
+/// `.story_kit/worktrees/` directory. Returns the canonicalized path.
+fn validate_working_dir(working_dir: &str, ctx: &AppContext) -> Result<PathBuf, String> {
+    let wd = PathBuf::from(working_dir);
+
+    if !wd.is_absolute() {
+        return Err("working_dir must be an absolute path".to_string());
+    }
+    if !wd.exists() {
+        return Err(format!("working_dir does not exist: {working_dir}"));
+    }
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let worktrees_root = project_root.join(".story_kit").join("worktrees");
+
+    let canonical_wd = wd
+        .canonicalize()
+        .map_err(|e| format!("Cannot canonicalize working_dir: {e}"))?;
+
+    // If worktrees_root doesn't exist yet, we can't allow anything
+    let canonical_wt = if worktrees_root.exists() {
+        worktrees_root
+            .canonicalize()
+            .map_err(|e| format!("Cannot canonicalize worktrees root: {e}"))?
+    } else {
+        return Err("No worktrees directory found in project".to_string());
+    };
+
+    if !canonical_wd.starts_with(&canonical_wt) {
+        return Err(format!(
+            "working_dir must be inside .story_kit/worktrees/. Got: {working_dir}"
+        ));
+    }
+
+    Ok(canonical_wd)
+}
+
+/// Regular (non-SSE) run_command: runs the bash command to completion and
+/// returns stdout, stderr, exit_code, and whether it timed out.
+pub(super) async fn tool_run_command(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let command = args
+        .get("command")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: command")?
+        .to_string();
+
+    let working_dir = args
+        .get("working_dir")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: working_dir")?;
+
+    let timeout_secs = args
+        .get("timeout")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(DEFAULT_TIMEOUT_SECS)
+        .min(MAX_TIMEOUT_SECS);
+
+    if let Some(reason) = is_dangerous(&command) {
+        return Err(reason);
+    }
+
+    let canonical_dir = validate_working_dir(working_dir, ctx)?;
+
+    let result = tokio::time::timeout(
+        std::time::Duration::from_secs(timeout_secs),
+        tokio::task::spawn_blocking({
+            let cmd = command.clone();
+            let dir = canonical_dir.clone();
+            move || {
+                std::process::Command::new("bash")
+                    .arg("-c")
+                    .arg(&cmd)
+                    .current_dir(&dir)
+                    .output()
+            }
+        }),
+    )
+    .await;
+
+    match result {
+        Err(_) => {
+            // timed out
+            serde_json::to_string_pretty(&json!({
+                "stdout": "",
+                "stderr": format!("Command timed out after {timeout_secs}s"),
+                "exit_code": -1,
+                "timed_out": true,
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+        Ok(Err(e)) => Err(format!("Task join error: {e}")),
+        Ok(Ok(Err(e))) => Err(format!("Failed to execute command: {e}")),
+        Ok(Ok(Ok(output))) => {
+            serde_json::to_string_pretty(&json!({
+                "stdout": String::from_utf8_lossy(&output.stdout),
+                "stderr": String::from_utf8_lossy(&output.stderr),
+                "exit_code": output.status.code().unwrap_or(-1),
+                "timed_out": false,
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+    }
+}
+
+/// SSE streaming run_command: spawns the process and emits stdout/stderr lines
+/// as JSON-RPC notifications, then a final response with exit_code.
+pub(super) fn handle_run_command_sse(
+    id: Option<Value>,
+    params: &Value,
+    ctx: &AppContext,
+) -> Response {
+    use super::{to_sse_response, JsonRpcResponse};
+
+    let args = params.get("arguments").cloned().unwrap_or(json!({}));
+
+    let command = match args.get("command").and_then(|v| v.as_str()) {
+        Some(c) => c.to_string(),
+        None => {
+            return to_sse_response(JsonRpcResponse::error(
+                id,
+                -32602,
+                "Missing required argument: command".into(),
+            ))
+        }
+    };
+
+    let working_dir = match args.get("working_dir").and_then(|v| v.as_str()) {
+        Some(d) => d.to_string(),
+        None => {
+            return to_sse_response(JsonRpcResponse::error(
+                id,
+                -32602,
+                "Missing required argument: working_dir".into(),
+            ))
+        }
+    };
+
+    let timeout_secs = args
+        .get("timeout")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(DEFAULT_TIMEOUT_SECS)
+        .min(MAX_TIMEOUT_SECS);
+
+    if let Some(reason) = is_dangerous(&command) {
+        return to_sse_response(JsonRpcResponse::error(id, -32602, reason));
+    }
+
+    let canonical_dir = match validate_working_dir(&working_dir, ctx) {
+        Ok(d) => d,
+        Err(e) => return to_sse_response(JsonRpcResponse::error(id, -32602, e)),
+    };
+
+    let final_id = id;
+
+    let stream = async_stream::stream! {
+        use tokio::io::AsyncBufReadExt;
+
+        let mut child = match tokio::process::Command::new("bash")
+            .arg("-c")
+            .arg(&command)
+            .current_dir(&canonical_dir)
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+        {
+            Ok(c) => c,
+            Err(e) => {
+                let resp = JsonRpcResponse::success(
+                    final_id,
+                    json!({
+                        "content": [{"type": "text", "text": format!("Failed to spawn process: {e}")}],
+                        "isError": true
+                    }),
+                );
+                if let Ok(s) = serde_json::to_string(&resp) {
+                    yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+                }
+                return;
+            }
+        };
+
+        let stdout = child.stdout.take().expect("stdout piped");
+        let stderr = child.stderr.take().expect("stderr piped");
+        let mut stdout_lines = tokio::io::BufReader::new(stdout).lines();
+        let mut stderr_lines = tokio::io::BufReader::new(stderr).lines();
+
+        let deadline = tokio::time::Instant::now()
+            + std::time::Duration::from_secs(timeout_secs);
+        let mut stdout_done = false;
+        let mut stderr_done = false;
+        let mut timed_out = false;
+
+        loop {
+            if stdout_done && stderr_done {
+                break;
+            }
+
+            let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
+            if remaining.is_zero() {
+                timed_out = true;
+                let _ = child.kill().await;
+                break;
+            }
+
+            tokio::select! {
+                line = stdout_lines.next_line(), if !stdout_done => {
+                    match line {
+                        Ok(Some(l)) => {
+                            let notif = json!({
+                                "jsonrpc": "2.0",
+                                "method": "notifications/tools/progress",
+                                "params": { "stream": "stdout", "line": l }
+                            });
+                            if let Ok(s) = serde_json::to_string(&notif) {
+                                yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+                            }
+                        }
+                        _ => { stdout_done = true; }
+                    }
+                }
+                line = stderr_lines.next_line(), if !stderr_done => {
+                    match line {
+                        Ok(Some(l)) => {
+                            let notif = json!({
+                                "jsonrpc": "2.0",
+                                "method": "notifications/tools/progress",
+                                "params": { "stream": "stderr", "line": l }
+                            });
+                            if let Ok(s) = serde_json::to_string(&notif) {
+                                yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+                            }
+                        }
+                        _ => { stderr_done = true; }
+                    }
+                }
+                _ = tokio::time::sleep(remaining) => {
+                    timed_out = true;
+                    let _ = child.kill().await;
+                    break;
+                }
+            }
+        }
+
+        let exit_code = child.wait().await.ok().and_then(|s| s.code()).unwrap_or(-1);
+
+        let summary = json!({
+            "exit_code": exit_code,
+            "timed_out": timed_out,
+        });
+
+        let final_resp = JsonRpcResponse::success(
+            final_id,
+            json!({
+                "content": [{"type": "text", "text": summary.to_string()}]
+            }),
+        );
+        if let Ok(s) = serde_json::to_string(&final_resp) {
+            yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+        }
+    };
+
+    Response::builder()
+        .status(poem::http::StatusCode::OK)
+        .header("Content-Type", "text/event-stream")
+        .header("Cache-Control", "no-cache")
+        .body(Body::from_bytes_stream(stream.map(|r| {
+            r.map(Bytes::from)
+        })))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use serde_json::json;
+
+    fn test_ctx(dir: &std::path::Path) -> AppContext {
+        AppContext::new_test(dir.to_path_buf())
+    }
+
+    // ── is_dangerous ─────────────────────────────────────────────────
+
+    #[test]
+    fn is_dangerous_blocks_rm_rf_root() {
+        assert!(is_dangerous("rm -rf /").is_some());
+        assert!(is_dangerous("  rm -rf /  ").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_rm_fr_root() {
+        assert!(is_dangerous("rm -fr /").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_rm_rf_star() {
+        assert!(is_dangerous("rm -rf /*").is_some());
+        assert!(is_dangerous("rm -fr /*").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_sudo() {
+        assert!(is_dangerous("sudo ls").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_shutdown() {
+        assert!(is_dangerous("shutdown -h now").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_mkfs() {
+        assert!(is_dangerous("mkfs /dev/sda1").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_fork_bomb() {
+        assert!(is_dangerous(":(){ :|:& };:").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_allows_safe_commands() {
+        assert!(is_dangerous("cargo build").is_none());
+        assert!(is_dangerous("npm test").is_none());
+        assert!(is_dangerous("git status").is_none());
+        assert!(is_dangerous("ls -la").is_none());
+        assert!(is_dangerous("rm -rf target/").is_none());
+    }
+
+    // ── validate_working_dir ──────────────────────────────────────────
+
+    #[test]
+    fn validate_working_dir_rejects_relative_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir("relative/path", &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("absolute"));
+    }
+
+    #[test]
+    fn validate_working_dir_rejects_nonexistent_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir("/nonexistent_path_xyz_abc", &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("does not exist"));
+    }
+
+    #[test]
+    fn validate_working_dir_rejects_path_outside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        // Create the worktrees dir so it exists
+        let wt_dir = tmp.path().join(".story_kit").join("worktrees");
+        std::fs::create_dir_all(&wt_dir).unwrap();
+        let ctx = test_ctx(tmp.path());
+        // Try to use /tmp (outside worktrees)
+        let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx);
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().contains("inside .story_kit/worktrees"),
+            "expected sandbox error"
+        );
+    }
+
+    #[test]
+    fn validate_working_dir_accepts_path_inside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".story_kit")
+            .join("worktrees")
+            .join("42_test_story");
+        std::fs::create_dir_all(&story_wt).unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir(story_wt.to_str().unwrap(), &ctx);
+        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
+    }
+
+    #[test]
+    fn validate_working_dir_rejects_no_worktrees_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        // Do NOT create worktrees dir
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx);
+        assert!(result.is_err());
+    }
+
+    // ── tool_run_command ───────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn tool_run_command_missing_command() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(&json!({"working_dir": "/tmp"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("command"));
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_missing_working_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(&json!({"command": "ls"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("working_dir"));
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_blocks_dangerous_command() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({"command": "rm -rf /", "working_dir": "/tmp"}),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("blocked"));
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_rejects_path_outside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let wt_dir = tmp.path().join(".story_kit").join("worktrees");
+        std::fs::create_dir_all(&wt_dir).unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "ls",
+                "working_dir": tmp.path().to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().contains("worktrees"),
+            "expected sandbox error"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_runs_in_worktree_and_returns_output() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".story_kit")
+            .join("worktrees")
+            .join("42_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+        std::fs::write(story_wt.join("canary.txt"), "hello").unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "ls",
+                "working_dir": story_wt.to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["exit_code"], 0);
+        assert!(parsed["stdout"].as_str().unwrap().contains("canary.txt"));
+        assert_eq!(parsed["timed_out"], false);
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_captures_nonzero_exit_code() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".story_kit")
+            .join("worktrees")
+            .join("43_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "exit 42",
+                "working_dir": story_wt.to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["exit_code"], 42);
+        assert_eq!(parsed["timed_out"], false);
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_timeout_returns_timed_out_true() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".story_kit")
+            .join("worktrees")
+            .join("44_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "sleep 10",
+                "working_dir": story_wt.to_str().unwrap(),
+                "timeout": 1
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["timed_out"], true);
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_captures_stderr() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".story_kit")
+            .join("worktrees")
+            .join("45_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "echo 'error msg' >&2",
+                "working_dir": story_wt.to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert!(
+            parsed["stderr"].as_str().unwrap().contains("error msg"),
+            "expected stderr: {parsed}"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_clamps_timeout_to_max() {
+        // Verify timeout > 600 is clamped to 600. We don't run a 600s sleep;
+        // just confirm the tool accepts the arg without error (sandbox check will
+        // fail first in a different test, here we test the arg parsing path).
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        // Will fail at working_dir validation, not timeout parsing — that's fine
+        let result = tool_run_command(
+            &json!({"command": "ls", "working_dir": "/tmp", "timeout": 9999}),
+            &ctx,
+        )
+        .await;
+        // Just ensure it doesn't panic and returns an Err about sandbox (not timeout)
+        assert!(result.is_err());
+    }
+}