Restore codebase deleted by bad auto-commit e4227cf

Commit e4227cf (a story creation auto-commit) erroneously deleted 175 files from master's tree, likely due to a race condition between concurrent git operations. This commit re-adds all files from the working directory. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 19:07:07 +00:00
parent 89f776b978
commit f610ef6046
174 changed files with 84280 additions and 0 deletions
--- a/server/Cargo.lock
+++ b/server/Cargo.lock
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@@ -0,0 +1,43 @@
+[package]
+name = "storkit"
+version = "0.4.1"
+edition = "2024"
+build = "build.rs"
+
+[dependencies]
+async-stream = { workspace = true }
+async-trait = { workspace = true }
+bytes = { workspace = true }
+chrono = { workspace = true, features = ["serde"] }
+eventsource-stream = { workspace = true }
+futures = { workspace = true }
+homedir = { workspace = true }
+ignore = { workspace = true }
+mime_guess = { workspace = true }
+notify = { workspace = true }
+poem = { workspace = true, features = ["websocket"] }
+poem-openapi = { workspace = true, features = ["swagger-ui"] }
+portable-pty = { workspace = true }
+reqwest = { workspace = true, features = ["json", "stream"] }
+rust-embed = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+serde_urlencoded = { workspace = true }
+serde_yaml = { workspace = true }
+strip-ansi-escapes = { workspace = true }
+tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync", "process"] }
+toml = { workspace = true }
+uuid = { workspace = true, features = ["v4", "serde"] }
+walkdir = { workspace = true }
+matrix-sdk = { workspace = true }
+pulldown-cmark = { workspace = true }
+
+# Force bundled SQLite so static musl builds don't need a system libsqlite3
+libsqlite3-sys = { version = "0.35.0", features = ["bundled"] }
+wait-timeout = "0.2.1"
+
+[dev-dependencies]
+tempfile = { workspace = true }
+tokio-tungstenite = { workspace = true }
+mockito = "1"
+filetime = { workspace = true }
--- a/server/build.rs
+++ b/server/build.rs
@@ -0,0 +1,50 @@
+use std::env;
+use std::path::Path;
+use std::process::Command;
+
+fn run(cmd: &str, args: &[&str], dir: &Path) {
+    let status = Command::new(cmd)
+        .args(args)
+        .current_dir(dir)
+        .status()
+        .unwrap_or_else(|e| panic!("Failed to run {} {:?}: {}", cmd, args, e));
+
+    if !status.success() {
+        panic!("Command failed: {} {:?}", cmd, args);
+    }
+}
+
+fn main() {
+    println!("cargo:rerun-if-changed=build.rs");
+    println!("cargo:rerun-if-env-changed=PROFILE");
+    println!("cargo:rerun-if-changed=../frontend/package.json");
+    println!("cargo:rerun-if-changed=../frontend/package-lock.json");
+    println!("cargo:rerun-if-changed=../frontend/vite.config.ts");
+    println!("cargo:rerun-if-changed=../frontend/index.html");
+    println!("cargo:rerun-if-changed=../frontend/src");
+    println!("cargo:rerun-if-changed=../frontend/public");
+
+    let profile = env::var("PROFILE").unwrap_or_default();
+    if profile != "release" {
+        return;
+    }
+
+    // When cross-compiling (e.g. musl via `cross`), the Docker container
+    // has no Node/npm. The release script builds macOS first, so
+    // frontend/dist/ already exists. Skip the frontend build in that case.
+    let target = env::var("TARGET").unwrap_or_default();
+    let host = env::var("HOST").unwrap_or_default();
+    if target != host {
+        let dist = Path::new("../frontend/dist");
+        if !dist.exists() {
+            panic!("Cross-compiling but frontend/dist/ is missing. Build macOS first.");
+        }
+        return;
+    }
+
+    let frontend_dir = Path::new("../frontend");
+
+    // Ensure dependencies are installed and build the frontend bundle.
+    run("npm", &["install"], frontend_dir);
+    run("npm", &["run", "build"], frontend_dir);
+}
--- a/server/src/agent_log.rs
+++ b/server/src/agent_log.rs
@@ -0,0 +1,377 @@
+use crate::agents::AgentEvent;
+use chrono::Utc;
+use serde::{Deserialize, Serialize};
+use std::fs::{self, File, OpenOptions};
+use std::io::{BufRead, BufReader, Write};
+use std::path::{Path, PathBuf};
+
+
+/// A single line in the agent log file (JSONL format).
+#[derive(Debug, Serialize, Deserialize)]
+pub struct LogEntry {
+    pub timestamp: String,
+    #[serde(flatten)]
+    pub event: serde_json::Value,
+}
+
+/// Writes agent events to a persistent log file (JSONL format).
+///
+/// Each agent session gets its own log file at:
+///   `.storkit/logs/{story_id}/{agent_name}-{session_id}.log`
+pub struct AgentLogWriter {
+    file: File,
+}
+
+impl AgentLogWriter {
+    /// Create a new log writer, creating the directory structure as needed.
+    ///
+    /// The log file is opened in append mode so that a restart mid-session
+    /// won't overwrite earlier output.
+    pub fn new(
+        project_root: &Path,
+        story_id: &str,
+        agent_name: &str,
+        session_id: &str,
+    ) -> Result<Self, String> {
+        let dir = log_dir(project_root, story_id);
+        fs::create_dir_all(&dir)
+            .map_err(|e| format!("Failed to create log directory {}: {e}", dir.display()))?;
+
+        let path = dir.join(format!("{agent_name}-{session_id}.log"));
+        let file = OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(&path)
+            .map_err(|e| format!("Failed to open log file {}: {e}", path.display()))?;
+
+        Ok(Self { file })
+    }
+
+    /// Write an agent event as a JSONL line with an ISO 8601 timestamp.
+    pub fn write_event(&mut self, event: &AgentEvent) -> Result<(), String> {
+        let event_value =
+            serde_json::to_value(event).map_err(|e| format!("Failed to serialize event: {e}"))?;
+
+        let entry = LogEntry {
+            timestamp: Utc::now().to_rfc3339(),
+            event: event_value,
+        };
+
+        let mut line =
+            serde_json::to_string(&entry).map_err(|e| format!("Failed to serialize entry: {e}"))?;
+        line.push('\n');
+
+        self.file
+            .write_all(line.as_bytes())
+            .map_err(|e| format!("Failed to write log entry: {e}"))?;
+
+        Ok(())
+    }
+}
+
+/// Return the log directory for a story.
+fn log_dir(project_root: &Path, story_id: &str) -> PathBuf {
+    project_root
+        .join(".storkit")
+        .join("logs")
+        .join(story_id)
+}
+
+/// Return the path to a specific log file.
+pub fn log_file_path(
+    project_root: &Path,
+    story_id: &str,
+    agent_name: &str,
+    session_id: &str,
+) -> PathBuf {
+    log_dir(project_root, story_id).join(format!("{agent_name}-{session_id}.log"))
+}
+
+/// Read all log entries from a log file.
+pub fn read_log(path: &Path) -> Result<Vec<LogEntry>, String> {
+    let file =
+        File::open(path).map_err(|e| format!("Failed to open log file {}: {e}", path.display()))?;
+    let reader = BufReader::new(file);
+    let mut entries = Vec::new();
+
+    for line in reader.lines() {
+        let line = line.map_err(|e| format!("Failed to read log line: {e}"))?;
+        let trimmed = line.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+        let entry: LogEntry = serde_json::from_str(trimmed)
+            .map_err(|e| format!("Failed to parse log entry: {e}"))?;
+        entries.push(entry);
+    }
+
+    Ok(entries)
+}
+
+/// Find the most recent log file for a given story+agent combination.
+///
+/// Scans `.storkit/logs/{story_id}/` for files matching `{agent_name}-*.log`
+/// and returns the one with the most recent modification time.
+pub fn find_latest_log(
+    project_root: &Path,
+    story_id: &str,
+    agent_name: &str,
+) -> Option<PathBuf> {
+    let dir = log_dir(project_root, story_id);
+    if !dir.is_dir() {
+        return None;
+    }
+
+    let prefix = format!("{agent_name}-");
+    let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
+
+    let entries = fs::read_dir(&dir).ok()?;
+    for entry in entries.flatten() {
+        let path = entry.path();
+        let name = match path.file_name().and_then(|n| n.to_str()) {
+            Some(n) => n.to_string(),
+            None => continue,
+        };
+        if !name.starts_with(&prefix) || !name.ends_with(".log") {
+            continue;
+        }
+        let modified = match entry.metadata().and_then(|m| m.modified()) {
+            Ok(t) => t,
+            Err(_) => continue,
+        };
+        if best.as_ref().is_none_or(|(_, t)| modified > *t) {
+            best = Some((path, modified));
+        }
+    }
+
+    best.map(|(p, _)| p)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentEvent;
+    use tempfile::tempdir;
+
+    #[test]
+    fn test_log_writer_creates_directory_and_file() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        let _writer =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-abc123").unwrap();
+
+        let expected_path = root
+            .join(".storkit")
+            .join("logs")
+            .join("42_story_foo")
+            .join("coder-1-sess-abc123.log");
+        assert!(expected_path.exists(), "Log file should exist");
+    }
+
+    #[test]
+    fn test_log_writer_writes_jsonl_with_timestamps() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        let mut writer =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-001").unwrap();
+
+        let event = AgentEvent::Status {
+            story_id: "42_story_foo".to_string(),
+            agent_name: "coder-1".to_string(),
+            status: "running".to_string(),
+        };
+        writer.write_event(&event).unwrap();
+
+        let event2 = AgentEvent::Output {
+            story_id: "42_story_foo".to_string(),
+            agent_name: "coder-1".to_string(),
+            text: "Hello world".to_string(),
+        };
+        writer.write_event(&event2).unwrap();
+
+        // Read the file and verify
+        let path = log_file_path(root, "42_story_foo", "coder-1", "sess-001");
+        let content = fs::read_to_string(&path).unwrap();
+        let lines: Vec<&str> = content.lines().collect();
+        assert_eq!(lines.len(), 2, "Should have 2 log lines");
+
+        // Parse each line as valid JSON with a timestamp
+        for line in &lines {
+            let entry: LogEntry = serde_json::from_str(line).unwrap();
+            assert!(!entry.timestamp.is_empty(), "Timestamp should be present");
+            // Verify it's a valid ISO 8601 timestamp
+            chrono::DateTime::parse_from_rfc3339(&entry.timestamp)
+                .expect("Timestamp should be valid RFC3339");
+        }
+
+        // Verify the first entry is a status event
+        let entry1: LogEntry = serde_json::from_str(lines[0]).unwrap();
+        assert_eq!(entry1.event["type"], "status");
+        assert_eq!(entry1.event["status"], "running");
+
+        // Verify the second entry is an output event
+        let entry2: LogEntry = serde_json::from_str(lines[1]).unwrap();
+        assert_eq!(entry2.event["type"], "output");
+        assert_eq!(entry2.event["text"], "Hello world");
+    }
+
+    #[test]
+    fn test_read_log_parses_written_events() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        let mut writer =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-002").unwrap();
+
+        let events = vec![
+            AgentEvent::Status {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                status: "running".to_string(),
+            },
+            AgentEvent::Output {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "Processing...".to_string(),
+            },
+            AgentEvent::AgentJson {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                data: serde_json::json!({"type": "tool_use", "name": "read_file"}),
+            },
+            AgentEvent::Done {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                session_id: Some("sess-002".to_string()),
+            },
+        ];
+
+        for event in &events {
+            writer.write_event(event).unwrap();
+        }
+
+        let path = log_file_path(root, "42_story_foo", "coder-1", "sess-002");
+        let entries = read_log(&path).unwrap();
+        assert_eq!(entries.len(), 4, "Should read back all 4 events");
+
+        // Verify event types round-trip correctly
+        assert_eq!(entries[0].event["type"], "status");
+        assert_eq!(entries[1].event["type"], "output");
+        assert_eq!(entries[2].event["type"], "agent_json");
+        assert_eq!(entries[3].event["type"], "done");
+    }
+
+    #[test]
+    fn test_separate_sessions_produce_separate_files() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        let mut writer1 =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-aaa").unwrap();
+        let mut writer2 =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-bbb").unwrap();
+
+        writer1
+            .write_event(&AgentEvent::Output {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "from session aaa".to_string(),
+            })
+            .unwrap();
+
+        writer2
+            .write_event(&AgentEvent::Output {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "from session bbb".to_string(),
+            })
+            .unwrap();
+
+        let path1 = log_file_path(root, "42_story_foo", "coder-1", "sess-aaa");
+        let path2 = log_file_path(root, "42_story_foo", "coder-1", "sess-bbb");
+
+        assert_ne!(path1, path2, "Different sessions should use different files");
+
+        let entries1 = read_log(&path1).unwrap();
+        let entries2 = read_log(&path2).unwrap();
+
+        assert_eq!(entries1.len(), 1);
+        assert_eq!(entries2.len(), 1);
+        assert_eq!(entries1[0].event["text"], "from session aaa");
+        assert_eq!(entries2[0].event["text"], "from session bbb");
+    }
+
+    #[test]
+    fn test_find_latest_log_returns_most_recent() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        // Create two log files with a small delay
+        let mut writer1 =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-old").unwrap();
+        writer1
+            .write_event(&AgentEvent::Output {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "old".to_string(),
+            })
+            .unwrap();
+        drop(writer1);
+
+        // Touch the second file to ensure it's newer
+        std::thread::sleep(std::time::Duration::from_millis(50));
+
+        let mut writer2 =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-new").unwrap();
+        writer2
+            .write_event(&AgentEvent::Output {
+                story_id: "42_story_foo".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "new".to_string(),
+            })
+            .unwrap();
+        drop(writer2);
+
+        let latest = find_latest_log(root, "42_story_foo", "coder-1").unwrap();
+        assert!(
+            latest.to_string_lossy().contains("sess-new"),
+            "Should find the newest log file, got: {}",
+            latest.display()
+        );
+    }
+
+    #[test]
+    fn test_find_latest_log_returns_none_for_missing_dir() {
+        let tmp = tempdir().unwrap();
+        let result = find_latest_log(tmp.path(), "nonexistent", "coder-1");
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_log_files_persist_on_disk() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        let path = {
+            let mut writer =
+                AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-persist").unwrap();
+            writer
+                .write_event(&AgentEvent::Status {
+                    story_id: "42_story_foo".to_string(),
+                    agent_name: "coder-1".to_string(),
+                    status: "running".to_string(),
+                })
+                .unwrap();
+            log_file_path(root, "42_story_foo", "coder-1", "sess-persist")
+            // writer is dropped here
+        };
+
+        // File should still exist and be readable
+        assert!(path.exists(), "Log file should persist after writer is dropped");
+        let entries = read_log(&path).unwrap();
+        assert_eq!(entries.len(), 1);
+        assert_eq!(entries[0].event["type"], "status");
+    }
+}
--- a/server/src/agents/gates.rs
+++ b/server/src/agents/gates.rs
@@ -0,0 +1,426 @@
+use std::path::Path;
+use std::process::Command;
+use std::time::Duration;
+use wait_timeout::ChildExt;
+
+/// Maximum time any single test command is allowed to run before being killed.
+const TEST_TIMEOUT: Duration = Duration::from_secs(600); // 10 minutes
+
+/// Detect whether the base branch in a worktree is `master` or `main`.
+/// Falls back to `"master"` if neither is found.
+pub(crate) fn detect_worktree_base_branch(wt_path: &Path) -> String {
+    for branch in &["master", "main"] {
+        let ok = Command::new("git")
+            .args(["rev-parse", "--verify", branch])
+            .current_dir(wt_path)
+            .output()
+            .map(|o| o.status.success())
+            .unwrap_or(false);
+        if ok {
+            return branch.to_string();
+        }
+    }
+    "master".to_string()
+}
+
+/// Return `true` if the git worktree at `wt_path` has commits on its current
+/// branch that are not present on the base branch (`master` or `main`).
+///
+/// Used during server startup reconciliation to detect stories whose agent work
+/// was committed while the server was offline.
+pub(crate) fn worktree_has_committed_work(wt_path: &Path) -> bool {
+    let base_branch = detect_worktree_base_branch(wt_path);
+    let output = Command::new("git")
+        .args(["log", &format!("{base_branch}..HEAD"), "--oneline"])
+        .current_dir(wt_path)
+        .output();
+    match output {
+        Ok(out) if out.status.success() => {
+            !String::from_utf8_lossy(&out.stdout).trim().is_empty()
+        }
+        _ => false,
+    }
+}
+
+/// Check whether the given directory has any uncommitted git changes.
+/// Returns `Err` with a descriptive message if there are any.
+pub(crate) fn check_uncommitted_changes(path: &Path) -> Result<(), String> {
+    let output = Command::new("git")
+        .args(["status", "--porcelain"])
+        .current_dir(path)
+        .output()
+        .map_err(|e| format!("Failed to run git status: {e}"))?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    if !stdout.trim().is_empty() {
+        return Err(format!(
+            "Worktree has uncommitted changes. Please commit all work before \
+             the agent exits:\n{stdout}"
+        ));
+    }
+    Ok(())
+}
+
+/// Run the project's test suite.
+///
+/// Uses `script/test` if present, treating it as the canonical single test entry point.
+/// Falls back to `cargo nextest run` / `cargo test` when `script/test` is absent.
+/// Returns `(tests_passed, output)`.
+pub(crate) fn run_project_tests(path: &Path) -> Result<(bool, String), String> {
+    let script_test = path.join("script").join("test");
+    if script_test.exists() {
+        let mut output = String::from("=== script/test ===\n");
+        let (success, out) = run_command_with_timeout(&script_test, &[], path)?;
+        output.push_str(&out);
+        output.push('\n');
+        return Ok((success, output));
+    }
+
+    // Fallback: cargo nextest run / cargo test
+    let mut output = String::from("=== tests ===\n");
+    let (success, test_out) = match run_command_with_timeout("cargo", &["nextest", "run"], path) {
+        Ok(result) => result,
+        Err(_) => {
+            // nextest not available — fall back to cargo test
+            run_command_with_timeout("cargo", &["test"], path)
+                .map_err(|e| format!("Failed to run cargo test: {e}"))?
+        }
+    };
+    output.push_str(&test_out);
+    output.push('\n');
+    Ok((success, output))
+}
+
+/// Run a command with a timeout. Returns `(success, combined_output)`.
+/// Kills the child process if it exceeds `TEST_TIMEOUT`.
+///
+/// Stdout and stderr are drained in background threads to avoid a pipe-buffer
+/// deadlock: if the child fills the 64 KB OS pipe buffer while the parent
+/// blocks on `waitpid`, neither side can make progress.
+fn run_command_with_timeout(
+    program: impl AsRef<std::ffi::OsStr>,
+    args: &[&str],
+    dir: &Path,
+) -> Result<(bool, String), String> {
+    let mut child = Command::new(program)
+        .args(args)
+        .current_dir(dir)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| format!("Failed to spawn command: {e}"))?;
+
+    // Drain stdout/stderr in background threads so the pipe buffers never fill.
+    let stdout_handle = child.stdout.take().map(|r| {
+        std::thread::spawn(move || {
+            let mut s = String::new();
+            let mut r = r;
+            std::io::Read::read_to_string(&mut r, &mut s).ok();
+            s
+        })
+    });
+    let stderr_handle = child.stderr.take().map(|r| {
+        std::thread::spawn(move || {
+            let mut s = String::new();
+            let mut r = r;
+            std::io::Read::read_to_string(&mut r, &mut s).ok();
+            s
+        })
+    });
+
+    match child.wait_timeout(TEST_TIMEOUT) {
+        Ok(Some(status)) => {
+            let stdout = stdout_handle
+                .and_then(|h| h.join().ok())
+                .unwrap_or_default();
+            let stderr = stderr_handle
+                .and_then(|h| h.join().ok())
+                .unwrap_or_default();
+            Ok((status.success(), format!("{stdout}{stderr}")))
+        }
+        Ok(None) => {
+            // Timed out — kill the child.
+            let _ = child.kill();
+            let _ = child.wait();
+            Err(format!(
+                "Command timed out after {} seconds",
+                TEST_TIMEOUT.as_secs()
+            ))
+        }
+        Err(e) => Err(format!("Failed to wait for command: {e}")),
+    }
+}
+
+/// Run `cargo clippy` and the project test suite (via `script/test` if present,
+/// otherwise `cargo nextest run` / `cargo test`) in the given directory.
+/// Returns `(gates_passed, combined_output)`.
+pub(crate) fn run_acceptance_gates(path: &Path) -> Result<(bool, String), String> {
+    let mut all_output = String::new();
+    let mut all_passed = true;
+
+    // ── cargo clippy ──────────────────────────────────────────────
+    let clippy = Command::new("cargo")
+        .args(["clippy", "--all-targets", "--all-features"])
+        .current_dir(path)
+        .output()
+        .map_err(|e| format!("Failed to run cargo clippy: {e}"))?;
+
+    all_output.push_str("=== cargo clippy ===\n");
+    let clippy_stdout = String::from_utf8_lossy(&clippy.stdout);
+    let clippy_stderr = String::from_utf8_lossy(&clippy.stderr);
+    if !clippy_stdout.is_empty() {
+        all_output.push_str(&clippy_stdout);
+    }
+    if !clippy_stderr.is_empty() {
+        all_output.push_str(&clippy_stderr);
+    }
+    all_output.push('\n');
+
+    if !clippy.status.success() {
+        all_passed = false;
+    }
+
+    // ── tests (script/test if available, else cargo nextest/test) ─
+    let (test_success, test_out) = run_project_tests(path)?;
+    all_output.push_str(&test_out);
+    if !test_success {
+        all_passed = false;
+    }
+
+    Ok((all_passed, all_output))
+}
+
+/// Run `script/test_coverage` in the given directory if the script exists.
+///
+/// Used as a QA gate before advancing a story from `3_qa/` to `4_merge/`.
+/// Returns `(passed, output)`.  If the script does not exist, returns `(true, …)`.
+pub(crate) fn run_coverage_gate(path: &Path) -> Result<(bool, String), String> {
+    let script = path.join("script").join("test_coverage");
+    if !script.exists() {
+        return Ok((
+            true,
+            "script/test_coverage not found; coverage gate skipped.\n".to_string(),
+        ));
+    }
+
+    let mut output = String::from("=== script/test_coverage ===\n");
+    let result = Command::new(&script)
+        .current_dir(path)
+        .output()
+        .map_err(|e| format!("Failed to run script/test_coverage: {e}"))?;
+
+    let combined = format!(
+        "{}{}",
+        String::from_utf8_lossy(&result.stdout),
+        String::from_utf8_lossy(&result.stderr)
+    );
+    output.push_str(&combined);
+    output.push('\n');
+
+    Ok((result.status.success(), output))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn init_git_repo(repo: &std::path::Path) {
+        Command::new("git")
+            .args(["init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+    }
+
+    // ── run_project_tests tests ───────────────────────────────────
+
+    #[cfg(unix)]
+    #[test]
+    fn run_project_tests_uses_script_test_when_present_and_passes() {
+        use std::fs;
+        use std::os::unix::fs::PermissionsExt;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path();
+        let script_dir = path.join("script");
+        fs::create_dir_all(&script_dir).unwrap();
+        let script_test = script_dir.join("test");
+        fs::write(&script_test, "#!/usr/bin/env bash\necho 'all tests passed'\nexit 0\n").unwrap();
+        let mut perms = fs::metadata(&script_test).unwrap().permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&script_test, perms).unwrap();
+
+        let (passed, output) = run_project_tests(path).unwrap();
+        assert!(passed, "script/test exiting 0 should pass");
+        assert!(output.contains("script/test"), "output should mention script/test");
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn run_project_tests_reports_failure_when_script_test_exits_nonzero() {
+        use std::fs;
+        use std::os::unix::fs::PermissionsExt;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path();
+        let script_dir = path.join("script");
+        fs::create_dir_all(&script_dir).unwrap();
+        let script_test = script_dir.join("test");
+        fs::write(&script_test, "#!/usr/bin/env bash\nexit 1\n").unwrap();
+        let mut perms = fs::metadata(&script_test).unwrap().permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&script_test, perms).unwrap();
+
+        let (passed, output) = run_project_tests(path).unwrap();
+        assert!(!passed, "script/test exiting 1 should fail");
+        assert!(output.contains("script/test"), "output should mention script/test");
+    }
+
+    // ── run_coverage_gate tests ───────────────────────────────────────────────
+
+    #[cfg(unix)]
+    #[test]
+    fn coverage_gate_passes_when_script_absent() {
+        use tempfile::tempdir;
+        let tmp = tempdir().unwrap();
+        let (passed, output) = run_coverage_gate(tmp.path()).unwrap();
+        assert!(passed, "coverage gate should pass when script is absent");
+        assert!(
+            output.contains("not found"),
+            "output should mention script not found"
+        );
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn coverage_gate_passes_when_script_exits_zero() {
+        use std::fs;
+        use std::os::unix::fs::PermissionsExt;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path();
+        let script_dir = path.join("script");
+        fs::create_dir_all(&script_dir).unwrap();
+        let script = script_dir.join("test_coverage");
+        fs::write(
+            &script,
+            "#!/usr/bin/env bash\necho 'Rust line coverage: 85%'\necho 'PASS: Coverage 85% meets threshold 0%'\nexit 0\n",
+        )
+        .unwrap();
+        let mut perms = fs::metadata(&script).unwrap().permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&script, perms).unwrap();
+
+        let (passed, output) = run_coverage_gate(path).unwrap();
+        assert!(passed, "coverage gate should pass when script exits 0");
+        assert!(
+            output.contains("script/test_coverage"),
+            "output should mention script/test_coverage"
+        );
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn coverage_gate_fails_when_script_exits_nonzero() {
+        use std::fs;
+        use std::os::unix::fs::PermissionsExt;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path();
+        let script_dir = path.join("script");
+        fs::create_dir_all(&script_dir).unwrap();
+        let script = script_dir.join("test_coverage");
+        fs::write(
+            &script,
+            "#!/usr/bin/env bash\necho 'FAIL: Coverage 40% is below threshold 80%'\nexit 1\n",
+        )
+        .unwrap();
+        let mut perms = fs::metadata(&script).unwrap().permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&script, perms).unwrap();
+
+        let (passed, output) = run_coverage_gate(path).unwrap();
+        assert!(!passed, "coverage gate should fail when script exits 1");
+        assert!(
+            output.contains("script/test_coverage"),
+            "output should mention script/test_coverage"
+        );
+    }
+
+    // ── worktree_has_committed_work tests ─────────────────────────────────────
+
+    #[test]
+    fn worktree_has_committed_work_false_on_fresh_repo() {
+        let tmp = tempfile::tempdir().unwrap();
+        let repo = tmp.path();
+        // init_git_repo creates the initial commit on the default branch.
+        // HEAD IS the base branch — no commits ahead.
+        init_git_repo(repo);
+        assert!(!worktree_has_committed_work(repo));
+    }
+
+    #[test]
+    fn worktree_has_committed_work_true_after_commit_on_feature_branch() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let project_root = tmp.path().join("project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        // Create a git worktree on a feature branch.
+        let wt_path = tmp.path().join("wt");
+        Command::new("git")
+            .args([
+                "worktree",
+                "add",
+                &wt_path.to_string_lossy(),
+                "-b",
+                "feature/story-99_test",
+            ])
+            .current_dir(&project_root)
+            .output()
+            .unwrap();
+
+        // No commits on the feature branch yet — same as base branch.
+        assert!(!worktree_has_committed_work(&wt_path));
+
+        // Add a commit to the feature branch in the worktree.
+        fs::write(wt_path.join("work.txt"), "done").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(&wt_path)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args([
+                "-c",
+                "user.email=test@test.com",
+                "-c",
+                "user.name=Test",
+                "commit",
+                "-m",
+                "coder: implement story",
+            ])
+            .current_dir(&wt_path)
+            .output()
+            .unwrap();
+
+        // Now the feature branch is ahead of the base branch.
+        assert!(worktree_has_committed_work(&wt_path));
+    }
+}
--- a/server/src/agents/lifecycle.rs
+++ b/server/src/agents/lifecycle.rs
@@ -0,0 +1,829 @@
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use crate::io::story_metadata::{clear_front_matter_field, write_rejection_notes};
+use crate::slog;
+
+pub(super) fn item_type_from_id(item_id: &str) -> &'static str {
+    // New format: {digits}_{type}_{slug}
+    let after_num = item_id.trim_start_matches(|c: char| c.is_ascii_digit());
+    if after_num.starts_with("_bug_") {
+        "bug"
+    } else if after_num.starts_with("_spike_") {
+        "spike"
+    } else {
+        "story"
+    }
+}
+
+/// Return the source directory path for a work item (always work/1_backlog/).
+fn item_source_dir(project_root: &Path, _item_id: &str) -> PathBuf {
+    project_root.join(".storkit").join("work").join("1_backlog")
+}
+
+/// Return the done directory path for a work item (always work/5_done/).
+fn item_archive_dir(project_root: &Path, _item_id: &str) -> PathBuf {
+    project_root.join(".storkit").join("work").join("5_done")
+}
+
+/// Move a work item (story, bug, or spike) from `work/1_backlog/` to `work/2_current/`.
+///
+/// Idempotent: if the item is already in `2_current/`, returns Ok without committing.
+/// If the item is not found in `1_backlog/`, logs a warning and returns Ok.
+pub fn move_story_to_current(project_root: &Path, story_id: &str) -> Result<(), String> {
+    let sk = project_root.join(".storkit").join("work");
+    let current_dir = sk.join("2_current");
+    let current_path = current_dir.join(format!("{story_id}.md"));
+
+    if current_path.exists() {
+        // Already in 2_current/ — idempotent, nothing to do.
+        return Ok(());
+    }
+
+    let source_dir = item_source_dir(project_root, story_id);
+    let source_path = source_dir.join(format!("{story_id}.md"));
+
+    if !source_path.exists() {
+        slog!(
+            "[lifecycle] Work item '{story_id}' not found in {}; skipping move to 2_current/",
+            source_dir.display()
+        );
+        return Ok(());
+    }
+
+    std::fs::create_dir_all(&current_dir)
+        .map_err(|e| format!("Failed to create work/2_current/ directory: {e}"))?;
+
+    std::fs::rename(&source_path, &current_path)
+        .map_err(|e| format!("Failed to move '{story_id}' to 2_current/: {e}"))?;
+
+    slog!(
+        "[lifecycle] Moved '{story_id}' from {} to work/2_current/",
+        source_dir.display()
+    );
+
+    Ok(())
+}
+
+/// Check whether a feature branch `feature/story-{story_id}` exists and has
+/// commits that are not yet on master.  Returns `true` when there is unmerged
+/// work, `false` when there is no branch or all its commits are already
+/// reachable from master.
+pub fn feature_branch_has_unmerged_changes(project_root: &Path, story_id: &str) -> bool {
+    let branch = format!("feature/story-{story_id}");
+
+    // Check if the branch exists.
+    let branch_check = Command::new("git")
+        .args(["rev-parse", "--verify", &branch])
+        .current_dir(project_root)
+        .output();
+    match branch_check {
+        Ok(out) if out.status.success() => {}
+        _ => return false, // No feature branch → nothing to merge.
+    }
+
+    // Check if the branch has commits not reachable from master.
+    let log = Command::new("git")
+        .args(["log", &format!("master..{branch}"), "--oneline"])
+        .current_dir(project_root)
+        .output();
+    match log {
+        Ok(out) => {
+            let stdout = String::from_utf8_lossy(&out.stdout);
+            !stdout.trim().is_empty()
+        }
+        Err(_) => false,
+    }
+}
+
+/// Move a story from `work/2_current/` to `work/5_done/` and auto-commit.
+///
+/// * If the story is in `2_current/`, it is moved to `5_done/` and committed.
+/// * If the story is in `4_merge/`, it is moved to `5_done/` and committed.
+/// * If the story is already in `5_done/` or `6_archived/`, this is a no-op (idempotent).
+/// * If the story is not found in `2_current/`, `4_merge/`, `5_done/`, or `6_archived/`, an error is returned.
+pub fn move_story_to_archived(project_root: &Path, story_id: &str) -> Result<(), String> {
+    let sk = project_root.join(".storkit").join("work");
+    let current_path = sk.join("2_current").join(format!("{story_id}.md"));
+    let merge_path = sk.join("4_merge").join(format!("{story_id}.md"));
+    let done_dir = sk.join("5_done");
+    let done_path = done_dir.join(format!("{story_id}.md"));
+    let archived_path = sk.join("6_archived").join(format!("{story_id}.md"));
+
+    if done_path.exists() || archived_path.exists() {
+        // Already in done or archived — idempotent, nothing to do.
+        return Ok(());
+    }
+
+    // Check 2_current/ first, then 4_merge/
+    let source_path = if current_path.exists() {
+        current_path.clone()
+    } else if merge_path.exists() {
+        merge_path.clone()
+    } else {
+        return Err(format!(
+            "Story '{story_id}' not found in work/2_current/ or work/4_merge/. Cannot accept story."
+        ));
+    };
+
+    std::fs::create_dir_all(&done_dir)
+        .map_err(|e| format!("Failed to create work/5_done/ directory: {e}"))?;
+    std::fs::rename(&source_path, &done_path)
+        .map_err(|e| format!("Failed to move story '{story_id}' to 5_done/: {e}"))?;
+
+    // Strip stale pipeline fields from front matter now that the story is done.
+    for field in &["merge_failure", "retry_count", "blocked"] {
+        if let Err(e) = clear_front_matter_field(&done_path, field) {
+            slog!("[lifecycle] Warning: could not clear {field} from '{story_id}': {e}");
+        }
+    }
+
+    let from_dir = if source_path == current_path {
+        "work/2_current/"
+    } else {
+        "work/4_merge/"
+    };
+    slog!("[lifecycle] Moved story '{story_id}' from {from_dir} to work/5_done/");
+
+    Ok(())
+}
+
+/// Move a story/bug from `work/2_current/` or `work/3_qa/` to `work/4_merge/`.
+///
+/// This stages a work item as ready for the mergemaster to pick up and merge into master.
+/// Idempotent: if already in `4_merge/`, returns Ok without committing.
+pub fn move_story_to_merge(project_root: &Path, story_id: &str) -> Result<(), String> {
+    let sk = project_root.join(".storkit").join("work");
+    let current_path = sk.join("2_current").join(format!("{story_id}.md"));
+    let qa_path = sk.join("3_qa").join(format!("{story_id}.md"));
+    let merge_dir = sk.join("4_merge");
+    let merge_path = merge_dir.join(format!("{story_id}.md"));
+
+    if merge_path.exists() {
+        // Already in 4_merge/ — idempotent, nothing to do.
+        return Ok(());
+    }
+
+    // Accept from 2_current/ (manual trigger) or 3_qa/ (pipeline advancement from QA stage).
+    let source_path = if current_path.exists() {
+        current_path.clone()
+    } else if qa_path.exists() {
+        qa_path.clone()
+    } else {
+        return Err(format!(
+            "Work item '{story_id}' not found in work/2_current/ or work/3_qa/. Cannot move to 4_merge/."
+        ));
+    };
+
+    std::fs::create_dir_all(&merge_dir)
+        .map_err(|e| format!("Failed to create work/4_merge/ directory: {e}"))?;
+    std::fs::rename(&source_path, &merge_path)
+        .map_err(|e| format!("Failed to move '{story_id}' to 4_merge/: {e}"))?;
+
+    let from_dir = if source_path == current_path {
+        "work/2_current/"
+    } else {
+        "work/3_qa/"
+    };
+    // Reset retry count and blocked for the new stage.
+    if let Err(e) = clear_front_matter_field(&merge_path, "retry_count") {
+        slog!("[lifecycle] Warning: could not clear retry_count for '{story_id}': {e}");
+    }
+    if let Err(e) = clear_front_matter_field(&merge_path, "blocked") {
+        slog!("[lifecycle] Warning: could not clear blocked for '{story_id}': {e}");
+    }
+
+    slog!("[lifecycle] Moved '{story_id}' from {from_dir} to work/4_merge/");
+
+    Ok(())
+}
+
+/// Move a story/bug from `work/2_current/` to `work/3_qa/` and auto-commit.
+///
+/// This stages a work item for QA review before merging to master.
+/// Idempotent: if already in `3_qa/`, returns Ok without committing.
+pub fn move_story_to_qa(project_root: &Path, story_id: &str) -> Result<(), String> {
+    let sk = project_root.join(".storkit").join("work");
+    let current_path = sk.join("2_current").join(format!("{story_id}.md"));
+    let qa_dir = sk.join("3_qa");
+    let qa_path = qa_dir.join(format!("{story_id}.md"));
+
+    if qa_path.exists() {
+        // Already in 3_qa/ — idempotent, nothing to do.
+        return Ok(());
+    }
+
+    if !current_path.exists() {
+        return Err(format!(
+            "Work item '{story_id}' not found in work/2_current/. Cannot move to 3_qa/."
+        ));
+    }
+
+    std::fs::create_dir_all(&qa_dir)
+        .map_err(|e| format!("Failed to create work/3_qa/ directory: {e}"))?;
+    std::fs::rename(&current_path, &qa_path)
+        .map_err(|e| format!("Failed to move '{story_id}' to 3_qa/: {e}"))?;
+
+    // Reset retry count for the new stage.
+    if let Err(e) = clear_front_matter_field(&qa_path, "retry_count") {
+        slog!("[lifecycle] Warning: could not clear retry_count for '{story_id}': {e}");
+    }
+    if let Err(e) = clear_front_matter_field(&qa_path, "blocked") {
+        slog!("[lifecycle] Warning: could not clear blocked for '{story_id}': {e}");
+    }
+
+    slog!("[lifecycle] Moved '{story_id}' from work/2_current/ to work/3_qa/");
+
+    Ok(())
+}
+
+/// Move a story from `work/3_qa/` back to `work/2_current/` and write rejection notes.
+///
+/// Used when a human reviewer rejects a story during manual QA.
+/// Clears the `review_hold` front matter field and appends rejection notes to the story file.
+pub fn reject_story_from_qa(
+    project_root: &Path,
+    story_id: &str,
+    notes: &str,
+) -> Result<(), String> {
+    let sk = project_root.join(".storkit").join("work");
+    let qa_path = sk.join("3_qa").join(format!("{story_id}.md"));
+    let current_dir = sk.join("2_current");
+    let current_path = current_dir.join(format!("{story_id}.md"));
+
+    if current_path.exists() {
+        return Ok(()); // Already in 2_current — idempotent.
+    }
+
+    if !qa_path.exists() {
+        return Err(format!(
+            "Work item '{story_id}' not found in work/3_qa/. Cannot reject."
+        ));
+    }
+
+    std::fs::create_dir_all(&current_dir)
+        .map_err(|e| format!("Failed to create work/2_current/ directory: {e}"))?;
+    std::fs::rename(&qa_path, &current_path)
+        .map_err(|e| format!("Failed to move '{story_id}' from 3_qa/ to 2_current/: {e}"))?;
+
+    // Clear review_hold since the story is going back for rework.
+    if let Err(e) = clear_front_matter_field(&current_path, "review_hold") {
+        slog!("[lifecycle] Warning: could not clear review_hold from '{story_id}': {e}");
+    }
+
+    // Write rejection notes into the story file so the coder can see what needs fixing.
+    if !notes.is_empty()
+        && let Err(e) = write_rejection_notes(&current_path, notes)
+    {
+        slog!("[lifecycle] Warning: could not write rejection notes to '{story_id}': {e}");
+    }
+
+    slog!("[lifecycle] Rejected '{story_id}' from work/3_qa/ back to work/2_current/");
+
+    Ok(())
+}
+
+/// Move any work item to an arbitrary pipeline stage by searching all stages.
+///
+/// Accepts `target_stage` as one of: `backlog`, `current`, `qa`, `merge`, `done`.
+/// Idempotent: if the item is already in the target stage, returns Ok.
+/// Returns `(from_stage, to_stage)` on success.
+pub fn move_story_to_stage(
+    project_root: &Path,
+    story_id: &str,
+    target_stage: &str,
+) -> Result<(String, String), String> {
+    let stage_dirs: &[(&str, &str)] = &[
+        ("backlog", "1_backlog"),
+        ("current", "2_current"),
+        ("qa", "3_qa"),
+        ("merge", "4_merge"),
+        ("done", "5_done"),
+    ];
+
+    let target_dir_name = stage_dirs
+        .iter()
+        .find(|(name, _)| *name == target_stage)
+        .map(|(_, dir)| *dir)
+        .ok_or_else(|| {
+            format!(
+                "Invalid target_stage '{target_stage}'. Must be one of: backlog, current, qa, merge, done"
+            )
+        })?;
+
+    let sk = project_root.join(".storkit").join("work");
+    let target_dir = sk.join(target_dir_name);
+    let target_path = target_dir.join(format!("{story_id}.md"));
+
+    if target_path.exists() {
+        return Ok((target_stage.to_string(), target_stage.to_string()));
+    }
+
+    // Search all named stages plus the archive stage.
+    let search_dirs: &[(&str, &str)] = &[
+        ("backlog", "1_backlog"),
+        ("current", "2_current"),
+        ("qa", "3_qa"),
+        ("merge", "4_merge"),
+        ("done", "5_done"),
+        ("archived", "6_archived"),
+    ];
+
+    let mut found_path: Option<std::path::PathBuf> = None;
+    let mut from_stage = "";
+    for (stage_name, dir_name) in search_dirs {
+        let candidate = sk.join(dir_name).join(format!("{story_id}.md"));
+        if candidate.exists() {
+            found_path = Some(candidate);
+            from_stage = stage_name;
+            break;
+        }
+    }
+
+    let source_path =
+        found_path.ok_or_else(|| format!("Work item '{story_id}' not found in any pipeline stage."))?;
+
+    std::fs::create_dir_all(&target_dir)
+        .map_err(|e| format!("Failed to create work/{target_dir_name}/ directory: {e}"))?;
+    std::fs::rename(&source_path, &target_path)
+        .map_err(|e| format!("Failed to move '{story_id}' to work/{target_dir_name}/: {e}"))?;
+
+    slog!(
+        "[lifecycle] Moved '{story_id}' from work/{from_stage}/ to work/{target_dir_name}/"
+    );
+
+    Ok((from_stage.to_string(), target_stage.to_string()))
+}
+
+/// Move a bug from `work/2_current/` or `work/1_backlog/` to `work/5_done/` and auto-commit.
+///
+/// * If the bug is in `2_current/`, it is moved to `5_done/` and committed.
+/// * If the bug is still in `1_backlog/` (never started), it is moved directly to `5_done/`.
+/// * If the bug is already in `5_done/`, this is a no-op (idempotent).
+/// * If the bug is not found anywhere, an error is returned.
+pub fn close_bug_to_archive(project_root: &Path, bug_id: &str) -> Result<(), String> {
+    let sk = project_root.join(".storkit").join("work");
+    let current_path = sk.join("2_current").join(format!("{bug_id}.md"));
+    let backlog_path = sk.join("1_backlog").join(format!("{bug_id}.md"));
+    let archive_dir = item_archive_dir(project_root, bug_id);
+    let archive_path = archive_dir.join(format!("{bug_id}.md"));
+
+    if archive_path.exists() {
+        return Ok(());
+    }
+
+    let source_path = if current_path.exists() {
+        current_path.clone()
+    } else if backlog_path.exists() {
+        backlog_path.clone()
+    } else {
+        return Err(format!(
+            "Bug '{bug_id}' not found in work/2_current/ or work/1_backlog/. Cannot close bug."
+        ));
+    };
+
+    std::fs::create_dir_all(&archive_dir)
+        .map_err(|e| format!("Failed to create work/5_done/ directory: {e}"))?;
+    std::fs::rename(&source_path, &archive_path)
+        .map_err(|e| format!("Failed to move bug '{bug_id}' to 5_done/: {e}"))?;
+
+    slog!(
+        "[lifecycle] Closed bug '{bug_id}' → work/5_done/"
+    );
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ── move_story_to_current tests ────────────────────────────────────────────
+
+    #[test]
+    fn move_story_to_current_moves_file() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let backlog = root.join(".storkit/work/1_backlog");
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::create_dir_all(&current).unwrap();
+        fs::write(backlog.join("10_story_foo.md"), "test").unwrap();
+
+        move_story_to_current(root, "10_story_foo").unwrap();
+
+        assert!(!backlog.join("10_story_foo.md").exists());
+        assert!(current.join("10_story_foo.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_current_is_idempotent_when_already_current() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("11_story_foo.md"), "test").unwrap();
+
+        move_story_to_current(root, "11_story_foo").unwrap();
+        assert!(current.join("11_story_foo.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_current_noop_when_not_in_backlog() {
+        let tmp = tempfile::tempdir().unwrap();
+        assert!(move_story_to_current(tmp.path(), "99_missing").is_ok());
+    }
+
+    #[test]
+    fn move_bug_to_current_moves_from_backlog() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let backlog = root.join(".storkit/work/1_backlog");
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::create_dir_all(&current).unwrap();
+        fs::write(backlog.join("1_bug_test.md"), "# Bug 1\n").unwrap();
+
+        move_story_to_current(root, "1_bug_test").unwrap();
+
+        assert!(!backlog.join("1_bug_test.md").exists());
+        assert!(current.join("1_bug_test.md").exists());
+    }
+
+    // ── close_bug_to_archive tests ─────────────────────────────────────────────
+
+    #[test]
+    fn close_bug_moves_from_current_to_archive() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("2_bug_test.md"), "# Bug 2\n").unwrap();
+
+        close_bug_to_archive(root, "2_bug_test").unwrap();
+
+        assert!(!current.join("2_bug_test.md").exists());
+        assert!(root.join(".storkit/work/5_done/2_bug_test.md").exists());
+    }
+
+    #[test]
+    fn close_bug_moves_from_backlog_when_not_started() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let backlog = root.join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(backlog.join("3_bug_test.md"), "# Bug 3\n").unwrap();
+
+        close_bug_to_archive(root, "3_bug_test").unwrap();
+
+        assert!(!backlog.join("3_bug_test.md").exists());
+        assert!(root.join(".storkit/work/5_done/3_bug_test.md").exists());
+    }
+
+    // ── item_type_from_id tests ────────────────────────────────────────────────
+
+    #[test]
+    fn item_type_from_id_detects_types() {
+        assert_eq!(item_type_from_id("1_bug_test"), "bug");
+        assert_eq!(item_type_from_id("1_spike_research"), "spike");
+        assert_eq!(item_type_from_id("50_story_my_story"), "story");
+        assert_eq!(item_type_from_id("1_story_simple"), "story");
+    }
+
+    // ── move_story_to_merge tests ──────────────────────────────────────────────
+
+    #[test]
+    fn move_story_to_merge_moves_file() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("20_story_foo.md"), "test").unwrap();
+
+        move_story_to_merge(root, "20_story_foo").unwrap();
+
+        assert!(!current.join("20_story_foo.md").exists());
+        assert!(root.join(".storkit/work/4_merge/20_story_foo.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_merge_from_qa_dir() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let qa_dir = root.join(".storkit/work/3_qa");
+        fs::create_dir_all(&qa_dir).unwrap();
+        fs::write(qa_dir.join("40_story_test.md"), "test").unwrap();
+
+        move_story_to_merge(root, "40_story_test").unwrap();
+
+        assert!(!qa_dir.join("40_story_test.md").exists());
+        assert!(root.join(".storkit/work/4_merge/40_story_test.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_merge_idempotent_when_already_in_merge() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let merge_dir = root.join(".storkit/work/4_merge");
+        fs::create_dir_all(&merge_dir).unwrap();
+        fs::write(merge_dir.join("21_story_test.md"), "test").unwrap();
+
+        move_story_to_merge(root, "21_story_test").unwrap();
+        assert!(merge_dir.join("21_story_test.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_merge_errors_when_not_in_current_or_qa() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = move_story_to_merge(tmp.path(), "99_nonexistent");
+        assert!(result.unwrap_err().contains("not found in work/2_current/ or work/3_qa/"));
+    }
+
+    // ── move_story_to_qa tests ────────────────────────────────────────────────
+
+    #[test]
+    fn move_story_to_qa_moves_file() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("30_story_qa.md"), "test").unwrap();
+
+        move_story_to_qa(root, "30_story_qa").unwrap();
+
+        assert!(!current.join("30_story_qa.md").exists());
+        assert!(root.join(".storkit/work/3_qa/30_story_qa.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_qa_idempotent_when_already_in_qa() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let qa_dir = root.join(".storkit/work/3_qa");
+        fs::create_dir_all(&qa_dir).unwrap();
+        fs::write(qa_dir.join("31_story_test.md"), "test").unwrap();
+
+        move_story_to_qa(root, "31_story_test").unwrap();
+        assert!(qa_dir.join("31_story_test.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_qa_errors_when_not_in_current() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = move_story_to_qa(tmp.path(), "99_nonexistent");
+        assert!(result.unwrap_err().contains("not found in work/2_current/"));
+    }
+
+    // ── move_story_to_archived tests ──────────────────────────────────────────
+
+    #[test]
+    fn move_story_to_archived_finds_in_merge_dir() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let merge_dir = root.join(".storkit/work/4_merge");
+        fs::create_dir_all(&merge_dir).unwrap();
+        fs::write(merge_dir.join("22_story_test.md"), "test").unwrap();
+
+        move_story_to_archived(root, "22_story_test").unwrap();
+
+        assert!(!merge_dir.join("22_story_test.md").exists());
+        assert!(root.join(".storkit/work/5_done/22_story_test.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_archived_error_when_not_in_current_or_merge() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = move_story_to_archived(tmp.path(), "99_nonexistent");
+        assert!(result.unwrap_err().contains("4_merge"));
+    }
+
+    // ── feature_branch_has_unmerged_changes tests ────────────────────────────
+
+    fn init_git_repo(repo: &std::path::Path) {
+        Command::new("git")
+            .args(["init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+    }
+
+    /// Bug 226: feature_branch_has_unmerged_changes returns true when the
+    /// feature branch has commits not on master.
+    #[test]
+    fn feature_branch_has_unmerged_changes_detects_unmerged_code() {
+        use std::fs;
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+
+        // Create a feature branch with a code commit.
+        Command::new("git")
+            .args(["checkout", "-b", "feature/story-50_story_test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        fs::write(repo.join("feature.rs"), "fn main() {}").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add feature"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["checkout", "master"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        assert!(
+            feature_branch_has_unmerged_changes(repo, "50_story_test"),
+            "should detect unmerged changes on feature branch"
+        );
+    }
+
+    /// Bug 226: feature_branch_has_unmerged_changes returns false when no
+    /// feature branch exists.
+    #[test]
+    fn feature_branch_has_unmerged_changes_false_when_no_branch() {
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+
+        assert!(
+            !feature_branch_has_unmerged_changes(repo, "99_nonexistent"),
+            "should return false when no feature branch"
+        );
+    }
+
+    // ── reject_story_from_qa tests ────────────────────────────────────────────
+
+    #[test]
+    fn reject_story_from_qa_moves_to_current() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let qa_dir = root.join(".storkit/work/3_qa");
+        let current_dir = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&qa_dir).unwrap();
+        fs::create_dir_all(&current_dir).unwrap();
+        fs::write(
+            qa_dir.join("50_story_test.md"),
+            "---\nname: Test\nreview_hold: true\n---\n# Story\n",
+        )
+        .unwrap();
+
+        reject_story_from_qa(root, "50_story_test", "Button color wrong").unwrap();
+
+        assert!(!qa_dir.join("50_story_test.md").exists());
+        assert!(current_dir.join("50_story_test.md").exists());
+        let contents = fs::read_to_string(current_dir.join("50_story_test.md")).unwrap();
+        assert!(contents.contains("Button color wrong"));
+        assert!(contents.contains("## QA Rejection Notes"));
+        assert!(!contents.contains("review_hold"));
+    }
+
+    #[test]
+    fn reject_story_from_qa_errors_when_not_in_qa() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = reject_story_from_qa(tmp.path(), "99_nonexistent", "notes");
+        assert!(result.unwrap_err().contains("not found in work/3_qa/"));
+    }
+
+    #[test]
+    fn reject_story_from_qa_idempotent_when_in_current() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current_dir = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current_dir).unwrap();
+        fs::write(current_dir.join("51_story_test.md"), "---\nname: Test\n---\n# Story\n").unwrap();
+
+        reject_story_from_qa(root, "51_story_test", "notes").unwrap();
+        assert!(current_dir.join("51_story_test.md").exists());
+    }
+
+    // ── move_story_to_stage tests ─────────────────────────────────
+
+    #[test]
+    fn move_story_to_stage_moves_from_backlog_to_current() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let backlog = root.join(".storkit/work/1_backlog");
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::create_dir_all(&current).unwrap();
+        fs::write(backlog.join("60_story_move.md"), "test").unwrap();
+
+        let (from, to) = move_story_to_stage(root, "60_story_move", "current").unwrap();
+
+        assert_eq!(from, "backlog");
+        assert_eq!(to, "current");
+        assert!(!backlog.join("60_story_move.md").exists());
+        assert!(current.join("60_story_move.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_stage_moves_from_current_to_backlog() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        let backlog = root.join(".storkit/work/1_backlog");
+        fs::create_dir_all(&current).unwrap();
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(current.join("61_story_back.md"), "test").unwrap();
+
+        let (from, to) = move_story_to_stage(root, "61_story_back", "backlog").unwrap();
+
+        assert_eq!(from, "current");
+        assert_eq!(to, "backlog");
+        assert!(!current.join("61_story_back.md").exists());
+        assert!(backlog.join("61_story_back.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_stage_idempotent_when_already_in_target() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("62_story_idem.md"), "test").unwrap();
+
+        let (from, to) = move_story_to_stage(root, "62_story_idem", "current").unwrap();
+
+        assert_eq!(from, "current");
+        assert_eq!(to, "current");
+        assert!(current.join("62_story_idem.md").exists());
+    }
+
+    #[test]
+    fn move_story_to_stage_invalid_target_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = move_story_to_stage(tmp.path(), "1_story_test", "invalid");
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("Invalid target_stage"));
+    }
+
+    #[test]
+    fn move_story_to_stage_not_found_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = move_story_to_stage(tmp.path(), "99_story_ghost", "current");
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("not found in any pipeline stage"));
+    }
+
+    #[test]
+    fn move_story_to_stage_finds_in_qa_dir() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let qa_dir = root.join(".storkit/work/3_qa");
+        let backlog = root.join(".storkit/work/1_backlog");
+        fs::create_dir_all(&qa_dir).unwrap();
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(qa_dir.join("63_story_qa.md"), "test").unwrap();
+
+        let (from, to) = move_story_to_stage(root, "63_story_qa", "backlog").unwrap();
+
+        assert_eq!(from, "qa");
+        assert_eq!(to, "backlog");
+        assert!(!qa_dir.join("63_story_qa.md").exists());
+        assert!(backlog.join("63_story_qa.md").exists());
+    }
+}
--- a/server/src/agents/merge.rs
+++ b/server/src/agents/merge.rs
--- a/server/src/agents/mod.rs
+++ b/server/src/agents/mod.rs
@@ -0,0 +1,222 @@
+pub mod gates;
+pub mod lifecycle;
+pub mod merge;
+mod pool;
+pub(crate) mod pty;
+pub mod runtime;
+pub mod token_usage;
+
+use crate::config::AgentConfig;
+use serde::{Deserialize, Serialize};
+
+pub use lifecycle::{
+    close_bug_to_archive, feature_branch_has_unmerged_changes, move_story_to_archived,
+    move_story_to_merge, move_story_to_qa, move_story_to_stage, reject_story_from_qa,
+};
+pub use pool::AgentPool;
+
+/// Events emitted during server startup reconciliation to broadcast real-time
+/// progress to connected WebSocket clients.
+#[derive(Debug, Clone, Serialize)]
+pub struct ReconciliationEvent {
+    /// The story being reconciled, or empty string for the overall "done" event.
+    pub story_id: String,
+    /// Coarse status: "checking", "gates_running", "advanced", "skipped", "failed", "done"
+    pub status: String,
+    /// Human-readable details.
+    pub message: String,
+}
+
+/// Events streamed from a running agent to SSE clients.
+#[derive(Debug, Clone, Serialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum AgentEvent {
+    /// Agent status changed.
+    Status {
+        story_id: String,
+        agent_name: String,
+        status: String,
+    },
+    /// Raw text output from the agent process.
+    Output {
+        story_id: String,
+        agent_name: String,
+        text: String,
+    },
+    /// Agent produced a JSON event from `--output-format stream-json`.
+    AgentJson {
+        story_id: String,
+        agent_name: String,
+        data: serde_json::Value,
+    },
+    /// Agent finished.
+    Done {
+        story_id: String,
+        agent_name: String,
+        session_id: Option<String>,
+    },
+    /// Agent errored.
+    Error {
+        story_id: String,
+        agent_name: String,
+        message: String,
+    },
+    /// Thinking tokens from an extended-thinking block.
+    Thinking {
+        story_id: String,
+        agent_name: String,
+        text: String,
+    },
+}
+
+#[derive(Debug, Clone, Serialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum AgentStatus {
+    Pending,
+    Running,
+    Completed,
+    Failed,
+}
+
+impl std::fmt::Display for AgentStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Pending => write!(f, "pending"),
+            Self::Running => write!(f, "running"),
+            Self::Completed => write!(f, "completed"),
+            Self::Failed => write!(f, "failed"),
+        }
+    }
+}
+
+/// Pipeline stages for automatic story advancement.
+#[derive(Debug, Clone, PartialEq)]
+pub enum PipelineStage {
+    /// Coding agents (coder-1, coder-2, etc.)
+    Coder,
+    /// QA review agent
+    Qa,
+    /// Mergemaster agent
+    Mergemaster,
+    /// Supervisors and unknown agents — no automatic advancement.
+    Other,
+}
+
+/// Determine the pipeline stage from an agent name.
+pub fn pipeline_stage(agent_name: &str) -> PipelineStage {
+    match agent_name {
+        "qa" => PipelineStage::Qa,
+        "mergemaster" => PipelineStage::Mergemaster,
+        name if name.starts_with("coder") => PipelineStage::Coder,
+        _ => PipelineStage::Other,
+    }
+}
+
+/// Determine the pipeline stage for a configured agent.
+///
+/// Prefers the explicit `stage` config field (added in Bug 150) over the
+/// legacy name-based heuristic so that agents with non-standard names
+/// (e.g. `qa-2`, `coder-opus`) are assigned to the correct stage.
+pub(crate) fn agent_config_stage(cfg: &AgentConfig) -> PipelineStage {
+    match cfg.stage.as_deref() {
+        Some("coder") => PipelineStage::Coder,
+        Some("qa") => PipelineStage::Qa,
+        Some("mergemaster") => PipelineStage::Mergemaster,
+        Some(_) => PipelineStage::Other,
+        None => pipeline_stage(&cfg.name),
+    }
+}
+
+/// Completion report produced when acceptance gates are run.
+///
+/// Created automatically by the server when an agent process exits normally,
+/// or via the internal `report_completion` method.
+#[derive(Debug, Serialize, Clone)]
+pub struct CompletionReport {
+    pub summary: String,
+    pub gates_passed: bool,
+    pub gate_output: String,
+}
+
+/// Token usage from a Claude Code session's `result` event.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct TokenUsage {
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub cache_creation_input_tokens: u64,
+    pub cache_read_input_tokens: u64,
+    pub total_cost_usd: f64,
+}
+
+impl TokenUsage {
+    /// Parse token usage from a Claude Code `result` JSON event.
+    pub fn from_result_event(json: &serde_json::Value) -> Option<Self> {
+        let usage = json.get("usage")?;
+        Some(Self {
+            input_tokens: usage
+                .get("input_tokens")
+                .and_then(|v| v.as_u64())
+                .unwrap_or(0),
+            output_tokens: usage
+                .get("output_tokens")
+                .and_then(|v| v.as_u64())
+                .unwrap_or(0),
+            cache_creation_input_tokens: usage
+                .get("cache_creation_input_tokens")
+                .and_then(|v| v.as_u64())
+                .unwrap_or(0),
+            cache_read_input_tokens: usage
+                .get("cache_read_input_tokens")
+                .and_then(|v| v.as_u64())
+                .unwrap_or(0),
+            total_cost_usd: json
+                .get("total_cost_usd")
+                .and_then(|v| v.as_f64())
+                .unwrap_or(0.0),
+        })
+    }
+}
+
+#[derive(Debug, Serialize, Clone)]
+pub struct AgentInfo {
+    pub story_id: String,
+    pub agent_name: String,
+    pub status: AgentStatus,
+    pub session_id: Option<String>,
+    pub worktree_path: Option<String>,
+    pub base_branch: Option<String>,
+    pub completion: Option<CompletionReport>,
+    /// UUID identifying the persistent log file for this session.
+    pub log_session_id: Option<String>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ── pipeline_stage tests ──────────────────────────────────────────────────
+
+    #[test]
+    fn pipeline_stage_detects_coders() {
+        assert_eq!(pipeline_stage("coder-1"), PipelineStage::Coder);
+        assert_eq!(pipeline_stage("coder-2"), PipelineStage::Coder);
+        assert_eq!(pipeline_stage("coder-3"), PipelineStage::Coder);
+    }
+
+    #[test]
+    fn pipeline_stage_detects_qa() {
+        assert_eq!(pipeline_stage("qa"), PipelineStage::Qa);
+    }
+
+    #[test]
+    fn pipeline_stage_detects_mergemaster() {
+        assert_eq!(pipeline_stage("mergemaster"), PipelineStage::Mergemaster);
+    }
+
+    #[test]
+    fn pipeline_stage_supervisor_is_other() {
+        assert_eq!(pipeline_stage("supervisor"), PipelineStage::Other);
+        assert_eq!(pipeline_stage("default"), PipelineStage::Other);
+        assert_eq!(pipeline_stage("unknown"), PipelineStage::Other);
+    }
+}
--- a/server/src/agents/pool/auto_assign.rs
+++ b/server/src/agents/pool/auto_assign.rs
--- a/server/src/agents/pool/mod.rs
+++ b/server/src/agents/pool/mod.rs
--- a/server/src/agents/pool/pipeline.rs
+++ b/server/src/agents/pool/pipeline.rs
--- a/server/src/agents/pty.rs
+++ b/server/src/agents/pty.rs
@@ -0,0 +1,591 @@
+use std::collections::HashMap;
+use std::io::{BufRead, BufReader};
+use std::sync::{Arc, Mutex};
+
+use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system};
+use tokio::sync::broadcast;
+
+use super::{AgentEvent, TokenUsage};
+use crate::agent_log::AgentLogWriter;
+use crate::io::watcher::WatcherEvent;
+use crate::slog;
+use crate::slog_warn;
+
+/// Result from a PTY agent session, containing the session ID and token usage.
+pub(in crate::agents) struct PtyResult {
+    pub session_id: Option<String>,
+    pub token_usage: Option<TokenUsage>,
+}
+
+fn composite_key(story_id: &str, agent_name: &str) -> String {
+    format!("{story_id}:{agent_name}")
+}
+
+struct ChildKillerGuard {
+    killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
+    key: String,
+}
+
+impl Drop for ChildKillerGuard {
+    fn drop(&mut self) {
+        if let Ok(mut killers) = self.killers.lock() {
+            killers.remove(&self.key);
+        }
+    }
+}
+
+/// Spawn claude agent in a PTY and stream events through the broadcast channel.
+#[allow(clippy::too_many_arguments)]
+pub(in crate::agents) async fn run_agent_pty_streaming(
+    story_id: &str,
+    agent_name: &str,
+    command: &str,
+    args: &[String],
+    prompt: &str,
+    cwd: &str,
+    tx: &broadcast::Sender<AgentEvent>,
+    event_log: &Arc<Mutex<Vec<AgentEvent>>>,
+    log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
+    inactivity_timeout_secs: u64,
+    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
+    watcher_tx: broadcast::Sender<WatcherEvent>,
+) -> Result<PtyResult, String> {
+    let sid = story_id.to_string();
+    let aname = agent_name.to_string();
+    let cmd = command.to_string();
+    let args = args.to_vec();
+    let prompt = prompt.to_string();
+    let cwd = cwd.to_string();
+    let tx = tx.clone();
+    let event_log = event_log.clone();
+
+    tokio::task::spawn_blocking(move || {
+        run_agent_pty_blocking(
+            &sid,
+            &aname,
+            &cmd,
+            &args,
+            &prompt,
+            &cwd,
+            &tx,
+            &event_log,
+            log_writer.as_deref(),
+            inactivity_timeout_secs,
+            &child_killers,
+            &watcher_tx,
+        )
+    })
+    .await
+    .map_err(|e| format!("Agent task panicked: {e}"))?
+}
+
+/// Dispatch a `stream_event` from Claude Code's `--include-partial-messages` output.
+///
+/// Extracts `thinking_delta` and `text_delta` from `content_block_delta` events
+/// and routes them as `AgentEvent::Thinking` and `AgentEvent::Output` respectively.
+/// This ensures thinking traces flow through the dedicated `ThinkingBlock` UI
+/// component rather than appearing as unbounded regular output.
+fn handle_agent_stream_event(
+    event: &serde_json::Value,
+    story_id: &str,
+    agent_name: &str,
+    tx: &broadcast::Sender<AgentEvent>,
+    event_log: &Mutex<Vec<AgentEvent>>,
+    log_writer: Option<&Mutex<AgentLogWriter>>,
+) {
+    let event_type = event.get("type").and_then(|t| t.as_str()).unwrap_or("");
+
+    if event_type == "content_block_delta"
+        && let Some(delta) = event.get("delta")
+    {
+        let delta_type = delta.get("type").and_then(|t| t.as_str()).unwrap_or("");
+        match delta_type {
+            "thinking_delta" => {
+                if let Some(thinking) = delta.get("thinking").and_then(|t| t.as_str()) {
+                    emit_event(
+                        AgentEvent::Thinking {
+                            story_id: story_id.to_string(),
+                            agent_name: agent_name.to_string(),
+                            text: thinking.to_string(),
+                        },
+                        tx,
+                        event_log,
+                        log_writer,
+                    );
+                }
+            }
+            "text_delta" => {
+                if let Some(text) = delta.get("text").and_then(|t| t.as_str()) {
+                    emit_event(
+                        AgentEvent::Output {
+                            story_id: story_id.to_string(),
+                            agent_name: agent_name.to_string(),
+                            text: text.to_string(),
+                        },
+                        tx,
+                        event_log,
+                        log_writer,
+                    );
+                }
+            }
+            _ => {}
+        }
+    }
+}
+
+/// Helper to send an event to broadcast, event log, and optional persistent log file.
+pub(super) fn emit_event(
+    event: AgentEvent,
+    tx: &broadcast::Sender<AgentEvent>,
+    event_log: &Mutex<Vec<AgentEvent>>,
+    log_writer: Option<&Mutex<AgentLogWriter>>,
+) {
+    if let Ok(mut log) = event_log.lock() {
+        log.push(event.clone());
+    }
+    if let Some(writer) = log_writer
+        && let Ok(mut w) = writer.lock()
+        && let Err(e) = w.write_event(&event)
+    {
+        eprintln!("[agent_log] Failed to write event to log file: {e}");
+    }
+    let _ = tx.send(event);
+}
+
+#[allow(clippy::too_many_arguments)]
+fn run_agent_pty_blocking(
+    story_id: &str,
+    agent_name: &str,
+    command: &str,
+    args: &[String],
+    prompt: &str,
+    cwd: &str,
+    tx: &broadcast::Sender<AgentEvent>,
+    event_log: &Mutex<Vec<AgentEvent>>,
+    log_writer: Option<&Mutex<AgentLogWriter>>,
+    inactivity_timeout_secs: u64,
+    child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
+    watcher_tx: &broadcast::Sender<WatcherEvent>,
+) -> Result<PtyResult, String> {
+    let pty_system = native_pty_system();
+
+    let pair = pty_system
+        .openpty(PtySize {
+            rows: 50,
+            cols: 200,
+            pixel_width: 0,
+            pixel_height: 0,
+        })
+        .map_err(|e| format!("Failed to open PTY: {e}"))?;
+
+    let mut cmd = CommandBuilder::new(command);
+
+    // -p <prompt> must come first
+    cmd.arg("-p");
+    cmd.arg(prompt);
+
+    // Add configured args (e.g., --directory /path/to/worktree, --model, etc.)
+    for arg in args {
+        cmd.arg(arg);
+    }
+
+    cmd.arg("--output-format");
+    cmd.arg("stream-json");
+    cmd.arg("--verbose");
+    // Enable partial streaming so we receive thinking_delta and text_delta
+    // events in real-time, rather than only complete assistant events.
+    // Without this, thinking traces may not appear in the structured output
+    // and instead leak as unstructured PTY text.
+    cmd.arg("--include-partial-messages");
+
+    // Supervised agents don't need interactive permission prompts
+    cmd.arg("--permission-mode");
+    cmd.arg("bypassPermissions");
+
+    cmd.cwd(cwd);
+    cmd.env("NO_COLOR", "1");
+
+    // Allow spawning Claude Code from within a Claude Code session
+    cmd.env_remove("CLAUDECODE");
+    cmd.env_remove("CLAUDE_CODE_ENTRYPOINT");
+
+    slog!("[agent:{story_id}:{agent_name}] Spawning {command} in {cwd} with args: {args:?}");
+
+    let mut child = pair
+        .slave
+        .spawn_command(cmd)
+        .map_err(|e| format!("Failed to spawn agent for {story_id}:{agent_name}: {e}"))?;
+
+    // Register the child killer so that kill_all_children() / stop_agent() can
+    // terminate this process on server shutdown, even if the blocking thread
+    // cannot be interrupted.  The ChildKillerGuard deregisters on function exit.
+    let killer_key = composite_key(story_id, agent_name);
+    {
+        let killer = child.clone_killer();
+        if let Ok(mut killers) = child_killers.lock() {
+            killers.insert(killer_key.clone(), killer);
+        }
+    }
+    let _killer_guard = ChildKillerGuard {
+        killers: Arc::clone(child_killers),
+        key: killer_key,
+    };
+
+    drop(pair.slave);
+
+    let reader = pair
+        .master
+        .try_clone_reader()
+        .map_err(|e| format!("Failed to clone PTY reader: {e}"))?;
+
+    drop(pair.master);
+
+    // Spawn a reader thread to collect PTY output lines.
+    // We use a channel so the main thread can apply an inactivity deadline
+    // via recv_timeout: if no output arrives within the configured window
+    // the process is killed and the agent is marked Failed.
+    let (line_tx, line_rx) = std::sync::mpsc::channel::<std::io::Result<String>>();
+    std::thread::spawn(move || {
+        let buf_reader = BufReader::new(reader);
+        for line in buf_reader.lines() {
+            if line_tx.send(line).is_err() {
+                break;
+            }
+        }
+    });
+
+    let timeout_dur = if inactivity_timeout_secs > 0 {
+        Some(std::time::Duration::from_secs(inactivity_timeout_secs))
+    } else {
+        None
+    };
+
+    let mut session_id: Option<String> = None;
+    let mut token_usage: Option<TokenUsage> = None;
+
+    loop {
+        let recv_result = match timeout_dur {
+            Some(dur) => line_rx.recv_timeout(dur),
+            None => line_rx
+                .recv()
+                .map_err(|_| std::sync::mpsc::RecvTimeoutError::Disconnected),
+        };
+
+        let line = match recv_result {
+            Ok(Ok(l)) => l,
+            Ok(Err(_)) => {
+                // IO error reading from PTY — treat as EOF.
+                break;
+            }
+            Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => {
+                // Reader thread exited (EOF from PTY).
+                break;
+            }
+            Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
+                slog_warn!(
+                    "[agent:{story_id}:{agent_name}] Inactivity timeout after \
+                     {inactivity_timeout_secs}s with no output. Killing process."
+                );
+                let _ = child.kill();
+                let _ = child.wait();
+                return Err(format!(
+                    "Agent inactivity timeout: no output received for {inactivity_timeout_secs}s"
+                ));
+            }
+        };
+
+        let trimmed = line.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+
+        // Try to parse as JSON
+        let json: serde_json::Value = match serde_json::from_str(trimmed) {
+            Ok(j) => j,
+            Err(_) => {
+                // Non-JSON output (terminal escapes etc.) — send as raw output
+                emit_event(
+                    AgentEvent::Output {
+                        story_id: story_id.to_string(),
+                        agent_name: agent_name.to_string(),
+                        text: trimmed.to_string(),
+                    },
+                    tx,
+                    event_log,
+                    log_writer,
+                );
+                continue;
+            }
+        };
+
+        let event_type = json.get("type").and_then(|t| t.as_str()).unwrap_or("");
+
+        match event_type {
+            "system" => {
+                session_id = json
+                    .get("session_id")
+                    .and_then(|s| s.as_str())
+                    .map(|s| s.to_string());
+            }
+            // With --include-partial-messages, thinking and text arrive
+            // incrementally via stream_event → content_block_delta. Handle
+            // them here for real-time streaming to the frontend.
+            "stream_event" => {
+                if let Some(event) = json.get("event") {
+                    handle_agent_stream_event(
+                        event,
+                        story_id,
+                        agent_name,
+                        tx,
+                        event_log,
+                        log_writer,
+                    );
+                }
+            }
+            // Complete assistant events are skipped for content extraction
+            // because thinking and text already arrived via stream_event.
+            // The raw JSON is still forwarded as AgentJson below.
+            "assistant" | "user" => {}
+            "rate_limit_event" => {
+                slog!(
+                    "[agent:{story_id}:{agent_name}] API rate limit warning received"
+                );
+                let _ = watcher_tx.send(WatcherEvent::RateLimitWarning {
+                    story_id: story_id.to_string(),
+                    agent_name: agent_name.to_string(),
+                });
+            }
+            "result" => {
+                // Extract token usage from the result event.
+                if let Some(usage) = TokenUsage::from_result_event(&json) {
+                    slog!(
+                        "[agent:{story_id}:{agent_name}] Token usage: in={} out={} cache_create={} cache_read={} cost=${:.4}",
+                        usage.input_tokens,
+                        usage.output_tokens,
+                        usage.cache_creation_input_tokens,
+                        usage.cache_read_input_tokens,
+                        usage.total_cost_usd,
+                    );
+                    token_usage = Some(usage);
+                }
+            }
+            _ => {}
+        }
+
+        // Forward all JSON events
+        emit_event(
+            AgentEvent::AgentJson {
+                story_id: story_id.to_string(),
+                agent_name: agent_name.to_string(),
+                data: json,
+            },
+            tx,
+            event_log,
+            log_writer,
+        );
+    }
+
+    let _ = child.kill();
+    let _ = child.wait();
+
+    slog!(
+        "[agent:{story_id}:{agent_name}] Done. Session: {:?}",
+        session_id
+    );
+
+    Ok(PtyResult {
+        session_id,
+        token_usage,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentEvent;
+    use crate::io::watcher::WatcherEvent;
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    // ── AC1: pty detects rate_limit_event and emits RateLimitWarning ─────────
+
+    /// Verify that when a `rate_limit_event` JSON line appears in PTY output,
+    /// `run_agent_pty_streaming` sends a `WatcherEvent::RateLimitWarning` with
+    /// the correct story_id and agent_name.
+    ///
+    /// The command invoked is: `sh -p -- <script>` where `--` terminates
+    /// option parsing so the script path is treated as the operand.
+    #[tokio::test]
+    async fn rate_limit_event_json_sends_watcher_warning() {
+        use std::os::unix::fs::PermissionsExt;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let script = tmp.path().join("emit_rate_limit.sh");
+        std::fs::write(
+            &script,
+            "#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"allowed_warning\"}}'\n",
+        )
+        .unwrap();
+        std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
+
+        let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
+        let (watcher_tx, mut watcher_rx) = broadcast::channel::<WatcherEvent>(16);
+        let event_log = Arc::new(Mutex::new(Vec::new()));
+        let child_killers = Arc::new(Mutex::new(HashMap::new()));
+
+        // sh -p "--" <script>: -p = privileged mode, "--" = end options,
+        // then the script path is the file operand.
+        let result = run_agent_pty_streaming(
+            "365_story_test",
+            "coder-1",
+            "sh",
+            &[script.to_string_lossy().to_string()],
+            "--",
+            "/tmp",
+            &tx,
+            &event_log,
+            None,
+            0,
+            child_killers,
+            watcher_tx,
+        )
+        .await;
+
+        assert!(result.is_ok(), "PTY run should succeed: {:?}", result.err());
+
+        let evt = watcher_rx
+            .try_recv()
+            .expect("Expected a RateLimitWarning to be sent on watcher_tx");
+        match evt {
+            WatcherEvent::RateLimitWarning {
+                story_id,
+                agent_name,
+            } => {
+                assert_eq!(story_id, "365_story_test");
+                assert_eq!(agent_name, "coder-1");
+            }
+            other => panic!("Expected RateLimitWarning, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_emit_event_writes_to_log_writer() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        let log_writer =
+            AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-emit").unwrap();
+        let log_mutex = Mutex::new(log_writer);
+
+        let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
+        let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
+
+        let event = AgentEvent::Status {
+            story_id: "42_story_foo".to_string(),
+            agent_name: "coder-1".to_string(),
+            status: "running".to_string(),
+        };
+
+        emit_event(event, &tx, &event_log, Some(&log_mutex));
+
+        // Verify event was added to in-memory log
+        let mem_events = event_log.lock().unwrap();
+        assert_eq!(mem_events.len(), 1);
+        drop(mem_events);
+
+        // Verify event was written to the log file
+        let log_path =
+            crate::agent_log::log_file_path(root, "42_story_foo", "coder-1", "sess-emit");
+        let entries = crate::agent_log::read_log(&log_path).unwrap();
+        assert_eq!(entries.len(), 1);
+        assert_eq!(entries[0].event["type"], "status");
+        assert_eq!(entries[0].event["status"], "running");
+    }
+
+    // ── bug 167: handle_agent_stream_event routes thinking/text correctly ───
+
+    #[test]
+    fn stream_event_thinking_delta_emits_thinking_event() {
+        let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
+        let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
+
+        let event = serde_json::json!({
+            "type": "content_block_delta",
+            "delta": {"type": "thinking_delta", "thinking": "Let me analyze this..."}
+        });
+
+        handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
+
+        let received = rx.try_recv().unwrap();
+        match received {
+            AgentEvent::Thinking {
+                story_id,
+                agent_name,
+                text,
+            } => {
+                assert_eq!(story_id, "s1");
+                assert_eq!(agent_name, "coder-1");
+                assert_eq!(text, "Let me analyze this...");
+            }
+            other => panic!("Expected Thinking event, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn stream_event_text_delta_emits_output_event() {
+        let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
+        let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
+
+        let event = serde_json::json!({
+            "type": "content_block_delta",
+            "delta": {"type": "text_delta", "text": "Here is the result."}
+        });
+
+        handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
+
+        let received = rx.try_recv().unwrap();
+        match received {
+            AgentEvent::Output {
+                story_id,
+                agent_name,
+                text,
+            } => {
+                assert_eq!(story_id, "s1");
+                assert_eq!(agent_name, "coder-1");
+                assert_eq!(text, "Here is the result.");
+            }
+            other => panic!("Expected Output event, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn stream_event_input_json_delta_ignored() {
+        let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
+        let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
+
+        let event = serde_json::json!({
+            "type": "content_block_delta",
+            "delta": {"type": "input_json_delta", "partial_json": "{\"file\":"}
+        });
+
+        handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
+
+        // No event should be emitted for tool argument deltas
+        assert!(rx.try_recv().is_err());
+    }
+
+    #[test]
+    fn stream_event_non_delta_type_ignored() {
+        let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
+        let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
+
+        let event = serde_json::json!({
+            "type": "message_start",
+            "message": {"role": "assistant"}
+        });
+
+        handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
+
+        assert!(rx.try_recv().is_err());
+    }
+}
--- a/server/src/agents/runtime/claude_code.rs
+++ b/server/src/agents/runtime/claude_code.rs
@@ -0,0 +1,73 @@
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+
+use portable_pty::ChildKiller;
+use tokio::sync::broadcast;
+
+use crate::agent_log::AgentLogWriter;
+use crate::io::watcher::WatcherEvent;
+
+use super::{AgentEvent, AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
+
+/// Agent runtime that spawns the `claude` CLI in a PTY and streams JSON events.
+///
+/// This is the default runtime (`runtime = "claude-code"` in project.toml).
+/// It wraps the existing PTY-based execution logic, preserving all streaming,
+/// token tracking, and inactivity timeout behaviour.
+pub struct ClaudeCodeRuntime {
+    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
+    watcher_tx: broadcast::Sender<WatcherEvent>,
+}
+
+impl ClaudeCodeRuntime {
+    pub fn new(
+        child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
+        watcher_tx: broadcast::Sender<WatcherEvent>,
+    ) -> Self {
+        Self {
+            child_killers,
+            watcher_tx,
+        }
+    }
+}
+
+impl AgentRuntime for ClaudeCodeRuntime {
+    async fn start(
+        &self,
+        ctx: RuntimeContext,
+        tx: broadcast::Sender<AgentEvent>,
+        event_log: Arc<Mutex<Vec<AgentEvent>>>,
+        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
+    ) -> Result<RuntimeResult, String> {
+        let pty_result = super::super::pty::run_agent_pty_streaming(
+            &ctx.story_id,
+            &ctx.agent_name,
+            &ctx.command,
+            &ctx.args,
+            &ctx.prompt,
+            &ctx.cwd,
+            &tx,
+            &event_log,
+            log_writer,
+            ctx.inactivity_timeout_secs,
+            Arc::clone(&self.child_killers),
+            self.watcher_tx.clone(),
+        )
+        .await?;
+
+        Ok(RuntimeResult {
+            session_id: pty_result.session_id,
+            token_usage: pty_result.token_usage,
+        })
+    }
+
+    fn stop(&self) {
+        // Stopping is handled externally by the pool via kill_child_for_key().
+        // The ChildKillerGuard in pty.rs deregisters automatically on process exit.
+    }
+
+    fn get_status(&self) -> RuntimeStatus {
+        // Lifecycle status is tracked by the pool; the runtime itself is stateless.
+        RuntimeStatus::Idle
+    }
+}
--- a/server/src/agents/runtime/gemini.rs
+++ b/server/src/agents/runtime/gemini.rs
@@ -0,0 +1,809 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::{Arc, Mutex};
+
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use serde_json::{json, Value};
+use tokio::sync::broadcast;
+
+use crate::agent_log::AgentLogWriter;
+use crate::slog;
+
+use super::super::{AgentEvent, TokenUsage};
+use super::{AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
+
+// ── Public runtime struct ────────────────────────────────────────────
+
+/// Agent runtime that drives a Gemini model through the Google AI
+/// `generateContent` REST API.
+///
+/// The runtime:
+/// 1. Fetches MCP tool definitions from storkit's MCP server.
+/// 2. Converts them to Gemini function-calling format.
+/// 3. Sends the agent prompt + tools to the Gemini API.
+/// 4. Executes any requested function calls via MCP `tools/call`.
+/// 5. Loops until the model produces a text-only response or an error.
+/// 6. Tracks token usage from the API response metadata.
+pub struct GeminiRuntime {
+    /// Whether a stop has been requested.
+    cancelled: Arc<AtomicBool>,
+}
+
+impl GeminiRuntime {
+    pub fn new() -> Self {
+        Self {
+            cancelled: Arc::new(AtomicBool::new(false)),
+        }
+    }
+}
+
+impl AgentRuntime for GeminiRuntime {
+    async fn start(
+        &self,
+        ctx: RuntimeContext,
+        tx: broadcast::Sender<AgentEvent>,
+        event_log: Arc<Mutex<Vec<AgentEvent>>>,
+        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
+    ) -> Result<RuntimeResult, String> {
+        let api_key = std::env::var("GOOGLE_AI_API_KEY").map_err(|_| {
+            "GOOGLE_AI_API_KEY environment variable is not set. \
+             Set it to your Google AI API key to use the Gemini runtime."
+                .to_string()
+        })?;
+
+        let model = if ctx.command.starts_with("gemini") {
+            // The pool puts the model into `command` for non-CLI runtimes,
+            // but also check args for a --model flag.
+            ctx.command.clone()
+        } else {
+            // Fall back to args: look for --model <value>
+            ctx.args
+                .iter()
+                .position(|a| a == "--model")
+                .and_then(|i| ctx.args.get(i + 1))
+                .cloned()
+                .unwrap_or_else(|| "gemini-2.5-pro".to_string())
+        };
+
+        let mcp_port = ctx.mcp_port;
+        let mcp_base = format!("http://localhost:{mcp_port}/mcp");
+
+        let client = Client::new();
+        let cancelled = Arc::clone(&self.cancelled);
+
+        // Step 1: Fetch MCP tool definitions and convert to Gemini format.
+        let gemini_tools = fetch_and_convert_mcp_tools(&client, &mcp_base).await?;
+
+        // Step 2: Build the initial conversation contents.
+        let system_instruction = build_system_instruction(&ctx);
+        let mut contents: Vec<Value> = vec![json!({
+            "role": "user",
+            "parts": [{ "text": ctx.prompt }]
+        })];
+
+        let mut total_usage = TokenUsage {
+            input_tokens: 0,
+            output_tokens: 0,
+            cache_creation_input_tokens: 0,
+            cache_read_input_tokens: 0,
+            total_cost_usd: 0.0,
+        };
+
+        let emit = |event: AgentEvent| {
+            super::super::pty::emit_event(
+                event,
+                &tx,
+                &event_log,
+                log_writer.as_ref().map(|w| w.as_ref()),
+            );
+        };
+
+        emit(AgentEvent::Status {
+            story_id: ctx.story_id.clone(),
+            agent_name: ctx.agent_name.clone(),
+            status: "running".to_string(),
+        });
+
+        // Step 3: Conversation loop.
+        let mut turn = 0u32;
+        let max_turns = 200; // Safety limit
+
+        loop {
+            if cancelled.load(Ordering::Relaxed) {
+                emit(AgentEvent::Error {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    message: "Agent was stopped by user".to_string(),
+                });
+                return Ok(RuntimeResult {
+                    session_id: None,
+                    token_usage: Some(total_usage),
+                });
+            }
+
+            turn += 1;
+            if turn > max_turns {
+                emit(AgentEvent::Error {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    message: format!("Exceeded maximum turns ({max_turns})"),
+                });
+                return Ok(RuntimeResult {
+                    session_id: None,
+                    token_usage: Some(total_usage),
+                });
+            }
+
+            slog!("[gemini] Turn {turn} for {}:{}", ctx.story_id, ctx.agent_name);
+
+            let request_body = build_generate_content_request(
+                &system_instruction,
+                &contents,
+                &gemini_tools,
+            );
+
+            let url = format!(
+                "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
+            );
+
+            let response = client
+                .post(&url)
+                .json(&request_body)
+                .send()
+                .await
+                .map_err(|e| format!("Gemini API request failed: {e}"))?;
+
+            let status = response.status();
+            let body: Value = response
+                .json()
+                .await
+                .map_err(|e| format!("Failed to parse Gemini API response: {e}"))?;
+
+            if !status.is_success() {
+                let error_msg = body["error"]["message"]
+                    .as_str()
+                    .unwrap_or("Unknown API error");
+                let err = format!("Gemini API error ({status}): {error_msg}");
+                emit(AgentEvent::Error {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    message: err.clone(),
+                });
+                return Err(err);
+            }
+
+            // Accumulate token usage.
+            if let Some(usage) = parse_usage_metadata(&body) {
+                total_usage.input_tokens += usage.input_tokens;
+                total_usage.output_tokens += usage.output_tokens;
+            }
+
+            // Extract the candidate response.
+            let candidate = body["candidates"]
+                .as_array()
+                .and_then(|c| c.first())
+                .ok_or_else(|| "No candidates in Gemini response".to_string())?;
+
+            let parts = candidate["content"]["parts"]
+                .as_array()
+                .ok_or_else(|| "No parts in Gemini response candidate".to_string())?;
+
+            // Check finish reason.
+            let finish_reason = candidate["finishReason"].as_str().unwrap_or("");
+
+            // Separate text parts and function call parts.
+            let mut text_parts: Vec<String> = Vec::new();
+            let mut function_calls: Vec<GeminiFunctionCall> = Vec::new();
+
+            for part in parts {
+                if let Some(text) = part["text"].as_str() {
+                    text_parts.push(text.to_string());
+                }
+                if let Some(fc) = part.get("functionCall")
+                    && let (Some(name), Some(args)) =
+                        (fc["name"].as_str(), fc.get("args"))
+                {
+                    function_calls.push(GeminiFunctionCall {
+                        name: name.to_string(),
+                        args: args.clone(),
+                    });
+                }
+            }
+
+            // Emit any text output.
+            for text in &text_parts {
+                if !text.is_empty() {
+                    emit(AgentEvent::Output {
+                        story_id: ctx.story_id.clone(),
+                        agent_name: ctx.agent_name.clone(),
+                        text: text.clone(),
+                    });
+                }
+            }
+
+            // If no function calls, the model is done.
+            if function_calls.is_empty() {
+                emit(AgentEvent::Done {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    session_id: None,
+                });
+                return Ok(RuntimeResult {
+                    session_id: None,
+                    token_usage: Some(total_usage),
+                });
+            }
+
+            // Add the model's response to the conversation.
+            let model_parts: Vec<Value> = parts.to_vec();
+            contents.push(json!({
+                "role": "model",
+                "parts": model_parts
+            }));
+
+            // Execute function calls via MCP and build response parts.
+            let mut response_parts: Vec<Value> = Vec::new();
+
+            for fc in &function_calls {
+                if cancelled.load(Ordering::Relaxed) {
+                    break;
+                }
+
+                slog!(
+                    "[gemini] Calling MCP tool '{}' for {}:{}",
+                    fc.name,
+                    ctx.story_id,
+                    ctx.agent_name
+                );
+
+                emit(AgentEvent::Output {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    text: format!("\n[Tool call: {}]\n", fc.name),
+                });
+
+                let tool_result =
+                    call_mcp_tool(&client, &mcp_base, &fc.name, &fc.args).await;
+
+                let response_value = match &tool_result {
+                    Ok(result) => {
+                        emit(AgentEvent::Output {
+                            story_id: ctx.story_id.clone(),
+                            agent_name: ctx.agent_name.clone(),
+                            text: format!(
+                                "[Tool result: {} chars]\n",
+                                result.len()
+                            ),
+                        });
+                        json!({ "result": result })
+                    }
+                    Err(e) => {
+                        emit(AgentEvent::Output {
+                            story_id: ctx.story_id.clone(),
+                            agent_name: ctx.agent_name.clone(),
+                            text: format!("[Tool error: {e}]\n"),
+                        });
+                        json!({ "error": e })
+                    }
+                };
+
+                response_parts.push(json!({
+                    "functionResponse": {
+                        "name": fc.name,
+                        "response": response_value
+                    }
+                }));
+            }
+
+            // Add function responses to the conversation.
+            contents.push(json!({
+                "role": "user",
+                "parts": response_parts
+            }));
+
+            // If the model indicated it's done despite having function calls,
+            // respect the finish reason.
+            if finish_reason == "STOP" && function_calls.is_empty() {
+                break;
+            }
+        }
+
+        emit(AgentEvent::Done {
+            story_id: ctx.story_id.clone(),
+            agent_name: ctx.agent_name.clone(),
+            session_id: None,
+        });
+
+        Ok(RuntimeResult {
+            session_id: None,
+            token_usage: Some(total_usage),
+        })
+    }
+
+    fn stop(&self) {
+        self.cancelled.store(true, Ordering::Relaxed);
+    }
+
+    fn get_status(&self) -> RuntimeStatus {
+        if self.cancelled.load(Ordering::Relaxed) {
+            RuntimeStatus::Failed
+        } else {
+            RuntimeStatus::Idle
+        }
+    }
+}
+
+// ── Internal types ───────────────────────────────────────────────────
+
+struct GeminiFunctionCall {
+    name: String,
+    args: Value,
+}
+
+// ── Gemini API types (for serde) ─────────────────────────────────────
+
+#[derive(Debug, Serialize, Deserialize)]
+struct GeminiFunctionDeclaration {
+    name: String,
+    description: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    parameters: Option<Value>,
+}
+
+// ── Helper functions ─────────────────────────────────────────────────
+
+/// Build the system instruction content from the RuntimeContext.
+fn build_system_instruction(ctx: &RuntimeContext) -> Value {
+    // Use system_prompt from args if provided via --append-system-prompt,
+    // otherwise use a sensible default.
+    let system_text = ctx
+        .args
+        .iter()
+        .position(|a| a == "--append-system-prompt")
+        .and_then(|i| ctx.args.get(i + 1))
+        .cloned()
+        .unwrap_or_else(|| {
+            format!(
+                "You are an AI coding agent working on story {}. \
+                 You have access to tools via function calling. \
+                 Use them to complete the task. \
+                 Work in the directory: {}",
+                ctx.story_id, ctx.cwd
+            )
+        });
+
+    json!({
+        "parts": [{ "text": system_text }]
+    })
+}
+
+/// Build the full `generateContent` request body.
+fn build_generate_content_request(
+    system_instruction: &Value,
+    contents: &[Value],
+    gemini_tools: &[GeminiFunctionDeclaration],
+) -> Value {
+    let mut body = json!({
+        "system_instruction": system_instruction,
+        "contents": contents,
+        "generationConfig": {
+            "temperature": 0.2,
+            "maxOutputTokens": 65536,
+        }
+    });
+
+    if !gemini_tools.is_empty() {
+        body["tools"] = json!([{
+            "functionDeclarations": gemini_tools
+        }]);
+    }
+
+    body
+}
+
+/// Fetch MCP tool definitions from storkit's MCP server and convert
+/// them to Gemini function declaration format.
+async fn fetch_and_convert_mcp_tools(
+    client: &Client,
+    mcp_base: &str,
+) -> Result<Vec<GeminiFunctionDeclaration>, String> {
+    let request = json!({
+        "jsonrpc": "2.0",
+        "id": 1,
+        "method": "tools/list",
+        "params": {}
+    });
+
+    let response = client
+        .post(mcp_base)
+        .json(&request)
+        .send()
+        .await
+        .map_err(|e| format!("Failed to fetch MCP tools: {e}"))?;
+
+    let body: Value = response
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse MCP tools response: {e}"))?;
+
+    let tools = body["result"]["tools"]
+        .as_array()
+        .ok_or_else(|| "No tools array in MCP response".to_string())?;
+
+    let mut declarations = Vec::new();
+
+    for tool in tools {
+        let name = tool["name"].as_str().unwrap_or("").to_string();
+        let description = tool["description"].as_str().unwrap_or("").to_string();
+
+        if name.is_empty() {
+            continue;
+        }
+
+        // Convert MCP inputSchema (JSON Schema) to Gemini parameters
+        // (OpenAPI-subset schema). They are structurally compatible for
+        // simple object schemas.
+        let parameters = convert_mcp_schema_to_gemini(tool.get("inputSchema"));
+
+        declarations.push(GeminiFunctionDeclaration {
+            name,
+            description,
+            parameters,
+        });
+    }
+
+    slog!("[gemini] Loaded {} MCP tools as function declarations", declarations.len());
+    Ok(declarations)
+}
+
+/// Convert an MCP inputSchema (JSON Schema) to a Gemini-compatible
+/// OpenAPI-subset parameter schema.
+///
+/// Gemini function calling expects parameters in OpenAPI format, which
+/// is structurally similar to JSON Schema for simple object types.
+/// We strip unsupported fields and ensure the type is "object".
+fn convert_mcp_schema_to_gemini(schema: Option<&Value>) -> Option<Value> {
+    let schema = schema?;
+
+    // If the schema has no properties (empty tool), return None.
+    let properties = schema.get("properties")?;
+    if properties.as_object().is_some_and(|p| p.is_empty()) {
+        return None;
+    }
+
+    let mut result = json!({
+        "type": "object",
+        "properties": clean_schema_properties(properties),
+    });
+
+    // Preserve required fields if present.
+    if let Some(required) = schema.get("required") {
+        result["required"] = required.clone();
+    }
+
+    Some(result)
+}
+
+/// Recursively clean schema properties to be Gemini-compatible.
+/// Removes unsupported JSON Schema keywords.
+fn clean_schema_properties(properties: &Value) -> Value {
+    let Some(obj) = properties.as_object() else {
+        return properties.clone();
+    };
+
+    let mut cleaned = serde_json::Map::new();
+    for (key, value) in obj {
+        let mut prop = value.clone();
+        // Remove JSON Schema keywords not supported by Gemini
+        if let Some(p) = prop.as_object_mut() {
+            p.remove("$schema");
+            p.remove("additionalProperties");
+
+            // Recursively clean nested object properties
+            if let Some(nested_props) = p.get("properties").cloned() {
+                p.insert(
+                    "properties".to_string(),
+                    clean_schema_properties(&nested_props),
+                );
+            }
+
+            // Clean items schema for arrays
+            if let Some(items) = p.get("items").cloned()
+                && let Some(items_obj) = items.as_object()
+            {
+                let mut cleaned_items = items_obj.clone();
+                cleaned_items.remove("$schema");
+                cleaned_items.remove("additionalProperties");
+                p.insert("items".to_string(), Value::Object(cleaned_items));
+            }
+        }
+        cleaned.insert(key.clone(), prop);
+    }
+    Value::Object(cleaned)
+}
+
+/// Call an MCP tool via storkit's MCP server.
+async fn call_mcp_tool(
+    client: &Client,
+    mcp_base: &str,
+    tool_name: &str,
+    args: &Value,
+) -> Result<String, String> {
+    let request = json!({
+        "jsonrpc": "2.0",
+        "id": 1,
+        "method": "tools/call",
+        "params": {
+            "name": tool_name,
+            "arguments": args
+        }
+    });
+
+    let response = client
+        .post(mcp_base)
+        .json(&request)
+        .send()
+        .await
+        .map_err(|e| format!("MCP tool call failed: {e}"))?;
+
+    let body: Value = response
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse MCP tool response: {e}"))?;
+
+    if let Some(error) = body.get("error") {
+        let msg = error["message"].as_str().unwrap_or("Unknown MCP error");
+        return Err(format!("MCP tool '{tool_name}' error: {msg}"));
+    }
+
+    // MCP tools/call returns { result: { content: [{ type: "text", text: "..." }] } }
+    let content = &body["result"]["content"];
+    if let Some(arr) = content.as_array() {
+        let texts: Vec<&str> = arr
+            .iter()
+            .filter_map(|c| c["text"].as_str())
+            .collect();
+        if !texts.is_empty() {
+            return Ok(texts.join("\n"));
+        }
+    }
+
+    // Fall back to serializing the entire result.
+    Ok(body["result"].to_string())
+}
+
+/// Parse token usage metadata from a Gemini API response.
+fn parse_usage_metadata(response: &Value) -> Option<TokenUsage> {
+    let metadata = response.get("usageMetadata")?;
+    Some(TokenUsage {
+        input_tokens: metadata
+            .get("promptTokenCount")
+            .and_then(|v| v.as_u64())
+            .unwrap_or(0),
+        output_tokens: metadata
+            .get("candidatesTokenCount")
+            .and_then(|v| v.as_u64())
+            .unwrap_or(0),
+        // Gemini doesn't have cache token fields, but we keep the struct uniform.
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+        // Google AI API doesn't report cost; leave at 0.
+        total_cost_usd: 0.0,
+    })
+}
+
+// ── Tests ────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn convert_mcp_schema_simple_object() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "story_id": {
+                    "type": "string",
+                    "description": "Story identifier"
+                }
+            },
+            "required": ["story_id"]
+        });
+
+        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
+        assert_eq!(result["type"], "object");
+        assert!(result["properties"]["story_id"].is_object());
+        assert_eq!(result["required"][0], "story_id");
+    }
+
+    #[test]
+    fn convert_mcp_schema_empty_properties_returns_none() {
+        let schema = json!({
+            "type": "object",
+            "properties": {}
+        });
+
+        assert!(convert_mcp_schema_to_gemini(Some(&schema)).is_none());
+    }
+
+    #[test]
+    fn convert_mcp_schema_none_returns_none() {
+        assert!(convert_mcp_schema_to_gemini(None).is_none());
+    }
+
+    #[test]
+    fn convert_mcp_schema_strips_additional_properties() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "name": {
+                    "type": "string",
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            }
+        });
+
+        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
+        let name_prop = &result["properties"]["name"];
+        assert!(name_prop.get("additionalProperties").is_none());
+        assert!(name_prop.get("$schema").is_none());
+        assert_eq!(name_prop["type"], "string");
+    }
+
+    #[test]
+    fn convert_mcp_schema_with_nested_objects() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "config": {
+                    "type": "object",
+                    "properties": {
+                        "key": { "type": "string" }
+                    }
+                }
+            }
+        });
+
+        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
+        assert!(result["properties"]["config"]["properties"]["key"].is_object());
+    }
+
+    #[test]
+    fn convert_mcp_schema_with_array_items() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "items": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "name": { "type": "string" }
+                        },
+                        "additionalProperties": false
+                    }
+                }
+            }
+        });
+
+        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
+        let items_schema = &result["properties"]["items"]["items"];
+        assert!(items_schema.get("additionalProperties").is_none());
+    }
+
+    #[test]
+    fn build_system_instruction_uses_args() {
+        let ctx = RuntimeContext {
+            story_id: "42_story_test".to_string(),
+            agent_name: "coder-1".to_string(),
+            command: "gemini-2.5-pro".to_string(),
+            args: vec![
+                "--append-system-prompt".to_string(),
+                "Custom system prompt".to_string(),
+            ],
+            prompt: "Do the thing".to_string(),
+            cwd: "/tmp/wt".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+
+        let instruction = build_system_instruction(&ctx);
+        assert_eq!(instruction["parts"][0]["text"], "Custom system prompt");
+    }
+
+    #[test]
+    fn build_system_instruction_default() {
+        let ctx = RuntimeContext {
+            story_id: "42_story_test".to_string(),
+            agent_name: "coder-1".to_string(),
+            command: "gemini-2.5-pro".to_string(),
+            args: vec![],
+            prompt: "Do the thing".to_string(),
+            cwd: "/tmp/wt".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+
+        let instruction = build_system_instruction(&ctx);
+        let text = instruction["parts"][0]["text"].as_str().unwrap();
+        assert!(text.contains("42_story_test"));
+        assert!(text.contains("/tmp/wt"));
+    }
+
+    #[test]
+    fn build_generate_content_request_includes_tools() {
+        let system = json!({"parts": [{"text": "system"}]});
+        let contents = vec![json!({"role": "user", "parts": [{"text": "hello"}]})];
+        let tools = vec![GeminiFunctionDeclaration {
+            name: "my_tool".to_string(),
+            description: "A tool".to_string(),
+            parameters: Some(json!({"type": "object", "properties": {"x": {"type": "string"}}})),
+        }];
+
+        let body = build_generate_content_request(&system, &contents, &tools);
+        assert!(body["tools"][0]["functionDeclarations"].is_array());
+        assert_eq!(body["tools"][0]["functionDeclarations"][0]["name"], "my_tool");
+    }
+
+    #[test]
+    fn build_generate_content_request_no_tools() {
+        let system = json!({"parts": [{"text": "system"}]});
+        let contents = vec![json!({"role": "user", "parts": [{"text": "hello"}]})];
+        let tools: Vec<GeminiFunctionDeclaration> = vec![];
+
+        let body = build_generate_content_request(&system, &contents, &tools);
+        assert!(body.get("tools").is_none());
+    }
+
+    #[test]
+    fn parse_usage_metadata_valid() {
+        let response = json!({
+            "usageMetadata": {
+                "promptTokenCount": 100,
+                "candidatesTokenCount": 50,
+                "totalTokenCount": 150
+            }
+        });
+
+        let usage = parse_usage_metadata(&response).unwrap();
+        assert_eq!(usage.input_tokens, 100);
+        assert_eq!(usage.output_tokens, 50);
+        assert_eq!(usage.cache_creation_input_tokens, 0);
+        assert_eq!(usage.total_cost_usd, 0.0);
+    }
+
+    #[test]
+    fn parse_usage_metadata_missing() {
+        let response = json!({"candidates": []});
+        assert!(parse_usage_metadata(&response).is_none());
+    }
+
+    #[test]
+    fn gemini_runtime_stop_sets_cancelled() {
+        let runtime = GeminiRuntime::new();
+        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
+        runtime.stop();
+        assert_eq!(runtime.get_status(), RuntimeStatus::Failed);
+    }
+
+    #[test]
+    fn model_extraction_from_command() {
+        // When command starts with "gemini", use it as model name
+        let ctx = RuntimeContext {
+            story_id: "1".to_string(),
+            agent_name: "coder".to_string(),
+            command: "gemini-2.5-pro".to_string(),
+            args: vec![],
+            prompt: "test".to_string(),
+            cwd: "/tmp".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+
+        // The model extraction logic is inside start(), but we test the
+        // condition here.
+        assert!(ctx.command.starts_with("gemini"));
+    }
+}
--- a/server/src/agents/runtime/mod.rs
+++ b/server/src/agents/runtime/mod.rs
@@ -0,0 +1,163 @@
+mod claude_code;
+mod gemini;
+mod openai;
+
+pub use claude_code::ClaudeCodeRuntime;
+pub use gemini::GeminiRuntime;
+pub use openai::OpenAiRuntime;
+
+use std::sync::{Arc, Mutex};
+use tokio::sync::broadcast;
+
+use crate::agent_log::AgentLogWriter;
+
+use super::{AgentEvent, TokenUsage};
+
+/// Context passed to a runtime when launching an agent session.
+pub struct RuntimeContext {
+    pub story_id: String,
+    pub agent_name: String,
+    pub command: String,
+    pub args: Vec<String>,
+    pub prompt: String,
+    pub cwd: String,
+    pub inactivity_timeout_secs: u64,
+    /// Port of the storkit MCP server, used by API-based runtimes (Gemini, OpenAI)
+    /// to call back for tool execution.
+    pub mcp_port: u16,
+}
+
+/// Result returned by a runtime after the agent session completes.
+pub struct RuntimeResult {
+    pub session_id: Option<String>,
+    pub token_usage: Option<TokenUsage>,
+}
+
+/// Runtime status reported by the backend.
+#[derive(Debug, Clone, PartialEq)]
+#[allow(dead_code)]
+pub enum RuntimeStatus {
+    Idle,
+    Running,
+    Completed,
+    Failed,
+}
+
+/// Abstraction over different agent execution backends.
+///
+/// Implementations:
+/// - [`ClaudeCodeRuntime`]: spawns the `claude` CLI via a PTY (default, `runtime = "claude-code"`)
+///
+/// Future implementations could include OpenAI and Gemini API runtimes.
+#[allow(dead_code)]
+pub trait AgentRuntime: Send + Sync {
+    /// Start the agent and drive it to completion, streaming events through
+    /// the provided broadcast sender and event log.
+    ///
+    /// Returns when the agent session finishes (success or error).
+    async fn start(
+        &self,
+        ctx: RuntimeContext,
+        tx: broadcast::Sender<AgentEvent>,
+        event_log: Arc<Mutex<Vec<AgentEvent>>>,
+        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
+    ) -> Result<RuntimeResult, String>;
+
+    /// Stop the running agent.
+    fn stop(&self);
+
+    /// Get the current runtime status.
+    fn get_status(&self) -> RuntimeStatus;
+
+    /// Return any events buffered outside the broadcast channel.
+    ///
+    /// PTY-based runtimes stream directly to the broadcast channel; this
+    /// returns empty by default. API-based runtimes may buffer events here.
+    fn stream_events(&self) -> Vec<AgentEvent> {
+        vec![]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn runtime_context_fields() {
+        let ctx = RuntimeContext {
+            story_id: "42_story_foo".to_string(),
+            agent_name: "coder-1".to_string(),
+            command: "claude".to_string(),
+            args: vec!["--model".to_string(), "sonnet".to_string()],
+            prompt: "Do the thing".to_string(),
+            cwd: "/tmp/wt".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+        assert_eq!(ctx.story_id, "42_story_foo");
+        assert_eq!(ctx.agent_name, "coder-1");
+        assert_eq!(ctx.command, "claude");
+        assert_eq!(ctx.args.len(), 2);
+        assert_eq!(ctx.prompt, "Do the thing");
+        assert_eq!(ctx.cwd, "/tmp/wt");
+        assert_eq!(ctx.inactivity_timeout_secs, 300);
+        assert_eq!(ctx.mcp_port, 3001);
+    }
+
+    #[test]
+    fn runtime_result_fields() {
+        let result = RuntimeResult {
+            session_id: Some("sess-123".to_string()),
+            token_usage: Some(TokenUsage {
+                input_tokens: 100,
+                output_tokens: 50,
+                cache_creation_input_tokens: 0,
+                cache_read_input_tokens: 0,
+                total_cost_usd: 0.01,
+            }),
+        };
+        assert_eq!(result.session_id, Some("sess-123".to_string()));
+        assert!(result.token_usage.is_some());
+        let usage = result.token_usage.unwrap();
+        assert_eq!(usage.input_tokens, 100);
+        assert_eq!(usage.output_tokens, 50);
+        assert_eq!(usage.total_cost_usd, 0.01);
+    }
+
+    #[test]
+    fn runtime_result_no_usage() {
+        let result = RuntimeResult {
+            session_id: None,
+            token_usage: None,
+        };
+        assert!(result.session_id.is_none());
+        assert!(result.token_usage.is_none());
+    }
+
+    #[test]
+    fn runtime_status_variants() {
+        assert_eq!(RuntimeStatus::Idle, RuntimeStatus::Idle);
+        assert_ne!(RuntimeStatus::Running, RuntimeStatus::Completed);
+        assert_ne!(RuntimeStatus::Failed, RuntimeStatus::Idle);
+    }
+
+    #[test]
+    fn claude_code_runtime_get_status_returns_idle() {
+        use std::collections::HashMap;
+        use crate::io::watcher::WatcherEvent;
+        let killers = Arc::new(Mutex::new(HashMap::new()));
+        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(16);
+        let runtime = ClaudeCodeRuntime::new(killers, watcher_tx);
+        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
+    }
+
+    #[test]
+    fn claude_code_runtime_stream_events_empty() {
+        use std::collections::HashMap;
+        use crate::io::watcher::WatcherEvent;
+        let killers = Arc::new(Mutex::new(HashMap::new()));
+        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(16);
+        let runtime = ClaudeCodeRuntime::new(killers, watcher_tx);
+        assert!(runtime.stream_events().is_empty());
+    }
+}
--- a/server/src/agents/runtime/openai.rs
+++ b/server/src/agents/runtime/openai.rs
@@ -0,0 +1,704 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::{Arc, Mutex};
+
+use reqwest::Client;
+use serde_json::{json, Value};
+use tokio::sync::broadcast;
+
+use crate::agent_log::AgentLogWriter;
+use crate::slog;
+
+use super::super::{AgentEvent, TokenUsage};
+use super::{AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
+
+// ── Public runtime struct ────────────────────────────────────────────
+
+/// Agent runtime that drives an OpenAI model (GPT-4o, o3, etc.) through
+/// the OpenAI Chat Completions API.
+///
+/// The runtime:
+/// 1. Fetches MCP tool definitions from storkit's MCP server.
+/// 2. Converts them to OpenAI function-calling format.
+/// 3. Sends the agent prompt + tools to the Chat Completions API.
+/// 4. Executes any requested tool calls via MCP `tools/call`.
+/// 5. Loops until the model produces a response with no tool calls.
+/// 6. Tracks token usage from the API response.
+pub struct OpenAiRuntime {
+    /// Whether a stop has been requested.
+    cancelled: Arc<AtomicBool>,
+}
+
+impl OpenAiRuntime {
+    pub fn new() -> Self {
+        Self {
+            cancelled: Arc::new(AtomicBool::new(false)),
+        }
+    }
+}
+
+impl AgentRuntime for OpenAiRuntime {
+    async fn start(
+        &self,
+        ctx: RuntimeContext,
+        tx: broadcast::Sender<AgentEvent>,
+        event_log: Arc<Mutex<Vec<AgentEvent>>>,
+        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
+    ) -> Result<RuntimeResult, String> {
+        let api_key = std::env::var("OPENAI_API_KEY").map_err(|_| {
+            "OPENAI_API_KEY environment variable is not set. \
+             Set it to your OpenAI API key to use the OpenAI runtime."
+                .to_string()
+        })?;
+
+        let model = if ctx.command.starts_with("gpt") || ctx.command.starts_with("o") {
+            // The pool puts the model into `command` for non-CLI runtimes.
+            ctx.command.clone()
+        } else {
+            // Fall back to args: look for --model <value>
+            ctx.args
+                .iter()
+                .position(|a| a == "--model")
+                .and_then(|i| ctx.args.get(i + 1))
+                .cloned()
+                .unwrap_or_else(|| "gpt-4o".to_string())
+        };
+
+        let mcp_port = ctx.mcp_port;
+        let mcp_base = format!("http://localhost:{mcp_port}/mcp");
+
+        let client = Client::new();
+        let cancelled = Arc::clone(&self.cancelled);
+
+        // Step 1: Fetch MCP tool definitions and convert to OpenAI format.
+        let openai_tools = fetch_and_convert_mcp_tools(&client, &mcp_base).await?;
+
+        // Step 2: Build the initial conversation messages.
+        let system_text = build_system_text(&ctx);
+        let mut messages: Vec<Value> = vec![
+            json!({ "role": "system", "content": system_text }),
+            json!({ "role": "user", "content": ctx.prompt }),
+        ];
+
+        let mut total_usage = TokenUsage {
+            input_tokens: 0,
+            output_tokens: 0,
+            cache_creation_input_tokens: 0,
+            cache_read_input_tokens: 0,
+            total_cost_usd: 0.0,
+        };
+
+        let emit = |event: AgentEvent| {
+            super::super::pty::emit_event(
+                event,
+                &tx,
+                &event_log,
+                log_writer.as_ref().map(|w| w.as_ref()),
+            );
+        };
+
+        emit(AgentEvent::Status {
+            story_id: ctx.story_id.clone(),
+            agent_name: ctx.agent_name.clone(),
+            status: "running".to_string(),
+        });
+
+        // Step 3: Conversation loop.
+        let mut turn = 0u32;
+        let max_turns = 200; // Safety limit
+
+        loop {
+            if cancelled.load(Ordering::Relaxed) {
+                emit(AgentEvent::Error {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    message: "Agent was stopped by user".to_string(),
+                });
+                return Ok(RuntimeResult {
+                    session_id: None,
+                    token_usage: Some(total_usage),
+                });
+            }
+
+            turn += 1;
+            if turn > max_turns {
+                emit(AgentEvent::Error {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    message: format!("Exceeded maximum turns ({max_turns})"),
+                });
+                return Ok(RuntimeResult {
+                    session_id: None,
+                    token_usage: Some(total_usage),
+                });
+            }
+
+            slog!(
+                "[openai] Turn {turn} for {}:{}",
+                ctx.story_id,
+                ctx.agent_name
+            );
+
+            let mut request_body = json!({
+                "model": model,
+                "messages": messages,
+                "temperature": 0.2,
+            });
+
+            if !openai_tools.is_empty() {
+                request_body["tools"] = json!(openai_tools);
+            }
+
+            let response = client
+                .post("https://api.openai.com/v1/chat/completions")
+                .bearer_auth(&api_key)
+                .json(&request_body)
+                .send()
+                .await
+                .map_err(|e| format!("OpenAI API request failed: {e}"))?;
+
+            let status = response.status();
+            let body: Value = response
+                .json()
+                .await
+                .map_err(|e| format!("Failed to parse OpenAI API response: {e}"))?;
+
+            if !status.is_success() {
+                let error_msg = body["error"]["message"]
+                    .as_str()
+                    .unwrap_or("Unknown API error");
+                let err = format!("OpenAI API error ({status}): {error_msg}");
+                emit(AgentEvent::Error {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    message: err.clone(),
+                });
+                return Err(err);
+            }
+
+            // Accumulate token usage.
+            if let Some(usage) = parse_usage(&body) {
+                total_usage.input_tokens += usage.input_tokens;
+                total_usage.output_tokens += usage.output_tokens;
+            }
+
+            // Extract the first choice.
+            let choice = body["choices"]
+                .as_array()
+                .and_then(|c| c.first())
+                .ok_or_else(|| "No choices in OpenAI response".to_string())?;
+
+            let message = &choice["message"];
+            let content = message["content"].as_str().unwrap_or("");
+
+            // Emit any text content.
+            if !content.is_empty() {
+                emit(AgentEvent::Output {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    text: content.to_string(),
+                });
+            }
+
+            // Check for tool calls.
+            let tool_calls = message["tool_calls"].as_array();
+
+            if tool_calls.is_none() || tool_calls.is_some_and(|tc| tc.is_empty()) {
+                // No tool calls — model is done.
+                emit(AgentEvent::Done {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    session_id: None,
+                });
+                return Ok(RuntimeResult {
+                    session_id: None,
+                    token_usage: Some(total_usage),
+                });
+            }
+
+            let tool_calls = tool_calls.unwrap();
+
+            // Add the assistant message (with tool_calls) to the conversation.
+            messages.push(message.clone());
+
+            // Execute each tool call via MCP and add results.
+            for tc in tool_calls {
+                if cancelled.load(Ordering::Relaxed) {
+                    break;
+                }
+
+                let call_id = tc["id"].as_str().unwrap_or("");
+                let function = &tc["function"];
+                let tool_name = function["name"].as_str().unwrap_or("");
+                let arguments_str = function["arguments"].as_str().unwrap_or("{}");
+
+                let args: Value = serde_json::from_str(arguments_str).unwrap_or(json!({}));
+
+                slog!(
+                    "[openai] Calling MCP tool '{}' for {}:{}",
+                    tool_name,
+                    ctx.story_id,
+                    ctx.agent_name
+                );
+
+                emit(AgentEvent::Output {
+                    story_id: ctx.story_id.clone(),
+                    agent_name: ctx.agent_name.clone(),
+                    text: format!("\n[Tool call: {tool_name}]\n"),
+                });
+
+                let tool_result = call_mcp_tool(&client, &mcp_base, tool_name, &args).await;
+
+                let result_content = match &tool_result {
+                    Ok(result) => {
+                        emit(AgentEvent::Output {
+                            story_id: ctx.story_id.clone(),
+                            agent_name: ctx.agent_name.clone(),
+                            text: format!("[Tool result: {} chars]\n", result.len()),
+                        });
+                        result.clone()
+                    }
+                    Err(e) => {
+                        emit(AgentEvent::Output {
+                            story_id: ctx.story_id.clone(),
+                            agent_name: ctx.agent_name.clone(),
+                            text: format!("[Tool error: {e}]\n"),
+                        });
+                        format!("Error: {e}")
+                    }
+                };
+
+                // OpenAI expects tool results as role=tool messages with
+                // the matching tool_call_id.
+                messages.push(json!({
+                    "role": "tool",
+                    "tool_call_id": call_id,
+                    "content": result_content,
+                }));
+            }
+        }
+    }
+
+    fn stop(&self) {
+        self.cancelled.store(true, Ordering::Relaxed);
+    }
+
+    fn get_status(&self) -> RuntimeStatus {
+        if self.cancelled.load(Ordering::Relaxed) {
+            RuntimeStatus::Failed
+        } else {
+            RuntimeStatus::Idle
+        }
+    }
+}
+
+// ── Helper functions ─────────────────────────────────────────────────
+
+/// Build the system message text from the RuntimeContext.
+fn build_system_text(ctx: &RuntimeContext) -> String {
+    ctx.args
+        .iter()
+        .position(|a| a == "--append-system-prompt")
+        .and_then(|i| ctx.args.get(i + 1))
+        .cloned()
+        .unwrap_or_else(|| {
+            format!(
+                "You are an AI coding agent working on story {}. \
+                 You have access to tools via function calling. \
+                 Use them to complete the task. \
+                 Work in the directory: {}",
+                ctx.story_id, ctx.cwd
+            )
+        })
+}
+
+/// Fetch MCP tool definitions from storkit's MCP server and convert
+/// them to OpenAI function-calling format.
+async fn fetch_and_convert_mcp_tools(
+    client: &Client,
+    mcp_base: &str,
+) -> Result<Vec<Value>, String> {
+    let request = json!({
+        "jsonrpc": "2.0",
+        "id": 1,
+        "method": "tools/list",
+        "params": {}
+    });
+
+    let response = client
+        .post(mcp_base)
+        .json(&request)
+        .send()
+        .await
+        .map_err(|e| format!("Failed to fetch MCP tools: {e}"))?;
+
+    let body: Value = response
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse MCP tools response: {e}"))?;
+
+    let tools = body["result"]["tools"]
+        .as_array()
+        .ok_or_else(|| "No tools array in MCP response".to_string())?;
+
+    let mut openai_tools = Vec::new();
+
+    for tool in tools {
+        let name = tool["name"].as_str().unwrap_or("").to_string();
+        let description = tool["description"].as_str().unwrap_or("").to_string();
+
+        if name.is_empty() {
+            continue;
+        }
+
+        // OpenAI function calling uses JSON Schema natively for parameters,
+        // so the MCP inputSchema can be used with minimal cleanup.
+        let parameters = convert_mcp_schema_to_openai(tool.get("inputSchema"));
+
+        openai_tools.push(json!({
+            "type": "function",
+            "function": {
+                "name": name,
+                "description": description,
+                "parameters": parameters.unwrap_or_else(|| json!({"type": "object", "properties": {}})),
+            }
+        }));
+    }
+
+    slog!(
+        "[openai] Loaded {} MCP tools as function definitions",
+        openai_tools.len()
+    );
+    Ok(openai_tools)
+}
+
+/// Convert an MCP inputSchema (JSON Schema) to OpenAI-compatible
+/// function parameters.
+///
+/// OpenAI uses JSON Schema natively, so less transformation is needed
+/// compared to Gemini. We still strip `$schema` to keep payloads clean.
+fn convert_mcp_schema_to_openai(schema: Option<&Value>) -> Option<Value> {
+    let schema = schema?;
+
+    let mut result = json!({
+        "type": "object",
+    });
+
+    if let Some(properties) = schema.get("properties") {
+        result["properties"] = clean_schema_properties(properties);
+    } else {
+        result["properties"] = json!({});
+    }
+
+    if let Some(required) = schema.get("required") {
+        result["required"] = required.clone();
+    }
+
+    // OpenAI recommends additionalProperties: false for strict mode.
+    result["additionalProperties"] = json!(false);
+
+    Some(result)
+}
+
+/// Recursively clean schema properties, removing unsupported keywords.
+fn clean_schema_properties(properties: &Value) -> Value {
+    let Some(obj) = properties.as_object() else {
+        return properties.clone();
+    };
+
+    let mut cleaned = serde_json::Map::new();
+    for (key, value) in obj {
+        let mut prop = value.clone();
+        if let Some(p) = prop.as_object_mut() {
+            p.remove("$schema");
+
+            // Recursively clean nested object properties.
+            if let Some(nested_props) = p.get("properties").cloned() {
+                p.insert(
+                    "properties".to_string(),
+                    clean_schema_properties(&nested_props),
+                );
+            }
+
+            // Clean items schema for arrays.
+            if let Some(items) = p.get("items").cloned()
+                && let Some(items_obj) = items.as_object()
+            {
+                let mut cleaned_items = items_obj.clone();
+                cleaned_items.remove("$schema");
+                p.insert("items".to_string(), Value::Object(cleaned_items));
+            }
+        }
+        cleaned.insert(key.clone(), prop);
+    }
+    Value::Object(cleaned)
+}
+
+/// Call an MCP tool via storkit's MCP server.
+async fn call_mcp_tool(
+    client: &Client,
+    mcp_base: &str,
+    tool_name: &str,
+    args: &Value,
+) -> Result<String, String> {
+    let request = json!({
+        "jsonrpc": "2.0",
+        "id": 1,
+        "method": "tools/call",
+        "params": {
+            "name": tool_name,
+            "arguments": args
+        }
+    });
+
+    let response = client
+        .post(mcp_base)
+        .json(&request)
+        .send()
+        .await
+        .map_err(|e| format!("MCP tool call failed: {e}"))?;
+
+    let body: Value = response
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse MCP tool response: {e}"))?;
+
+    if let Some(error) = body.get("error") {
+        let msg = error["message"].as_str().unwrap_or("Unknown MCP error");
+        return Err(format!("MCP tool '{tool_name}' error: {msg}"));
+    }
+
+    // MCP tools/call returns { result: { content: [{ type: "text", text: "..." }] } }
+    let content = &body["result"]["content"];
+    if let Some(arr) = content.as_array() {
+        let texts: Vec<&str> = arr
+            .iter()
+            .filter_map(|c| c["text"].as_str())
+            .collect();
+        if !texts.is_empty() {
+            return Ok(texts.join("\n"));
+        }
+    }
+
+    // Fall back to serializing the entire result.
+    Ok(body["result"].to_string())
+}
+
+/// Parse token usage from an OpenAI API response.
+fn parse_usage(response: &Value) -> Option<TokenUsage> {
+    let usage = response.get("usage")?;
+    Some(TokenUsage {
+        input_tokens: usage
+            .get("prompt_tokens")
+            .and_then(|v| v.as_u64())
+            .unwrap_or(0),
+        output_tokens: usage
+            .get("completion_tokens")
+            .and_then(|v| v.as_u64())
+            .unwrap_or(0),
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+        // OpenAI API doesn't report cost directly; leave at 0.
+        total_cost_usd: 0.0,
+    })
+}
+
+// ── Tests ────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn convert_mcp_schema_simple_object() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "story_id": {
+                    "type": "string",
+                    "description": "Story identifier"
+                }
+            },
+            "required": ["story_id"]
+        });
+
+        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
+        assert_eq!(result["type"], "object");
+        assert!(result["properties"]["story_id"].is_object());
+        assert_eq!(result["required"][0], "story_id");
+        assert_eq!(result["additionalProperties"], false);
+    }
+
+    #[test]
+    fn convert_mcp_schema_empty_properties() {
+        let schema = json!({
+            "type": "object",
+            "properties": {}
+        });
+
+        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
+        assert_eq!(result["type"], "object");
+        assert!(result["properties"].as_object().unwrap().is_empty());
+    }
+
+    #[test]
+    fn convert_mcp_schema_none_returns_none() {
+        assert!(convert_mcp_schema_to_openai(None).is_none());
+    }
+
+    #[test]
+    fn convert_mcp_schema_strips_dollar_schema() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "name": {
+                    "type": "string",
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            }
+        });
+
+        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
+        let name_prop = &result["properties"]["name"];
+        assert!(name_prop.get("$schema").is_none());
+        assert_eq!(name_prop["type"], "string");
+    }
+
+    #[test]
+    fn convert_mcp_schema_with_nested_objects() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "config": {
+                    "type": "object",
+                    "properties": {
+                        "key": { "type": "string" }
+                    }
+                }
+            }
+        });
+
+        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
+        assert!(result["properties"]["config"]["properties"]["key"].is_object());
+    }
+
+    #[test]
+    fn convert_mcp_schema_with_array_items() {
+        let schema = json!({
+            "type": "object",
+            "properties": {
+                "items": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "name": { "type": "string" }
+                        },
+                        "$schema": "http://json-schema.org/draft-07/schema#"
+                    }
+                }
+            }
+        });
+
+        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
+        let items_schema = &result["properties"]["items"]["items"];
+        assert!(items_schema.get("$schema").is_none());
+    }
+
+    #[test]
+    fn build_system_text_uses_args() {
+        let ctx = RuntimeContext {
+            story_id: "42_story_test".to_string(),
+            agent_name: "coder-1".to_string(),
+            command: "gpt-4o".to_string(),
+            args: vec![
+                "--append-system-prompt".to_string(),
+                "Custom system prompt".to_string(),
+            ],
+            prompt: "Do the thing".to_string(),
+            cwd: "/tmp/wt".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+
+        assert_eq!(build_system_text(&ctx), "Custom system prompt");
+    }
+
+    #[test]
+    fn build_system_text_default() {
+        let ctx = RuntimeContext {
+            story_id: "42_story_test".to_string(),
+            agent_name: "coder-1".to_string(),
+            command: "gpt-4o".to_string(),
+            args: vec![],
+            prompt: "Do the thing".to_string(),
+            cwd: "/tmp/wt".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+
+        let text = build_system_text(&ctx);
+        assert!(text.contains("42_story_test"));
+        assert!(text.contains("/tmp/wt"));
+    }
+
+    #[test]
+    fn parse_usage_valid() {
+        let response = json!({
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            }
+        });
+
+        let usage = parse_usage(&response).unwrap();
+        assert_eq!(usage.input_tokens, 100);
+        assert_eq!(usage.output_tokens, 50);
+        assert_eq!(usage.cache_creation_input_tokens, 0);
+        assert_eq!(usage.total_cost_usd, 0.0);
+    }
+
+    #[test]
+    fn parse_usage_missing() {
+        let response = json!({"choices": []});
+        assert!(parse_usage(&response).is_none());
+    }
+
+    #[test]
+    fn openai_runtime_stop_sets_cancelled() {
+        let runtime = OpenAiRuntime::new();
+        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
+        runtime.stop();
+        assert_eq!(runtime.get_status(), RuntimeStatus::Failed);
+    }
+
+    #[test]
+    fn model_extraction_from_command_gpt() {
+        let ctx = RuntimeContext {
+            story_id: "1".to_string(),
+            agent_name: "coder".to_string(),
+            command: "gpt-4o".to_string(),
+            args: vec![],
+            prompt: "test".to_string(),
+            cwd: "/tmp".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+        assert!(ctx.command.starts_with("gpt"));
+    }
+
+    #[test]
+    fn model_extraction_from_command_o3() {
+        let ctx = RuntimeContext {
+            story_id: "1".to_string(),
+            agent_name: "coder".to_string(),
+            command: "o3".to_string(),
+            args: vec![],
+            prompt: "test".to_string(),
+            cwd: "/tmp".to_string(),
+            inactivity_timeout_secs: 300,
+            mcp_port: 3001,
+        };
+        assert!(ctx.command.starts_with("o"));
+    }
+}
--- a/server/src/agents/token_usage.rs
+++ b/server/src/agents/token_usage.rs
@@ -0,0 +1,202 @@
+use std::fs;
+use std::path::Path;
+
+use chrono::Utc;
+use serde::{Deserialize, Serialize};
+
+use super::TokenUsage;
+
+/// A single token usage record persisted to disk.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct TokenUsageRecord {
+    pub story_id: String,
+    pub agent_name: String,
+    pub timestamp: String,
+    #[serde(default)]
+    pub model: Option<String>,
+    pub usage: TokenUsage,
+}
+
+/// Append a token usage record to the persistent JSONL file.
+///
+/// Each line is a self-contained JSON object, making appends atomic and
+/// reads simple. The file lives at `.storkit/token_usage.jsonl`.
+pub fn append_record(project_root: &Path, record: &TokenUsageRecord) -> Result<(), String> {
+    let path = token_usage_path(project_root);
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent)
+            .map_err(|e| format!("Failed to create token_usage directory: {e}"))?;
+    }
+    let mut line =
+        serde_json::to_string(record).map_err(|e| format!("Failed to serialize record: {e}"))?;
+    line.push('\n');
+    use std::io::Write;
+    let file = fs::OpenOptions::new()
+        .create(true)
+        .append(true)
+        .open(&path)
+        .map_err(|e| format!("Failed to open token_usage file: {e}"))?;
+    let mut writer = std::io::BufWriter::new(file);
+    writer
+        .write_all(line.as_bytes())
+        .map_err(|e| format!("Failed to write token_usage record: {e}"))?;
+    writer
+        .flush()
+        .map_err(|e| format!("Failed to flush token_usage file: {e}"))?;
+    Ok(())
+}
+
+/// Read all token usage records from the persistent file.
+pub fn read_all(project_root: &Path) -> Result<Vec<TokenUsageRecord>, String> {
+    let path = token_usage_path(project_root);
+    if !path.exists() {
+        return Ok(Vec::new());
+    }
+    let content =
+        fs::read_to_string(&path).map_err(|e| format!("Failed to read token_usage file: {e}"))?;
+    let mut records = Vec::new();
+    for line in content.lines() {
+        let trimmed = line.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+        match serde_json::from_str::<TokenUsageRecord>(trimmed) {
+            Ok(record) => records.push(record),
+            Err(e) => {
+                crate::slog_warn!("[token_usage] Skipping malformed line: {e}");
+            }
+        }
+    }
+    Ok(records)
+}
+
+/// Build a `TokenUsageRecord` from the parts available at completion time.
+pub fn build_record(
+    story_id: &str,
+    agent_name: &str,
+    model: Option<String>,
+    usage: TokenUsage,
+) -> TokenUsageRecord {
+    TokenUsageRecord {
+        story_id: story_id.to_string(),
+        agent_name: agent_name.to_string(),
+        timestamp: Utc::now().to_rfc3339(),
+        model,
+        usage,
+    }
+}
+
+fn token_usage_path(project_root: &Path) -> std::path::PathBuf {
+    project_root.join(".storkit").join("token_usage.jsonl")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    fn sample_usage() -> TokenUsage {
+        TokenUsage {
+            input_tokens: 100,
+            output_tokens: 200,
+            cache_creation_input_tokens: 5000,
+            cache_read_input_tokens: 10000,
+            total_cost_usd: 1.57,
+        }
+    }
+
+    #[test]
+    fn append_and_read_roundtrip() {
+        let dir = TempDir::new().unwrap();
+        let root = dir.path();
+
+        let record = build_record("42_story_foo", "coder-1", None, sample_usage());
+        append_record(root, &record).unwrap();
+
+        let records = read_all(root).unwrap();
+        assert_eq!(records.len(), 1);
+        assert_eq!(records[0].story_id, "42_story_foo");
+        assert_eq!(records[0].agent_name, "coder-1");
+        assert_eq!(records[0].usage, sample_usage());
+    }
+
+    #[test]
+    fn multiple_appends_accumulate() {
+        let dir = TempDir::new().unwrap();
+        let root = dir.path();
+
+        let r1 = build_record("s1", "coder-1", None, sample_usage());
+        let r2 = build_record("s2", "coder-2", None, sample_usage());
+        append_record(root, &r1).unwrap();
+        append_record(root, &r2).unwrap();
+
+        let records = read_all(root).unwrap();
+        assert_eq!(records.len(), 2);
+        assert_eq!(records[0].story_id, "s1");
+        assert_eq!(records[1].story_id, "s2");
+    }
+
+    #[test]
+    fn read_empty_returns_empty() {
+        let dir = TempDir::new().unwrap();
+        let records = read_all(dir.path()).unwrap();
+        assert!(records.is_empty());
+    }
+
+    #[test]
+    fn malformed_lines_are_skipped() {
+        let dir = TempDir::new().unwrap();
+        let root = dir.path();
+        let path = root.join(".storkit").join("token_usage.jsonl");
+        fs::create_dir_all(path.parent().unwrap()).unwrap();
+        fs::write(&path, "not json\n{\"bad\":true}\n").unwrap();
+
+        let records = read_all(root).unwrap();
+        assert!(records.is_empty());
+    }
+
+    #[test]
+    fn token_usage_from_result_event() {
+        let json = serde_json::json!({
+            "type": "result",
+            "total_cost_usd": 1.57,
+            "usage": {
+                "input_tokens": 7,
+                "output_tokens": 475,
+                "cache_creation_input_tokens": 185020,
+                "cache_read_input_tokens": 810585
+            }
+        });
+
+        let usage = TokenUsage::from_result_event(&json).unwrap();
+        assert_eq!(usage.input_tokens, 7);
+        assert_eq!(usage.output_tokens, 475);
+        assert_eq!(usage.cache_creation_input_tokens, 185020);
+        assert_eq!(usage.cache_read_input_tokens, 810585);
+        assert!((usage.total_cost_usd - 1.57).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn token_usage_from_result_event_missing_usage() {
+        let json = serde_json::json!({"type": "result"});
+        assert!(TokenUsage::from_result_event(&json).is_none());
+    }
+
+    #[test]
+    fn token_usage_from_result_event_partial_fields() {
+        let json = serde_json::json!({
+            "type": "result",
+            "total_cost_usd": 0.5,
+            "usage": {
+                "input_tokens": 10,
+                "output_tokens": 20
+            }
+        });
+
+        let usage = TokenUsage::from_result_event(&json).unwrap();
+        assert_eq!(usage.input_tokens, 10);
+        assert_eq!(usage.output_tokens, 20);
+        assert_eq!(usage.cache_creation_input_tokens, 0);
+        assert_eq!(usage.cache_read_input_tokens, 0);
+    }
+}
--- a/server/src/config.rs
+++ b/server/src/config.rs
@@ -0,0 +1,880 @@
+use crate::slog;
+use serde::Deserialize;
+use std::collections::HashSet;
+use std::path::Path;
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct ProjectConfig {
+    #[serde(default)]
+    pub component: Vec<ComponentConfig>,
+    #[serde(default)]
+    pub agent: Vec<AgentConfig>,
+    #[serde(default)]
+    pub watcher: WatcherConfig,
+    /// Project-wide default QA mode: "server", "agent", or "human".
+    /// Per-story `qa` front matter overrides this. Default: "server".
+    #[serde(default = "default_qa")]
+    pub default_qa: String,
+    /// Default model for coder-stage agents (e.g. "sonnet").
+    /// When set, `find_free_agent_for_stage` only considers coder agents whose
+    /// model matches this value, so opus agents are only used when explicitly
+    /// requested via story front matter `agent:` field.
+    #[serde(default)]
+    pub default_coder_model: Option<String>,
+    /// Maximum number of concurrent coder-stage agents.
+    /// When set, `auto_assign_available_work` will not start more than this many
+    /// coder agents at once. Stories wait in `2_current/` until a slot frees up.
+    #[serde(default)]
+    pub max_coders: Option<usize>,
+    /// Maximum number of retries per story per pipeline stage before marking as blocked.
+    /// Default: 2. Set to 0 to disable retry limits.
+    #[serde(default = "default_max_retries")]
+    pub max_retries: u32,
+}
+
+/// Configuration for the filesystem watcher's sweep behaviour.
+///
+/// Controls how often the watcher checks `5_done/` for items to promote to
+/// `6_archived/`, and how long items must remain in `5_done/` before promotion.
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+pub struct WatcherConfig {
+    /// How often (in seconds) to check `5_done/` for items to archive.
+    /// Default: 60 seconds.
+    #[serde(default = "default_sweep_interval_secs")]
+    pub sweep_interval_secs: u64,
+    /// How long (in seconds) an item must remain in `5_done/` before being
+    /// moved to `6_archived/`. Default: 14400 (4 hours).
+    #[serde(default = "default_done_retention_secs")]
+    pub done_retention_secs: u64,
+}
+
+impl Default for WatcherConfig {
+    fn default() -> Self {
+        Self {
+            sweep_interval_secs: default_sweep_interval_secs(),
+            done_retention_secs: default_done_retention_secs(),
+        }
+    }
+}
+
+fn default_sweep_interval_secs() -> u64 {
+    60
+}
+
+fn default_done_retention_secs() -> u64 {
+    4 * 60 * 60 // 4 hours
+}
+
+fn default_qa() -> String {
+    "server".to_string()
+}
+
+fn default_max_retries() -> u32 {
+    2
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)]
+pub struct ComponentConfig {
+    pub name: String,
+    #[serde(default = "default_path")]
+    pub path: String,
+    #[serde(default)]
+    pub setup: Vec<String>,
+    #[serde(default)]
+    pub teardown: Vec<String>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct AgentConfig {
+    #[serde(default = "default_agent_name")]
+    pub name: String,
+    #[serde(default)]
+    pub role: String,
+    #[serde(default = "default_agent_command")]
+    pub command: String,
+    #[serde(default)]
+    pub args: Vec<String>,
+    #[serde(default = "default_agent_prompt")]
+    pub prompt: String,
+    #[serde(default)]
+    pub model: Option<String>,
+    #[serde(default)]
+    pub allowed_tools: Option<Vec<String>>,
+    #[serde(default)]
+    pub max_turns: Option<u32>,
+    #[serde(default)]
+    pub max_budget_usd: Option<f64>,
+    #[serde(default)]
+    pub system_prompt: Option<String>,
+    /// Pipeline stage this agent belongs to. Supported values: "coder", "qa",
+    /// "mergemaster", "other". When set, overrides the legacy name-based
+    /// detection used by `pipeline_stage()`.
+    #[serde(default)]
+    pub stage: Option<String>,
+    /// Inactivity timeout in seconds for the PTY read loop.
+    /// If no output is received within this duration, the agent process is killed
+    /// and marked as Failed. Default: 300 (5 minutes). Set to 0 to disable.
+    #[serde(default = "default_inactivity_timeout_secs")]
+    pub inactivity_timeout_secs: u64,
+    /// Agent runtime backend. Controls how the agent process is spawned and
+    /// how events are streamed. Default: `"claude-code"` (spawns the `claude`
+    /// CLI in a PTY). Future values: `"openai"`, `"gemini"`.
+    #[serde(default)]
+    pub runtime: Option<String>,
+}
+
+fn default_path() -> String {
+    ".".to_string()
+}
+
+fn default_agent_name() -> String {
+    "default".to_string()
+}
+
+fn default_inactivity_timeout_secs() -> u64 {
+    300
+}
+
+fn default_agent_command() -> String {
+    "claude".to_string()
+}
+
+fn default_agent_prompt() -> String {
+    "You are working in a git worktree on story {{story_id}}. \
+     Read .storkit/README.md to understand the dev process, then pick up the story. \
+     Commit all your work when done — the server will automatically run acceptance \
+     gates (cargo clippy + tests) when your process exits."
+        .to_string()
+}
+
+/// Legacy config format with `agent` as an optional single table (`[agent]`).
+#[derive(Debug, Deserialize)]
+struct LegacyProjectConfig {
+    #[serde(default)]
+    component: Vec<ComponentConfig>,
+    agent: Option<AgentConfig>,
+    #[serde(default)]
+    watcher: WatcherConfig,
+    #[serde(default = "default_qa")]
+    default_qa: String,
+    #[serde(default)]
+    default_coder_model: Option<String>,
+    #[serde(default)]
+    max_coders: Option<usize>,
+    #[serde(default = "default_max_retries")]
+    max_retries: u32,
+}
+
+impl Default for ProjectConfig {
+    fn default() -> Self {
+        Self {
+            component: Vec::new(),
+            agent: vec![AgentConfig {
+                name: default_agent_name(),
+                role: String::new(),
+                command: default_agent_command(),
+                args: vec![],
+                prompt: default_agent_prompt(),
+                model: None,
+                allowed_tools: None,
+                max_turns: None,
+                max_budget_usd: None,
+                system_prompt: None,
+                stage: None,
+                inactivity_timeout_secs: default_inactivity_timeout_secs(),
+                runtime: None,
+            }],
+            watcher: WatcherConfig::default(),
+            default_qa: default_qa(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: default_max_retries(),
+        }
+    }
+}
+
+impl ProjectConfig {
+    /// Load from `.storkit/project.toml` relative to the given root.
+    /// Falls back to sensible defaults if the file doesn't exist.
+    ///
+    /// Supports both the new `[[agent]]` array format and the legacy
+    /// `[agent]` single-table format (with a deprecation warning).
+    pub fn load(project_root: &Path) -> Result<Self, String> {
+        let config_path = project_root.join(".storkit/project.toml");
+        if !config_path.exists() {
+            return Ok(Self::default());
+        }
+        let content =
+            std::fs::read_to_string(&config_path).map_err(|e| format!("Read config: {e}"))?;
+        Self::parse(&content)
+    }
+
+    /// Parse config from a TOML string, supporting both new and legacy formats.
+    pub fn parse(content: &str) -> Result<Self, String> {
+        // Try new format first (agent as array of tables)
+        match toml::from_str::<ProjectConfig>(content) {
+            Ok(config) if !config.agent.is_empty() => {
+                validate_agents(&config.agent)?;
+                Ok(config)
+            }
+            Ok(config) => {
+                // Parsed successfully but no agents — could be legacy or no agent section.
+                // Try legacy format.
+                if let Ok(legacy) = toml::from_str::<LegacyProjectConfig>(content)
+                    && let Some(agent) = legacy.agent {
+                        slog!(
+                            "[config] Warning: [agent] table is deprecated. \
+                             Use [[agent]] array format instead."
+                        );
+                        let config = ProjectConfig {
+                            component: legacy.component,
+                            agent: vec![agent],
+                            watcher: legacy.watcher,
+                            default_qa: legacy.default_qa,
+                            default_coder_model: legacy.default_coder_model,
+                            max_coders: legacy.max_coders,
+                            max_retries: legacy.max_retries,
+                        };
+                        validate_agents(&config.agent)?;
+                        return Ok(config);
+                    }
+                // No agent section at all
+                Ok(config)
+            }
+            Err(_) => {
+                // New format failed — try legacy
+                let legacy: LegacyProjectConfig =
+                    toml::from_str(content).map_err(|e| format!("Parse config: {e}"))?;
+                if let Some(agent) = legacy.agent {
+                    slog!(
+                        "[config] Warning: [agent] table is deprecated. \
+                         Use [[agent]] array format instead."
+                    );
+                    let config = ProjectConfig {
+                        component: legacy.component,
+                        agent: vec![agent],
+                        watcher: legacy.watcher,
+                        default_qa: legacy.default_qa,
+                        default_coder_model: legacy.default_coder_model,
+                        max_coders: legacy.max_coders,
+                        max_retries: legacy.max_retries,
+                    };
+                    validate_agents(&config.agent)?;
+                    Ok(config)
+                } else {
+                    Ok(ProjectConfig {
+                        component: legacy.component,
+                        agent: Vec::new(),
+                        watcher: legacy.watcher,
+                        default_qa: legacy.default_qa,
+                        default_coder_model: legacy.default_coder_model,
+                        max_coders: legacy.max_coders,
+                        max_retries: legacy.max_retries,
+                    })
+                }
+            }
+        }
+    }
+
+    /// Return the project-wide default QA mode parsed from `default_qa`.
+    /// Falls back to `Server` if the value is unrecognised.
+    pub fn default_qa_mode(&self) -> crate::io::story_metadata::QaMode {
+        crate::io::story_metadata::QaMode::from_str(&self.default_qa)
+            .unwrap_or(crate::io::story_metadata::QaMode::Server)
+    }
+
+    /// Look up an agent config by name.
+    pub fn find_agent(&self, name: &str) -> Option<&AgentConfig> {
+        self.agent.iter().find(|a| a.name == name)
+    }
+
+    /// Get the default (first) agent config.
+    pub fn default_agent(&self) -> Option<&AgentConfig> {
+        self.agent.first()
+    }
+
+    /// Render template variables in agent args and prompt for the given agent.
+    /// If `agent_name` is None, uses the first (default) agent.
+    pub fn render_agent_args(
+        &self,
+        worktree_path: &str,
+        story_id: &str,
+        agent_name: Option<&str>,
+        base_branch: Option<&str>,
+    ) -> Result<(String, Vec<String>, String), String> {
+        let agent = match agent_name {
+            Some(name) => self
+                .find_agent(name)
+                .ok_or_else(|| format!("No agent named '{name}' in config"))?,
+            None => self
+                .default_agent()
+                .ok_or_else(|| "No agents configured".to_string())?,
+        };
+
+        let bb = base_branch.unwrap_or("master");
+        let aname = agent.name.as_str();
+        let render = |s: &str| {
+            s.replace("{{worktree_path}}", worktree_path)
+                .replace("{{story_id}}", story_id)
+                .replace("{{base_branch}}", bb)
+                .replace("{{agent_name}}", aname)
+        };
+
+        let command = render(&agent.command);
+        let mut args: Vec<String> = agent.args.iter().map(|a| render(a)).collect();
+        let prompt = render(&agent.prompt);
+
+        // Append structured CLI flags
+        if let Some(ref model) = agent.model {
+            args.push("--model".to_string());
+            args.push(model.clone());
+        }
+        if let Some(ref tools) = agent.allowed_tools
+            && !tools.is_empty() {
+                args.push("--allowedTools".to_string());
+                args.push(tools.join(","));
+            }
+        if let Some(turns) = agent.max_turns {
+            args.push("--max-turns".to_string());
+            args.push(turns.to_string());
+        }
+        if let Some(budget) = agent.max_budget_usd {
+            args.push("--max-budget-usd".to_string());
+            args.push(budget.to_string());
+        }
+        if let Some(ref sp) = agent.system_prompt {
+            args.push("--append-system-prompt".to_string());
+            args.push(render(sp));
+        }
+
+        Ok((command, args, prompt))
+    }
+}
+
+/// Validate agent configs: no duplicate names, no empty names, positive budgets/turns.
+fn validate_agents(agents: &[AgentConfig]) -> Result<(), String> {
+    let mut names = HashSet::new();
+    for agent in agents {
+        if agent.name.trim().is_empty() {
+            return Err("Agent name must not be empty".to_string());
+        }
+        if !names.insert(&agent.name) {
+            return Err(format!("Duplicate agent name: '{}'", agent.name));
+        }
+        if let Some(budget) = agent.max_budget_usd
+            && budget <= 0.0 {
+                return Err(format!(
+                    "Agent '{}': max_budget_usd must be positive, got {budget}",
+                    agent.name
+                ));
+            }
+        if let Some(turns) = agent.max_turns
+            && turns == 0 {
+                return Err(format!(
+                    "Agent '{}': max_turns must be positive, got 0",
+                    agent.name
+                ));
+            }
+        if let Some(ref runtime) = agent.runtime {
+            match runtime.as_str() {
+                "claude-code" | "gemini" => {}
+                other => {
+                    return Err(format!(
+                        "Agent '{}': unknown runtime '{other}'. Supported: 'claude-code', 'gemini'",
+                        agent.name
+                    ));
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+
+    #[test]
+    fn default_config_when_missing() {
+        let tmp = tempfile::tempdir().unwrap();
+        let config = ProjectConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.agent.len(), 1);
+        assert_eq!(config.agent[0].name, "default");
+        assert!(config.component.is_empty());
+    }
+
+    #[test]
+    fn parse_multi_agent_toml() {
+        let toml_str = r#"
+[[component]]
+name = "server"
+path = "."
+setup = ["cargo check"]
+
+[[agent]]
+name = "supervisor"
+role = "Coordinates work"
+model = "opus"
+max_turns = 50
+max_budget_usd = 10.00
+system_prompt = "You are a senior engineer"
+
+[[agent]]
+name = "coder-1"
+role = "Full-stack engineer"
+model = "sonnet"
+max_turns = 30
+max_budget_usd = 5.00
+"#;
+
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.agent.len(), 2);
+        assert_eq!(config.agent[0].name, "supervisor");
+        assert_eq!(config.agent[0].role, "Coordinates work");
+        assert_eq!(config.agent[0].model, Some("opus".to_string()));
+        assert_eq!(config.agent[0].max_turns, Some(50));
+        assert_eq!(config.agent[0].max_budget_usd, Some(10.0));
+        assert_eq!(
+            config.agent[0].system_prompt,
+            Some("You are a senior engineer".to_string())
+        );
+        assert_eq!(config.agent[1].name, "coder-1");
+        assert_eq!(config.agent[1].model, Some("sonnet".to_string()));
+        assert_eq!(config.component.len(), 1);
+    }
+
+    #[test]
+    fn parse_legacy_single_agent() {
+        let toml_str = r#"
+[[component]]
+name = "server"
+path = "."
+
+[agent]
+command = "claude"
+args = ["--print", "--directory", "{{worktree_path}}"]
+prompt = "Pick up story {{story_id}}"
+"#;
+
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.agent.len(), 1);
+        assert_eq!(config.agent[0].name, "default");
+        assert_eq!(config.agent[0].command, "claude");
+    }
+
+    #[test]
+    fn validate_duplicate_names() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+role = "Engineer"
+
+[[agent]]
+name = "coder"
+role = "Another engineer"
+"#;
+
+        let err = ProjectConfig::parse(toml_str).unwrap_err();
+        assert!(err.contains("Duplicate agent name: 'coder'"));
+    }
+
+    #[test]
+    fn validate_empty_name() {
+        let toml_str = r#"
+[[agent]]
+name = ""
+role = "Engineer"
+"#;
+
+        let err = ProjectConfig::parse(toml_str).unwrap_err();
+        assert!(err.contains("Agent name must not be empty"));
+    }
+
+    #[test]
+    fn validate_non_positive_budget() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+max_budget_usd = -1.0
+"#;
+
+        let err = ProjectConfig::parse(toml_str).unwrap_err();
+        assert!(err.contains("must be positive"));
+    }
+
+    #[test]
+    fn validate_zero_max_turns() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+max_turns = 0
+"#;
+
+        let err = ProjectConfig::parse(toml_str).unwrap_err();
+        assert!(err.contains("max_turns must be positive"));
+    }
+
+    #[test]
+    fn render_agent_args_default() {
+        let config = ProjectConfig::default();
+        let (cmd, args, prompt) = config
+            .render_agent_args("/tmp/wt", "42_foo", None, None)
+            .unwrap();
+        assert_eq!(cmd, "claude");
+        assert!(args.is_empty());
+        assert!(prompt.contains("42_foo"));
+    }
+
+    #[test]
+    fn render_agent_args_by_name() {
+        let toml_str = r#"
+[[agent]]
+name = "supervisor"
+model = "opus"
+max_turns = 50
+max_budget_usd = 10.00
+system_prompt = "You lead story {{story_id}}"
+allowed_tools = ["Read", "Write", "Bash"]
+
+[[agent]]
+name = "coder"
+model = "sonnet"
+max_turns = 30
+"#;
+
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        let (cmd, args, prompt) = config
+            .render_agent_args("/tmp/wt", "42_foo", Some("supervisor"), Some("master"))
+            .unwrap();
+        assert_eq!(cmd, "claude");
+        assert!(args.contains(&"--model".to_string()));
+        assert!(args.contains(&"opus".to_string()));
+        assert!(args.contains(&"--max-turns".to_string()));
+        assert!(args.contains(&"50".to_string()));
+        assert!(args.contains(&"--max-budget-usd".to_string()));
+        assert!(args.contains(&"10".to_string()));
+        assert!(args.contains(&"--allowedTools".to_string()));
+        assert!(args.contains(&"Read,Write,Bash".to_string()));
+        assert!(args.contains(&"--append-system-prompt".to_string()));
+        // System prompt should have template rendered
+        assert!(args.contains(&"You lead story 42_foo".to_string()));
+        assert!(prompt.contains("42_foo"));
+
+        // Render for coder
+        let (_, coder_args, _) = config
+            .render_agent_args("/tmp/wt", "42_foo", Some("coder"), Some("master"))
+            .unwrap();
+        assert!(coder_args.contains(&"sonnet".to_string()));
+        assert!(coder_args.contains(&"30".to_string()));
+        assert!(!coder_args.contains(&"--max-budget-usd".to_string()));
+        assert!(!coder_args.contains(&"--append-system-prompt".to_string()));
+    }
+
+    #[test]
+    fn render_agent_args_not_found() {
+        let config = ProjectConfig::default();
+        let result = config.render_agent_args("/tmp/wt", "42_foo", Some("nonexistent"), None);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("No agent named 'nonexistent'"));
+    }
+
+    #[test]
+    fn find_agent_and_default() {
+        let toml_str = r#"
+[[agent]]
+name = "first"
+
+[[agent]]
+name = "second"
+"#;
+
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.default_agent().unwrap().name, "first");
+        assert_eq!(config.find_agent("second").unwrap().name, "second");
+        assert!(config.find_agent("missing").is_none());
+    }
+
+    #[test]
+    fn parse_project_toml_from_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("project.toml"),
+            r#"
+[[component]]
+name = "server"
+path = "."
+setup = ["cargo check"]
+teardown = []
+
+[[component]]
+name = "frontend"
+path = "frontend"
+setup = ["pnpm install"]
+
+[[agent]]
+name = "main"
+command = "claude"
+args = ["--print", "--directory", "{{worktree_path}}"]
+prompt = "Pick up story {{story_id}}"
+model = "sonnet"
+"#,
+        )
+        .unwrap();
+
+        let config = ProjectConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.component.len(), 2);
+        assert_eq!(config.component[0].name, "server");
+        assert_eq!(config.component[1].setup, vec!["pnpm install"]);
+        assert_eq!(config.agent.len(), 1);
+        assert_eq!(config.agent[0].name, "main");
+        assert_eq!(config.agent[0].model, Some("sonnet".to_string()));
+    }
+
+    // ── WatcherConfig ──────────────────────────────────────────────────────
+
+    #[test]
+    fn watcher_config_defaults_when_omitted() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.watcher.sweep_interval_secs, 60);
+        assert_eq!(config.watcher.done_retention_secs, 4 * 60 * 60);
+    }
+
+    #[test]
+    fn watcher_config_custom_values() {
+        let toml_str = r#"
+[watcher]
+sweep_interval_secs = 30
+done_retention_secs = 7200
+
+[[agent]]
+name = "coder"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.watcher.sweep_interval_secs, 30);
+        assert_eq!(config.watcher.done_retention_secs, 7200);
+    }
+
+    #[test]
+    fn watcher_config_partial_override() {
+        let toml_str = r#"
+[watcher]
+sweep_interval_secs = 10
+
+[[agent]]
+name = "coder"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.watcher.sweep_interval_secs, 10);
+        // done_retention_secs should fall back to the default (4 hours).
+        assert_eq!(config.watcher.done_retention_secs, 4 * 60 * 60);
+    }
+
+    #[test]
+    fn watcher_config_from_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("project.toml"),
+            r#"
+[watcher]
+sweep_interval_secs = 120
+done_retention_secs = 3600
+
+[[agent]]
+name = "coder"
+"#,
+        )
+        .unwrap();
+
+        let config = ProjectConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.watcher.sweep_interval_secs, 120);
+        assert_eq!(config.watcher.done_retention_secs, 3600);
+    }
+
+    #[test]
+    fn watcher_config_default_when_no_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        let config = ProjectConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.watcher, WatcherConfig::default());
+    }
+
+    #[test]
+    fn coder_agents_have_root_cause_guidance() {
+        // Load the actual project.toml and verify all coder-stage agents
+        // include root cause investigation guidance for bugs.
+        let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
+        let project_root = manifest_dir.parent().unwrap();
+        let config = ProjectConfig::load(project_root).unwrap();
+
+        let coder_agents: Vec<_> = config
+            .agent
+            .iter()
+            .filter(|a| a.stage.as_deref() == Some("coder"))
+            .collect();
+
+        assert!(
+            !coder_agents.is_empty(),
+            "Expected at least one coder-stage agent in project.toml"
+        );
+
+        for agent in coder_agents {
+            let prompt = &agent.prompt;
+            let system_prompt = agent.system_prompt.as_deref().unwrap_or("");
+            let combined = format!("{prompt} {system_prompt}");
+
+            assert!(
+                combined.contains("root cause"),
+                "Coder agent '{}' must mention 'root cause' in prompt or system_prompt",
+                agent.name
+            );
+            assert!(
+                combined.contains("git bisect") || combined.contains("git log"),
+                "Coder agent '{}' must mention 'git bisect' or 'git log' for bug investigation",
+                agent.name
+            );
+            assert!(
+                combined.to_lowercase().contains("do not") || combined.contains("surgical"),
+                "Coder agent '{}' must discourage adding abstractions/workarounds",
+                agent.name
+            );
+        }
+    }
+
+    #[test]
+    fn watcher_config_preserved_in_legacy_format() {
+        let toml_str = r#"
+[watcher]
+sweep_interval_secs = 15
+done_retention_secs = 900
+
+[agent]
+command = "claude"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.watcher.sweep_interval_secs, 15);
+        assert_eq!(config.watcher.done_retention_secs, 900);
+        assert_eq!(config.agent.len(), 1);
+    }
+
+    // ── default_coder_model & max_coders ─────────────────────────────────
+
+    #[test]
+    fn parse_default_coder_model_and_max_coders() {
+        let toml_str = r#"
+default_coder_model = "sonnet"
+max_coders = 3
+
+[[agent]]
+name = "coder-1"
+stage = "coder"
+model = "sonnet"
+
+[[agent]]
+name = "coder-opus"
+stage = "coder"
+model = "opus"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.default_coder_model, Some("sonnet".to_string()));
+        assert_eq!(config.max_coders, Some(3));
+    }
+
+    #[test]
+    fn default_coder_model_and_max_coders_default_to_none() {
+        let toml_str = r#"
+[[agent]]
+name = "coder-1"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.default_coder_model, None);
+        assert_eq!(config.max_coders, None);
+    }
+
+    #[test]
+    fn project_toml_has_default_coder_model_and_max_coders() {
+        // Verify the actual project.toml has the new settings.
+        let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
+        let project_root = manifest_dir.parent().unwrap();
+        let config = ProjectConfig::load(project_root).unwrap();
+        assert_eq!(config.default_coder_model, Some("sonnet".to_string()));
+        assert_eq!(config.max_coders, Some(3));
+    }
+
+    // ── runtime config ────────────────────────────────────────────────
+
+    #[test]
+    fn runtime_defaults_to_none() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.agent[0].runtime, None);
+    }
+
+    #[test]
+    fn runtime_claude_code_accepted() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+runtime = "claude-code"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(
+            config.agent[0].runtime,
+            Some("claude-code".to_string())
+        );
+    }
+
+    #[test]
+    fn runtime_gemini_accepted() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+runtime = "gemini"
+model = "gemini-2.5-pro"
+"#;
+        let config = ProjectConfig::parse(toml_str).unwrap();
+        assert_eq!(config.agent[0].runtime, Some("gemini".to_string()));
+    }
+
+    #[test]
+    fn runtime_unknown_rejected() {
+        let toml_str = r#"
+[[agent]]
+name = "coder"
+runtime = "openai"
+"#;
+        let err = ProjectConfig::parse(toml_str).unwrap_err();
+        assert!(err.contains("unknown runtime 'openai'"));
+    }
+
+    #[test]
+    fn project_toml_has_three_sonnet_coders() {
+        let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
+        let project_root = manifest_dir.parent().unwrap();
+        let config = ProjectConfig::load(project_root).unwrap();
+
+        let sonnet_coders: Vec<_> = config
+            .agent
+            .iter()
+            .filter(|a| a.stage.as_deref() == Some("coder") && a.model.as_deref() == Some("sonnet"))
+            .collect();
+
+        assert_eq!(
+            sonnet_coders.len(),
+            3,
+            "Expected 3 sonnet coders (coder-1, coder-2, coder-3), found {}",
+            sonnet_coders.len()
+        );
+    }
+}
--- a/server/src/http/agents.rs
+++ b/server/src/http/agents.rs
--- a/server/src/http/agents_sse.rs
+++ b/server/src/http/agents_sse.rs
@@ -0,0 +1,208 @@
+use crate::http::context::AppContext;
+use poem::handler;
+use poem::http::StatusCode;
+use poem::web::{Data, Path};
+use poem::{Body, IntoResponse, Response};
+use std::sync::Arc;
+
+/// SSE endpoint: `GET /agents/:story_id/:agent_name/stream`
+///
+/// Streams `AgentEvent`s as Server-Sent Events. Each event is JSON-encoded
+/// with `data:` prefix and double newline terminator per the SSE spec.
+///
+/// `AgentEvent::Thinking` events are intentionally excluded — thinking traces
+/// are internal model state and must never be displayed in the UI.
+#[handler]
+pub async fn agent_stream(
+    Path((story_id, agent_name)): Path<(String, String)>,
+    ctx: Data<&Arc<AppContext>>,
+) -> impl IntoResponse {
+    let mut rx = match ctx.agents.subscribe(&story_id, &agent_name) {
+        Ok(rx) => rx,
+        Err(e) => {
+            return Response::builder()
+                .status(StatusCode::NOT_FOUND)
+                .body(Body::from_string(e));
+        }
+    };
+
+    let stream = async_stream::stream! {
+        loop {
+            match rx.recv().await {
+                Ok(event) => {
+                    // Never forward thinking traces to the UI — they are
+                    // internal model state and must not be displayed.
+                    if matches!(event, crate::agents::AgentEvent::Thinking { .. }) {
+                        continue;
+                    }
+                    if let Ok(json) = serde_json::to_string(&event) {
+                        yield Ok::<_, std::io::Error>(format!("data: {json}\n\n"));
+                    }
+                    // Check for terminal events
+                    match &event {
+                        crate::agents::AgentEvent::Done { .. }
+                        | crate::agents::AgentEvent::Error { .. } => break,
+                        crate::agents::AgentEvent::Status { status, .. }
+                            if status == "stopped" => break,
+                        _ => {}
+                    }
+                }
+                Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
+                    let msg = format!("{{\"type\":\"warning\",\"message\":\"Skipped {n} events\"}}");
+                    yield Ok::<_, std::io::Error>(format!("data: {msg}\n\n"));
+                }
+                Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
+            }
+        }
+    };
+
+    Response::builder()
+        .header("Content-Type", "text/event-stream")
+        .header("Cache-Control", "no-cache")
+        .header("Connection", "keep-alive")
+        .body(Body::from_bytes_stream(
+            futures::StreamExt::map(stream, |r| r.map(bytes::Bytes::from)),
+        ))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::{AgentEvent, AgentStatus};
+    use crate::http::context::AppContext;
+    use poem::{EndpointExt, Route, get};
+    use std::sync::Arc;
+    use tempfile::tempdir;
+
+    fn test_app(ctx: Arc<AppContext>) -> impl poem::Endpoint {
+        Route::new()
+            .at(
+                "/agents/:story_id/:agent_name/stream",
+                get(agent_stream),
+            )
+            .data(ctx)
+    }
+
+    #[tokio::test]
+    async fn thinking_events_are_not_forwarded_via_sse() {
+        let tmp = tempdir().unwrap();
+        let ctx = Arc::new(AppContext::new_test(tmp.path().to_path_buf()));
+
+        // Inject a running agent and get its broadcast sender.
+        let tx = ctx
+            .agents
+            .inject_test_agent("1_story", "coder-1", AgentStatus::Running);
+
+        // Spawn a task that sends events after the SSE connection is established.
+        let tx_clone = tx.clone();
+        tokio::spawn(async move {
+            // Brief pause so the SSE handler has subscribed before we emit.
+            tokio::time::sleep(std::time::Duration::from_millis(5)).await;
+
+            // Thinking event — must be filtered out.
+            let _ = tx_clone.send(AgentEvent::Thinking {
+                story_id: "1_story".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "secret thinking text".to_string(),
+            });
+
+            // Output event — must be forwarded.
+            let _ = tx_clone.send(AgentEvent::Output {
+                story_id: "1_story".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "visible output".to_string(),
+            });
+
+            // Done event — closes the stream.
+            let _ = tx_clone.send(AgentEvent::Done {
+                story_id: "1_story".to_string(),
+                agent_name: "coder-1".to_string(),
+                session_id: None,
+            });
+        });
+
+        let cli = poem::test::TestClient::new(test_app(ctx));
+        let resp = cli
+            .get("/agents/1_story/coder-1/stream")
+            .send()
+            .await;
+
+        let body = resp.0.into_body().into_string().await.unwrap();
+
+        // Thinking content must not appear anywhere in the SSE output.
+        assert!(
+            !body.contains("secret thinking text"),
+            "Thinking text must not be forwarded via SSE: {body}"
+        );
+        assert!(
+            !body.contains("\"type\":\"thinking\""),
+            "Thinking event type must not appear in SSE output: {body}"
+        );
+
+        // Output event must be present.
+        assert!(
+            body.contains("visible output"),
+            "Output event must be forwarded via SSE: {body}"
+        );
+        assert!(
+            body.contains("\"type\":\"output\""),
+            "Output event type must appear in SSE output: {body}"
+        );
+    }
+
+    #[tokio::test]
+    async fn output_and_done_events_are_forwarded_via_sse() {
+        let tmp = tempdir().unwrap();
+        let ctx = Arc::new(AppContext::new_test(tmp.path().to_path_buf()));
+
+        let tx = ctx
+            .agents
+            .inject_test_agent("2_story", "coder-1", AgentStatus::Running);
+
+        let tx_clone = tx.clone();
+        tokio::spawn(async move {
+            tokio::time::sleep(std::time::Duration::from_millis(5)).await;
+
+            let _ = tx_clone.send(AgentEvent::Output {
+                story_id: "2_story".to_string(),
+                agent_name: "coder-1".to_string(),
+                text: "step 1 output".to_string(),
+            });
+
+            let _ = tx_clone.send(AgentEvent::Done {
+                story_id: "2_story".to_string(),
+                agent_name: "coder-1".to_string(),
+                session_id: Some("sess-abc".to_string()),
+            });
+        });
+
+        let cli = poem::test::TestClient::new(test_app(ctx));
+        let resp = cli
+            .get("/agents/2_story/coder-1/stream")
+            .send()
+            .await;
+
+        let body = resp.0.into_body().into_string().await.unwrap();
+
+        assert!(body.contains("step 1 output"), "Output must be forwarded: {body}");
+        assert!(body.contains("\"type\":\"done\""), "Done event must be forwarded: {body}");
+    }
+
+    #[tokio::test]
+    async fn unknown_agent_returns_404() {
+        let tmp = tempdir().unwrap();
+        let ctx = Arc::new(AppContext::new_test(tmp.path().to_path_buf()));
+
+        let cli = poem::test::TestClient::new(test_app(ctx));
+        let resp = cli
+            .get("/agents/nonexistent/coder-1/stream")
+            .send()
+            .await;
+
+        assert_eq!(
+            resp.0.status(),
+            poem::http::StatusCode::NOT_FOUND,
+            "Unknown agent must return 404"
+        );
+    }
+}
--- a/server/src/http/anthropic.rs
+++ b/server/src/http/anthropic.rs
@@ -0,0 +1,318 @@
+use crate::http::context::{AppContext, OpenApiResult, bad_request};
+use crate::llm::chat;
+use crate::store::StoreOps;
+use poem_openapi::{Object, OpenApi, Tags, payload::Json};
+use reqwest::header::{HeaderMap, HeaderValue};
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+
+const ANTHROPIC_MODELS_URL: &str = "https://api.anthropic.com/v1/models";
+const ANTHROPIC_VERSION: &str = "2023-06-01";
+const KEY_ANTHROPIC_API_KEY: &str = "anthropic_api_key";
+
+#[derive(Deserialize)]
+struct AnthropicModelsResponse {
+    data: Vec<AnthropicModelInfo>,
+}
+
+#[derive(Deserialize)]
+struct AnthropicModelInfo {
+    id: String,
+    context_window: u64,
+}
+
+#[derive(Serialize, Object)]
+struct AnthropicModelSummary {
+    id: String,
+    context_window: u64,
+}
+
+fn get_anthropic_api_key(ctx: &AppContext) -> Result<String, String> {
+    match ctx.store.get(KEY_ANTHROPIC_API_KEY) {
+        Some(value) => {
+            if let Some(key) = value.as_str() {
+                if key.is_empty() {
+                    Err("Anthropic API key is empty. Please set your API key.".to_string())
+                } else {
+                    Ok(key.to_string())
+                }
+            } else {
+                Err("Stored API key is not a string".to_string())
+            }
+        }
+        None => Err("Anthropic API key not found. Please set your API key.".to_string()),
+    }
+}
+
+#[derive(Deserialize, Object)]
+struct ApiKeyPayload {
+    api_key: String,
+}
+
+#[derive(Tags)]
+enum AnthropicTags {
+    Anthropic,
+}
+
+pub struct AnthropicApi {
+    ctx: Arc<AppContext>,
+}
+
+impl AnthropicApi {
+    pub fn new(ctx: Arc<AppContext>) -> Self {
+        Self { ctx }
+    }
+}
+
+#[OpenApi(tag = "AnthropicTags::Anthropic")]
+impl AnthropicApi {
+    /// Check whether an Anthropic API key is stored.
+    ///
+    /// Returns `true` if a non-empty key is present, otherwise `false`.
+    #[oai(path = "/anthropic/key/exists", method = "get")]
+    async fn get_anthropic_api_key_exists(&self) -> OpenApiResult<Json<bool>> {
+        let exists =
+            chat::get_anthropic_api_key_exists(self.ctx.store.as_ref()).map_err(bad_request)?;
+        Ok(Json(exists))
+    }
+
+    /// Store or update the Anthropic API key used for requests.
+    ///
+    /// Returns `true` when the key is saved successfully.
+    #[oai(path = "/anthropic/key", method = "post")]
+    async fn set_anthropic_api_key(
+        &self,
+        payload: Json<ApiKeyPayload>,
+    ) -> OpenApiResult<Json<bool>> {
+        chat::set_anthropic_api_key(self.ctx.store.as_ref(), payload.0.api_key)
+            .map_err(bad_request)?;
+        Ok(Json(true))
+    }
+
+    /// List available Anthropic models.
+    #[oai(path = "/anthropic/models", method = "get")]
+    async fn list_anthropic_models(&self) -> OpenApiResult<Json<Vec<AnthropicModelSummary>>> {
+        self.list_anthropic_models_from(ANTHROPIC_MODELS_URL).await
+    }
+}
+
+impl AnthropicApi {
+    async fn list_anthropic_models_from(
+        &self,
+        url: &str,
+    ) -> OpenApiResult<Json<Vec<AnthropicModelSummary>>> {
+        let api_key = get_anthropic_api_key(self.ctx.as_ref()).map_err(bad_request)?;
+        let client = reqwest::Client::new();
+        let mut headers = HeaderMap::new();
+        headers.insert(
+            "x-api-key",
+            HeaderValue::from_str(&api_key).map_err(|e| bad_request(e.to_string()))?,
+        );
+        headers.insert(
+            "anthropic-version",
+            HeaderValue::from_static(ANTHROPIC_VERSION),
+        );
+
+        let response = client
+            .get(url)
+            .headers(headers)
+            .send()
+            .await
+            .map_err(|e| bad_request(e.to_string()))?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let error_text = response
+                .text()
+                .await
+                .unwrap_or_else(|_| "Unknown error".to_string());
+            return Err(bad_request(format!(
+                "Anthropic API error {status}: {error_text}"
+            )));
+        }
+
+        let body = response
+            .json::<AnthropicModelsResponse>()
+            .await
+            .map_err(|e| bad_request(e.to_string()))?;
+        let models = body
+            .data
+            .into_iter()
+            .map(|m| AnthropicModelSummary {
+                id: m.id,
+                context_window: m.context_window,
+            })
+            .collect();
+
+        Ok(Json(models))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use serde_json::json;
+    use std::sync::Arc;
+    use tempfile::TempDir;
+
+    fn test_ctx(dir: &TempDir) -> AppContext {
+        AppContext::new_test(dir.path().to_path_buf())
+    }
+
+    fn make_api(dir: &TempDir) -> AnthropicApi {
+        AnthropicApi::new(Arc::new(test_ctx(dir)))
+    }
+
+    // -- get_anthropic_api_key (private helper) --
+
+    #[test]
+    fn get_api_key_returns_err_when_not_set() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        let result = get_anthropic_api_key(&ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("not found"));
+    }
+
+    #[test]
+    fn get_api_key_returns_err_when_empty() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        ctx.store.set(KEY_ANTHROPIC_API_KEY, json!(""));
+        let result = get_anthropic_api_key(&ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("empty"));
+    }
+
+    #[test]
+    fn get_api_key_returns_err_when_not_string() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        ctx.store.set(KEY_ANTHROPIC_API_KEY, json!(12345));
+        let result = get_anthropic_api_key(&ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("not a string"));
+    }
+
+    #[test]
+    fn get_api_key_returns_key_when_set() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        ctx.store.set(KEY_ANTHROPIC_API_KEY, json!("sk-ant-test123"));
+        let result = get_anthropic_api_key(&ctx);
+        assert_eq!(result.unwrap(), "sk-ant-test123");
+    }
+
+    // -- get_anthropic_api_key_exists endpoint --
+
+    #[tokio::test]
+    async fn key_exists_returns_false_when_not_set() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api.get_anthropic_api_key_exists().await.unwrap();
+        assert!(!result.0);
+    }
+
+    #[tokio::test]
+    async fn key_exists_returns_true_when_set() {
+        let dir = TempDir::new().unwrap();
+        let ctx = AppContext::new_test(dir.path().to_path_buf());
+        ctx.store.set(KEY_ANTHROPIC_API_KEY, json!("sk-ant-test123"));
+        let api = AnthropicApi::new(Arc::new(ctx));
+        let result = api.get_anthropic_api_key_exists().await.unwrap();
+        assert!(result.0);
+    }
+
+    // -- set_anthropic_api_key endpoint --
+
+    #[tokio::test]
+    async fn set_api_key_returns_true() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(ApiKeyPayload {
+            api_key: "sk-ant-test123".to_string(),
+        });
+        let result = api.set_anthropic_api_key(payload).await.unwrap();
+        assert!(result.0);
+    }
+
+    #[tokio::test]
+    async fn set_then_exists_returns_true() {
+        let dir = TempDir::new().unwrap();
+        let ctx = Arc::new(AppContext::new_test(dir.path().to_path_buf()));
+        let api = AnthropicApi::new(ctx);
+        api.set_anthropic_api_key(Json(ApiKeyPayload {
+            api_key: "sk-ant-test123".to_string(),
+        }))
+        .await
+        .unwrap();
+        let result = api.get_anthropic_api_key_exists().await.unwrap();
+        assert!(result.0);
+    }
+
+    // -- list_anthropic_models endpoint --
+
+    #[tokio::test]
+    async fn list_models_fails_when_no_key() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api.list_anthropic_models().await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn list_models_fails_with_invalid_header_value() {
+        let dir = TempDir::new().unwrap();
+        let ctx = AppContext::new_test(dir.path().to_path_buf());
+        // A header value containing a newline is invalid
+        ctx.store
+            .set(KEY_ANTHROPIC_API_KEY, json!("bad\nvalue"));
+        let api = AnthropicApi::new(Arc::new(ctx));
+        let result = api.list_anthropic_models_from("http://127.0.0.1:1").await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn list_models_fails_when_server_unreachable() {
+        let dir = TempDir::new().unwrap();
+        let ctx = AppContext::new_test(dir.path().to_path_buf());
+        ctx.store
+            .set(KEY_ANTHROPIC_API_KEY, json!("sk-ant-test123"));
+        let api = AnthropicApi::new(Arc::new(ctx));
+        // Port 1 is reserved and should immediately refuse the connection
+        let result = api.list_anthropic_models_from("http://127.0.0.1:1").await;
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn new_creates_api_instance() {
+        let dir = TempDir::new().unwrap();
+        let _api = make_api(&dir);
+    }
+
+    #[test]
+    fn anthropic_model_info_deserializes_context_window() {
+        let json = json!({
+            "id": "claude-opus-4-5",
+            "context_window": 200000
+        });
+        let info: AnthropicModelInfo = serde_json::from_value(json).unwrap();
+        assert_eq!(info.id, "claude-opus-4-5");
+        assert_eq!(info.context_window, 200000);
+    }
+
+    #[test]
+    fn anthropic_models_response_deserializes_multiple_models() {
+        let json = json!({
+            "data": [
+                { "id": "claude-opus-4-5", "context_window": 200000 },
+                { "id": "claude-haiku-4-5-20251001", "context_window": 100000 }
+            ]
+        });
+        let response: AnthropicModelsResponse = serde_json::from_value(json).unwrap();
+        assert_eq!(response.data.len(), 2);
+        assert_eq!(response.data[0].context_window, 200000);
+        assert_eq!(response.data[1].context_window, 100000);
+    }
+}
--- a/server/src/http/assets.rs
+++ b/server/src/http/assets.rs
@@ -0,0 +1,148 @@
+use poem::{
+    Response, handler,
+    http::{StatusCode, header},
+    web::Path,
+};
+use rust_embed::RustEmbed;
+
+#[derive(RustEmbed)]
+#[folder = "../frontend/dist"]
+struct EmbeddedAssets;
+
+fn serve_embedded(path: &str) -> Response {
+    let normalized = if path.is_empty() {
+        "index.html"
+    } else {
+        path.trim_start_matches('/')
+    };
+
+    let is_asset_request = normalized.starts_with("assets/");
+    let asset = if is_asset_request {
+        EmbeddedAssets::get(normalized)
+    } else {
+        EmbeddedAssets::get(normalized).or_else(|| {
+            if normalized == "index.html" {
+                None
+            } else {
+                EmbeddedAssets::get("index.html")
+            }
+        })
+    };
+
+    match asset {
+        Some(content) => {
+            let body = content.data.into_owned();
+            let mime = mime_guess::from_path(normalized)
+                .first_or_octet_stream()
+                .to_string();
+
+            Response::builder()
+                .status(StatusCode::OK)
+                .header(header::CONTENT_TYPE, mime)
+                .body(body)
+        }
+        None => Response::builder()
+            .status(StatusCode::NOT_FOUND)
+            .body("Not Found"),
+    }
+}
+
+/// Serve a single embedded asset from the `assets/` folder.
+#[handler]
+pub fn embedded_asset(Path(path): Path<String>) -> Response {
+    let asset_path = format!("assets/{path}");
+    serve_embedded(&asset_path)
+}
+
+/// Serve an embedded file by path (falls back to `index.html` for SPA routing).
+#[handler]
+pub fn embedded_file(Path(path): Path<String>) -> Response {
+    serve_embedded(&path)
+}
+
+/// Serve the embedded SPA entrypoint.
+#[handler]
+pub fn embedded_index() -> Response {
+    serve_embedded("index.html")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use poem::http::StatusCode;
+
+    #[test]
+    fn non_asset_path_spa_fallback_or_not_found() {
+        // Non-asset paths fall back to index.html for SPA client-side routing.
+        // In release builds (with embedded dist/) this returns 200.
+        // In debug builds without a built frontend dist/ it returns 404.
+        let response = serve_embedded("__nonexistent_spa_route__.html");
+        let status = response.status();
+        assert!(
+            status == StatusCode::OK || status == StatusCode::NOT_FOUND,
+            "unexpected status: {status}",
+        );
+    }
+
+    #[test]
+    fn missing_asset_path_prefix_returns_not_found() {
+        // assets/ prefix: no SPA fallback – returns 404 if the file does not exist
+        let response = serve_embedded("assets/__nonexistent__.js");
+        assert_eq!(response.status(), StatusCode::NOT_FOUND);
+    }
+
+    #[test]
+    fn serve_embedded_does_not_panic_on_empty_path() {
+        // Empty path normalises to index.html; OK in release, 404 in debug without dist/
+        let response = serve_embedded("");
+        let status = response.status();
+        assert!(
+            status == StatusCode::OK || status == StatusCode::NOT_FOUND,
+            "unexpected status: {status}",
+        );
+    }
+
+    #[test]
+    fn embedded_assets_struct_is_iterable() {
+        // Verifies that rust-embed compiled the EmbeddedAssets struct correctly.
+        // In debug builds without a built frontend dist/ directory the iterator is empty; that is
+        // expected.  In release builds it will contain all bundled frontend files.
+        let _files: Vec<_> = EmbeddedAssets::iter().collect();
+        // No assertion needed – the test passes as long as it compiles and does not panic.
+    }
+
+    #[tokio::test]
+    async fn embedded_index_handler_returns_ok_or_not_found() {
+        // Route the handler through TestClient; index.html is the SPA entry point.
+        let app = poem::Route::new().at("/", poem::get(embedded_index));
+        let cli = poem::test::TestClient::new(app);
+        let resp = cli.get("/").send().await;
+        let status = resp.0.status();
+        assert!(
+            status == StatusCode::OK || status == StatusCode::NOT_FOUND,
+            "unexpected status: {status}",
+        );
+    }
+
+    #[tokio::test]
+    async fn embedded_file_handler_with_path_returns_ok_or_not_found() {
+        // Non-asset paths fall back to index.html (SPA routing) or 404.
+        let app = poem::Route::new().at("/*path", poem::get(embedded_file));
+        let cli = poem::test::TestClient::new(app);
+        let resp = cli.get("/__spa_route__").send().await;
+        let status = resp.0.status();
+        assert!(
+            status == StatusCode::OK || status == StatusCode::NOT_FOUND,
+            "unexpected status: {status}",
+        );
+    }
+
+    #[tokio::test]
+    async fn embedded_asset_handler_missing_file_returns_not_found() {
+        // The assets/ prefix disables SPA fallback; missing files must return 404.
+        let app = poem::Route::new().at("/assets/*path", poem::get(embedded_asset));
+        let cli = poem::test::TestClient::new(app);
+        let resp = cli.get("/assets/__nonexistent__.js").send().await;
+        assert_eq!(resp.0.status(), StatusCode::NOT_FOUND);
+    }
+}
--- a/server/src/http/chat.rs
+++ b/server/src/http/chat.rs
@@ -0,0 +1,58 @@
+use crate::http::context::{AppContext, OpenApiResult, bad_request};
+use crate::llm::chat;
+use poem_openapi::{OpenApi, Tags, payload::Json};
+use std::sync::Arc;
+
+#[derive(Tags)]
+enum ChatTags {
+    Chat,
+}
+
+pub struct ChatApi {
+    pub ctx: Arc<AppContext>,
+}
+
+#[OpenApi(tag = "ChatTags::Chat")]
+impl ChatApi {
+    /// Cancel the currently running chat stream, if any.
+    ///
+    /// Returns `true` once the cancellation signal is issued.
+    #[oai(path = "/chat/cancel", method = "post")]
+    async fn cancel_chat(&self) -> OpenApiResult<Json<bool>> {
+        chat::cancel_chat(&self.ctx.state).map_err(bad_request)?;
+        Ok(Json(true))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    fn test_api(dir: &TempDir) -> ChatApi {
+        ChatApi {
+            ctx: Arc::new(AppContext::new_test(dir.path().to_path_buf())),
+        }
+    }
+
+    #[tokio::test]
+    async fn cancel_chat_returns_true() {
+        let dir = TempDir::new().unwrap();
+        let api = test_api(&dir);
+        let result = api.cancel_chat().await;
+        assert!(result.is_ok());
+        assert!(result.unwrap().0);
+    }
+
+    #[tokio::test]
+    async fn cancel_chat_sends_cancel_signal() {
+        let dir = TempDir::new().unwrap();
+        let api = test_api(&dir);
+        let mut cancel_rx = api.ctx.state.cancel_rx.clone();
+        cancel_rx.borrow_and_update();
+
+        api.cancel_chat().await.unwrap();
+
+        assert!(*cancel_rx.borrow());
+    }
+}
--- a/server/src/http/context.rs
+++ b/server/src/http/context.rs
@@ -0,0 +1,120 @@
+use crate::agents::{AgentPool, ReconciliationEvent};
+use crate::io::watcher::WatcherEvent;
+use crate::state::SessionState;
+use crate::store::JsonFileStore;
+use crate::workflow::WorkflowState;
+use poem::http::StatusCode;
+use std::sync::Arc;
+use tokio::sync::{broadcast, mpsc, oneshot};
+
+/// The user's decision when responding to a permission dialog.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum PermissionDecision {
+    /// One-time denial.
+    Deny,
+    /// One-time approval.
+    Approve,
+    /// Approve and persist the rule to `.claude/settings.json` so Claude Code's
+    /// built-in permission system handles future checks without prompting.
+    AlwaysAllow,
+}
+
+/// A permission request forwarded from the MCP `prompt_permission` tool to the
+/// active WebSocket session. The MCP handler blocks on `response_tx` until the
+/// user approves or denies via the frontend dialog.
+pub struct PermissionForward {
+    pub request_id: String,
+    pub tool_name: String,
+    pub tool_input: serde_json::Value,
+    pub response_tx: oneshot::Sender<PermissionDecision>,
+}
+
+#[derive(Clone)]
+pub struct AppContext {
+    pub state: Arc<SessionState>,
+    pub store: Arc<JsonFileStore>,
+    pub workflow: Arc<std::sync::Mutex<WorkflowState>>,
+    pub agents: Arc<AgentPool>,
+    /// Broadcast channel for filesystem watcher events. WebSocket handlers
+    /// subscribe to this to push lifecycle notifications to connected clients.
+    pub watcher_tx: broadcast::Sender<WatcherEvent>,
+    /// Broadcast channel for startup reconciliation progress events.
+    /// WebSocket handlers subscribe to this to push real-time reconciliation
+    /// updates to connected clients.
+    pub reconciliation_tx: broadcast::Sender<ReconciliationEvent>,
+    /// Sender for permission requests originating from the MCP
+    /// `prompt_permission` tool. The MCP handler sends a [`PermissionForward`]
+    /// and awaits the oneshot response.
+    pub perm_tx: mpsc::UnboundedSender<PermissionForward>,
+    /// Receiver for permission requests. The active WebSocket handler locks
+    /// this and polls for incoming permission forwards.
+    pub perm_rx: Arc<tokio::sync::Mutex<mpsc::UnboundedReceiver<PermissionForward>>>,
+    /// Child process of the QA app launched for manual testing.
+    /// Only one instance runs at a time.
+    pub qa_app_process: Arc<std::sync::Mutex<Option<std::process::Child>>>,
+}
+
+#[cfg(test)]
+impl AppContext {
+    pub fn new_test(project_root: std::path::PathBuf) -> Self {
+        let state = SessionState::default();
+        *state.project_root.lock().unwrap() = Some(project_root.clone());
+        let store_path = project_root.join(".storkit_store.json");
+        let (watcher_tx, _) = broadcast::channel(64);
+        let (reconciliation_tx, _) = broadcast::channel(64);
+        let (perm_tx, perm_rx) = mpsc::unbounded_channel();
+        Self {
+            state: Arc::new(state),
+            store: Arc::new(JsonFileStore::new(store_path).unwrap()),
+            workflow: Arc::new(std::sync::Mutex::new(WorkflowState::default())),
+            agents: Arc::new(AgentPool::new(3001, watcher_tx.clone())),
+            watcher_tx,
+            reconciliation_tx,
+            perm_tx,
+            perm_rx: Arc::new(tokio::sync::Mutex::new(perm_rx)),
+            qa_app_process: Arc::new(std::sync::Mutex::new(None)),
+        }
+    }
+}
+
+pub type OpenApiResult<T> = poem::Result<T>;
+
+pub fn bad_request(message: String) -> poem::Error {
+    poem::Error::from_string(message, StatusCode::BAD_REQUEST)
+}
+
+pub fn not_found(message: String) -> poem::Error {
+    poem::Error::from_string(message, StatusCode::NOT_FOUND)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn bad_request_returns_400_status() {
+        let err = bad_request("something went wrong".to_string());
+        assert_eq!(err.status(), StatusCode::BAD_REQUEST);
+    }
+
+    #[test]
+    fn bad_request_accepts_empty_message() {
+        let err = bad_request(String::new());
+        assert_eq!(err.status(), StatusCode::BAD_REQUEST);
+    }
+
+    #[test]
+    fn permission_decision_equality() {
+        assert_eq!(PermissionDecision::Deny, PermissionDecision::Deny);
+        assert_eq!(PermissionDecision::Approve, PermissionDecision::Approve);
+        assert_eq!(PermissionDecision::AlwaysAllow, PermissionDecision::AlwaysAllow);
+        assert_ne!(PermissionDecision::Deny, PermissionDecision::Approve);
+        assert_ne!(PermissionDecision::Approve, PermissionDecision::AlwaysAllow);
+    }
+
+    #[test]
+    fn not_found_returns_404_status() {
+        let err = not_found("item not found".to_string());
+        assert_eq!(err.status(), StatusCode::NOT_FOUND);
+    }
+}
--- a/server/src/http/health.rs
+++ b/server/src/http/health.rs
@@ -0,0 +1,66 @@
+use poem::handler;
+use poem_openapi::{Object, OpenApi, Tags, payload::Json};
+use serde::Serialize;
+
+/// Health check endpoint.
+///
+/// Returns a static "ok" response to indicate the server is running.
+#[handler]
+pub fn health() -> &'static str {
+    "ok"
+}
+
+#[derive(Tags)]
+enum HealthTags {
+    Health,
+}
+
+#[derive(Serialize, Object)]
+pub struct HealthStatus {
+    status: String,
+}
+
+pub struct HealthApi;
+
+#[OpenApi(tag = "HealthTags::Health")]
+impl HealthApi {
+    /// Health check endpoint.
+    ///
+    /// Returns a JSON status object to confirm the server is running.
+    #[oai(path = "/health", method = "get")]
+    async fn health(&self) -> Json<HealthStatus> {
+        Json(HealthStatus {
+            status: "ok".to_string(),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn handler_health_returns_ok() {
+        let app = poem::Route::new().at("/health", poem::get(health));
+        let cli = poem::test::TestClient::new(app);
+        let resp = cli.get("/health").send().await;
+        resp.assert_status_is_ok();
+        resp.assert_text("ok").await;
+    }
+
+    #[test]
+    fn health_status_serializes_to_json() {
+        let status = HealthStatus {
+            status: "ok".to_string(),
+        };
+        let json = serde_json::to_value(&status).unwrap();
+        assert_eq!(json["status"], "ok");
+    }
+
+    #[tokio::test]
+    async fn api_health_returns_ok_status() {
+        let api = HealthApi;
+        let response = api.health().await;
+        assert_eq!(response.0.status, "ok");
+    }
+}
--- a/server/src/http/io.rs
+++ b/server/src/http/io.rs
@@ -0,0 +1,405 @@
+use crate::http::context::{AppContext, OpenApiResult, bad_request};
+use crate::io::fs as io_fs;
+use poem_openapi::{Object, OpenApi, Tags, payload::Json};
+use serde::Deserialize;
+use std::sync::Arc;
+
+#[derive(Tags)]
+enum IoTags {
+    Io,
+}
+
+#[derive(Deserialize, Object)]
+struct FilePathPayload {
+    pub path: String,
+}
+
+#[derive(Deserialize, Object)]
+struct WriteFilePayload {
+    pub path: String,
+    pub content: String,
+}
+
+#[derive(Deserialize, Object)]
+struct SearchPayload {
+    query: String,
+}
+
+#[derive(Deserialize, Object)]
+struct CreateDirectoryPayload {
+    pub path: String,
+}
+
+#[derive(Deserialize, Object)]
+struct ExecShellPayload {
+    pub command: String,
+    pub args: Vec<String>,
+}
+
+pub struct IoApi {
+    pub ctx: Arc<AppContext>,
+}
+
+#[OpenApi(tag = "IoTags::Io")]
+impl IoApi {
+    /// Read a file from the currently open project and return its contents.
+    #[oai(path = "/io/fs/read", method = "post")]
+    async fn read_file(&self, payload: Json<FilePathPayload>) -> OpenApiResult<Json<String>> {
+        let content = io_fs::read_file(payload.0.path, &self.ctx.state)
+            .await
+            .map_err(bad_request)?;
+        Ok(Json(content))
+    }
+
+    /// Write a file to the currently open project, creating parent directories if needed.
+    #[oai(path = "/io/fs/write", method = "post")]
+    async fn write_file(&self, payload: Json<WriteFilePayload>) -> OpenApiResult<Json<bool>> {
+        io_fs::write_file(payload.0.path, payload.0.content, &self.ctx.state)
+            .await
+            .map_err(bad_request)?;
+        Ok(Json(true))
+    }
+
+    /// List files and folders in a directory within the currently open project.
+    #[oai(path = "/io/fs/list", method = "post")]
+    async fn list_directory(
+        &self,
+        payload: Json<FilePathPayload>,
+    ) -> OpenApiResult<Json<Vec<io_fs::FileEntry>>> {
+        let entries = io_fs::list_directory(payload.0.path, &self.ctx.state)
+            .await
+            .map_err(bad_request)?;
+        Ok(Json(entries))
+    }
+
+    /// List files and folders at an absolute path (not scoped to the project root).
+    #[oai(path = "/io/fs/list/absolute", method = "post")]
+    async fn list_directory_absolute(
+        &self,
+        payload: Json<FilePathPayload>,
+    ) -> OpenApiResult<Json<Vec<io_fs::FileEntry>>> {
+        let entries = io_fs::list_directory_absolute(payload.0.path)
+            .await
+            .map_err(bad_request)?;
+        Ok(Json(entries))
+    }
+
+    /// Create a directory at an absolute path.
+    #[oai(path = "/io/fs/create/absolute", method = "post")]
+    async fn create_directory_absolute(
+        &self,
+        payload: Json<CreateDirectoryPayload>,
+    ) -> OpenApiResult<Json<bool>> {
+        io_fs::create_directory_absolute(payload.0.path)
+            .await
+            .map_err(bad_request)?;
+        Ok(Json(true))
+    }
+
+    /// Get the user's home directory.
+    #[oai(path = "/io/fs/home", method = "get")]
+    async fn get_home_directory(&self) -> OpenApiResult<Json<String>> {
+        let home = io_fs::get_home_directory().map_err(bad_request)?;
+        Ok(Json(home))
+    }
+
+    /// List all files in the project recursively, respecting .gitignore.
+    #[oai(path = "/io/fs/files", method = "get")]
+    async fn list_project_files(&self) -> OpenApiResult<Json<Vec<String>>> {
+        let files = io_fs::list_project_files(&self.ctx.state)
+            .await
+            .map_err(bad_request)?;
+        Ok(Json(files))
+    }
+
+    /// Search the currently open project for files containing the provided query string.
+    #[oai(path = "/io/search", method = "post")]
+    async fn search_files(
+        &self,
+        payload: Json<SearchPayload>,
+    ) -> OpenApiResult<Json<Vec<crate::io::search::SearchResult>>> {
+        let results = crate::io::search::search_files(payload.0.query, &self.ctx.state)
+            .await
+            .map_err(bad_request)?;
+        Ok(Json(results))
+    }
+
+    /// Execute an allowlisted shell command in the currently open project.
+    #[oai(path = "/io/shell/exec", method = "post")]
+    async fn exec_shell(
+        &self,
+        payload: Json<ExecShellPayload>,
+    ) -> OpenApiResult<Json<crate::io::shell::CommandOutput>> {
+        let output =
+            crate::io::shell::exec_shell(payload.0.command, payload.0.args, &self.ctx.state)
+                .await
+                .map_err(bad_request)?;
+        Ok(Json(output))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use tempfile::TempDir;
+
+    fn make_api(dir: &TempDir) -> IoApi {
+        IoApi {
+            ctx: Arc::new(AppContext::new_test(dir.path().to_path_buf())),
+        }
+    }
+
+    // --- list_directory_absolute ---
+
+    #[tokio::test]
+    async fn list_directory_absolute_returns_entries_for_valid_path() {
+        let dir = TempDir::new().unwrap();
+        std::fs::create_dir(dir.path().join("subdir")).unwrap();
+        std::fs::write(dir.path().join("file.txt"), "content").unwrap();
+
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: dir.path().to_string_lossy().to_string(),
+        });
+        let result = api.list_directory_absolute(payload).await.unwrap();
+        let entries = &result.0;
+
+        assert!(entries.len() >= 2);
+        assert!(entries.iter().any(|e| e.name == "subdir" && e.kind == "dir"));
+        assert!(entries.iter().any(|e| e.name == "file.txt" && e.kind == "file"));
+    }
+
+    #[tokio::test]
+    async fn list_directory_absolute_returns_empty_for_empty_dir() {
+        let dir = TempDir::new().unwrap();
+        let empty = dir.path().join("empty");
+        std::fs::create_dir(&empty).unwrap();
+
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: empty.to_string_lossy().to_string(),
+        });
+        let result = api.list_directory_absolute(payload).await.unwrap();
+        assert!(result.0.is_empty());
+    }
+
+    #[tokio::test]
+    async fn list_directory_absolute_errors_on_nonexistent_path() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: dir.path().join("nonexistent").to_string_lossy().to_string(),
+        });
+        let result = api.list_directory_absolute(payload).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn list_directory_absolute_errors_on_file_path() {
+        let dir = TempDir::new().unwrap();
+        let file = dir.path().join("not_a_dir.txt");
+        std::fs::write(&file, "content").unwrap();
+
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: file.to_string_lossy().to_string(),
+        });
+        let result = api.list_directory_absolute(payload).await;
+        assert!(result.is_err());
+    }
+
+    // --- create_directory_absolute ---
+
+    #[tokio::test]
+    async fn create_directory_absolute_creates_new_dir() {
+        let dir = TempDir::new().unwrap();
+        let new_dir = dir.path().join("new_dir");
+
+        let api = make_api(&dir);
+        let payload = Json(CreateDirectoryPayload {
+            path: new_dir.to_string_lossy().to_string(),
+        });
+        let result = api.create_directory_absolute(payload).await.unwrap();
+        assert!(result.0);
+        assert!(new_dir.is_dir());
+    }
+
+    #[tokio::test]
+    async fn create_directory_absolute_succeeds_for_existing_dir() {
+        let dir = TempDir::new().unwrap();
+        let existing = dir.path().join("existing");
+        std::fs::create_dir(&existing).unwrap();
+
+        let api = make_api(&dir);
+        let payload = Json(CreateDirectoryPayload {
+            path: existing.to_string_lossy().to_string(),
+        });
+        let result = api.create_directory_absolute(payload).await.unwrap();
+        assert!(result.0);
+    }
+
+    #[tokio::test]
+    async fn create_directory_absolute_creates_nested_dirs() {
+        let dir = TempDir::new().unwrap();
+        let nested = dir.path().join("a").join("b").join("c");
+
+        let api = make_api(&dir);
+        let payload = Json(CreateDirectoryPayload {
+            path: nested.to_string_lossy().to_string(),
+        });
+        let result = api.create_directory_absolute(payload).await.unwrap();
+        assert!(result.0);
+        assert!(nested.is_dir());
+    }
+
+    // --- get_home_directory ---
+
+    #[tokio::test]
+    async fn get_home_directory_returns_a_path() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api.get_home_directory().await.unwrap();
+        let home = &result.0;
+        assert!(!home.is_empty());
+        assert!(std::path::Path::new(home).is_absolute());
+    }
+
+    // --- read_file (project-scoped) ---
+
+    #[tokio::test]
+    async fn read_file_returns_content() {
+        let dir = TempDir::new().unwrap();
+        std::fs::write(dir.path().join("hello.txt"), "hello world").unwrap();
+
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: "hello.txt".to_string(),
+        });
+        let result = api.read_file(payload).await.unwrap();
+        assert_eq!(result.0, "hello world");
+    }
+
+    #[tokio::test]
+    async fn read_file_errors_on_missing_file() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: "nonexistent.txt".to_string(),
+        });
+        let result = api.read_file(payload).await;
+        assert!(result.is_err());
+    }
+
+    // --- write_file (project-scoped) ---
+
+    #[tokio::test]
+    async fn write_file_creates_file() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(WriteFilePayload {
+            path: "output.txt".to_string(),
+            content: "written content".to_string(),
+        });
+        let result = api.write_file(payload).await.unwrap();
+        assert!(result.0);
+        assert_eq!(
+            std::fs::read_to_string(dir.path().join("output.txt")).unwrap(),
+            "written content"
+        );
+    }
+
+    #[tokio::test]
+    async fn write_file_creates_parent_dirs() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(WriteFilePayload {
+            path: "sub/dir/file.txt".to_string(),
+            content: "nested".to_string(),
+        });
+        let result = api.write_file(payload).await.unwrap();
+        assert!(result.0);
+        assert_eq!(
+            std::fs::read_to_string(dir.path().join("sub/dir/file.txt")).unwrap(),
+            "nested"
+        );
+    }
+
+    // --- list_project_files ---
+
+    #[tokio::test]
+    async fn list_project_files_returns_file_paths() {
+        let dir = TempDir::new().unwrap();
+        std::fs::create_dir(dir.path().join("src")).unwrap();
+        std::fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
+        std::fs::write(dir.path().join("README.md"), "# readme").unwrap();
+
+        let api = make_api(&dir);
+        let result = api.list_project_files().await.unwrap();
+        let files = &result.0;
+
+        assert!(files.contains(&"README.md".to_string()));
+        assert!(files.contains(&"src/main.rs".to_string()));
+    }
+
+    #[tokio::test]
+    async fn list_project_files_excludes_directories() {
+        let dir = TempDir::new().unwrap();
+        std::fs::create_dir(dir.path().join("subdir")).unwrap();
+        std::fs::write(dir.path().join("file.txt"), "").unwrap();
+
+        let api = make_api(&dir);
+        let result = api.list_project_files().await.unwrap();
+        let files = &result.0;
+
+        assert!(files.contains(&"file.txt".to_string()));
+        // Directories should not appear
+        assert!(!files.iter().any(|f| f == "subdir"));
+    }
+
+    #[tokio::test]
+    async fn list_project_files_returns_sorted_paths() {
+        let dir = TempDir::new().unwrap();
+        std::fs::write(dir.path().join("z_last.txt"), "").unwrap();
+        std::fs::write(dir.path().join("a_first.txt"), "").unwrap();
+
+        let api = make_api(&dir);
+        let result = api.list_project_files().await.unwrap();
+        let files = &result.0;
+
+        let a_idx = files.iter().position(|f| f == "a_first.txt").unwrap();
+        let z_idx = files.iter().position(|f| f == "z_last.txt").unwrap();
+        assert!(a_idx < z_idx);
+    }
+
+    // --- list_directory (project-scoped) ---
+
+    #[tokio::test]
+    async fn list_directory_returns_entries() {
+        let dir = TempDir::new().unwrap();
+        std::fs::create_dir(dir.path().join("adir")).unwrap();
+        std::fs::write(dir.path().join("bfile.txt"), "").unwrap();
+
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: ".".to_string(),
+        });
+        let result = api.list_directory(payload).await.unwrap();
+        let entries = &result.0;
+
+        assert!(entries.iter().any(|e| e.name == "adir" && e.kind == "dir"));
+        assert!(entries.iter().any(|e| e.name == "bfile.txt" && e.kind == "file"));
+    }
+
+    #[tokio::test]
+    async fn list_directory_errors_on_nonexistent() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(FilePathPayload {
+            path: "nonexistent_dir".to_string(),
+        });
+        let result = api.list_directory(payload).await;
+        assert!(result.is_err());
+    }
+
+}
--- a/server/src/http/mcp/agent_tools.rs
+++ b/server/src/http/mcp/agent_tools.rs
@@ -0,0 +1,731 @@
+use crate::agents::PipelineStage;
+use crate::config::ProjectConfig;
+use crate::http::context::AppContext;
+use crate::http::settings::get_editor_command_from_store;
+use crate::slog_warn;
+use crate::worktree;
+use serde_json::{json, Value};
+
+pub(super) async fn tool_start_agent(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let agent_name = args.get("agent_name").and_then(|v| v.as_str());
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let info = ctx
+        .agents
+        .start_agent(&project_root, story_id, agent_name, None)
+        .await?;
+
+    // Snapshot coverage baseline from the most recent coverage report (best-effort).
+    if let Some(pct) = read_coverage_percent_from_json(&project_root)
+        && let Err(e) = crate::http::workflow::write_coverage_baseline_to_story_file(
+            &project_root,
+            story_id,
+            pct,
+        )
+    {
+        slog_warn!("[start_agent] Could not write coverage baseline to story file: {e}");
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": info.story_id,
+        "agent_name": info.agent_name,
+        "status": info.status.to_string(),
+        "session_id": info.session_id,
+        "worktree_path": info.worktree_path,
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+/// Try to read the overall line coverage percentage from the llvm-cov JSON report.
+///
+/// Expects the file at `{project_root}/.storkit/coverage/server.json`.
+/// Returns `None` if the file is absent, unreadable, or cannot be parsed.
+pub(super) fn read_coverage_percent_from_json(project_root: &std::path::Path) -> Option<f64> {
+    let path = project_root
+        .join(".storkit")
+        .join("coverage")
+        .join("server.json");
+    let contents = std::fs::read_to_string(&path).ok()?;
+    let json: Value = serde_json::from_str(&contents).ok()?;
+    // cargo llvm-cov --json format: data[0].totals.lines.percent
+    json.pointer("/data/0/totals/lines/percent")
+        .and_then(|v| v.as_f64())
+}
+
+pub(super) async fn tool_stop_agent(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let agent_name = args
+        .get("agent_name")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: agent_name")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    ctx.agents
+        .stop_agent(&project_root, story_id, agent_name)
+        .await?;
+
+    Ok(format!("Agent '{agent_name}' for story '{story_id}' stopped."))
+}
+
+pub(super) fn tool_list_agents(ctx: &AppContext) -> Result<String, String> {
+    let project_root = ctx.agents.get_project_root(&ctx.state).ok();
+    let agents = ctx.agents.list_agents()?;
+    serde_json::to_string_pretty(&json!(agents
+        .iter()
+        .filter(|a| {
+            project_root
+                .as_deref()
+                .map(|root| !crate::http::agents::story_is_archived(root, &a.story_id))
+                .unwrap_or(true)
+        })
+        .map(|a| json!({
+            "story_id": a.story_id,
+            "agent_name": a.agent_name,
+            "status": a.status.to_string(),
+            "session_id": a.session_id,
+            "worktree_path": a.worktree_path,
+        }))
+        .collect::<Vec<_>>()))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) async fn tool_get_agent_output_poll(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let agent_name = args
+        .get("agent_name")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: agent_name")?;
+
+    // Try draining in-memory events first.
+    match ctx.agents.drain_events(story_id, agent_name) {
+        Ok(drained) => {
+            let done = drained.iter().any(|e| {
+                matches!(
+                    e,
+                    crate::agents::AgentEvent::Done { .. }
+                        | crate::agents::AgentEvent::Error { .. }
+                )
+            });
+
+            let events: Vec<serde_json::Value> = drained
+                .into_iter()
+                .filter_map(|e| serde_json::to_value(&e).ok())
+                .collect();
+
+            serde_json::to_string_pretty(&json!({
+                "events": events,
+                "done": done,
+                "event_count": events.len(),
+                "message": if done { "Agent stream ended." } else if events.is_empty() { "No new events. Call again to continue." } else { "Events returned. Call again to continue." }
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+        Err(_) => {
+            // Agent not in memory — fall back to persistent log file.
+            get_agent_output_from_log(story_id, agent_name, ctx)
+        }
+    }
+}
+
+/// Fall back to reading agent output from the persistent log file on disk.
+///
+/// Tries to find the log file via the agent's stored log_session_id first,
+/// then falls back to `find_latest_log` scanning the log directory.
+pub(super) fn get_agent_output_from_log(
+    story_id: &str,
+    agent_name: &str,
+    ctx: &AppContext,
+) -> Result<String, String> {
+    use crate::agent_log;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    // Try to find the log file: first from in-memory agent info, then by scanning.
+    let log_path = ctx
+        .agents
+        .get_log_info(story_id, agent_name)
+        .map(|(session_id, root)| agent_log::log_file_path(&root, story_id, agent_name, &session_id))
+        .filter(|p| p.exists())
+        .or_else(|| agent_log::find_latest_log(&project_root, story_id, agent_name));
+
+    let log_path = match log_path {
+        Some(p) => p,
+        None => {
+            return serde_json::to_string_pretty(&json!({
+                "events": [],
+                "done": true,
+                "event_count": 0,
+                "message": format!("No agent '{agent_name}' for story '{story_id}' and no log file found."),
+                "source": "none",
+            }))
+            .map_err(|e| format!("Serialization error: {e}"));
+        }
+    };
+
+    match agent_log::read_log(&log_path) {
+        Ok(entries) => {
+            let events: Vec<serde_json::Value> = entries
+                .into_iter()
+                .map(|e| {
+                    let mut val = e.event;
+                    if let serde_json::Value::Object(ref mut map) = val {
+                        map.insert(
+                            "timestamp".to_string(),
+                            serde_json::Value::String(e.timestamp),
+                        );
+                    }
+                    val
+                })
+                .collect();
+
+            let count = events.len();
+            serde_json::to_string_pretty(&json!({
+                "events": events,
+                "done": true,
+                "event_count": count,
+                "message": "Events loaded from persistent log file.",
+                "source": "log_file",
+                "log_file": log_path.to_string_lossy(),
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+        Err(e) => Err(format!("Failed to read log file: {e}")),
+    }
+}
+
+pub(super) fn tool_get_agent_config(ctx: &AppContext) -> Result<String, String> {
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let config = ProjectConfig::load(&project_root)?;
+
+    // Collect available (idle) agent names across all stages so the caller can
+    // see at a glance which agents are free to start (story 190).
+    let mut available_names: std::collections::HashSet<String> =
+        std::collections::HashSet::new();
+    for stage in &[
+        PipelineStage::Coder,
+        PipelineStage::Qa,
+        PipelineStage::Mergemaster,
+        PipelineStage::Other,
+    ] {
+        if let Ok(names) = ctx.agents.available_agents_for_stage(&config, stage) {
+            available_names.extend(names);
+        }
+    }
+
+    serde_json::to_string_pretty(&json!(config
+        .agent
+        .iter()
+        .map(|a| json!({
+            "name": a.name,
+            "role": a.role,
+            "model": a.model,
+            "allowed_tools": a.allowed_tools,
+            "max_turns": a.max_turns,
+            "max_budget_usd": a.max_budget_usd,
+            "available": available_names.contains(&a.name),
+        }))
+        .collect::<Vec<_>>()))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) async fn tool_wait_for_agent(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let agent_name = args
+        .get("agent_name")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: agent_name")?;
+    let timeout_ms = args
+        .get("timeout_ms")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(300_000); // default: 5 minutes
+
+    let info = ctx
+        .agents
+        .wait_for_agent(story_id, agent_name, timeout_ms)
+        .await?;
+
+    let commits = match (&info.worktree_path, &info.base_branch) {
+        (Some(wt_path), Some(base)) => get_worktree_commits(wt_path, base).await,
+        _ => None,
+    };
+
+    let completion = info.completion.as_ref().map(|r| json!({
+        "summary": r.summary,
+        "gates_passed": r.gates_passed,
+        "gate_output": r.gate_output,
+    }));
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": info.story_id,
+        "agent_name": info.agent_name,
+        "status": info.status.to_string(),
+        "session_id": info.session_id,
+        "worktree_path": info.worktree_path,
+        "base_branch": info.base_branch,
+        "commits": commits,
+        "completion": completion,
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) async fn tool_create_worktree(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let info = ctx.agents.create_worktree(&project_root, story_id).await?;
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": story_id,
+        "worktree_path": info.path.to_string_lossy(),
+        "branch": info.branch,
+        "base_branch": info.base_branch,
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) fn tool_list_worktrees(ctx: &AppContext) -> Result<String, String> {
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let entries = worktree::list_worktrees(&project_root)?;
+
+    serde_json::to_string_pretty(&json!(entries
+        .iter()
+        .map(|e| json!({
+            "story_id": e.story_id,
+            "path": e.path.to_string_lossy(),
+        }))
+        .collect::<Vec<_>>()))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) async fn tool_remove_worktree(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let config = ProjectConfig::load(&project_root)?;
+    worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await?;
+
+    Ok(format!("Worktree for story '{story_id}' removed."))
+}
+
+pub(super) fn tool_get_editor_command(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let worktree_path = args
+        .get("worktree_path")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: worktree_path")?;
+
+    let editor = get_editor_command_from_store(ctx)
+        .ok_or_else(|| "No editor configured. Set one via PUT /api/settings/editor.".to_string())?;
+
+    Ok(format!("{editor} {worktree_path}"))
+}
+
+/// Run `git log <base>..HEAD --oneline` in the worktree and return the commit
+/// summaries, or `None` if git is unavailable or there are no new commits.
+pub(super) async fn get_worktree_commits(worktree_path: &str, base_branch: &str) -> Option<Vec<String>> {
+    let wt = worktree_path.to_string();
+    let base = base_branch.to_string();
+    tokio::task::spawn_blocking(move || {
+        let output = std::process::Command::new("git")
+            .args(["log", &format!("{base}..HEAD"), "--oneline"])
+            .current_dir(&wt)
+            .output()
+            .ok()?;
+
+        if output.status.success() {
+            let lines: Vec<String> = String::from_utf8(output.stdout)
+                .ok()?
+                .lines()
+                .filter(|l| !l.is_empty())
+                .map(|l| l.to_string())
+                .collect();
+            Some(lines)
+        } else {
+            None
+        }
+    })
+    .await
+    .ok()
+    .flatten()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use crate::store::StoreOps;
+
+    fn test_ctx(dir: &std::path::Path) -> AppContext {
+        AppContext::new_test(dir.to_path_buf())
+    }
+
+    #[test]
+    fn tool_list_agents_empty() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_list_agents(&ctx).unwrap();
+        let parsed: Vec<Value> = serde_json::from_str(&result).unwrap();
+        assert!(parsed.is_empty());
+    }
+
+    #[test]
+    fn tool_get_agent_config_no_project_toml_returns_default_agent() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        // No project.toml → default config with one fallback agent
+        let result = tool_get_agent_config(&ctx).unwrap();
+        let parsed: Vec<Value> = serde_json::from_str(&result).unwrap();
+        // Default config contains one agent entry with default values
+        assert_eq!(parsed.len(), 1, "default config should have one fallback agent");
+        assert!(parsed[0].get("name").is_some());
+        assert!(parsed[0].get("role").is_some());
+    }
+
+    #[tokio::test]
+    async fn tool_get_agent_output_poll_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_get_agent_output_poll(&json!({"agent_name": "bot"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_get_agent_output_poll_missing_agent_name() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result =
+            tool_get_agent_output_poll(&json!({"story_id": "1_test"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("agent_name"));
+    }
+
+    #[tokio::test]
+    async fn tool_get_agent_output_poll_no_agent_falls_back_to_empty_log() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        // No agent registered, no log file → returns empty response from log fallback
+        let result = tool_get_agent_output_poll(
+            &json!({"story_id": "99_nope", "agent_name": "bot"}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["done"], true);
+        assert_eq!(parsed["event_count"], 0);
+        assert!(
+            parsed["message"].as_str().unwrap_or("").contains("No agent"),
+            "expected 'No agent' message: {parsed}"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_get_agent_output_poll_with_running_agent_returns_empty_events() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        // Inject a running agent — no events broadcast yet
+        ctx.agents
+            .inject_test_agent("10_story", "worker", crate::agents::AgentStatus::Running);
+        let result = tool_get_agent_output_poll(
+            &json!({"story_id": "10_story", "agent_name": "worker"}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["done"], false);
+        assert_eq!(parsed["event_count"], 0);
+        assert!(parsed["events"].is_array());
+    }
+
+    #[tokio::test]
+    async fn tool_stop_agent_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_stop_agent(&json!({"agent_name": "bot"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_stop_agent_missing_agent_name() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_stop_agent(&json!({"story_id": "1_test"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("agent_name"));
+    }
+
+    #[tokio::test]
+    async fn tool_start_agent_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_start_agent(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_start_agent_no_agent_name_no_coder_returns_clear_error() {
+        // Config has only a supervisor — start_agent without agent_name should
+        // refuse rather than silently assigning supervisor.
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        std::fs::create_dir_all(&sk).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            r#"
+[[agent]]
+name = "supervisor"
+stage = "other"
+"#,
+        )
+        .unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_start_agent(&json!({"story_id": "42_my_story"}), &ctx).await;
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(
+            err.contains("coder"),
+            "error should mention 'coder', got: {err}"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_start_agent_no_agent_name_picks_coder_not_supervisor() {
+        // Config has supervisor first, then coder-1. Without agent_name the
+        // coder should be selected, not supervisor. The call will fail due to
+        // missing git repo / worktree, but the error must NOT be about
+        // "No coder agent configured".
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        std::fs::create_dir_all(&sk).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            r#"
+[[agent]]
+name = "supervisor"
+stage = "other"
+
+[[agent]]
+name = "coder-1"
+stage = "coder"
+"#,
+        )
+        .unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_start_agent(&json!({"story_id": "42_my_story"}), &ctx).await;
+        // May succeed or fail for infrastructure reasons (no git repo), but
+        // must NOT fail with "No coder agent configured".
+        if let Err(err) = result {
+            assert!(
+                !err.contains("No coder agent configured"),
+                "should not fail on agent selection, got: {err}"
+            );
+            // Should also not complain about supervisor being absent.
+            assert!(
+                !err.contains("supervisor"),
+                "should not select supervisor, got: {err}"
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn tool_create_worktree_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_create_worktree(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_remove_worktree_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_remove_worktree(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[test]
+    fn tool_list_worktrees_empty_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_list_worktrees(&ctx).unwrap();
+        let parsed: Vec<Value> = serde_json::from_str(&result).unwrap();
+        assert!(parsed.is_empty());
+    }
+
+    // ── Editor command tool tests ─────────────────────────────────
+
+    #[test]
+    fn tool_get_editor_command_missing_worktree_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_get_editor_command(&json!({}), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("worktree_path"));
+    }
+
+    #[test]
+    fn tool_get_editor_command_no_editor_configured() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_get_editor_command(
+            &json!({"worktree_path": "/some/path"}),
+            &ctx,
+        );
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("No editor configured"));
+    }
+
+    #[test]
+    fn tool_get_editor_command_formats_correctly() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        ctx.store.set("editor_command", json!("zed"));
+
+        let result = tool_get_editor_command(
+            &json!({"worktree_path": "/home/user/worktrees/37_my_story"}),
+            &ctx,
+        )
+        .unwrap();
+        assert_eq!(result, "zed /home/user/worktrees/37_my_story");
+    }
+
+    #[test]
+    fn tool_get_editor_command_works_with_vscode() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        ctx.store.set("editor_command", json!("code"));
+
+        let result = tool_get_editor_command(
+            &json!({"worktree_path": "/path/to/worktree"}),
+            &ctx,
+        )
+        .unwrap();
+        assert_eq!(result, "code /path/to/worktree");
+    }
+
+    #[test]
+    fn get_editor_command_in_tools_list() {
+        use super::super::{handle_tools_list};
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "get_editor_command");
+        assert!(tool.is_some(), "get_editor_command missing from tools list");
+        let t = tool.unwrap();
+        assert!(t["description"].is_string());
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"worktree_path"));
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_tool_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_wait_for_agent(&json!({"agent_name": "bot"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_tool_missing_agent_name() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_wait_for_agent(&json!({"story_id": "1_test"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("agent_name"));
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_tool_nonexistent_agent_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result =
+            tool_wait_for_agent(&json!({"story_id": "99_nope", "agent_name": "bot", "timeout_ms": 50}), &ctx)
+                .await;
+        // No agent registered — should error
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_tool_returns_completed_agent() {
+        use crate::agents::AgentStatus;
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        ctx.agents
+            .inject_test_agent("41_story", "worker", AgentStatus::Completed);
+
+        let result = tool_wait_for_agent(
+            &json!({"story_id": "41_story", "agent_name": "worker"}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["status"], "completed");
+        assert_eq!(parsed["story_id"], "41_story");
+        assert_eq!(parsed["agent_name"], "worker");
+        // commits key present (may be null since no real worktree)
+        assert!(parsed.get("commits").is_some());
+        // completion key present (null for agents that didn't call report_completion)
+        assert!(parsed.get("completion").is_some());
+    }
+
+    #[test]
+    fn wait_for_agent_tool_in_list() {
+        use super::super::{handle_tools_list};
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let wait_tool = tools.iter().find(|t| t["name"] == "wait_for_agent");
+        assert!(wait_tool.is_some(), "wait_for_agent missing from tools list");
+        let t = wait_tool.unwrap();
+        assert!(t["description"].as_str().unwrap().contains("block") || t["description"].as_str().unwrap().contains("Block"));
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+        assert!(req_names.contains(&"agent_name"));
+    }
+
+    #[test]
+    fn read_coverage_percent_from_json_parses_llvm_cov_format() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let cov_dir = tmp.path().join(".storkit/coverage");
+        fs::create_dir_all(&cov_dir).unwrap();
+        let json_content = r#"{"data":[{"totals":{"lines":{"count":100,"covered":78,"percent":78.0}}}]}"#;
+        fs::write(cov_dir.join("server.json"), json_content).unwrap();
+
+        let pct = read_coverage_percent_from_json(tmp.path());
+        assert_eq!(pct, Some(78.0));
+    }
+
+    #[test]
+    fn read_coverage_percent_from_json_returns_none_when_absent() {
+        let tmp = tempfile::tempdir().unwrap();
+        let pct = read_coverage_percent_from_json(tmp.path());
+        assert!(pct.is_none());
+    }
+}
--- a/server/src/http/mcp/diagnostics.rs
+++ b/server/src/http/mcp/diagnostics.rs
@@ -0,0 +1,735 @@
+use crate::agents::move_story_to_stage;
+use crate::http::context::AppContext;
+use crate::log_buffer;
+use crate::slog;
+use crate::slog_warn;
+use serde_json::{Value, json};
+use std::fs;
+
+pub(super) fn tool_get_server_logs(args: &Value) -> Result<String, String> {
+    let lines_count = args
+        .get("lines")
+        .and_then(|v| v.as_u64())
+        .map(|n| n.min(1000) as usize)
+        .unwrap_or(100);
+    let filter = args.get("filter").and_then(|v| v.as_str());
+    let severity = args
+        .get("severity")
+        .and_then(|v| v.as_str())
+        .and_then(log_buffer::LogLevel::from_str_ci);
+
+    let recent = log_buffer::global().get_recent(lines_count, filter, severity.as_ref());
+    let joined = recent.join("\n");
+    // Clamp to lines_count actual lines in case any entry contains embedded newlines.
+    let all_lines: Vec<&str> = joined.lines().collect();
+    let start = all_lines.len().saturating_sub(lines_count);
+    Ok(all_lines[start..].join("\n"))
+}
+
+/// Rebuild the server binary and re-exec (delegates to `crate::rebuild`).
+pub(super) async fn tool_rebuild_and_restart(ctx: &AppContext) -> Result<String, String> {
+    slog!("[rebuild] Rebuild and restart requested via MCP tool");
+    let project_root = ctx.state.get_project_root().unwrap_or_default();
+    crate::rebuild::rebuild_and_restart(&ctx.agents, &project_root).await
+}
+
+/// Generate a Claude Code permission rule string for the given tool name and input.
+///
+/// - `Edit` / `Write` / `Read` / `Grep` / `Glob` etc. → just the tool name
+/// - `Bash` → `Bash(first_word *)` derived from the `command` field in `tool_input`
+/// - `mcp__*` → the full tool name (e.g. `mcp__storkit__create_story`)
+fn generate_permission_rule(tool_name: &str, tool_input: &Value) -> String {
+    if tool_name == "Bash" {
+        // Extract command from tool_input.command and use first word as prefix
+        let command_str = tool_input
+            .get("command")
+            .and_then(|v| v.as_str())
+            .unwrap_or("");
+        let first_word = command_str.split_whitespace().next().unwrap_or("unknown");
+        format!("Bash({first_word} *)")
+    } else {
+        // For Edit, Write, Read, Glob, Grep, MCP tools, etc. — use the tool name directly
+        tool_name.to_string()
+    }
+}
+
+/// Add a permission rule to `.claude/settings.json` in the project root.
+/// Does nothing if the rule already exists. Creates the file if missing.
+pub(super) fn add_permission_rule(
+    project_root: &std::path::Path,
+    rule: &str,
+) -> Result<(), String> {
+    let claude_dir = project_root.join(".claude");
+    fs::create_dir_all(&claude_dir)
+        .map_err(|e| format!("Failed to create .claude/ directory: {e}"))?;
+
+    let settings_path = claude_dir.join("settings.json");
+    let mut settings: Value = if settings_path.exists() {
+        let content = fs::read_to_string(&settings_path)
+            .map_err(|e| format!("Failed to read settings.json: {e}"))?;
+        serde_json::from_str(&content).map_err(|e| format!("Failed to parse settings.json: {e}"))?
+    } else {
+        json!({ "permissions": { "allow": [] } })
+    };
+
+    let allow_arr = settings
+        .pointer_mut("/permissions/allow")
+        .and_then(|v| v.as_array_mut());
+
+    let allow = match allow_arr {
+        Some(arr) => arr,
+        None => {
+            // Ensure the structure exists
+            settings
+                .as_object_mut()
+                .unwrap()
+                .entry("permissions")
+                .or_insert(json!({ "allow": [] }));
+            settings
+                .pointer_mut("/permissions/allow")
+                .unwrap()
+                .as_array_mut()
+                .unwrap()
+        }
+    };
+
+    // Check for duplicates — exact string match
+    let rule_value = Value::String(rule.to_string());
+    if allow.contains(&rule_value) {
+        return Ok(());
+    }
+
+    // Also check for wildcard coverage: if "mcp__storkit__*" exists, don't add
+    // a more specific "mcp__storkit__create_story".
+    let dominated = allow.iter().any(|existing| {
+        if let Some(pat) = existing.as_str()
+            && let Some(prefix) = pat.strip_suffix('*')
+        {
+            return rule.starts_with(prefix);
+        }
+        false
+    });
+    if dominated {
+        return Ok(());
+    }
+
+    allow.push(rule_value);
+
+    let pretty =
+        serde_json::to_string_pretty(&settings).map_err(|e| format!("Failed to serialize: {e}"))?;
+    fs::write(&settings_path, pretty).map_err(|e| format!("Failed to write settings.json: {e}"))?;
+    Ok(())
+}
+
+/// MCP tool called by Claude Code via `--permission-prompt-tool`.
+///
+/// Forwards the permission request through the shared channel to the active
+/// WebSocket session, which presents a dialog to the user. Blocks until the
+/// user approves or denies (with a 5-minute timeout).
+pub(super) async fn tool_prompt_permission(
+    args: &Value,
+    ctx: &AppContext,
+) -> Result<String, String> {
+    let tool_name = args
+        .get("tool_name")
+        .and_then(|v| v.as_str())
+        .unwrap_or("unknown")
+        .to_string();
+    let tool_input = args.get("input").cloned().unwrap_or(json!({}));
+
+    let request_id = uuid::Uuid::new_v4().to_string();
+    let (response_tx, response_rx) = tokio::sync::oneshot::channel();
+
+    ctx.perm_tx
+        .send(crate::http::context::PermissionForward {
+            request_id: request_id.clone(),
+            tool_name: tool_name.clone(),
+            tool_input: tool_input.clone(),
+            response_tx,
+        })
+        .map_err(|_| "No active WebSocket session to receive permission request".to_string())?;
+
+    use crate::http::context::PermissionDecision;
+
+    let decision = tokio::time::timeout(std::time::Duration::from_secs(300), response_rx)
+        .await
+        .map_err(|_| {
+            let msg = format!("Permission request for '{tool_name}' timed out after 5 minutes");
+            slog_warn!("[permission] {msg}");
+            msg
+        })?
+        .map_err(|_| "Permission response channel closed unexpectedly".to_string())?;
+
+    if decision == PermissionDecision::AlwaysAllow {
+        // Persist the rule so Claude Code won't prompt again for this tool.
+        if let Some(root) = ctx.state.project_root.lock().unwrap().clone() {
+            let rule = generate_permission_rule(&tool_name, &tool_input);
+            if let Err(e) = add_permission_rule(&root, &rule) {
+                slog_warn!("[permission] Failed to write always-allow rule: {e}");
+            } else {
+                slog!("[permission] Added always-allow rule: {rule}");
+            }
+        }
+    }
+
+    if decision == PermissionDecision::Approve || decision == PermissionDecision::AlwaysAllow {
+        // Claude Code SDK expects:
+        //   Allow:  { behavior: "allow", updatedInput: <record> }
+        //   Deny:   { behavior: "deny", message: string }
+        Ok(json!({"behavior": "allow", "updatedInput": tool_input}).to_string())
+    } else {
+        slog_warn!("[permission] User denied permission for '{tool_name}'");
+        Ok(json!({
+            "behavior": "deny",
+            "message": format!("User denied permission for '{tool_name}'")
+        })
+        .to_string())
+    }
+}
+
+pub(super) fn tool_get_token_usage(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let root = ctx.state.get_project_root()?;
+    let filter_story = args.get("story_id").and_then(|v| v.as_str());
+
+    let all_records = crate::agents::token_usage::read_all(&root)?;
+    let records: Vec<_> = all_records
+        .into_iter()
+        .filter(|r| filter_story.is_none_or(|s| r.story_id == s))
+        .collect();
+
+    let total_cost: f64 = records.iter().map(|r| r.usage.total_cost_usd).sum();
+    let total_input: u64 = records.iter().map(|r| r.usage.input_tokens).sum();
+    let total_output: u64 = records.iter().map(|r| r.usage.output_tokens).sum();
+    let total_cache_create: u64 = records
+        .iter()
+        .map(|r| r.usage.cache_creation_input_tokens)
+        .sum();
+    let total_cache_read: u64 = records
+        .iter()
+        .map(|r| r.usage.cache_read_input_tokens)
+        .sum();
+
+    serde_json::to_string_pretty(&json!({
+        "records": records.iter().map(|r| json!({
+            "story_id": r.story_id,
+            "agent_name": r.agent_name,
+            "timestamp": r.timestamp,
+            "input_tokens": r.usage.input_tokens,
+            "output_tokens": r.usage.output_tokens,
+            "cache_creation_input_tokens": r.usage.cache_creation_input_tokens,
+            "cache_read_input_tokens": r.usage.cache_read_input_tokens,
+            "total_cost_usd": r.usage.total_cost_usd,
+        })).collect::<Vec<_>>(),
+        "totals": {
+            "records": records.len(),
+            "input_tokens": total_input,
+            "output_tokens": total_output,
+            "cache_creation_input_tokens": total_cache_create,
+            "cache_read_input_tokens": total_cache_read,
+            "total_cost_usd": total_cost,
+        }
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) fn tool_move_story(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let target_stage = args
+        .get("target_stage")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: target_stage")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    let (from_stage, to_stage) = move_story_to_stage(&project_root, story_id, target_stage)?;
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": story_id,
+        "from_stage": from_stage,
+        "to_stage": to_stage,
+        "message": format!("Work item '{story_id}' moved from '{from_stage}' to '{to_stage}'.")
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+
+    fn test_ctx(dir: &std::path::Path) -> AppContext {
+        AppContext::new_test(dir.to_path_buf())
+    }
+
+    #[test]
+    fn tool_get_server_logs_no_args_returns_string() {
+        let result = tool_get_server_logs(&json!({})).unwrap();
+        // Returns recent log lines (possibly empty in tests) — just verify no panic
+        let _ = result;
+    }
+
+    #[test]
+    fn tool_get_server_logs_with_filter_returns_matching_lines() {
+        let result = tool_get_server_logs(&json!({"filter": "xyz_unlikely_match_999"})).unwrap();
+        assert_eq!(
+            result, "",
+            "filter with no matches should return empty string"
+        );
+    }
+
+    #[test]
+    fn tool_get_server_logs_with_line_limit() {
+        let result = tool_get_server_logs(&json!({"lines": 5})).unwrap();
+        assert!(result.lines().count() <= 5);
+    }
+
+    #[test]
+    fn tool_get_server_logs_max_cap_is_1000() {
+        // Lines > 1000 are capped — just verify it returns without error
+        let result = tool_get_server_logs(&json!({"lines": 9999})).unwrap();
+        let _ = result;
+    }
+
+    #[test]
+    fn tool_get_token_usage_empty_returns_zero_totals() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_get_token_usage(&json!({}), &ctx).unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["records"].as_array().unwrap().len(), 0);
+        assert_eq!(parsed["totals"]["records"], 0);
+        assert_eq!(parsed["totals"]["total_cost_usd"], 0.0);
+    }
+
+    #[test]
+    fn tool_get_token_usage_returns_written_records() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let ctx = test_ctx(root);
+
+        let usage = crate::agents::TokenUsage {
+            input_tokens: 100,
+            output_tokens: 200,
+            cache_creation_input_tokens: 5000,
+            cache_read_input_tokens: 10000,
+            total_cost_usd: 1.57,
+        };
+        let record =
+            crate::agents::token_usage::build_record("42_story_foo", "coder-1", None, usage);
+        crate::agents::token_usage::append_record(root, &record).unwrap();
+
+        let result = tool_get_token_usage(&json!({}), &ctx).unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["records"].as_array().unwrap().len(), 1);
+        assert_eq!(parsed["records"][0]["story_id"], "42_story_foo");
+        assert_eq!(parsed["records"][0]["agent_name"], "coder-1");
+        assert_eq!(parsed["records"][0]["input_tokens"], 100);
+        assert_eq!(parsed["totals"]["records"], 1);
+        assert!((parsed["totals"]["total_cost_usd"].as_f64().unwrap() - 1.57).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn tool_get_token_usage_filters_by_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let ctx = test_ctx(root);
+
+        let usage = crate::agents::TokenUsage {
+            input_tokens: 50,
+            output_tokens: 60,
+            cache_creation_input_tokens: 0,
+            cache_read_input_tokens: 0,
+            total_cost_usd: 0.5,
+        };
+        let r1 =
+            crate::agents::token_usage::build_record("10_story_a", "coder-1", None, usage.clone());
+        let r2 = crate::agents::token_usage::build_record("20_story_b", "coder-2", None, usage);
+        crate::agents::token_usage::append_record(root, &r1).unwrap();
+        crate::agents::token_usage::append_record(root, &r2).unwrap();
+
+        let result = tool_get_token_usage(&json!({"story_id": "10_story_a"}), &ctx).unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["records"].as_array().unwrap().len(), 1);
+        assert_eq!(parsed["records"][0]["story_id"], "10_story_a");
+        assert_eq!(parsed["totals"]["records"], 1);
+    }
+
+    #[tokio::test]
+    async fn tool_prompt_permission_approved_returns_updated_input() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+
+        // Spawn a task that immediately sends approval through the channel.
+        let perm_rx = ctx.perm_rx.clone();
+        tokio::spawn(async move {
+            let mut rx = perm_rx.lock().await;
+            if let Some(forward) = rx.recv().await {
+                let _ = forward
+                    .response_tx
+                    .send(crate::http::context::PermissionDecision::Approve);
+            }
+        });
+
+        let result = tool_prompt_permission(
+            &json!({"tool_name": "Bash", "input": {"command": "echo hello"}}),
+            &ctx,
+        )
+        .await
+        .expect("should succeed on approval");
+
+        let parsed: Value = serde_json::from_str(&result).expect("result should be valid JSON");
+        assert_eq!(
+            parsed["behavior"], "allow",
+            "approved must return behavior:allow"
+        );
+        assert_eq!(
+            parsed["updatedInput"]["command"], "echo hello",
+            "approved must return updatedInput with original tool input for Claude Code SDK compatibility"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_prompt_permission_denied_returns_deny_json() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+
+        // Spawn a task that immediately sends denial through the channel.
+        let perm_rx = ctx.perm_rx.clone();
+        tokio::spawn(async move {
+            let mut rx = perm_rx.lock().await;
+            if let Some(forward) = rx.recv().await {
+                let _ = forward
+                    .response_tx
+                    .send(crate::http::context::PermissionDecision::Deny);
+            }
+        });
+
+        let result = tool_prompt_permission(&json!({"tool_name": "Write", "input": {}}), &ctx)
+            .await
+            .expect("denial must return Ok, not Err");
+
+        let parsed: Value = serde_json::from_str(&result).expect("result should be valid JSON");
+        assert_eq!(
+            parsed["behavior"], "deny",
+            "denied must return behavior:deny"
+        );
+        assert!(parsed["message"].is_string(), "deny must include a message");
+    }
+
+    // ── Permission rule generation tests ─────────────────────────
+
+    #[test]
+    fn generate_rule_for_edit_tool() {
+        let rule = generate_permission_rule("Edit", &json!({}));
+        assert_eq!(rule, "Edit");
+    }
+
+    #[test]
+    fn generate_rule_for_write_tool() {
+        let rule = generate_permission_rule("Write", &json!({}));
+        assert_eq!(rule, "Write");
+    }
+
+    #[test]
+    fn generate_rule_for_bash_git() {
+        let rule = generate_permission_rule("Bash", &json!({"command": "git status"}));
+        assert_eq!(rule, "Bash(git *)");
+    }
+
+    #[test]
+    fn generate_rule_for_bash_cargo() {
+        let rule = generate_permission_rule("Bash", &json!({"command": "cargo test --all"}));
+        assert_eq!(rule, "Bash(cargo *)");
+    }
+
+    #[test]
+    fn generate_rule_for_bash_empty_command() {
+        let rule = generate_permission_rule("Bash", &json!({}));
+        assert_eq!(rule, "Bash(unknown *)");
+    }
+
+    #[test]
+    fn generate_rule_for_mcp_tool() {
+        let rule = generate_permission_rule("mcp__storkit__create_story", &json!({"name": "foo"}));
+        assert_eq!(rule, "mcp__storkit__create_story");
+    }
+
+    // ── Settings.json writing tests ──────────────────────────────
+
+    #[test]
+    fn add_rule_creates_settings_file_when_missing() {
+        let tmp = tempfile::tempdir().unwrap();
+        add_permission_rule(tmp.path(), "Edit").unwrap();
+
+        let content = fs::read_to_string(tmp.path().join(".claude/settings.json")).unwrap();
+        let settings: Value = serde_json::from_str(&content).unwrap();
+        let allow = settings["permissions"]["allow"].as_array().unwrap();
+        assert!(allow.contains(&json!("Edit")));
+    }
+
+    #[test]
+    fn add_rule_does_not_duplicate_existing() {
+        let tmp = tempfile::tempdir().unwrap();
+        add_permission_rule(tmp.path(), "Edit").unwrap();
+        add_permission_rule(tmp.path(), "Edit").unwrap();
+
+        let content = fs::read_to_string(tmp.path().join(".claude/settings.json")).unwrap();
+        let settings: Value = serde_json::from_str(&content).unwrap();
+        let allow = settings["permissions"]["allow"].as_array().unwrap();
+        let count = allow.iter().filter(|v| v == &&json!("Edit")).count();
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn add_rule_skips_when_wildcard_already_covers() {
+        let tmp = tempfile::tempdir().unwrap();
+        let claude_dir = tmp.path().join(".claude");
+        fs::create_dir_all(&claude_dir).unwrap();
+        fs::write(
+            claude_dir.join("settings.json"),
+            r#"{"permissions":{"allow":["mcp__storkit__*"]}}"#,
+        )
+        .unwrap();
+
+        add_permission_rule(tmp.path(), "mcp__storkit__create_story").unwrap();
+
+        let content = fs::read_to_string(claude_dir.join("settings.json")).unwrap();
+        let settings: Value = serde_json::from_str(&content).unwrap();
+        let allow = settings["permissions"]["allow"].as_array().unwrap();
+        assert_eq!(allow.len(), 1);
+        assert_eq!(allow[0], "mcp__storkit__*");
+    }
+
+    #[test]
+    fn add_rule_appends_to_existing_rules() {
+        let tmp = tempfile::tempdir().unwrap();
+        let claude_dir = tmp.path().join(".claude");
+        fs::create_dir_all(&claude_dir).unwrap();
+        fs::write(
+            claude_dir.join("settings.json"),
+            r#"{"permissions":{"allow":["Edit"]}}"#,
+        )
+        .unwrap();
+
+        add_permission_rule(tmp.path(), "Write").unwrap();
+
+        let content = fs::read_to_string(claude_dir.join("settings.json")).unwrap();
+        let settings: Value = serde_json::from_str(&content).unwrap();
+        let allow = settings["permissions"]["allow"].as_array().unwrap();
+        assert_eq!(allow.len(), 2);
+        assert!(allow.contains(&json!("Edit")));
+        assert!(allow.contains(&json!("Write")));
+    }
+
+    #[test]
+    fn add_rule_preserves_other_settings_fields() {
+        let tmp = tempfile::tempdir().unwrap();
+        let claude_dir = tmp.path().join(".claude");
+        fs::create_dir_all(&claude_dir).unwrap();
+        fs::write(
+            claude_dir.join("settings.json"),
+            r#"{"permissions":{"allow":["Edit"]},"enabledMcpjsonServers":["storkit"]}"#,
+        )
+        .unwrap();
+
+        add_permission_rule(tmp.path(), "Write").unwrap();
+
+        let content = fs::read_to_string(claude_dir.join("settings.json")).unwrap();
+        let settings: Value = serde_json::from_str(&content).unwrap();
+        let servers = settings["enabledMcpjsonServers"].as_array().unwrap();
+        assert_eq!(servers.len(), 1);
+        assert_eq!(servers[0], "storkit");
+    }
+
+    #[test]
+    fn rebuild_and_restart_in_tools_list() {
+        use super::super::handle_tools_list;
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "rebuild_and_restart");
+        assert!(
+            tool.is_some(),
+            "rebuild_and_restart missing from tools list"
+        );
+        let t = tool.unwrap();
+        assert!(t["description"].as_str().unwrap().contains("Rebuild"));
+        assert!(t["inputSchema"].is_object());
+    }
+
+    #[tokio::test]
+    async fn rebuild_and_restart_kills_agents_before_build() {
+        // Verify that calling rebuild_and_restart on an empty pool doesn't
+        // panic and proceeds to the build step. We can't test exec() in a
+        // unit test, but we can verify the build attempt happens.
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+
+        // The build will succeed (we're running in the real workspace) and
+        // then exec() will be called — which would replace our test process.
+        // So we only test that the function *runs* without panicking up to
+        // the agent-kill step. We do this by checking the pool is empty.
+        assert_eq!(ctx.agents.list_agents().unwrap().len(), 0);
+        ctx.agents.kill_all_children(); // should not panic on empty pool
+    }
+
+    #[test]
+    fn rebuild_uses_matching_build_profile() {
+        // The build must use the same profile (debug/release) as the running
+        // binary, otherwise cargo build outputs to a different target dir and
+        // current_exe() still points at the old binary.
+        let build_args: Vec<&str> = if cfg!(debug_assertions) {
+            vec!["build", "-p", "storkit"]
+        } else {
+            vec!["build", "--release", "-p", "storkit"]
+        };
+
+        // Tests always run in debug mode, so --release must NOT be present.
+        assert!(
+            !build_args.contains(&"--release"),
+            "In debug builds, rebuild must not pass --release (would put \
+             the binary in target/release/ while current_exe() points to \
+             target/debug/)"
+        );
+    }
+
+    // ── move_story tool tests ─────────────────────────────────────
+
+    #[test]
+    fn move_story_in_tools_list() {
+        use super::super::handle_tools_list;
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "move_story");
+        assert!(tool.is_some(), "move_story missing from tools list");
+        let t = tool.unwrap();
+        assert!(t["description"].is_string());
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+        assert!(req_names.contains(&"target_stage"));
+    }
+
+    #[test]
+    fn tool_move_story_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_move_story(&json!({"target_stage": "current"}), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[test]
+    fn tool_move_story_missing_target_stage() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_move_story(&json!({"story_id": "1_story_test"}), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("target_stage"));
+    }
+
+    #[test]
+    fn tool_move_story_invalid_target_stage() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        // Seed project root in state so get_project_root works
+        let backlog = root.join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(backlog.join("1_story_test.md"), "---\nname: Test\n---\n").unwrap();
+        let ctx = test_ctx(root);
+        let result = tool_move_story(
+            &json!({"story_id": "1_story_test", "target_stage": "invalid"}),
+            &ctx,
+        );
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("Invalid target_stage"));
+    }
+
+    #[test]
+    fn tool_move_story_moves_from_backlog_to_current() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let backlog = root.join(".storkit/work/1_backlog");
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::create_dir_all(&current).unwrap();
+        fs::write(backlog.join("5_story_test.md"), "---\nname: Test\n---\n").unwrap();
+
+        let ctx = test_ctx(root);
+        let result = tool_move_story(
+            &json!({"story_id": "5_story_test", "target_stage": "current"}),
+            &ctx,
+        )
+        .unwrap();
+
+        assert!(!backlog.join("5_story_test.md").exists());
+        assert!(current.join("5_story_test.md").exists());
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["story_id"], "5_story_test");
+        assert_eq!(parsed["from_stage"], "backlog");
+        assert_eq!(parsed["to_stage"], "current");
+    }
+
+    #[test]
+    fn tool_move_story_moves_from_current_to_backlog() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        let backlog = root.join(".storkit/work/1_backlog");
+        fs::create_dir_all(&current).unwrap();
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(current.join("6_story_back.md"), "---\nname: Back\n---\n").unwrap();
+
+        let ctx = test_ctx(root);
+        let result = tool_move_story(
+            &json!({"story_id": "6_story_back", "target_stage": "backlog"}),
+            &ctx,
+        )
+        .unwrap();
+
+        assert!(!current.join("6_story_back.md").exists());
+        assert!(backlog.join("6_story_back.md").exists());
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["from_stage"], "current");
+        assert_eq!(parsed["to_stage"], "backlog");
+    }
+
+    #[test]
+    fn tool_move_story_idempotent_when_already_in_target() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("7_story_idem.md"), "---\nname: Idem\n---\n").unwrap();
+
+        let ctx = test_ctx(root);
+        let result = tool_move_story(
+            &json!({"story_id": "7_story_idem", "target_stage": "current"}),
+            &ctx,
+        )
+        .unwrap();
+
+        assert!(current.join("7_story_idem.md").exists());
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["from_stage"], "current");
+        assert_eq!(parsed["to_stage"], "current");
+    }
+
+    #[test]
+    fn tool_move_story_error_when_not_found() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_move_story(
+            &json!({"story_id": "99_story_ghost", "target_stage": "current"}),
+            &ctx,
+        );
+        assert!(result.is_err());
+        assert!(
+            result
+                .unwrap_err()
+                .contains("not found in any pipeline stage")
+        );
+    }
+}
--- a/server/src/http/mcp/git_tools.rs
+++ b/server/src/http/mcp/git_tools.rs
@@ -0,0 +1,766 @@
+use crate::http::context::AppContext;
+use serde_json::{json, Value};
+use std::path::PathBuf;
+
+/// Validates that `worktree_path` exists and is inside the project's
+/// `.storkit/worktrees/` directory. Returns the canonicalized path.
+fn validate_worktree_path(worktree_path: &str, ctx: &AppContext) -> Result<PathBuf, String> {
+    let wd = PathBuf::from(worktree_path);
+
+    if !wd.is_absolute() {
+        return Err("worktree_path must be an absolute path".to_string());
+    }
+    if !wd.exists() {
+        return Err(format!(
+            "worktree_path does not exist: {worktree_path}"
+        ));
+    }
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let worktrees_root = project_root.join(".storkit").join("worktrees");
+
+    let canonical_wd = wd
+        .canonicalize()
+        .map_err(|e| format!("Cannot canonicalize worktree_path: {e}"))?;
+
+    let canonical_wt = if worktrees_root.exists() {
+        worktrees_root
+            .canonicalize()
+            .map_err(|e| format!("Cannot canonicalize worktrees root: {e}"))?
+    } else {
+        return Err("No worktrees directory found in project".to_string());
+    };
+
+    if !canonical_wd.starts_with(&canonical_wt) {
+        return Err(format!(
+            "worktree_path must be inside .storkit/worktrees/. Got: {worktree_path}"
+        ));
+    }
+
+    Ok(canonical_wd)
+}
+
+/// Run a git command in the given directory and return its output.
+async fn run_git(args: Vec<&'static str>, dir: PathBuf) -> Result<std::process::Output, String> {
+    tokio::task::spawn_blocking(move || {
+        std::process::Command::new("git")
+            .args(&args)
+            .current_dir(&dir)
+            .output()
+    })
+    .await
+    .map_err(|e| format!("Task join error: {e}"))?
+    .map_err(|e| format!("Failed to run git: {e}"))
+}
+
+/// Run a git command with owned args in the given directory.
+async fn run_git_owned(args: Vec<String>, dir: PathBuf) -> Result<std::process::Output, String> {
+    tokio::task::spawn_blocking(move || {
+        std::process::Command::new("git")
+            .args(&args)
+            .current_dir(&dir)
+            .output()
+    })
+    .await
+    .map_err(|e| format!("Task join error: {e}"))?
+    .map_err(|e| format!("Failed to run git: {e}"))
+}
+
+/// git_status — returns working tree status (staged, unstaged, untracked files).
+pub(super) async fn tool_git_status(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let worktree_path = args
+        .get("worktree_path")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: worktree_path")?;
+
+    let dir = validate_worktree_path(worktree_path, ctx)?;
+
+    let output = run_git(vec!["status", "--porcelain=v1", "-u"], dir).await?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let stderr = String::from_utf8_lossy(&output.stderr);
+
+    if !output.status.success() {
+        return Err(format!(
+            "git status failed (exit {}): {stderr}",
+            output.status.code().unwrap_or(-1)
+        ));
+    }
+
+    let mut staged: Vec<String> = Vec::new();
+    let mut unstaged: Vec<String> = Vec::new();
+    let mut untracked: Vec<String> = Vec::new();
+
+    for line in stdout.lines() {
+        if line.len() < 3 {
+            continue;
+        }
+        let x = line.chars().next().unwrap_or(' ');
+        let y = line.chars().nth(1).unwrap_or(' ');
+        let path = line[3..].to_string();
+
+        match (x, y) {
+            ('?', '?') => untracked.push(path),
+            (' ', _) => unstaged.push(path),
+            (_, ' ') => staged.push(path),
+            _ => {
+                // Both staged and unstaged modifications
+                staged.push(path.clone());
+                unstaged.push(path);
+            }
+        }
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "staged": staged,
+        "unstaged": unstaged,
+        "untracked": untracked,
+        "clean": staged.is_empty() && unstaged.is_empty() && untracked.is_empty(),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+/// git_diff — returns diff output. Supports staged/unstaged/commit range.
+pub(super) async fn tool_git_diff(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let worktree_path = args
+        .get("worktree_path")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: worktree_path")?;
+
+    let dir = validate_worktree_path(worktree_path, ctx)?;
+
+    let staged = args
+        .get("staged")
+        .and_then(|v| v.as_bool())
+        .unwrap_or(false);
+
+    let commit_range = args
+        .get("commit_range")
+        .and_then(|v| v.as_str())
+        .map(|s| s.to_string());
+
+    let mut git_args: Vec<String> = vec!["diff".to_string()];
+
+    if staged {
+        git_args.push("--staged".to_string());
+    }
+
+    if let Some(range) = commit_range {
+        git_args.push(range);
+    }
+
+    let output = run_git_owned(git_args, dir).await?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let stderr = String::from_utf8_lossy(&output.stderr);
+
+    if !output.status.success() {
+        return Err(format!(
+            "git diff failed (exit {}): {stderr}",
+            output.status.code().unwrap_or(-1)
+        ));
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "diff": stdout.as_ref(),
+        "exit_code": output.status.code().unwrap_or(-1),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+/// git_add — stages files by path.
+pub(super) async fn tool_git_add(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let worktree_path = args
+        .get("worktree_path")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: worktree_path")?;
+
+    let paths: Vec<String> = args
+        .get("paths")
+        .and_then(|v| v.as_array())
+        .ok_or("Missing required argument: paths (must be an array of strings)")?
+        .iter()
+        .filter_map(|v| v.as_str().map(|s| s.to_string()))
+        .collect();
+
+    if paths.is_empty() {
+        return Err("paths must be a non-empty array of strings".to_string());
+    }
+
+    let dir = validate_worktree_path(worktree_path, ctx)?;
+
+    let mut git_args: Vec<String> = vec!["add".to_string(), "--".to_string()];
+    git_args.extend(paths.clone());
+
+    let output = run_git_owned(git_args, dir).await?;
+
+    let stderr = String::from_utf8_lossy(&output.stderr);
+
+    if !output.status.success() {
+        return Err(format!(
+            "git add failed (exit {}): {stderr}",
+            output.status.code().unwrap_or(-1)
+        ));
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "staged": paths,
+        "exit_code": output.status.code().unwrap_or(0),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+/// git_commit — commits staged changes with a message.
+pub(super) async fn tool_git_commit(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let worktree_path = args
+        .get("worktree_path")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: worktree_path")?;
+
+    let message = args
+        .get("message")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: message")?
+        .to_string();
+
+    if message.trim().is_empty() {
+        return Err("message must not be empty".to_string());
+    }
+
+    let dir = validate_worktree_path(worktree_path, ctx)?;
+
+    let git_args: Vec<String> = vec![
+        "commit".to_string(),
+        "--message".to_string(),
+        message,
+    ];
+
+    let output = run_git_owned(git_args, dir).await?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let stderr = String::from_utf8_lossy(&output.stderr);
+
+    if !output.status.success() {
+        return Err(format!(
+            "git commit failed (exit {}): {stderr}",
+            output.status.code().unwrap_or(-1)
+        ));
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "output": stdout.as_ref(),
+        "exit_code": output.status.code().unwrap_or(0),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+/// git_log — returns commit history with configurable count and format.
+pub(super) async fn tool_git_log(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let worktree_path = args
+        .get("worktree_path")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: worktree_path")?;
+
+    let dir = validate_worktree_path(worktree_path, ctx)?;
+
+    let count = args
+        .get("count")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(10)
+        .min(500);
+
+    let format = args
+        .get("format")
+        .and_then(|v| v.as_str())
+        .unwrap_or("%H%x09%s%x09%an%x09%ai")
+        .to_string();
+
+    let git_args: Vec<String> = vec![
+        "log".to_string(),
+        format!("--max-count={count}"),
+        format!("--pretty=format:{format}"),
+    ];
+
+    let output = run_git_owned(git_args, dir).await?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let stderr = String::from_utf8_lossy(&output.stderr);
+
+    if !output.status.success() {
+        return Err(format!(
+            "git log failed (exit {}): {stderr}",
+            output.status.code().unwrap_or(-1)
+        ));
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "log": stdout.as_ref(),
+        "exit_code": output.status.code().unwrap_or(0),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use serde_json::json;
+
+    fn test_ctx(dir: &std::path::Path) -> AppContext {
+        AppContext::new_test(dir.to_path_buf())
+    }
+
+    /// Create a temp directory with a git worktree structure and init a repo.
+    fn setup_worktree() -> (tempfile::TempDir, PathBuf, AppContext) {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".storkit")
+            .join("worktrees")
+            .join("42_test_story");
+        std::fs::create_dir_all(&story_wt).unwrap();
+
+        // Init git repo in the worktree
+        std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        (tmp, story_wt, ctx)
+    }
+
+    // ── validate_worktree_path ─────────────────────────────────────────
+
+    #[test]
+    fn validate_rejects_relative_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_worktree_path("relative/path", &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("absolute"));
+    }
+
+    #[test]
+    fn validate_rejects_nonexistent_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_worktree_path("/nonexistent_path_xyz_git", &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("does not exist"));
+    }
+
+    #[test]
+    fn validate_rejects_path_outside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let wt_dir = tmp.path().join(".storkit").join("worktrees");
+        std::fs::create_dir_all(&wt_dir).unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_worktree_path(tmp.path().to_str().unwrap(), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("inside .storkit/worktrees"));
+    }
+
+    #[test]
+    fn validate_accepts_path_inside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".storkit")
+            .join("worktrees")
+            .join("42_test_story");
+        std::fs::create_dir_all(&story_wt).unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_worktree_path(story_wt.to_str().unwrap(), &ctx);
+        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
+    }
+
+    // ── git_status ────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn git_status_missing_worktree_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_git_status(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("worktree_path"));
+    }
+
+    #[tokio::test]
+    async fn git_status_clean_repo() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        // Make an initial commit so HEAD exists
+        std::fs::write(story_wt.join("readme.txt"), "hello").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        let result = tool_git_status(
+            &json!({"worktree_path": story_wt.to_str().unwrap()}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["clean"], true);
+        assert!(parsed["staged"].as_array().unwrap().is_empty());
+        assert!(parsed["unstaged"].as_array().unwrap().is_empty());
+        assert!(parsed["untracked"].as_array().unwrap().is_empty());
+    }
+
+    #[tokio::test]
+    async fn git_status_shows_untracked_file() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        // Make initial commit
+        std::fs::write(story_wt.join("readme.txt"), "hello").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        // Add untracked file
+        std::fs::write(story_wt.join("new_file.txt"), "content").unwrap();
+
+        let result = tool_git_status(
+            &json!({"worktree_path": story_wt.to_str().unwrap()}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["clean"], false);
+        let untracked = parsed["untracked"].as_array().unwrap();
+        assert!(
+            untracked.iter().any(|v| v.as_str().unwrap().contains("new_file.txt")),
+            "expected new_file.txt in untracked: {parsed}"
+        );
+    }
+
+    // ── git_diff ──────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn git_diff_missing_worktree_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_git_diff(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("worktree_path"));
+    }
+
+    #[tokio::test]
+    async fn git_diff_returns_diff() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        // Create initial commit
+        std::fs::write(story_wt.join("file.txt"), "line1\n").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        // Modify file (unstaged)
+        std::fs::write(story_wt.join("file.txt"), "line1\nline2\n").unwrap();
+
+        let result = tool_git_diff(
+            &json!({"worktree_path": story_wt.to_str().unwrap()}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        assert!(
+            parsed["diff"].as_str().unwrap().contains("line2"),
+            "expected diff output: {parsed}"
+        );
+    }
+
+    #[tokio::test]
+    async fn git_diff_staged_flag() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        // Create initial commit
+        std::fs::write(story_wt.join("file.txt"), "line1\n").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        // Stage a modification
+        std::fs::write(story_wt.join("file.txt"), "line1\nstaged_change\n").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "file.txt"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        let result = tool_git_diff(
+            &json!({"worktree_path": story_wt.to_str().unwrap(), "staged": true}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        assert!(
+            parsed["diff"].as_str().unwrap().contains("staged_change"),
+            "expected staged diff: {parsed}"
+        );
+    }
+
+    // ── git_add ───────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn git_add_missing_worktree_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_git_add(&json!({"paths": ["file.txt"]}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("worktree_path"));
+    }
+
+    #[tokio::test]
+    async fn git_add_missing_paths() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+        let result = tool_git_add(
+            &json!({"worktree_path": story_wt.to_str().unwrap()}),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("paths"));
+    }
+
+    #[tokio::test]
+    async fn git_add_empty_paths() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+        let result = tool_git_add(
+            &json!({"worktree_path": story_wt.to_str().unwrap(), "paths": []}),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("non-empty"));
+    }
+
+    #[tokio::test]
+    async fn git_add_stages_file() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        std::fs::write(story_wt.join("file.txt"), "content").unwrap();
+
+        let result = tool_git_add(
+            &json!({
+                "worktree_path": story_wt.to_str().unwrap(),
+                "paths": ["file.txt"]
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["exit_code"], 0);
+        let staged = parsed["staged"].as_array().unwrap();
+        assert!(staged.iter().any(|v| v.as_str().unwrap() == "file.txt"));
+
+        // Verify file is actually staged
+        let status = std::process::Command::new("git")
+            .args(["status", "--porcelain"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        let output = String::from_utf8_lossy(&status.stdout);
+        assert!(output.contains("A  file.txt"), "file should be staged: {output}");
+    }
+
+    // ── git_commit ────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn git_commit_missing_worktree_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_git_commit(&json!({"message": "test"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("worktree_path"));
+    }
+
+    #[tokio::test]
+    async fn git_commit_missing_message() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+        let result = tool_git_commit(
+            &json!({"worktree_path": story_wt.to_str().unwrap()}),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("message"));
+    }
+
+    #[tokio::test]
+    async fn git_commit_empty_message() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+        let result = tool_git_commit(
+            &json!({"worktree_path": story_wt.to_str().unwrap(), "message": "   "}),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("empty"));
+    }
+
+    #[tokio::test]
+    async fn git_commit_creates_commit() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        // Stage a file
+        std::fs::write(story_wt.join("file.txt"), "content").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "file.txt"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        let result = tool_git_commit(
+            &json!({
+                "worktree_path": story_wt.to_str().unwrap(),
+                "message": "test commit message"
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["exit_code"], 0);
+
+        // Verify commit exists
+        let log = std::process::Command::new("git")
+            .args(["log", "--oneline"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        let log_output = String::from_utf8_lossy(&log.stdout);
+        assert!(
+            log_output.contains("test commit message"),
+            "expected commit in log: {log_output}"
+        );
+    }
+
+    // ── git_log ───────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn git_log_missing_worktree_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_git_log(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("worktree_path"));
+    }
+
+    #[tokio::test]
+    async fn git_log_returns_history() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        // Make a commit
+        std::fs::write(story_wt.join("file.txt"), "content").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "first commit"])
+            .current_dir(&story_wt)
+            .output()
+            .unwrap();
+
+        let result = tool_git_log(
+            &json!({"worktree_path": story_wt.to_str().unwrap()}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["exit_code"], 0);
+        assert!(
+            parsed["log"].as_str().unwrap().contains("first commit"),
+            "expected commit in log: {parsed}"
+        );
+    }
+
+    #[tokio::test]
+    async fn git_log_respects_count() {
+        let (_tmp, story_wt, ctx) = setup_worktree();
+
+        // Make multiple commits
+        for i in 0..5 {
+            std::fs::write(story_wt.join("file.txt"), format!("content {i}")).unwrap();
+            std::process::Command::new("git")
+                .args(["add", "."])
+                .current_dir(&story_wt)
+                .output()
+                .unwrap();
+            std::process::Command::new("git")
+                .args(["commit", "-m", &format!("commit {i}")])
+                .current_dir(&story_wt)
+                .output()
+                .unwrap();
+        }
+
+        let result = tool_git_log(
+            &json!({"worktree_path": story_wt.to_str().unwrap(), "count": 2}),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+        // With count=2, only 2 commit entries should appear
+        let log = parsed["log"].as_str().unwrap();
+        // Each log line is tab-separated; count newlines
+        let lines: Vec<&str> = log.lines().collect();
+        assert_eq!(lines.len(), 2, "expected 2 log entries, got: {log}");
+    }
+}
--- a/server/src/http/mcp/merge_tools.rs
+++ b/server/src/http/mcp/merge_tools.rs
@@ -0,0 +1,380 @@
+use crate::agents::move_story_to_merge;
+use crate::http::context::AppContext;
+use crate::io::story_metadata::write_merge_failure;
+use crate::slog;
+use crate::slog_warn;
+use serde_json::{json, Value};
+
+pub(super) fn tool_merge_agent_work(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    ctx.agents.start_merge_agent_work(&project_root, story_id)?;
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": story_id,
+        "status": "started",
+        "message": "Merge pipeline started. Poll get_merge_status(story_id) every 10-15 seconds until status is 'completed' or 'failed'."
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) fn tool_get_merge_status(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+
+    let job = ctx.agents.get_merge_status(story_id)
+        .ok_or_else(|| format!("No merge job found for story '{story_id}'. Call merge_agent_work first."))?;
+
+    match &job.status {
+        crate::agents::merge::MergeJobStatus::Running => {
+            serde_json::to_string_pretty(&json!({
+                "story_id": story_id,
+                "status": "running",
+                "message": "Merge pipeline is still running. Poll again in 10-15 seconds."
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+        crate::agents::merge::MergeJobStatus::Completed(report) => {
+            let status_msg = if report.success && report.gates_passed && report.conflicts_resolved {
+                "Merge complete: conflicts were auto-resolved and all quality gates passed. Story moved to done and worktree cleaned up."
+            } else if report.success && report.gates_passed {
+                "Merge complete: all quality gates passed. Story moved to done and worktree cleaned up."
+            } else if report.had_conflicts && !report.conflicts_resolved {
+                "Merge failed: conflicts detected that could not be auto-resolved. Merge was aborted — master is untouched. Call report_merge_failure with the conflict details so the human can resolve them. Do NOT manually move the story file or call accept_story."
+            } else if report.success && !report.gates_passed {
+                "Merge committed but quality gates failed. Review gate_output and fix issues before re-running."
+            } else {
+                "Merge failed. Review gate_output for details. Call report_merge_failure to record the failure. Do NOT manually move the story file or call accept_story."
+            };
+
+            serde_json::to_string_pretty(&json!({
+                "story_id": story_id,
+                "status": "completed",
+                "success": report.success,
+                "had_conflicts": report.had_conflicts,
+                "conflicts_resolved": report.conflicts_resolved,
+                "conflict_details": report.conflict_details,
+                "gates_passed": report.gates_passed,
+                "gate_output": report.gate_output,
+                "worktree_cleaned_up": report.worktree_cleaned_up,
+                "story_archived": report.story_archived,
+                "message": status_msg,
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+        crate::agents::merge::MergeJobStatus::Failed(err) => {
+            serde_json::to_string_pretty(&json!({
+                "story_id": story_id,
+                "status": "failed",
+                "error": err,
+                "message": format!("Merge pipeline failed: {err}. Call report_merge_failure to record the failure.")
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+    }
+}
+
+pub(super) async fn tool_move_story_to_merge(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let agent_name = args
+        .get("agent_name")
+        .and_then(|v| v.as_str())
+        .unwrap_or("mergemaster");
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    // Move story from work/2_current/ to work/4_merge/
+    move_story_to_merge(&project_root, story_id)?;
+
+    // Start the mergemaster agent on the story worktree
+    let info = ctx
+        .agents
+        .start_agent(&project_root, story_id, Some(agent_name), None)
+        .await?;
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": info.story_id,
+        "agent_name": info.agent_name,
+        "status": info.status.to_string(),
+        "worktree_path": info.worktree_path,
+        "message": format!(
+            "Story '{story_id}' moved to work/4_merge/ and mergemaster agent '{}' started.",
+            info.agent_name
+        ),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) fn tool_report_merge_failure(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let reason = args
+        .get("reason")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: reason")?;
+
+    slog!("[mergemaster] Merge failure reported for '{story_id}': {reason}");
+    ctx.agents.set_merge_failure_reported(story_id);
+
+    // Broadcast the failure so the Matrix notification listener can post an
+    // error message to configured rooms without coupling this tool to the bot.
+    let _ = ctx.watcher_tx.send(crate::io::watcher::WatcherEvent::MergeFailure {
+        story_id: story_id.to_string(),
+        reason: reason.to_string(),
+    });
+
+    // Persist the failure reason to the story file's front matter so it
+    // survives server restarts and is visible in the web UI.
+    if let Ok(project_root) = ctx.state.get_project_root() {
+        let story_file = project_root
+            .join(".storkit")
+            .join("work")
+            .join("4_merge")
+            .join(format!("{story_id}.md"));
+        if story_file.exists() {
+            if let Err(e) = write_merge_failure(&story_file, reason) {
+                slog_warn!(
+                    "[mergemaster] Failed to persist merge_failure to story file for '{story_id}': {e}"
+                );
+            }
+        } else {
+            slog_warn!(
+                "[mergemaster] Story file not found in 4_merge/ for '{story_id}'; \
+                 merge_failure not persisted to front matter"
+            );
+        }
+    }
+
+    Ok(format!(
+        "Merge failure for '{story_id}' recorded. Story remains in work/4_merge/. Reason: {reason}"
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+
+    fn test_ctx(dir: &std::path::Path) -> AppContext {
+        AppContext::new_test(dir.to_path_buf())
+    }
+
+    fn setup_git_repo_in(dir: &std::path::Path) {
+        std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(dir)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(dir)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(dir)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(dir)
+            .output()
+            .unwrap();
+    }
+
+    #[test]
+    fn merge_agent_work_in_tools_list() {
+        use super::super::{handle_tools_list};
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "merge_agent_work");
+        assert!(tool.is_some(), "merge_agent_work missing from tools list");
+        let t = tool.unwrap();
+        assert!(t["description"].is_string());
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+        // agent_name is optional
+        assert!(!req_names.contains(&"agent_name"));
+    }
+
+    #[test]
+    fn move_story_to_merge_in_tools_list() {
+        use super::super::{handle_tools_list};
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "move_story_to_merge");
+        assert!(tool.is_some(), "move_story_to_merge missing from tools list");
+        let t = tool.unwrap();
+        assert!(t["description"].is_string());
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+        // agent_name is optional
+        assert!(!req_names.contains(&"agent_name"));
+    }
+
+    #[test]
+    fn tool_merge_agent_work_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_merge_agent_work(&json!({}), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_move_story_to_merge_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_move_story_to_merge(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_move_story_to_merge_moves_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo_in(tmp.path());
+        let current_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&current_dir).unwrap();
+        let story_file = current_dir.join("24_story_test.md");
+        std::fs::write(&story_file, "---\nname: Test\n---\n").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "add story"])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        // The agent start will fail in test (no worktree/config), but the file move should succeed
+        let result = tool_move_story_to_merge(&json!({"story_id": "24_story_test"}), &ctx).await;
+        // File should have been moved regardless of agent start outcome
+        assert!(!story_file.exists(), "2_current file should be gone");
+        assert!(
+            tmp.path().join(".storkit/work/4_merge/24_story_test.md").exists(),
+            "4_merge file should exist"
+        );
+        // Result is either Ok (agent started) or Err (agent failed - acceptable in tests)
+        let _ = result;
+    }
+
+    #[tokio::test]
+    async fn tool_merge_agent_work_returns_started() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo_in(tmp.path());
+        let ctx = test_ctx(tmp.path());
+
+        let result = tool_merge_agent_work(
+            &json!({"story_id": "99_nonexistent", "agent_name": "coder-1"}),
+            &ctx,
+        )
+        .unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["story_id"], "99_nonexistent");
+        assert_eq!(parsed["status"], "started");
+        assert!(parsed.get("message").is_some());
+    }
+
+    #[test]
+    fn tool_get_merge_status_no_job() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_get_merge_status(&json!({"story_id": "99_nonexistent"}), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("No merge job"));
+    }
+
+    #[tokio::test]
+    async fn tool_get_merge_status_returns_running() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo_in(tmp.path());
+        let ctx = test_ctx(tmp.path());
+
+        // Start a merge (it will run in background)
+        tool_merge_agent_work(
+            &json!({"story_id": "99_nonexistent"}),
+            &ctx,
+        )
+        .unwrap();
+
+        // Immediately check — should be running (or already finished if very fast)
+        let result = tool_get_merge_status(&json!({"story_id": "99_nonexistent"}), &ctx).unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        let status = parsed["status"].as_str().unwrap();
+        assert!(
+            status == "running" || status == "completed" || status == "failed",
+            "unexpected status: {status}"
+        );
+    }
+
+    #[test]
+    fn report_merge_failure_in_tools_list() {
+        use super::super::{handle_tools_list};
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "report_merge_failure");
+        assert!(
+            tool.is_some(),
+            "report_merge_failure missing from tools list"
+        );
+        let t = tool.unwrap();
+        assert!(t["description"].is_string());
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+        assert!(req_names.contains(&"reason"));
+    }
+
+    #[test]
+    fn tool_report_merge_failure_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_report_merge_failure(&json!({"reason": "conflicts"}), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[test]
+    fn tool_report_merge_failure_missing_reason() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_report_merge_failure(&json!({"story_id": "42_story_foo"}), &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("reason"));
+    }
+
+    #[test]
+    fn tool_report_merge_failure_returns_confirmation() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_report_merge_failure(
+            &json!({
+                "story_id": "42_story_foo",
+                "reason": "Unresolvable merge conflicts in src/main.rs"
+            }),
+            &ctx,
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap();
+        assert!(msg.contains("42_story_foo"));
+        assert!(msg.contains("work/4_merge/"));
+        assert!(msg.contains("Unresolvable merge conflicts"));
+    }
+}
--- a/server/src/http/mcp/mod.rs
+++ b/server/src/http/mcp/mod.rs
--- a/server/src/http/mcp/qa_tools.rs
+++ b/server/src/http/mcp/qa_tools.rs
@@ -0,0 +1,293 @@
+use crate::agents::{move_story_to_merge, move_story_to_qa, reject_story_from_qa};
+use crate::http::context::AppContext;
+use crate::slog;
+use crate::slog_warn;
+use serde_json::{Value, json};
+
+pub(super) async fn tool_request_qa(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let agent_name = args
+        .get("agent_name")
+        .and_then(|v| v.as_str())
+        .unwrap_or("qa");
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    // Move story from work/2_current/ to work/3_qa/
+    move_story_to_qa(&project_root, story_id)?;
+
+    // Start the QA agent on the story worktree
+    let info = ctx
+        .agents
+        .start_agent(&project_root, story_id, Some(agent_name), None)
+        .await?;
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": info.story_id,
+        "agent_name": info.agent_name,
+        "status": info.status.to_string(),
+        "worktree_path": info.worktree_path,
+        "message": format!(
+            "Story '{story_id}' moved to work/3_qa/ and QA agent '{}' started.",
+            info.agent_name
+        ),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) async fn tool_approve_qa(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    // Clear review_hold before moving
+    let qa_path = project_root
+        .join(".storkit/work/3_qa")
+        .join(format!("{story_id}.md"));
+    if qa_path.exists() {
+        let _ = crate::io::story_metadata::clear_front_matter_field(&qa_path, "review_hold");
+    }
+
+    // Move story from work/3_qa/ to work/4_merge/
+    move_story_to_merge(&project_root, story_id)?;
+
+    // Start the mergemaster agent
+    let info = ctx
+        .agents
+        .start_agent(&project_root, story_id, Some("mergemaster"), None)
+        .await?;
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": info.story_id,
+        "agent_name": info.agent_name,
+        "status": info.status.to_string(),
+        "message": format!(
+            "Story '{story_id}' approved. Moved to work/4_merge/ and mergemaster agent '{}' started.",
+            info.agent_name
+        ),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+pub(super) async fn tool_reject_qa(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+    let notes = args
+        .get("notes")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: notes")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    // Move story from work/3_qa/ back to work/2_current/ with rejection notes
+    reject_story_from_qa(&project_root, story_id, notes)?;
+
+    // Restart the coder agent with rejection context
+    let story_path = project_root
+        .join(".storkit/work/2_current")
+        .join(format!("{story_id}.md"));
+    let agent_name = if story_path.exists() {
+        let contents = std::fs::read_to_string(&story_path).unwrap_or_default();
+        crate::io::story_metadata::parse_front_matter(&contents)
+            .ok()
+            .and_then(|meta| meta.agent)
+    } else {
+        None
+    };
+    let agent_name = agent_name.as_deref().unwrap_or("coder-opus");
+
+    let context = format!(
+        "\n\n---\n## QA Rejection\n\
+         Your previous implementation was rejected during human QA review.\n\
+         Rejection notes:\n{notes}\n\n\
+         Please fix the issues described above and try again."
+    );
+    if let Err(e) = ctx
+        .agents
+        .start_agent(&project_root, story_id, Some(agent_name), Some(&context))
+        .await
+    {
+        slog_warn!("[qa] Failed to restart coder for '{story_id}' after rejection: {e}");
+    }
+
+    Ok(format!(
+        "Story '{story_id}' rejected and moved back to work/2_current/. Coder agent '{agent_name}' restarted with rejection notes."
+    ))
+}
+
+pub(super) async fn tool_launch_qa_app(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+
+    // Find the worktree path for this story
+    let worktrees = crate::worktree::list_worktrees(&project_root)?;
+    let wt = worktrees
+        .iter()
+        .find(|w| w.story_id == story_id)
+        .ok_or_else(|| format!("No worktree found for story '{story_id}'"))?;
+    let wt_path = wt.path.clone();
+
+    // Stop any existing QA app instance
+    {
+        let mut guard = ctx.qa_app_process.lock().unwrap();
+        if let Some(mut child) = guard.take() {
+            let _ = child.kill();
+            let _ = child.wait();
+            slog!("[qa-app] Stopped previous QA app instance.");
+        }
+    }
+
+    // Find a free port starting from 3100
+    let port = find_free_port(3100);
+
+    // Write .storkit_port so the frontend dev server knows where to connect
+    let port_file = wt_path.join(".storkit_port");
+    std::fs::write(&port_file, port.to_string())
+        .map_err(|e| format!("Failed to write .storkit_port: {e}"))?;
+
+    // Launch the server from the worktree
+    let child = std::process::Command::new("cargo")
+        .args(["run"])
+        .env("STORKIT_PORT", port.to_string())
+        .current_dir(&wt_path)
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::null())
+        .spawn()
+        .map_err(|e| format!("Failed to launch QA app: {e}"))?;
+
+    {
+        let mut guard = ctx.qa_app_process.lock().unwrap();
+        *guard = Some(child);
+    }
+
+    serde_json::to_string_pretty(&json!({
+        "story_id": story_id,
+        "port": port,
+        "worktree_path": wt_path.to_string_lossy(),
+        "message": format!("QA app launched on port {port} from worktree at {}", wt_path.display()),
+    }))
+    .map_err(|e| format!("Serialization error: {e}"))
+}
+
+/// Find a free TCP port starting from `start`.
+pub(super) fn find_free_port(start: u16) -> u16 {
+    for port in start..start + 100 {
+        if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() {
+            return port;
+        }
+    }
+    start // fallback
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+
+    fn test_ctx(dir: &std::path::Path) -> AppContext {
+        AppContext::new_test(dir.to_path_buf())
+    }
+
+    #[test]
+    fn request_qa_in_tools_list() {
+        use super::super::handle_tools_list;
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "request_qa");
+        assert!(tool.is_some(), "request_qa missing from tools list");
+        let t = tool.unwrap();
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+        // agent_name is optional
+        assert!(!req_names.contains(&"agent_name"));
+    }
+
+    #[test]
+    fn approve_qa_in_tools_list() {
+        use super::super::handle_tools_list;
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "approve_qa");
+        assert!(tool.is_some(), "approve_qa missing from tools list");
+        let t = tool.unwrap();
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+    }
+
+    #[test]
+    fn reject_qa_in_tools_list() {
+        use super::super::handle_tools_list;
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "reject_qa");
+        assert!(tool.is_some(), "reject_qa missing from tools list");
+        let t = tool.unwrap();
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+        assert!(req_names.contains(&"notes"));
+    }
+
+    #[test]
+    fn launch_qa_app_in_tools_list() {
+        use super::super::handle_tools_list;
+        let resp = handle_tools_list(Some(json!(1)));
+        let tools = resp.result.unwrap()["tools"].as_array().unwrap().clone();
+        let tool = tools.iter().find(|t| t["name"] == "launch_qa_app");
+        assert!(tool.is_some(), "launch_qa_app missing from tools list");
+        let t = tool.unwrap();
+        let required = t["inputSchema"]["required"].as_array().unwrap();
+        let req_names: Vec<&str> = required.iter().map(|v| v.as_str().unwrap()).collect();
+        assert!(req_names.contains(&"story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_approve_qa_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_approve_qa(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_reject_qa_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_reject_qa(&json!({"notes": "broken"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+
+    #[tokio::test]
+    async fn tool_reject_qa_missing_notes() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_reject_qa(&json!({"story_id": "1_story_test"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("notes"));
+    }
+
+    #[tokio::test]
+    async fn tool_request_qa_missing_story_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_request_qa(&json!({}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("story_id"));
+    }
+}
--- a/server/src/http/mcp/shell_tools.rs
+++ b/server/src/http/mcp/shell_tools.rs
@@ -0,0 +1,626 @@
+use crate::http::context::AppContext;
+use bytes::Bytes;
+use futures::StreamExt;
+use poem::{Body, Response};
+use serde_json::{json, Value};
+use std::path::PathBuf;
+
+const DEFAULT_TIMEOUT_SECS: u64 = 120;
+const MAX_TIMEOUT_SECS: u64 = 600;
+
+/// Patterns that are unconditionally blocked regardless of context.
+static BLOCKED_PATTERNS: &[&str] = &[
+    "rm -rf /",
+    "rm -fr /",
+    "rm -rf /*",
+    "rm -fr /*",
+    "rm --no-preserve-root",
+    ":(){ :|:& };:",
+    "> /dev/sda",
+    "dd if=/dev",
+];
+
+/// Binaries that are unconditionally blocked.
+static BLOCKED_BINARIES: &[&str] = &[
+    "sudo",
+    "su",
+    "shutdown",
+    "reboot",
+    "halt",
+    "poweroff",
+    "mkfs",
+];
+
+/// Returns an error message if the command matches a blocked pattern or binary.
+fn is_dangerous(command: &str) -> Option<String> {
+    let trimmed = command.trim();
+
+    // Check each blocked pattern (substring match)
+    for &pattern in BLOCKED_PATTERNS {
+        if trimmed.contains(pattern) {
+            return Some(format!(
+                "Command blocked: dangerous pattern '{pattern}' detected"
+            ));
+        }
+    }
+
+    // Check first token of the command against blocked binaries
+    if let Some(first_token) = trimmed.split_whitespace().next() {
+        let binary = std::path::Path::new(first_token)
+            .file_name()
+            .and_then(|n| n.to_str())
+            .unwrap_or(first_token);
+        if BLOCKED_BINARIES.contains(&binary) {
+            return Some(format!("Command blocked: '{binary}' is not permitted"));
+        }
+    }
+
+    None
+}
+
+/// Validates that `working_dir` exists and is inside the project's
+/// `.storkit/worktrees/` directory. Returns the canonicalized path.
+fn validate_working_dir(working_dir: &str, ctx: &AppContext) -> Result<PathBuf, String> {
+    let wd = PathBuf::from(working_dir);
+
+    if !wd.is_absolute() {
+        return Err("working_dir must be an absolute path".to_string());
+    }
+    if !wd.exists() {
+        return Err(format!("working_dir does not exist: {working_dir}"));
+    }
+
+    let project_root = ctx.agents.get_project_root(&ctx.state)?;
+    let worktrees_root = project_root.join(".storkit").join("worktrees");
+
+    let canonical_wd = wd
+        .canonicalize()
+        .map_err(|e| format!("Cannot canonicalize working_dir: {e}"))?;
+
+    // If worktrees_root doesn't exist yet, we can't allow anything
+    let canonical_wt = if worktrees_root.exists() {
+        worktrees_root
+            .canonicalize()
+            .map_err(|e| format!("Cannot canonicalize worktrees root: {e}"))?
+    } else {
+        return Err("No worktrees directory found in project".to_string());
+    };
+
+    if !canonical_wd.starts_with(&canonical_wt) {
+        return Err(format!(
+            "working_dir must be inside .storkit/worktrees/. Got: {working_dir}"
+        ));
+    }
+
+    Ok(canonical_wd)
+}
+
+/// Regular (non-SSE) run_command: runs the bash command to completion and
+/// returns stdout, stderr, exit_code, and whether it timed out.
+pub(super) async fn tool_run_command(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let command = args
+        .get("command")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: command")?
+        .to_string();
+
+    let working_dir = args
+        .get("working_dir")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: working_dir")?;
+
+    let timeout_secs = args
+        .get("timeout")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(DEFAULT_TIMEOUT_SECS)
+        .min(MAX_TIMEOUT_SECS);
+
+    if let Some(reason) = is_dangerous(&command) {
+        return Err(reason);
+    }
+
+    let canonical_dir = validate_working_dir(working_dir, ctx)?;
+
+    let result = tokio::time::timeout(
+        std::time::Duration::from_secs(timeout_secs),
+        tokio::task::spawn_blocking({
+            let cmd = command.clone();
+            let dir = canonical_dir.clone();
+            move || {
+                std::process::Command::new("bash")
+                    .arg("-c")
+                    .arg(&cmd)
+                    .current_dir(&dir)
+                    .output()
+            }
+        }),
+    )
+    .await;
+
+    match result {
+        Err(_) => {
+            // timed out
+            serde_json::to_string_pretty(&json!({
+                "stdout": "",
+                "stderr": format!("Command timed out after {timeout_secs}s"),
+                "exit_code": -1,
+                "timed_out": true,
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+        Ok(Err(e)) => Err(format!("Task join error: {e}")),
+        Ok(Ok(Err(e))) => Err(format!("Failed to execute command: {e}")),
+        Ok(Ok(Ok(output))) => {
+            serde_json::to_string_pretty(&json!({
+                "stdout": String::from_utf8_lossy(&output.stdout),
+                "stderr": String::from_utf8_lossy(&output.stderr),
+                "exit_code": output.status.code().unwrap_or(-1),
+                "timed_out": false,
+            }))
+            .map_err(|e| format!("Serialization error: {e}"))
+        }
+    }
+}
+
+/// SSE streaming run_command: spawns the process and emits stdout/stderr lines
+/// as JSON-RPC notifications, then a final response with exit_code.
+pub(super) fn handle_run_command_sse(
+    id: Option<Value>,
+    params: &Value,
+    ctx: &AppContext,
+) -> Response {
+    use super::{to_sse_response, JsonRpcResponse};
+
+    let args = params.get("arguments").cloned().unwrap_or(json!({}));
+
+    let command = match args.get("command").and_then(|v| v.as_str()) {
+        Some(c) => c.to_string(),
+        None => {
+            return to_sse_response(JsonRpcResponse::error(
+                id,
+                -32602,
+                "Missing required argument: command".into(),
+            ))
+        }
+    };
+
+    let working_dir = match args.get("working_dir").and_then(|v| v.as_str()) {
+        Some(d) => d.to_string(),
+        None => {
+            return to_sse_response(JsonRpcResponse::error(
+                id,
+                -32602,
+                "Missing required argument: working_dir".into(),
+            ))
+        }
+    };
+
+    let timeout_secs = args
+        .get("timeout")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(DEFAULT_TIMEOUT_SECS)
+        .min(MAX_TIMEOUT_SECS);
+
+    if let Some(reason) = is_dangerous(&command) {
+        return to_sse_response(JsonRpcResponse::error(id, -32602, reason));
+    }
+
+    let canonical_dir = match validate_working_dir(&working_dir, ctx) {
+        Ok(d) => d,
+        Err(e) => return to_sse_response(JsonRpcResponse::error(id, -32602, e)),
+    };
+
+    let final_id = id;
+
+    let stream = async_stream::stream! {
+        use tokio::io::AsyncBufReadExt;
+
+        let mut child = match tokio::process::Command::new("bash")
+            .arg("-c")
+            .arg(&command)
+            .current_dir(&canonical_dir)
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+        {
+            Ok(c) => c,
+            Err(e) => {
+                let resp = JsonRpcResponse::success(
+                    final_id,
+                    json!({
+                        "content": [{"type": "text", "text": format!("Failed to spawn process: {e}")}],
+                        "isError": true
+                    }),
+                );
+                if let Ok(s) = serde_json::to_string(&resp) {
+                    yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+                }
+                return;
+            }
+        };
+
+        let stdout = child.stdout.take().expect("stdout piped");
+        let stderr = child.stderr.take().expect("stderr piped");
+        let mut stdout_lines = tokio::io::BufReader::new(stdout).lines();
+        let mut stderr_lines = tokio::io::BufReader::new(stderr).lines();
+
+        let deadline = tokio::time::Instant::now()
+            + std::time::Duration::from_secs(timeout_secs);
+        let mut stdout_done = false;
+        let mut stderr_done = false;
+        let mut timed_out = false;
+
+        loop {
+            if stdout_done && stderr_done {
+                break;
+            }
+
+            let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
+            if remaining.is_zero() {
+                timed_out = true;
+                let _ = child.kill().await;
+                break;
+            }
+
+            tokio::select! {
+                line = stdout_lines.next_line(), if !stdout_done => {
+                    match line {
+                        Ok(Some(l)) => {
+                            let notif = json!({
+                                "jsonrpc": "2.0",
+                                "method": "notifications/tools/progress",
+                                "params": { "stream": "stdout", "line": l }
+                            });
+                            if let Ok(s) = serde_json::to_string(&notif) {
+                                yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+                            }
+                        }
+                        _ => { stdout_done = true; }
+                    }
+                }
+                line = stderr_lines.next_line(), if !stderr_done => {
+                    match line {
+                        Ok(Some(l)) => {
+                            let notif = json!({
+                                "jsonrpc": "2.0",
+                                "method": "notifications/tools/progress",
+                                "params": { "stream": "stderr", "line": l }
+                            });
+                            if let Ok(s) = serde_json::to_string(&notif) {
+                                yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+                            }
+                        }
+                        _ => { stderr_done = true; }
+                    }
+                }
+                _ = tokio::time::sleep(remaining) => {
+                    timed_out = true;
+                    let _ = child.kill().await;
+                    break;
+                }
+            }
+        }
+
+        let exit_code = child.wait().await.ok().and_then(|s| s.code()).unwrap_or(-1);
+
+        let summary = json!({
+            "exit_code": exit_code,
+            "timed_out": timed_out,
+        });
+
+        let final_resp = JsonRpcResponse::success(
+            final_id,
+            json!({
+                "content": [{"type": "text", "text": summary.to_string()}]
+            }),
+        );
+        if let Ok(s) = serde_json::to_string(&final_resp) {
+            yield Ok::<_, std::io::Error>(format!("data: {s}\n\n"));
+        }
+    };
+
+    Response::builder()
+        .status(poem::http::StatusCode::OK)
+        .header("Content-Type", "text/event-stream")
+        .header("Cache-Control", "no-cache")
+        .body(Body::from_bytes_stream(stream.map(|r| {
+            r.map(Bytes::from)
+        })))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use serde_json::json;
+
+    fn test_ctx(dir: &std::path::Path) -> AppContext {
+        AppContext::new_test(dir.to_path_buf())
+    }
+
+    // ── is_dangerous ─────────────────────────────────────────────────
+
+    #[test]
+    fn is_dangerous_blocks_rm_rf_root() {
+        assert!(is_dangerous("rm -rf /").is_some());
+        assert!(is_dangerous("  rm -rf /  ").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_rm_fr_root() {
+        assert!(is_dangerous("rm -fr /").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_rm_rf_star() {
+        assert!(is_dangerous("rm -rf /*").is_some());
+        assert!(is_dangerous("rm -fr /*").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_sudo() {
+        assert!(is_dangerous("sudo ls").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_shutdown() {
+        assert!(is_dangerous("shutdown -h now").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_mkfs() {
+        assert!(is_dangerous("mkfs /dev/sda1").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_fork_bomb() {
+        assert!(is_dangerous(":(){ :|:& };:").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_allows_safe_commands() {
+        assert!(is_dangerous("cargo build").is_none());
+        assert!(is_dangerous("npm test").is_none());
+        assert!(is_dangerous("git status").is_none());
+        assert!(is_dangerous("ls -la").is_none());
+        assert!(is_dangerous("rm -rf target/").is_none());
+    }
+
+    // ── validate_working_dir ──────────────────────────────────────────
+
+    #[test]
+    fn validate_working_dir_rejects_relative_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir("relative/path", &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("absolute"));
+    }
+
+    #[test]
+    fn validate_working_dir_rejects_nonexistent_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir("/nonexistent_path_xyz_abc", &ctx);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("does not exist"));
+    }
+
+    #[test]
+    fn validate_working_dir_rejects_path_outside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        // Create the worktrees dir so it exists
+        let wt_dir = tmp.path().join(".storkit").join("worktrees");
+        std::fs::create_dir_all(&wt_dir).unwrap();
+        let ctx = test_ctx(tmp.path());
+        // Try to use /tmp (outside worktrees)
+        let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx);
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().contains("inside .storkit/worktrees"),
+            "expected sandbox error"
+        );
+    }
+
+    #[test]
+    fn validate_working_dir_accepts_path_inside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".storkit")
+            .join("worktrees")
+            .join("42_test_story");
+        std::fs::create_dir_all(&story_wt).unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir(story_wt.to_str().unwrap(), &ctx);
+        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
+    }
+
+    #[test]
+    fn validate_working_dir_rejects_no_worktrees_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        // Do NOT create worktrees dir
+        let ctx = test_ctx(tmp.path());
+        let result = validate_working_dir(tmp.path().to_str().unwrap(), &ctx);
+        assert!(result.is_err());
+    }
+
+    // ── tool_run_command ───────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn tool_run_command_missing_command() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(&json!({"working_dir": "/tmp"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("command"));
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_missing_working_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(&json!({"command": "ls"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("working_dir"));
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_blocks_dangerous_command() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({"command": "rm -rf /", "working_dir": "/tmp"}),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("blocked"));
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_rejects_path_outside_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let wt_dir = tmp.path().join(".storkit").join("worktrees");
+        std::fs::create_dir_all(&wt_dir).unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "ls",
+                "working_dir": tmp.path().to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await;
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().contains("worktrees"),
+            "expected sandbox error"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_runs_in_worktree_and_returns_output() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".storkit")
+            .join("worktrees")
+            .join("42_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+        std::fs::write(story_wt.join("canary.txt"), "hello").unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "ls",
+                "working_dir": story_wt.to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["exit_code"], 0);
+        assert!(parsed["stdout"].as_str().unwrap().contains("canary.txt"));
+        assert_eq!(parsed["timed_out"], false);
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_captures_nonzero_exit_code() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".storkit")
+            .join("worktrees")
+            .join("43_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "exit 42",
+                "working_dir": story_wt.to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["exit_code"], 42);
+        assert_eq!(parsed["timed_out"], false);
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_timeout_returns_timed_out_true() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".storkit")
+            .join("worktrees")
+            .join("44_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "sleep 10",
+                "working_dir": story_wt.to_str().unwrap(),
+                "timeout": 1
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert_eq!(parsed["timed_out"], true);
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_captures_stderr() {
+        let tmp = tempfile::tempdir().unwrap();
+        let story_wt = tmp
+            .path()
+            .join(".storkit")
+            .join("worktrees")
+            .join("45_test");
+        std::fs::create_dir_all(&story_wt).unwrap();
+
+        let ctx = test_ctx(tmp.path());
+        let result = tool_run_command(
+            &json!({
+                "command": "echo 'error msg' >&2",
+                "working_dir": story_wt.to_str().unwrap()
+            }),
+            &ctx,
+        )
+        .await
+        .unwrap();
+
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        assert!(
+            parsed["stderr"].as_str().unwrap().contains("error msg"),
+            "expected stderr: {parsed}"
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_run_command_clamps_timeout_to_max() {
+        // Verify timeout > 600 is clamped to 600. We don't run a 600s sleep;
+        // just confirm the tool accepts the arg without error (sandbox check will
+        // fail first in a different test, here we test the arg parsing path).
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        // Will fail at working_dir validation, not timeout parsing — that's fine
+        let result = tool_run_command(
+            &json!({"command": "ls", "working_dir": "/tmp", "timeout": 9999}),
+            &ctx,
+        )
+        .await;
+        // Just ensure it doesn't panic and returns an Err about sandbox (not timeout)
+        assert!(result.is_err());
+    }
+}
--- a/server/src/http/mcp/story_tools.rs
+++ b/server/src/http/mcp/story_tools.rs
--- a/server/src/http/mcp/whatsup_tools.rs
+++ b/server/src/http/mcp/whatsup_tools.rs
@@ -0,0 +1,364 @@
+use crate::http::context::AppContext;
+use serde_json::{Value, json};
+use std::fs;
+use std::path::{Path, PathBuf};
+
+/// Parse all AC items from a story file, returning (text, is_checked) pairs.
+fn parse_ac_items(contents: &str) -> Vec<(String, bool)> {
+    let mut in_ac_section = false;
+    let mut items = Vec::new();
+
+    for line in contents.lines() {
+        let trimmed = line.trim();
+        if trimmed == "## Acceptance Criteria" {
+            in_ac_section = true;
+            continue;
+        }
+        // Stop at the next heading
+        if in_ac_section && trimmed.starts_with("## ") {
+            break;
+        }
+        if in_ac_section {
+            if let Some(rest) = trimmed.strip_prefix("- [x] ").or(trimmed.strip_prefix("- [X] ")) {
+                items.push((rest.to_string(), true));
+            } else if let Some(rest) = trimmed.strip_prefix("- [ ] ") {
+                items.push((rest.to_string(), false));
+            }
+        }
+    }
+
+    items
+}
+
+/// Find the most recent log file for any agent under `.storkit/logs/{story_id}/`.
+fn find_most_recent_log(project_root: &Path, story_id: &str) -> Option<PathBuf> {
+    let dir = project_root
+        .join(".storkit")
+        .join("logs")
+        .join(story_id);
+
+    if !dir.is_dir() {
+        return None;
+    }
+
+    let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
+
+    let entries = fs::read_dir(&dir).ok()?;
+    for entry in entries.flatten() {
+        let path = entry.path();
+        let name = match path.file_name().and_then(|n| n.to_str()) {
+            Some(n) => n.to_string(),
+            None => continue,
+        };
+        if !name.ends_with(".log") {
+            continue;
+        }
+        let modified = match entry.metadata().and_then(|m| m.modified()) {
+            Ok(t) => t,
+            Err(_) => continue,
+        };
+        if best.as_ref().is_none_or(|(_, t)| modified > *t) {
+            best = Some((path, modified));
+        }
+    }
+
+    best.map(|(p, _)| p)
+}
+
+/// Return the last N raw lines from a file.
+fn last_n_lines(path: &Path, n: usize) -> Result<Vec<String>, String> {
+    let content =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read log file: {e}"))?;
+    let lines: Vec<String> = content
+        .lines()
+        .rev()
+        .take(n)
+        .map(|l| l.to_string())
+        .collect::<Vec<_>>()
+        .into_iter()
+        .rev()
+        .collect();
+    Ok(lines)
+}
+
+/// Run `git diff --stat {base}...HEAD` in the worktree.
+async fn git_diff_stat(worktree: &Path, base: &str) -> Option<String> {
+    let dir = worktree.to_path_buf();
+    let base_arg = format!("{base}...HEAD");
+    tokio::task::spawn_blocking(move || {
+        let output = std::process::Command::new("git")
+            .args(["diff", "--stat", &base_arg])
+            .current_dir(&dir)
+            .output()
+            .ok()?;
+        if output.status.success() {
+            Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
+        } else {
+            None
+        }
+    })
+    .await
+    .ok()
+    .flatten()
+}
+
+/// Return the last N commit messages on the current branch relative to base.
+async fn git_log_commits(worktree: &Path, base: &str, count: usize) -> Option<Vec<String>> {
+    let dir = worktree.to_path_buf();
+    let range = format!("{base}..HEAD");
+    let count_str = count.to_string();
+    tokio::task::spawn_blocking(move || {
+        let output = std::process::Command::new("git")
+            .args(["log", &range, "--oneline", &format!("-{count_str}")])
+            .current_dir(&dir)
+            .output()
+            .ok()?;
+        if output.status.success() {
+            let lines: Vec<String> = String::from_utf8(output.stdout)
+                .ok()?
+                .lines()
+                .filter(|l| !l.is_empty())
+                .map(|l| l.to_string())
+                .collect();
+            Some(lines)
+        } else {
+            None
+        }
+    })
+    .await
+    .ok()
+    .flatten()
+}
+
+/// Return the active branch name for the given directory.
+async fn git_branch(dir: &Path) -> Option<String> {
+    let dir = dir.to_path_buf();
+    tokio::task::spawn_blocking(move || {
+        let output = std::process::Command::new("git")
+            .args(["rev-parse", "--abbrev-ref", "HEAD"])
+            .current_dir(&dir)
+            .output()
+            .ok()?;
+        if output.status.success() {
+            Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
+        } else {
+            None
+        }
+    })
+    .await
+    .ok()
+    .flatten()
+}
+
+pub(super) async fn tool_whatsup(args: &Value, ctx: &AppContext) -> Result<String, String> {
+    let story_id = args
+        .get("story_id")
+        .and_then(|v| v.as_str())
+        .ok_or("Missing required argument: story_id")?;
+
+    let root = ctx.state.get_project_root()?;
+    let current_dir = root.join(".storkit").join("work").join("2_current");
+    let filepath = current_dir.join(format!("{story_id}.md"));
+
+    if !filepath.exists() {
+        return Err(format!(
+            "Story '{story_id}' not found in work/2_current/. Check the story_id and ensure it is in the current stage."
+        ));
+    }
+
+    let contents =
+        fs::read_to_string(&filepath).map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    // --- Front matter ---
+    let mut front_matter = serde_json::Map::new();
+    if let Ok(meta) = crate::io::story_metadata::parse_front_matter(&contents) {
+        if let Some(name) = &meta.name {
+            front_matter.insert("name".to_string(), json!(name));
+        }
+        if let Some(agent) = &meta.agent {
+            front_matter.insert("agent".to_string(), json!(agent));
+        }
+        if let Some(true) = meta.blocked {
+            front_matter.insert("blocked".to_string(), json!(true));
+        }
+        if let Some(qa) = &meta.qa {
+            front_matter.insert("qa".to_string(), json!(qa.as_str()));
+        }
+        if let Some(rc) = meta.retry_count
+            && rc > 0
+        {
+            front_matter.insert("retry_count".to_string(), json!(rc));
+        }
+        if let Some(mf) = &meta.merge_failure {
+            front_matter.insert("merge_failure".to_string(), json!(mf));
+        }
+        if let Some(rh) = meta.review_hold
+            && rh
+        {
+            front_matter.insert("review_hold".to_string(), json!(rh));
+        }
+    }
+
+    // --- AC checklist ---
+    let ac_items: Vec<Value> = parse_ac_items(&contents)
+        .into_iter()
+        .map(|(text, checked)| json!({ "text": text, "checked": checked }))
+        .collect();
+
+    // --- Worktree ---
+    let worktree_path = root.join(".storkit").join("worktrees").join(story_id);
+    let (_, worktree_info) = if worktree_path.is_dir() {
+        let branch = git_branch(&worktree_path).await;
+        (
+            branch.clone(),
+            Some(json!({
+                "path": worktree_path.to_string_lossy(),
+                "branch": branch,
+            })),
+        )
+    } else {
+        (None, None)
+    };
+
+    // --- Git diff stat ---
+    let diff_stat = if worktree_path.is_dir() {
+        git_diff_stat(&worktree_path, "master").await
+    } else {
+        None
+    };
+
+    // --- Last 5 commits ---
+    let commits = if worktree_path.is_dir() {
+        git_log_commits(&worktree_path, "master", 5).await
+    } else {
+        None
+    };
+
+    // --- Most recent agent log (last 20 lines) ---
+    let agent_log = match find_most_recent_log(&root, story_id) {
+        Some(log_path) => {
+            let filename = log_path
+                .file_name()
+                .and_then(|n| n.to_str())
+                .unwrap_or("")
+                .to_string();
+            match last_n_lines(&log_path, 20) {
+                Ok(lines) => Some(json!({
+                    "file": filename,
+                    "lines": lines,
+                })),
+                Err(_) => None,
+            }
+        }
+        None => None,
+    };
+
+    let result = json!({
+        "story_id": story_id,
+        "front_matter": front_matter,
+        "acceptance_criteria": ac_items,
+        "worktree": worktree_info,
+        "git_diff_stat": diff_stat,
+        "commits": commits,
+        "agent_log": agent_log,
+    });
+
+    serde_json::to_string_pretty(&result).map_err(|e| format!("Serialization error: {e}"))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    #[test]
+    fn parse_ac_items_returns_checked_and_unchecked() {
+        let content = "---\nname: test\n---\n\n## Acceptance Criteria\n\n- [ ] item one\n- [x] item two\n- [X] item three\n\n## Out of Scope\n\n- [ ] not an ac\n";
+        let items = parse_ac_items(content);
+        assert_eq!(items.len(), 3);
+        assert_eq!(items[0], ("item one".to_string(), false));
+        assert_eq!(items[1], ("item two".to_string(), true));
+        assert_eq!(items[2], ("item three".to_string(), true));
+    }
+
+    #[test]
+    fn parse_ac_items_empty_when_no_section() {
+        let content = "---\nname: test\n---\n\nNo AC section here.\n";
+        let items = parse_ac_items(content);
+        assert!(items.is_empty());
+    }
+
+    #[test]
+    fn find_most_recent_log_returns_none_for_missing_dir() {
+        let tmp = tempdir().unwrap();
+        let result = find_most_recent_log(tmp.path(), "nonexistent_story");
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn find_most_recent_log_returns_newest_file() {
+        let tmp = tempdir().unwrap();
+        let log_dir = tmp
+            .path()
+            .join(".storkit")
+            .join("logs")
+            .join("42_story_foo");
+        fs::create_dir_all(&log_dir).unwrap();
+
+        let old_path = log_dir.join("coder-1-sess-old.log");
+        fs::write(&old_path, "old content").unwrap();
+
+        // Ensure different mtime
+        std::thread::sleep(std::time::Duration::from_millis(50));
+
+        let new_path = log_dir.join("coder-1-sess-new.log");
+        fs::write(&new_path, "new content").unwrap();
+
+        let result = find_most_recent_log(tmp.path(), "42_story_foo").unwrap();
+        assert!(
+            result.to_string_lossy().contains("sess-new"),
+            "Expected newest file, got: {}",
+            result.display()
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_whatsup_returns_error_for_missing_story() {
+        let tmp = tempdir().unwrap();
+        let ctx = crate::http::context::AppContext::new_test(tmp.path().to_path_buf());
+        let result = tool_whatsup(&json!({"story_id": "999_story_nonexistent"}), &ctx).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("not found in work/2_current/"));
+    }
+
+    #[tokio::test]
+    async fn tool_whatsup_returns_story_data() {
+        let tmp = tempdir().unwrap();
+        let current_dir = tmp
+            .path()
+            .join(".storkit")
+            .join("work")
+            .join("2_current");
+        fs::create_dir_all(&current_dir).unwrap();
+
+        let story_content = "---\nname: My Test Story\nagent: coder-1\n---\n\n## Acceptance Criteria\n\n- [ ] First criterion\n- [x] Second criterion\n\n## Out of Scope\n\n- nothing\n";
+        fs::write(current_dir.join("42_story_test.md"), story_content).unwrap();
+
+        let ctx = crate::http::context::AppContext::new_test(tmp.path().to_path_buf());
+        let result = tool_whatsup(&json!({"story_id": "42_story_test"}), &ctx)
+            .await
+            .unwrap();
+        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
+
+        assert_eq!(parsed["story_id"], "42_story_test");
+        assert_eq!(parsed["front_matter"]["name"], "My Test Story");
+        assert_eq!(parsed["front_matter"]["agent"], "coder-1");
+
+        let ac = parsed["acceptance_criteria"].as_array().unwrap();
+        assert_eq!(ac.len(), 2);
+        assert_eq!(ac[0]["text"], "First criterion");
+        assert_eq!(ac[0]["checked"], false);
+        assert_eq!(ac[1]["text"], "Second criterion");
+        assert_eq!(ac[1]["checked"], true);
+    }
+}
--- a/server/src/http/mod.rs
+++ b/server/src/http/mod.rs
@@ -0,0 +1,215 @@
+pub mod agents;
+pub mod agents_sse;
+pub mod anthropic;
+pub mod assets;
+pub mod chat;
+pub mod context;
+pub mod health;
+pub mod io;
+pub mod mcp;
+pub mod model;
+pub mod settings;
+pub mod workflow;
+
+pub mod project;
+pub mod ws;
+
+use agents::AgentsApi;
+use anthropic::AnthropicApi;
+use chat::ChatApi;
+use context::AppContext;
+use health::HealthApi;
+use io::IoApi;
+use model::ModelApi;
+use poem::EndpointExt;
+use poem::{Route, get, post};
+use poem_openapi::OpenApiService;
+use project::ProjectApi;
+use settings::SettingsApi;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+
+use crate::slack::SlackWebhookContext;
+use crate::whatsapp::WhatsAppWebhookContext;
+
+const DEFAULT_PORT: u16 = 3001;
+
+pub fn parse_port(value: Option<String>) -> u16 {
+    value
+        .and_then(|v| v.parse::<u16>().ok())
+        .unwrap_or(DEFAULT_PORT)
+}
+
+pub fn resolve_port() -> u16 {
+    parse_port(std::env::var("STORKIT_PORT").ok())
+}
+
+pub fn write_port_file(dir: &Path, port: u16) -> Option<PathBuf> {
+    let path = dir.join(".storkit_port");
+    std::fs::write(&path, port.to_string()).ok()?;
+    Some(path)
+}
+
+pub fn remove_port_file(path: &Path) {
+    let _ = std::fs::remove_file(path);
+}
+
+pub fn build_routes(
+    ctx: AppContext,
+    whatsapp_ctx: Option<Arc<WhatsAppWebhookContext>>,
+    slack_ctx: Option<Arc<SlackWebhookContext>>,
+) -> impl poem::Endpoint {
+    let ctx_arc = std::sync::Arc::new(ctx);
+
+    let (api_service, docs_service) = build_openapi_service(ctx_arc.clone());
+
+    let mut route = Route::new()
+        .nest("/api", api_service)
+        .nest("/docs", docs_service.swagger_ui())
+        .at("/ws", get(ws::ws_handler))
+        .at(
+            "/agents/:story_id/:agent_name/stream",
+            get(agents_sse::agent_stream),
+        )
+        .at(
+            "/mcp",
+            post(mcp::mcp_post_handler).get(mcp::mcp_get_handler),
+        )
+        .at("/health", get(health::health))
+        .at("/assets/*path", get(assets::embedded_asset))
+        .at("/", get(assets::embedded_index))
+        .at("/*path", get(assets::embedded_file));
+
+    if let Some(wa_ctx) = whatsapp_ctx {
+        route = route.at(
+            "/webhook/whatsapp",
+            get(crate::whatsapp::webhook_verify)
+                .post(crate::whatsapp::webhook_receive)
+                .data(wa_ctx),
+        );
+    }
+
+    if let Some(sl_ctx) = slack_ctx {
+        route = route
+            .at(
+                "/webhook/slack",
+                post(crate::slack::webhook_receive).data(sl_ctx.clone()),
+            )
+            .at(
+                "/webhook/slack/command",
+                post(crate::slack::slash_command_receive).data(sl_ctx),
+            );
+    }
+
+    route.data(ctx_arc)
+}
+
+type ApiTuple = (
+    ProjectApi,
+    ModelApi,
+    AnthropicApi,
+    IoApi,
+    ChatApi,
+    AgentsApi,
+    SettingsApi,
+    HealthApi,
+);
+
+type ApiService = OpenApiService<ApiTuple, ()>;
+
+/// All HTTP methods are documented by OpenAPI at /docs
+pub fn build_openapi_service(ctx: Arc<AppContext>) -> (ApiService, ApiService) {
+    let api = (
+        ProjectApi { ctx: ctx.clone() },
+        ModelApi { ctx: ctx.clone() },
+        AnthropicApi::new(ctx.clone()),
+        IoApi { ctx: ctx.clone() },
+        ChatApi { ctx: ctx.clone() },
+        AgentsApi { ctx: ctx.clone() },
+        SettingsApi { ctx: ctx.clone() },
+        HealthApi,
+    );
+
+    let api_service =
+        OpenApiService::new(api, "Storkit API", "1.0").server("http://127.0.0.1:3001/api");
+
+    let docs_api = (
+        ProjectApi { ctx: ctx.clone() },
+        ModelApi { ctx: ctx.clone() },
+        AnthropicApi::new(ctx.clone()),
+        IoApi { ctx: ctx.clone() },
+        ChatApi { ctx: ctx.clone() },
+        AgentsApi { ctx: ctx.clone() },
+        SettingsApi { ctx },
+        HealthApi,
+    );
+
+    let docs_service =
+        OpenApiService::new(docs_api, "Storkit API", "1.0").server("http://127.0.0.1:3001/api");
+
+    (api_service, docs_service)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_port_defaults_to_3001() {
+        assert_eq!(parse_port(None), 3001);
+    }
+
+    #[test]
+    fn parse_port_reads_valid_value() {
+        assert_eq!(parse_port(Some("4200".to_string())), 4200);
+    }
+
+    #[test]
+    fn parse_port_ignores_invalid_value() {
+        assert_eq!(parse_port(Some("not_a_number".to_string())), 3001);
+    }
+
+    #[test]
+    fn write_and_remove_port_file() {
+        let tmp = tempfile::tempdir().unwrap();
+
+        let path = write_port_file(tmp.path(), 4567).expect("should write port file");
+        assert_eq!(std::fs::read_to_string(&path).unwrap(), "4567");
+
+        remove_port_file(&path);
+        assert!(!path.exists());
+    }
+
+    #[test]
+    fn write_port_file_returns_none_on_nonexistent_dir() {
+        let bad = std::path::Path::new("/this_dir_does_not_exist_storykit_test_xyz");
+        assert!(write_port_file(bad, 1234).is_none());
+    }
+
+    #[test]
+    fn remove_port_file_does_not_panic_for_missing_file() {
+        let path = std::path::Path::new("/tmp/nonexistent_storykit_port_test_xyz_999");
+        remove_port_file(path);
+    }
+
+    #[test]
+    fn resolve_port_returns_a_valid_port() {
+        // Exercises the resolve_port code path (reads STORKIT_PORT env var or defaults).
+        let port = resolve_port();
+        assert!(port > 0);
+    }
+
+    #[test]
+    fn build_openapi_service_constructs_without_panic() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = Arc::new(context::AppContext::new_test(tmp.path().to_path_buf()));
+        let (_api_service, _docs_service) = build_openapi_service(ctx);
+    }
+
+    #[test]
+    fn build_routes_constructs_without_panic() {
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = context::AppContext::new_test(tmp.path().to_path_buf());
+        let _endpoint = build_routes(ctx, None, None);
+    }
+}
--- a/server/src/http/model.rs
+++ b/server/src/http/model.rs
@@ -0,0 +1,129 @@
+use crate::http::context::{AppContext, OpenApiResult, bad_request};
+use crate::io::fs;
+use crate::llm::chat;
+use poem_openapi::{Object, OpenApi, Tags, param::Query, payload::Json};
+use serde::Deserialize;
+use std::sync::Arc;
+
+#[derive(Tags)]
+enum ModelTags {
+    Model,
+}
+
+#[derive(Deserialize, Object)]
+struct ModelPayload {
+    model: String,
+}
+
+pub struct ModelApi {
+    pub ctx: Arc<AppContext>,
+}
+
+#[OpenApi(tag = "ModelTags::Model")]
+impl ModelApi {
+    /// Get the currently selected model preference, if any.
+    #[oai(path = "/model", method = "get")]
+    async fn get_model_preference(&self) -> OpenApiResult<Json<Option<String>>> {
+        let result = fs::get_model_preference(self.ctx.store.as_ref()).map_err(bad_request)?;
+        Ok(Json(result))
+    }
+
+    /// Persist the selected model preference.
+    #[oai(path = "/model", method = "post")]
+    async fn set_model_preference(&self, payload: Json<ModelPayload>) -> OpenApiResult<Json<bool>> {
+        fs::set_model_preference(payload.0.model, self.ctx.store.as_ref()).map_err(bad_request)?;
+        Ok(Json(true))
+    }
+
+    /// Fetch available model names from an Ollama server.
+    /// Optionally override the base URL via query string.
+    /// Returns an empty list when Ollama is unreachable so the UI stays functional.
+    #[oai(path = "/ollama/models", method = "get")]
+    async fn get_ollama_models(
+        &self,
+        base_url: Query<Option<String>>,
+    ) -> OpenApiResult<Json<Vec<String>>> {
+        let models = chat::get_ollama_models(base_url.0)
+            .await
+            .unwrap_or_default();
+        Ok(Json(models))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use tempfile::TempDir;
+
+    fn make_api(dir: &TempDir) -> ModelApi {
+        ModelApi {
+            ctx: Arc::new(AppContext::new_test(dir.path().to_path_buf())),
+        }
+    }
+
+    #[tokio::test]
+    async fn get_model_preference_returns_none_when_unset() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api.get_model_preference().await.unwrap();
+        assert!(result.0.is_none());
+    }
+
+    #[tokio::test]
+    async fn set_model_preference_returns_true() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(ModelPayload {
+            model: "claude-3-sonnet".to_string(),
+        });
+        let result = api.set_model_preference(payload).await.unwrap();
+        assert!(result.0);
+    }
+
+    #[tokio::test]
+    async fn get_model_preference_returns_value_after_set() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+
+        let payload = Json(ModelPayload {
+            model: "claude-3-sonnet".to_string(),
+        });
+        api.set_model_preference(payload).await.unwrap();
+
+        let result = api.get_model_preference().await.unwrap();
+        assert_eq!(result.0, Some("claude-3-sonnet".to_string()));
+    }
+
+    #[tokio::test]
+    async fn set_model_preference_overwrites_previous_value() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+
+        api.set_model_preference(Json(ModelPayload {
+            model: "model-a".to_string(),
+        }))
+        .await
+        .unwrap();
+
+        api.set_model_preference(Json(ModelPayload {
+            model: "model-b".to_string(),
+        }))
+        .await
+        .unwrap();
+
+        let result = api.get_model_preference().await.unwrap();
+        assert_eq!(result.0, Some("model-b".to_string()));
+    }
+
+    #[tokio::test]
+    async fn get_ollama_models_returns_empty_list_for_unreachable_url() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        // Port 1 is reserved and should immediately refuse the connection.
+        let base_url = Query(Some("http://127.0.0.1:1".to_string()));
+        let result = api.get_ollama_models(base_url).await;
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap().0, Vec::<String>::new());
+    }
+}
--- a/server/src/http/project.rs
+++ b/server/src/http/project.rs
@@ -0,0 +1,213 @@
+use crate::http::context::{AppContext, OpenApiResult, bad_request};
+use crate::io::fs;
+use poem_openapi::{Object, OpenApi, Tags, payload::Json};
+use serde::Deserialize;
+use std::sync::Arc;
+
+#[derive(Tags)]
+enum ProjectTags {
+    Project,
+}
+
+#[derive(Deserialize, Object)]
+struct PathPayload {
+    path: String,
+}
+
+pub struct ProjectApi {
+    pub ctx: Arc<AppContext>,
+}
+
+#[OpenApi(tag = "ProjectTags::Project")]
+impl ProjectApi {
+    /// Get the currently open project path (if any).
+    ///
+    /// Returns null when no project is open.
+    #[oai(path = "/project", method = "get")]
+    async fn get_current_project(&self) -> OpenApiResult<Json<Option<String>>> {
+        let result = fs::get_current_project(&self.ctx.state, self.ctx.store.as_ref())
+            .map_err(bad_request)?;
+        Ok(Json(result))
+    }
+
+    /// Open a project and set it as the current project.
+    ///
+    /// Persists the selected path for later sessions.
+    #[oai(path = "/project", method = "post")]
+    async fn open_project(&self, payload: Json<PathPayload>) -> OpenApiResult<Json<String>> {
+        let confirmed = fs::open_project(
+            payload.0.path,
+            &self.ctx.state,
+            self.ctx.store.as_ref(),
+        )
+        .await
+        .map_err(bad_request)?;
+        Ok(Json(confirmed))
+    }
+
+    /// Close the current project and clear the stored selection.
+    #[oai(path = "/project", method = "delete")]
+    async fn close_project(&self) -> OpenApiResult<Json<bool>> {
+        // TRACE:MERGE-DEBUG — remove once root cause is found
+        crate::slog_error!(
+            "[MERGE-DEBUG] DELETE /project called! \
+             Backtrace: this is the only code path that clears project_root."
+        );
+        fs::close_project(&self.ctx.state, self.ctx.store.as_ref()).map_err(bad_request)?;
+        Ok(Json(true))
+    }
+
+    /// List known projects from the store.
+    #[oai(path = "/projects", method = "get")]
+    async fn list_known_projects(&self) -> OpenApiResult<Json<Vec<String>>> {
+        let projects = fs::get_known_projects(self.ctx.store.as_ref()).map_err(bad_request)?;
+        Ok(Json(projects))
+    }
+
+    /// Forget a known project path.
+    #[oai(path = "/projects/forget", method = "post")]
+    async fn forget_known_project(&self, payload: Json<PathPayload>) -> OpenApiResult<Json<bool>> {
+        fs::forget_known_project(payload.0.path, self.ctx.store.as_ref()).map_err(bad_request)?;
+        Ok(Json(true))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use tempfile::TempDir;
+
+    fn make_api(dir: &TempDir) -> ProjectApi {
+        ProjectApi {
+            ctx: Arc::new(AppContext::new_test(dir.path().to_path_buf())),
+        }
+    }
+
+    #[tokio::test]
+    async fn get_current_project_returns_none_when_unset() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        // Clear the project root that new_test sets
+        api.close_project().await.unwrap();
+        let result = api.get_current_project().await.unwrap();
+        assert!(result.0.is_none());
+    }
+
+    #[tokio::test]
+    async fn get_current_project_returns_path_from_state() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api.get_current_project().await.unwrap();
+        assert_eq!(result.0, Some(dir.path().to_string_lossy().to_string()));
+    }
+
+    #[tokio::test]
+    async fn open_project_succeeds_with_valid_directory() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let path = dir.path().to_string_lossy().to_string();
+        let payload = Json(PathPayload { path: path.clone() });
+        let result = api.open_project(payload).await.unwrap();
+        assert_eq!(result.0, path);
+    }
+
+    #[tokio::test]
+    async fn open_project_fails_with_nonexistent_file_path() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        // Create a file (not a directory) to trigger validation error
+        let file_path = dir.path().join("not_a_dir.txt");
+        std::fs::write(&file_path, "content").unwrap();
+        let payload = Json(PathPayload {
+            path: file_path.to_string_lossy().to_string(),
+        });
+        let result = api.open_project(payload).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn close_project_returns_true() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api.close_project().await.unwrap();
+        assert!(result.0);
+    }
+
+    #[tokio::test]
+    async fn close_project_clears_current_project() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+
+        // Verify project is set initially
+        let before = api.get_current_project().await.unwrap();
+        assert!(before.0.is_some());
+
+        // Close the project
+        api.close_project().await.unwrap();
+
+        // Verify project is now None
+        let after = api.get_current_project().await.unwrap();
+        assert!(after.0.is_none());
+    }
+
+    #[tokio::test]
+    async fn list_known_projects_returns_empty_initially() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        // Close the project so the store has no known projects
+        api.close_project().await.unwrap();
+        let result = api.list_known_projects().await.unwrap();
+        assert!(result.0.is_empty());
+    }
+
+    #[tokio::test]
+    async fn list_known_projects_returns_project_after_open() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let path = dir.path().to_string_lossy().to_string();
+
+        api.open_project(Json(PathPayload { path: path.clone() }))
+            .await
+            .unwrap();
+
+        let result = api.list_known_projects().await.unwrap();
+        assert!(result.0.contains(&path));
+    }
+
+    #[tokio::test]
+    async fn forget_known_project_removes_project() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let path = dir.path().to_string_lossy().to_string();
+
+        api.open_project(Json(PathPayload { path: path.clone() }))
+            .await
+            .unwrap();
+
+        let before = api.list_known_projects().await.unwrap();
+        assert!(before.0.contains(&path));
+
+        let result = api
+            .forget_known_project(Json(PathPayload { path: path.clone() }))
+            .await
+            .unwrap();
+        assert!(result.0);
+
+        let after = api.list_known_projects().await.unwrap();
+        assert!(!after.0.contains(&path));
+    }
+
+    #[tokio::test]
+    async fn forget_known_project_returns_true_for_nonexistent_path() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api
+            .forget_known_project(Json(PathPayload {
+                path: "/some/unknown/path".to_string(),
+            }))
+            .await
+            .unwrap();
+        assert!(result.0);
+    }
+}
--- a/server/src/http/settings.rs
+++ b/server/src/http/settings.rs
@@ -0,0 +1,369 @@
+use crate::http::context::{AppContext, OpenApiResult, bad_request};
+use crate::store::StoreOps;
+use poem_openapi::{Object, OpenApi, Tags, param::Query, payload::Json};
+use serde::Serialize;
+use serde_json::json;
+use std::sync::Arc;
+
+const EDITOR_COMMAND_KEY: &str = "editor_command";
+
+#[derive(Tags)]
+enum SettingsTags {
+    Settings,
+}
+
+#[derive(Object)]
+struct EditorCommandPayload {
+    editor_command: Option<String>,
+}
+
+#[derive(Object, Serialize)]
+struct EditorCommandResponse {
+    editor_command: Option<String>,
+}
+
+#[derive(Debug, Object, Serialize)]
+struct OpenFileResponse {
+    success: bool,
+}
+
+pub struct SettingsApi {
+    pub ctx: Arc<AppContext>,
+}
+
+#[OpenApi(tag = "SettingsTags::Settings")]
+impl SettingsApi {
+    /// Get the configured editor command (e.g. "zed", "code", "cursor"), or null if not set.
+    #[oai(path = "/settings/editor", method = "get")]
+    async fn get_editor(&self) -> OpenApiResult<Json<EditorCommandResponse>> {
+        let editor_command = self
+            .ctx
+            .store
+            .get(EDITOR_COMMAND_KEY)
+            .and_then(|v| v.as_str().map(|s| s.to_string()));
+        Ok(Json(EditorCommandResponse { editor_command }))
+    }
+
+    /// Open a file in the configured editor at the given line number.
+    ///
+    /// Invokes the stored editor CLI (e.g. "zed", "code") with `path:line` as the argument.
+    /// Returns an error if no editor is configured or if the process fails to spawn.
+    #[oai(path = "/settings/open-file", method = "post")]
+    async fn open_file(
+        &self,
+        path: Query<String>,
+        line: Query<Option<u32>>,
+    ) -> OpenApiResult<Json<OpenFileResponse>> {
+        let editor_command = get_editor_command_from_store(&self.ctx)
+            .ok_or_else(|| bad_request("No editor configured".to_string()))?;
+
+        let file_ref = match line.0 {
+            Some(l) => format!("{}:{}", path.0, l),
+            None => path.0.clone(),
+        };
+
+        std::process::Command::new(&editor_command)
+            .arg(&file_ref)
+            .spawn()
+            .map_err(|e| bad_request(format!("Failed to open editor: {e}")))?;
+
+        Ok(Json(OpenFileResponse { success: true }))
+    }
+
+    /// Set the preferred editor command (e.g. "zed", "code", "cursor").
+    /// Pass null or empty string to clear the preference.
+    #[oai(path = "/settings/editor", method = "put")]
+    async fn set_editor(
+        &self,
+        payload: Json<EditorCommandPayload>,
+    ) -> OpenApiResult<Json<EditorCommandResponse>> {
+        let editor_command = payload.0.editor_command;
+        let trimmed = editor_command.as_deref().map(str::trim).filter(|s| !s.is_empty());
+        match trimmed {
+            Some(cmd) => {
+                self.ctx.store.set(EDITOR_COMMAND_KEY, json!(cmd));
+                self.ctx.store.save().map_err(bad_request)?;
+                Ok(Json(EditorCommandResponse {
+                    editor_command: Some(cmd.to_string()),
+                }))
+            }
+            None => {
+                self.ctx.store.delete(EDITOR_COMMAND_KEY);
+                self.ctx.store.save().map_err(bad_request)?;
+                Ok(Json(EditorCommandResponse {
+                    editor_command: None,
+                }))
+            }
+        }
+    }
+}
+
+pub fn get_editor_command_from_store(ctx: &AppContext) -> Option<String> {
+    ctx.store
+        .get(EDITOR_COMMAND_KEY)
+        .and_then(|v| v.as_str().map(|s| s.to_string()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::http::context::AppContext;
+    use std::sync::Arc;
+    use tempfile::TempDir;
+
+    fn test_ctx(dir: &TempDir) -> AppContext {
+        AppContext::new_test(dir.path().to_path_buf())
+    }
+
+    fn make_api(dir: &TempDir) -> SettingsApi {
+        SettingsApi {
+            ctx: Arc::new(AppContext::new_test(dir.path().to_path_buf())),
+        }
+    }
+
+    #[tokio::test]
+    async fn get_editor_returns_none_when_unset() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api.get_editor().await.unwrap();
+        assert!(result.0.editor_command.is_none());
+    }
+
+    #[tokio::test]
+    async fn set_editor_stores_command() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let payload = Json(EditorCommandPayload {
+            editor_command: Some("zed".to_string()),
+        });
+        let result = api.set_editor(payload).await.unwrap();
+        assert_eq!(result.0.editor_command, Some("zed".to_string()));
+    }
+
+    #[tokio::test]
+    async fn set_editor_clears_command_on_null() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        api.set_editor(Json(EditorCommandPayload {
+            editor_command: Some("zed".to_string()),
+        }))
+        .await
+        .unwrap();
+        let result = api
+            .set_editor(Json(EditorCommandPayload {
+                editor_command: None,
+            }))
+            .await
+            .unwrap();
+        assert!(result.0.editor_command.is_none());
+    }
+
+    #[tokio::test]
+    async fn set_editor_clears_command_on_empty_string() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api
+            .set_editor(Json(EditorCommandPayload {
+                editor_command: Some(String::new()),
+            }))
+            .await
+            .unwrap();
+        assert!(result.0.editor_command.is_none());
+    }
+
+    #[tokio::test]
+    async fn set_editor_trims_whitespace_only() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api
+            .set_editor(Json(EditorCommandPayload {
+                editor_command: Some("   ".to_string()),
+            }))
+            .await
+            .unwrap();
+        assert!(result.0.editor_command.is_none());
+    }
+
+    #[tokio::test]
+    async fn get_editor_returns_value_after_set() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        api.set_editor(Json(EditorCommandPayload {
+            editor_command: Some("cursor".to_string()),
+        }))
+        .await
+        .unwrap();
+        let result = api.get_editor().await.unwrap();
+        assert_eq!(result.0.editor_command, Some("cursor".to_string()));
+    }
+
+    #[test]
+    fn editor_command_defaults_to_null() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        let result = get_editor_command_from_store(&ctx);
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn set_editor_command_persists_in_store() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+
+        ctx.store.set(EDITOR_COMMAND_KEY, json!("zed"));
+        ctx.store.save().unwrap();
+
+        let result = get_editor_command_from_store(&ctx);
+        assert_eq!(result, Some("zed".to_string()));
+    }
+
+    #[test]
+    fn get_editor_command_from_store_returns_value() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+
+        ctx.store.set(EDITOR_COMMAND_KEY, json!("code"));
+        let result = get_editor_command_from_store(&ctx);
+        assert_eq!(result, Some("code".to_string()));
+    }
+
+    #[test]
+    fn delete_editor_command_returns_none() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+
+        ctx.store.set(EDITOR_COMMAND_KEY, json!("cursor"));
+        ctx.store.delete(EDITOR_COMMAND_KEY);
+        let result = get_editor_command_from_store(&ctx);
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn editor_command_survives_reload() {
+        let dir = TempDir::new().unwrap();
+        let store_path = dir.path().join(".storkit_store.json");
+
+        {
+            let ctx = AppContext::new_test(dir.path().to_path_buf());
+            ctx.store.set(EDITOR_COMMAND_KEY, json!("zed"));
+            ctx.store.save().unwrap();
+        }
+
+        // Reload from disk
+        let store2 = crate::store::JsonFileStore::new(store_path).unwrap();
+        let val = store2.get(EDITOR_COMMAND_KEY);
+        assert_eq!(val, Some(json!("zed")));
+    }
+
+    #[tokio::test]
+    async fn get_editor_http_handler_returns_null_when_not_set() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        let api = SettingsApi {
+            ctx: Arc::new(ctx),
+        };
+        let result = api.get_editor().await.unwrap().0;
+        assert!(result.editor_command.is_none());
+    }
+
+    #[tokio::test]
+    async fn set_editor_http_handler_stores_value() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        let api = SettingsApi {
+            ctx: Arc::new(ctx),
+        };
+        let result = api
+            .set_editor(Json(EditorCommandPayload {
+                editor_command: Some("zed".to_string()),
+            }))
+            .await
+            .unwrap()
+            .0;
+        assert_eq!(result.editor_command, Some("zed".to_string()));
+    }
+
+    #[tokio::test]
+    async fn set_editor_http_handler_clears_value_when_null() {
+        let dir = TempDir::new().unwrap();
+        let ctx = test_ctx(&dir);
+        let api = SettingsApi {
+            ctx: Arc::new(ctx),
+        };
+        // First set a value
+        api.set_editor(Json(EditorCommandPayload {
+            editor_command: Some("code".to_string()),
+        }))
+        .await
+        .unwrap();
+        // Now clear it
+        let result = api
+            .set_editor(Json(EditorCommandPayload {
+                editor_command: None,
+            }))
+            .await
+            .unwrap()
+            .0;
+        assert!(result.editor_command.is_none());
+    }
+
+    #[tokio::test]
+    async fn open_file_returns_error_when_no_editor_configured() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        let result = api
+            .open_file(Query("src/main.rs".to_string()), Query(Some(42)))
+            .await;
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert_eq!(err.status(), poem::http::StatusCode::BAD_REQUEST);
+    }
+
+    #[tokio::test]
+    async fn open_file_spawns_editor_with_path_and_line() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        // Configure the editor to "echo" which is a safe no-op command
+        api.set_editor(Json(EditorCommandPayload {
+            editor_command: Some("echo".to_string()),
+        }))
+        .await
+        .unwrap();
+        let result = api
+            .open_file(Query("src/main.rs".to_string()), Query(Some(42)))
+            .await
+            .unwrap();
+        assert!(result.0.success);
+    }
+
+    #[tokio::test]
+    async fn open_file_spawns_editor_with_path_only_when_no_line() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        api.set_editor(Json(EditorCommandPayload {
+            editor_command: Some("echo".to_string()),
+        }))
+        .await
+        .unwrap();
+        let result = api
+            .open_file(Query("src/lib.rs".to_string()), Query(None))
+            .await
+            .unwrap();
+        assert!(result.0.success);
+    }
+
+    #[tokio::test]
+    async fn open_file_returns_error_for_nonexistent_editor() {
+        let dir = TempDir::new().unwrap();
+        let api = make_api(&dir);
+        api.set_editor(Json(EditorCommandPayload {
+            editor_command: Some("this_editor_does_not_exist_xyz_abc".to_string()),
+        }))
+        .await
+        .unwrap();
+        let result = api
+            .open_file(Query("src/main.rs".to_string()), Query(Some(1)))
+            .await;
+        assert!(result.is_err());
+    }
+}
--- a/server/src/http/workflow/bug_ops.rs
+++ b/server/src/http/workflow/bug_ops.rs
@@ -0,0 +1,586 @@
+use crate::io::story_metadata::parse_front_matter;
+use std::fs;
+use std::path::Path;
+
+use super::{next_item_number, slugify_name};
+
+/// Create a bug file in `work/1_backlog/` with a deterministic filename and auto-commit.
+///
+/// Returns the bug_id (e.g. `"4_bug_login_crash"`).
+pub fn create_bug_file(
+    root: &Path,
+    name: &str,
+    description: &str,
+    steps_to_reproduce: &str,
+    actual_result: &str,
+    expected_result: &str,
+    acceptance_criteria: Option<&[String]>,
+) -> Result<String, String> {
+    let bug_number = next_item_number(root)?;
+    let slug = slugify_name(name);
+
+    if slug.is_empty() {
+        return Err("Name must contain at least one alphanumeric character.".to_string());
+    }
+
+    let filename = format!("{bug_number}_bug_{slug}.md");
+    let bugs_dir = root.join(".storkit").join("work").join("1_backlog");
+    fs::create_dir_all(&bugs_dir)
+        .map_err(|e| format!("Failed to create backlog directory: {e}"))?;
+
+    let filepath = bugs_dir.join(&filename);
+    if filepath.exists() {
+        return Err(format!("Bug file already exists: {filename}"));
+    }
+
+    let bug_id = filepath
+        .file_stem()
+        .and_then(|s| s.to_str())
+        .unwrap_or_default()
+        .to_string();
+
+    let mut content = String::new();
+    content.push_str("---\n");
+    content.push_str(&format!("name: \"{}\"\n", name.replace('"', "\\\"")));
+    content.push_str("---\n\n");
+    content.push_str(&format!("# Bug {bug_number}: {name}\n\n"));
+    content.push_str("## Description\n\n");
+    content.push_str(description);
+    content.push_str("\n\n");
+    content.push_str("## How to Reproduce\n\n");
+    content.push_str(steps_to_reproduce);
+    content.push_str("\n\n");
+    content.push_str("## Actual Result\n\n");
+    content.push_str(actual_result);
+    content.push_str("\n\n");
+    content.push_str("## Expected Result\n\n");
+    content.push_str(expected_result);
+    content.push_str("\n\n");
+    content.push_str("## Acceptance Criteria\n\n");
+    if let Some(criteria) = acceptance_criteria {
+        for criterion in criteria {
+            content.push_str(&format!("- [ ] {criterion}\n"));
+        }
+    } else {
+        content.push_str("- [ ] Bug is fixed and verified\n");
+    }
+
+    fs::write(&filepath, &content).map_err(|e| format!("Failed to write bug file: {e}"))?;
+
+    // Watcher handles the git commit asynchronously.
+
+    Ok(bug_id)
+}
+
+/// Create a spike file in `work/1_backlog/` with a deterministic filename.
+///
+/// Returns the spike_id (e.g. `"4_spike_filesystem_watcher_architecture"`).
+pub fn create_spike_file(
+    root: &Path,
+    name: &str,
+    description: Option<&str>,
+) -> Result<String, String> {
+    let spike_number = next_item_number(root)?;
+    let slug = slugify_name(name);
+
+    if slug.is_empty() {
+        return Err("Name must contain at least one alphanumeric character.".to_string());
+    }
+
+    let filename = format!("{spike_number}_spike_{slug}.md");
+    let backlog_dir = root.join(".storkit").join("work").join("1_backlog");
+    fs::create_dir_all(&backlog_dir)
+        .map_err(|e| format!("Failed to create backlog directory: {e}"))?;
+
+    let filepath = backlog_dir.join(&filename);
+    if filepath.exists() {
+        return Err(format!("Spike file already exists: {filename}"));
+    }
+
+    let spike_id = filepath
+        .file_stem()
+        .and_then(|s| s.to_str())
+        .unwrap_or_default()
+        .to_string();
+
+    let mut content = String::new();
+    content.push_str("---\n");
+    content.push_str(&format!("name: \"{}\"\n", name.replace('"', "\\\"")));
+    content.push_str("---\n\n");
+    content.push_str(&format!("# Spike {spike_number}: {name}\n\n"));
+    content.push_str("## Question\n\n");
+    if let Some(desc) = description {
+        content.push_str(desc);
+        content.push('\n');
+    } else {
+        content.push_str("- TBD\n");
+    }
+    content.push('\n');
+    content.push_str("## Hypothesis\n\n");
+    content.push_str("- TBD\n\n");
+    content.push_str("## Timebox\n\n");
+    content.push_str("- TBD\n\n");
+    content.push_str("## Investigation Plan\n\n");
+    content.push_str("- TBD\n\n");
+    content.push_str("## Findings\n\n");
+    content.push_str("- TBD\n\n");
+    content.push_str("## Recommendation\n\n");
+    content.push_str("- TBD\n");
+
+    fs::write(&filepath, &content).map_err(|e| format!("Failed to write spike file: {e}"))?;
+
+    // Watcher handles the git commit asynchronously.
+
+    Ok(spike_id)
+}
+
+/// Create a refactor work item file in `work/1_backlog/`.
+///
+/// Returns the refactor_id (e.g. `"5_refactor_split_agents_rs"`).
+pub fn create_refactor_file(
+    root: &Path,
+    name: &str,
+    description: Option<&str>,
+    acceptance_criteria: Option<&[String]>,
+) -> Result<String, String> {
+    let refactor_number = next_item_number(root)?;
+    let slug = slugify_name(name);
+
+    if slug.is_empty() {
+        return Err("Name must contain at least one alphanumeric character.".to_string());
+    }
+
+    let filename = format!("{refactor_number}_refactor_{slug}.md");
+    let backlog_dir = root.join(".storkit").join("work").join("1_backlog");
+    fs::create_dir_all(&backlog_dir)
+        .map_err(|e| format!("Failed to create backlog directory: {e}"))?;
+
+    let filepath = backlog_dir.join(&filename);
+    if filepath.exists() {
+        return Err(format!("Refactor file already exists: {filename}"));
+    }
+
+    let refactor_id = filepath
+        .file_stem()
+        .and_then(|s| s.to_str())
+        .unwrap_or_default()
+        .to_string();
+
+    let mut content = String::new();
+    content.push_str("---\n");
+    content.push_str(&format!("name: \"{}\"\n", name.replace('"', "\\\"")));
+    content.push_str("---\n\n");
+    content.push_str(&format!("# Refactor {refactor_number}: {name}\n\n"));
+    content.push_str("## Current State\n\n");
+    content.push_str("- TBD\n\n");
+    content.push_str("## Desired State\n\n");
+    if let Some(desc) = description {
+        content.push_str(desc);
+        content.push('\n');
+    } else {
+        content.push_str("- TBD\n");
+    }
+    content.push('\n');
+    content.push_str("## Acceptance Criteria\n\n");
+    if let Some(criteria) = acceptance_criteria {
+        for criterion in criteria {
+            content.push_str(&format!("- [ ] {criterion}\n"));
+        }
+    } else {
+        content.push_str("- [ ] Refactoring complete and all tests pass\n");
+    }
+    content.push('\n');
+    content.push_str("## Out of Scope\n\n");
+    content.push_str("- TBD\n");
+
+    fs::write(&filepath, &content)
+        .map_err(|e| format!("Failed to write refactor file: {e}"))?;
+
+    // Watcher handles the git commit asynchronously.
+
+    Ok(refactor_id)
+}
+
+/// Returns true if the item stem (filename without extension) is a bug item.
+/// Bug items follow the pattern: {N}_bug_{slug}
+fn is_bug_item(stem: &str) -> bool {
+    // Format: {digits}_bug_{rest}
+    let after_num = stem.trim_start_matches(|c: char| c.is_ascii_digit());
+    after_num.starts_with("_bug_")
+}
+
+/// Extract the human-readable name from a bug file's first heading.
+fn extract_bug_name(path: &Path) -> Option<String> {
+    let contents = fs::read_to_string(path).ok()?;
+    for line in contents.lines() {
+        if let Some(rest) = line.strip_prefix("# Bug ") {
+            // Format: "N: Name"
+            if let Some(colon_pos) = rest.find(": ") {
+                return Some(rest[colon_pos + 2..].to_string());
+            }
+        }
+    }
+    None
+}
+
+/// List all open bugs — files in `work/1_backlog/` matching the `_bug_` naming pattern.
+///
+/// Returns a sorted list of `(bug_id, name)` pairs.
+pub fn list_bug_files(root: &Path) -> Result<Vec<(String, String)>, String> {
+    let backlog_dir = root.join(".storkit").join("work").join("1_backlog");
+    if !backlog_dir.exists() {
+        return Ok(Vec::new());
+    }
+
+    let mut bugs = Vec::new();
+    for entry in
+        fs::read_dir(&backlog_dir).map_err(|e| format!("Failed to read backlog directory: {e}"))?
+    {
+        let entry = entry.map_err(|e| format!("Failed to read entry: {e}"))?;
+        let path = entry.path();
+
+        if path.is_dir() {
+            continue;
+        }
+
+        if path.extension().and_then(|ext| ext.to_str()) != Some("md") {
+            continue;
+        }
+
+        let stem = path
+            .file_stem()
+            .and_then(|s| s.to_str())
+            .ok_or_else(|| "Invalid file name.".to_string())?;
+
+        // Only include bug items: {N}_bug_{slug}
+        if !is_bug_item(stem) {
+            continue;
+        }
+
+        let bug_id = stem.to_string();
+        let name = extract_bug_name(&path).unwrap_or_else(|| bug_id.clone());
+        bugs.push((bug_id, name));
+    }
+
+    bugs.sort_by(|a, b| a.0.cmp(&b.0));
+    Ok(bugs)
+}
+
+/// Returns true if the item stem (filename without extension) is a refactor item.
+/// Refactor items follow the pattern: {N}_refactor_{slug}
+fn is_refactor_item(stem: &str) -> bool {
+    let after_num = stem.trim_start_matches(|c: char| c.is_ascii_digit());
+    after_num.starts_with("_refactor_")
+}
+
+/// List all open refactors — files in `work/1_backlog/` matching the `_refactor_` naming pattern.
+///
+/// Returns a sorted list of `(refactor_id, name)` pairs.
+pub fn list_refactor_files(root: &Path) -> Result<Vec<(String, String)>, String> {
+    let backlog_dir = root.join(".storkit").join("work").join("1_backlog");
+    if !backlog_dir.exists() {
+        return Ok(Vec::new());
+    }
+
+    let mut refactors = Vec::new();
+    for entry in fs::read_dir(&backlog_dir)
+        .map_err(|e| format!("Failed to read backlog directory: {e}"))?
+    {
+        let entry = entry.map_err(|e| format!("Failed to read entry: {e}"))?;
+        let path = entry.path();
+
+        if path.is_dir() {
+            continue;
+        }
+
+        if path.extension().and_then(|ext| ext.to_str()) != Some("md") {
+            continue;
+        }
+
+        let stem = path
+            .file_stem()
+            .and_then(|s| s.to_str())
+            .ok_or_else(|| "Invalid file name.".to_string())?;
+
+        if !is_refactor_item(stem) {
+            continue;
+        }
+
+        let refactor_id = stem.to_string();
+        let name = fs::read_to_string(&path)
+            .ok()
+            .and_then(|contents| parse_front_matter(&contents).ok())
+            .and_then(|m| m.name)
+            .unwrap_or_else(|| refactor_id.clone());
+        refactors.push((refactor_id, name));
+    }
+
+    refactors.sort_by(|a, b| a.0.cmp(&b.0));
+    Ok(refactors)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn setup_git_repo(root: &std::path::Path) {
+        std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+    }
+
+    // ── Bug file helper tests ──────────────────────────────────────────────────
+
+    #[test]
+    fn next_item_number_starts_at_1_when_empty_bugs() {
+        let tmp = tempfile::tempdir().unwrap();
+        assert_eq!(super::super::next_item_number(tmp.path()).unwrap(), 1);
+    }
+
+    #[test]
+    fn next_item_number_increments_from_existing_bugs() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(backlog.join("1_bug_crash.md"), "").unwrap();
+        fs::write(backlog.join("3_bug_another.md"), "").unwrap();
+        assert_eq!(super::super::next_item_number(tmp.path()).unwrap(), 4);
+    }
+
+    #[test]
+    fn next_item_number_scans_archived_too() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        let archived = tmp.path().join(".storkit/work/5_done");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::create_dir_all(&archived).unwrap();
+        fs::write(archived.join("5_bug_old.md"), "").unwrap();
+        assert_eq!(super::super::next_item_number(tmp.path()).unwrap(), 6);
+    }
+
+    #[test]
+    fn list_bug_files_empty_when_no_bugs_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = list_bug_files(tmp.path()).unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn list_bug_files_excludes_archive_subdir() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog_dir = tmp.path().join(".storkit/work/1_backlog");
+        let archived_dir = tmp.path().join(".storkit/work/5_done");
+        fs::create_dir_all(&backlog_dir).unwrap();
+        fs::create_dir_all(&archived_dir).unwrap();
+        fs::write(backlog_dir.join("1_bug_open.md"), "# Bug 1: Open Bug\n").unwrap();
+        fs::write(archived_dir.join("2_bug_closed.md"), "# Bug 2: Closed Bug\n").unwrap();
+
+        let result = list_bug_files(tmp.path()).unwrap();
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].0, "1_bug_open");
+        assert_eq!(result[0].1, "Open Bug");
+    }
+
+    #[test]
+    fn list_bug_files_sorted_by_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog_dir = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog_dir).unwrap();
+        fs::write(backlog_dir.join("3_bug_third.md"), "# Bug 3: Third\n").unwrap();
+        fs::write(backlog_dir.join("1_bug_first.md"), "# Bug 1: First\n").unwrap();
+        fs::write(backlog_dir.join("2_bug_second.md"), "# Bug 2: Second\n").unwrap();
+
+        let result = list_bug_files(tmp.path()).unwrap();
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0].0, "1_bug_first");
+        assert_eq!(result[1].0, "2_bug_second");
+        assert_eq!(result[2].0, "3_bug_third");
+    }
+
+    #[test]
+    fn extract_bug_name_parses_heading() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("bug-1-crash.md");
+        fs::write(&path, "# Bug 1: Login page crashes\n\n## Description\n").unwrap();
+        let name = extract_bug_name(&path).unwrap();
+        assert_eq!(name, "Login page crashes");
+    }
+
+    #[test]
+    fn create_bug_file_writes_correct_content() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo(tmp.path());
+
+        let bug_id = create_bug_file(
+            tmp.path(),
+            "Login Crash",
+            "The login page crashes on submit.",
+            "1. Go to /login\n2. Click submit",
+            "Page crashes with 500 error",
+            "Login succeeds",
+            Some(&["Login form submits without error".to_string()]),
+        )
+        .unwrap();
+
+        assert_eq!(bug_id, "1_bug_login_crash");
+
+        let filepath = tmp
+            .path()
+            .join(".storkit/work/1_backlog/1_bug_login_crash.md");
+        assert!(filepath.exists());
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(
+            contents.starts_with("---\nname: \"Login Crash\"\n---"),
+            "bug file must start with YAML front matter"
+        );
+        assert!(contents.contains("# Bug 1: Login Crash"));
+        assert!(contents.contains("## Description"));
+        assert!(contents.contains("The login page crashes on submit."));
+        assert!(contents.contains("## How to Reproduce"));
+        assert!(contents.contains("1. Go to /login"));
+        assert!(contents.contains("## Actual Result"));
+        assert!(contents.contains("Page crashes with 500 error"));
+        assert!(contents.contains("## Expected Result"));
+        assert!(contents.contains("Login succeeds"));
+        assert!(contents.contains("## Acceptance Criteria"));
+        assert!(contents.contains("- [ ] Login form submits without error"));
+    }
+
+    #[test]
+    fn create_bug_file_rejects_empty_name() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = create_bug_file(tmp.path(), "!!!", "desc", "steps", "actual", "expected", None);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("alphanumeric"));
+    }
+
+    #[test]
+    fn create_bug_file_uses_default_acceptance_criterion() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo(tmp.path());
+
+        create_bug_file(
+            tmp.path(),
+            "Some Bug",
+            "desc",
+            "steps",
+            "actual",
+            "expected",
+            None,
+        )
+        .unwrap();
+
+        let filepath = tmp.path().join(".storkit/work/1_backlog/1_bug_some_bug.md");
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(
+            contents.starts_with("---\nname: \"Some Bug\"\n---"),
+            "bug file must have YAML front matter"
+        );
+        assert!(contents.contains("- [ ] Bug is fixed and verified"));
+    }
+
+    // ── create_spike_file tests ────────────────────────────────────────────────
+
+    #[test]
+    fn create_spike_file_writes_correct_content() {
+        let tmp = tempfile::tempdir().unwrap();
+
+        let spike_id =
+            create_spike_file(tmp.path(), "Filesystem Watcher Architecture", None).unwrap();
+
+        assert_eq!(spike_id, "1_spike_filesystem_watcher_architecture");
+
+        let filepath = tmp
+            .path()
+            .join(".storkit/work/1_backlog/1_spike_filesystem_watcher_architecture.md");
+        assert!(filepath.exists());
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(
+            contents.starts_with("---\nname: \"Filesystem Watcher Architecture\"\n---"),
+            "spike file must start with YAML front matter"
+        );
+        assert!(contents.contains("# Spike 1: Filesystem Watcher Architecture"));
+        assert!(contents.contains("## Question"));
+        assert!(contents.contains("## Hypothesis"));
+        assert!(contents.contains("## Timebox"));
+        assert!(contents.contains("## Investigation Plan"));
+        assert!(contents.contains("## Findings"));
+        assert!(contents.contains("## Recommendation"));
+    }
+
+    #[test]
+    fn create_spike_file_uses_description_when_provided() {
+        let tmp = tempfile::tempdir().unwrap();
+        let description = "What is the best approach for watching filesystem events?";
+
+        create_spike_file(tmp.path(), "FS Watcher Spike", Some(description)).unwrap();
+
+        let filepath =
+            tmp.path().join(".storkit/work/1_backlog/1_spike_fs_watcher_spike.md");
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(contents.contains(description));
+    }
+
+    #[test]
+    fn create_spike_file_uses_placeholder_when_no_description() {
+        let tmp = tempfile::tempdir().unwrap();
+        create_spike_file(tmp.path(), "My Spike", None).unwrap();
+
+        let filepath = tmp.path().join(".storkit/work/1_backlog/1_spike_my_spike.md");
+        let contents = fs::read_to_string(&filepath).unwrap();
+        // Should have placeholder TBD in Question section
+        assert!(contents.contains("## Question\n\n- TBD\n"));
+    }
+
+    #[test]
+    fn create_spike_file_rejects_empty_name() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = create_spike_file(tmp.path(), "!!!", None);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("alphanumeric"));
+    }
+
+    #[test]
+    fn create_spike_file_with_special_chars_in_name_produces_valid_yaml() {
+        let tmp = tempfile::tempdir().unwrap();
+        let name = "Spike: compare \"fast\" vs slow encoders";
+        let result = create_spike_file(tmp.path(), name, None);
+        assert!(result.is_ok(), "create_spike_file failed: {result:?}");
+
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        let spike_id = result.unwrap();
+        let filename = format!("{spike_id}.md");
+        let contents = fs::read_to_string(backlog.join(&filename)).unwrap();
+
+        let meta = parse_front_matter(&contents).expect("front matter should be valid YAML");
+        assert_eq!(meta.name.as_deref(), Some(name));
+    }
+
+    #[test]
+    fn create_spike_file_increments_from_existing_items() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(backlog.join("5_story_existing.md"), "").unwrap();
+
+        let spike_id = create_spike_file(tmp.path(), "My Spike", None).unwrap();
+        assert!(spike_id.starts_with("6_spike_"), "expected spike number 6, got: {spike_id}");
+    }
+}
--- a/server/src/http/workflow/mod.rs
+++ b/server/src/http/workflow/mod.rs
@@ -0,0 +1,745 @@
+mod bug_ops;
+mod story_ops;
+mod test_results;
+
+pub use bug_ops::{
+    create_bug_file, create_refactor_file, create_spike_file, list_bug_files, list_refactor_files,
+};
+pub use story_ops::{
+    add_criterion_to_file, check_criterion_in_file, create_story_file, update_story_in_file,
+};
+pub use test_results::{
+    read_test_results_from_story_file, write_coverage_baseline_to_story_file,
+    write_test_results_to_story_file,
+};
+
+use crate::agents::AgentStatus;
+use crate::http::context::AppContext;
+use crate::io::story_metadata::parse_front_matter;
+use serde::Serialize;
+use std::collections::HashMap;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+/// Agent assignment embedded in a pipeline stage item.
+#[derive(Clone, Debug, Serialize)]
+pub struct AgentAssignment {
+    pub agent_name: String,
+    pub model: Option<String>,
+    pub status: String,
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct UpcomingStory {
+    pub story_id: String,
+    pub name: Option<String>,
+    pub error: Option<String>,
+    /// Merge failure reason persisted to front matter by the mergemaster agent.
+    pub merge_failure: Option<String>,
+    /// Active agent working on this item, if any.
+    pub agent: Option<AgentAssignment>,
+    /// True when the item is held in QA for human review.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub review_hold: Option<bool>,
+    /// QA mode for this item: "human", "server", or "agent".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub qa: Option<String>,
+    /// Number of retries at the current pipeline stage.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub retry_count: Option<u32>,
+    /// True when the story has exceeded its retry limit and will not be auto-assigned.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub blocked: Option<bool>,
+}
+
+pub struct StoryValidationResult {
+    pub story_id: String,
+    pub valid: bool,
+    pub error: Option<String>,
+}
+
+/// Full pipeline state across all stages.
+#[derive(Clone, Debug, Serialize)]
+pub struct PipelineState {
+    pub backlog: Vec<UpcomingStory>,
+    pub current: Vec<UpcomingStory>,
+    pub qa: Vec<UpcomingStory>,
+    pub merge: Vec<UpcomingStory>,
+    pub done: Vec<UpcomingStory>,
+}
+
+/// Load the full pipeline state (all 5 active stages).
+pub fn load_pipeline_state(ctx: &AppContext) -> Result<PipelineState, String> {
+    let agent_map = build_active_agent_map(ctx);
+    Ok(PipelineState {
+        backlog: load_stage_items(ctx, "1_backlog", &HashMap::new())?,
+        current: load_stage_items(ctx, "2_current", &agent_map)?,
+        qa: load_stage_items(ctx, "3_qa", &agent_map)?,
+        merge: load_stage_items(ctx, "4_merge", &agent_map)?,
+        done: load_stage_items(ctx, "5_done", &HashMap::new())?,
+    })
+}
+
+/// Build a map from story_id → AgentAssignment for all pending/running agents.
+fn build_active_agent_map(ctx: &AppContext) -> HashMap<String, AgentAssignment> {
+    let agents = match ctx.agents.list_agents() {
+        Ok(a) => a,
+        Err(_) => return HashMap::new(),
+    };
+
+    let config_opt = ctx
+        .state
+        .get_project_root()
+        .ok()
+        .and_then(|root| crate::config::ProjectConfig::load(&root).ok());
+
+    let mut map = HashMap::new();
+    for agent in agents {
+        if !matches!(agent.status, AgentStatus::Pending | AgentStatus::Running) {
+            continue;
+        }
+        let model = config_opt
+            .as_ref()
+            .and_then(|cfg| cfg.find_agent(&agent.agent_name))
+            .and_then(|ac| ac.model.clone());
+        map.insert(
+            agent.story_id.clone(),
+            AgentAssignment {
+                agent_name: agent.agent_name,
+                model,
+                status: agent.status.to_string(),
+            },
+        );
+    }
+    map
+}
+
+/// Load work items from any pipeline stage directory.
+fn load_stage_items(
+    ctx: &AppContext,
+    stage_dir: &str,
+    agent_map: &HashMap<String, AgentAssignment>,
+) -> Result<Vec<UpcomingStory>, String> {
+    let root = ctx.state.get_project_root()?;
+    let dir = root.join(".storkit").join("work").join(stage_dir);
+
+    if !dir.exists() {
+        return Ok(Vec::new());
+    }
+
+    let mut stories = Vec::new();
+    for entry in fs::read_dir(&dir)
+        .map_err(|e| format!("Failed to read {stage_dir} directory: {e}"))?
+    {
+        let entry = entry.map_err(|e| format!("Failed to read {stage_dir} entry: {e}"))?;
+        let path = entry.path();
+        if path.extension().and_then(|ext| ext.to_str()) != Some("md") {
+            continue;
+        }
+        let story_id = path
+            .file_stem()
+            .and_then(|stem| stem.to_str())
+            .ok_or_else(|| "Invalid story file name.".to_string())?
+            .to_string();
+        let contents = fs::read_to_string(&path)
+            .map_err(|e| format!("Failed to read story file {}: {e}", path.display()))?;
+        let (name, error, merge_failure, review_hold, qa, retry_count, blocked) = match parse_front_matter(&contents) {
+            Ok(meta) => (meta.name, None, meta.merge_failure, meta.review_hold, meta.qa.map(|m| m.as_str().to_string()), meta.retry_count, meta.blocked),
+            Err(e) => (None, Some(e.to_string()), None, None, None, None, None),
+        };
+        let agent = agent_map.get(&story_id).cloned();
+        stories.push(UpcomingStory { story_id, name, error, merge_failure, agent, review_hold, qa, retry_count, blocked });
+    }
+
+    stories.sort_by(|a, b| a.story_id.cmp(&b.story_id));
+    Ok(stories)
+}
+
+pub fn load_upcoming_stories(ctx: &AppContext) -> Result<Vec<UpcomingStory>, String> {
+    load_stage_items(ctx, "1_backlog", &HashMap::new())
+}
+
+pub fn validate_story_dirs(
+    root: &std::path::Path,
+) -> Result<Vec<StoryValidationResult>, String> {
+    let mut results = Vec::new();
+
+    // Directories to validate: work/2_current/ + work/1_backlog/
+    let dirs_to_validate: Vec<PathBuf> = vec![
+        root.join(".storkit").join("work").join("2_current"),
+        root.join(".storkit").join("work").join("1_backlog"),
+    ];
+
+    for dir in &dirs_to_validate {
+        let subdir = dir.file_name().map(|n| n.to_string_lossy().into_owned()).unwrap_or_default();
+        if !dir.exists() {
+            continue;
+        }
+        for entry in
+            fs::read_dir(dir).map_err(|e| format!("Failed to read {subdir} directory: {e}"))?
+        {
+            let entry = entry.map_err(|e| format!("Failed to read entry: {e}"))?;
+            let path = entry.path();
+            if path.extension().and_then(|ext| ext.to_str()) != Some("md") {
+                continue;
+            }
+            let story_id = path
+                .file_stem()
+                .and_then(|stem| stem.to_str())
+                .unwrap_or_default()
+                .to_string();
+            let contents = fs::read_to_string(&path)
+                .map_err(|e| format!("Failed to read {}: {e}", path.display()))?;
+            match parse_front_matter(&contents) {
+                Ok(meta) => {
+                    let mut errors = Vec::new();
+                    if meta.name.is_none() {
+                        errors.push("Missing 'name' field".to_string());
+                    }
+                    if errors.is_empty() {
+                        results.push(StoryValidationResult {
+                            story_id,
+                            valid: true,
+                            error: None,
+                        });
+                    } else {
+                        results.push(StoryValidationResult {
+                            story_id,
+                            valid: false,
+                            error: Some(errors.join("; ")),
+                        });
+                    }
+                }
+                Err(e) => results.push(StoryValidationResult {
+                    story_id,
+                    valid: false,
+                    error: Some(e.to_string()),
+                }),
+            }
+        }
+    }
+
+    results.sort_by(|a, b| a.story_id.cmp(&b.story_id));
+    Ok(results)
+}
+
+// ── Shared utilities used by submodules ──────────────────────────
+
+/// Locate a work item file by searching all active pipeline stages.
+///
+/// Searches in priority order: 2_current, 1_backlog, 3_qa, 4_merge, 5_done, 6_archived.
+pub(super) fn find_story_file(project_root: &Path, story_id: &str) -> Result<PathBuf, String> {
+    let filename = format!("{story_id}.md");
+    let sk = project_root.join(".storkit").join("work");
+    for stage in &["2_current", "1_backlog", "3_qa", "4_merge", "5_done", "6_archived"] {
+        let path = sk.join(stage).join(&filename);
+        if path.exists() {
+            return Ok(path);
+        }
+    }
+    Err(format!(
+        "Story '{story_id}' not found in any pipeline stage."
+    ))
+}
+
+/// Replace the content of a named `## Section` in a story file.
+///
+/// Finds the first occurrence of `## {section_name}` and replaces everything
+/// until the next `##` heading (or end of file) with the provided text.
+/// Returns an error if the section is not found.
+pub(super) fn replace_section_content(content: &str, section_name: &str, new_text: &str) -> Result<String, String> {
+    let lines: Vec<&str> = content.lines().collect();
+    let heading = format!("## {section_name}");
+
+    let mut section_start: Option<usize> = None;
+    let mut section_end: Option<usize> = None;
+
+    for (i, line) in lines.iter().enumerate() {
+        let trimmed = line.trim();
+        if trimmed == heading {
+            section_start = Some(i);
+            continue;
+        }
+        if section_start.is_some() && trimmed.starts_with("## ") {
+            section_end = Some(i);
+            break;
+        }
+    }
+
+    let section_start =
+        section_start.ok_or_else(|| format!("Section '{heading}' not found in story file."))?;
+
+    let mut new_lines: Vec<String> = Vec::new();
+    // Keep everything up to and including the section heading.
+    for line in lines.iter().take(section_start + 1) {
+        new_lines.push(line.to_string());
+    }
+    // Blank line, new content, blank line.
+    new_lines.push(String::new());
+    new_lines.push(new_text.to_string());
+    new_lines.push(String::new());
+    // Resume from the next section heading (or EOF).
+    let resume_from = section_end.unwrap_or(lines.len());
+    for line in lines.iter().skip(resume_from) {
+        new_lines.push(line.to_string());
+    }
+
+    let mut new_str = new_lines.join("\n");
+    if content.ends_with('\n') {
+        new_str.push('\n');
+    }
+    Ok(new_str)
+}
+
+/// Replace the `## Test Results` section in `contents` with `new_section`,
+/// or append it if not present.
+pub(super) fn replace_or_append_section(contents: &str, header: &str, new_section: &str) -> String {
+    let lines: Vec<&str> = contents.lines().collect();
+    let header_trimmed = header.trim();
+
+    // Find the start of the existing section
+    let section_start = lines.iter().position(|l| l.trim() == header_trimmed);
+
+    if let Some(start) = section_start {
+        // Find the next `##` heading after the section start (the end of this section)
+        let section_end = lines[start + 1..]
+            .iter()
+            .position(|l| {
+                let t = l.trim();
+                t.starts_with("## ") && t != header_trimmed
+            })
+            .map(|i| start + 1 + i)
+            .unwrap_or(lines.len());
+
+        let mut result = lines[..start].join("\n");
+        if !result.is_empty() {
+            result.push('\n');
+        }
+        result.push_str(new_section);
+        if section_end < lines.len() {
+            result.push('\n');
+            result.push_str(&lines[section_end..].join("\n"));
+        }
+        if contents.ends_with('\n') {
+            result.push('\n');
+        }
+        result
+    } else {
+        // Append at the end
+        let mut result = contents.trim_end_matches('\n').to_string();
+        result.push_str("\n\n");
+        result.push_str(new_section);
+        if !result.ends_with('\n') {
+            result.push('\n');
+        }
+        result
+    }
+}
+
+pub(super) fn slugify_name(name: &str) -> String {
+    let slug: String = name
+        .chars()
+        .map(|c| {
+            if c.is_ascii_alphanumeric() {
+                c.to_ascii_lowercase()
+            } else {
+                '_'
+            }
+        })
+        .collect();
+    // Collapse consecutive underscores and trim edges
+    let mut result = String::new();
+    let mut prev_underscore = true; // start true to trim leading _
+    for ch in slug.chars() {
+        if ch == '_' {
+            if !prev_underscore {
+                result.push('_');
+            }
+            prev_underscore = true;
+        } else {
+            result.push(ch);
+            prev_underscore = false;
+        }
+    }
+    // Trim trailing underscore
+    if result.ends_with('_') {
+        result.pop();
+    }
+    result
+}
+
+/// Scan all `work/` subdirectories for the highest item number across all types (stories, bugs, spikes).
+pub(super) fn next_item_number(root: &std::path::Path) -> Result<u32, String> {
+    let work_base = root.join(".storkit").join("work");
+    let mut max_num: u32 = 0;
+
+    for subdir in &["1_backlog", "2_current", "3_qa", "4_merge", "5_done", "6_archived"] {
+        let dir = work_base.join(subdir);
+        if !dir.exists() {
+            continue;
+        }
+        for entry in
+            fs::read_dir(&dir).map_err(|e| format!("Failed to read {subdir} directory: {e}"))?
+        {
+            let entry = entry.map_err(|e| format!("Failed to read entry: {e}"))?;
+            let name = entry.file_name();
+            let name_str = name.to_string_lossy();
+            // Filename format: {N}_{type}_{slug}.md — extract leading N
+            let num_str: String = name_str.chars().take_while(|c| c.is_ascii_digit()).collect();
+            if let Ok(n) = num_str.parse::<u32>()
+                && n > max_num
+            {
+                max_num = n;
+            }
+        }
+    }
+
+    Ok(max_num + 1)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn load_pipeline_state_loads_all_stages() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path().to_path_buf();
+
+        for (stage, id) in &[
+            ("1_backlog", "10_story_upcoming"),
+            ("2_current", "20_story_current"),
+            ("3_qa", "30_story_qa"),
+            ("4_merge", "40_story_merge"),
+            ("5_done", "50_story_done"),
+        ] {
+            let dir = root.join(".storkit").join("work").join(stage);
+            fs::create_dir_all(&dir).unwrap();
+            fs::write(
+                dir.join(format!("{id}.md")),
+                format!("---\nname: {id}\n---\n"),
+            )
+            .unwrap();
+        }
+
+        let ctx = crate::http::context::AppContext::new_test(root);
+        let state = load_pipeline_state(&ctx).unwrap();
+
+        assert_eq!(state.backlog.len(), 1);
+        assert_eq!(state.backlog[0].story_id, "10_story_upcoming");
+
+        assert_eq!(state.current.len(), 1);
+        assert_eq!(state.current[0].story_id, "20_story_current");
+
+        assert_eq!(state.qa.len(), 1);
+        assert_eq!(state.qa[0].story_id, "30_story_qa");
+
+        assert_eq!(state.merge.len(), 1);
+        assert_eq!(state.merge[0].story_id, "40_story_merge");
+
+        assert_eq!(state.done.len(), 1);
+        assert_eq!(state.done[0].story_id, "50_story_done");
+    }
+
+    #[test]
+    fn load_upcoming_returns_empty_when_no_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path().to_path_buf();
+        // No .storkit directory at all
+        let ctx = crate::http::context::AppContext::new_test(root);
+        let result = load_upcoming_stories(&ctx).unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn pipeline_state_includes_agent_for_running_story() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path().to_path_buf();
+
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("10_story_test.md"),
+            "---\nname: Test Story\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let ctx = crate::http::context::AppContext::new_test(root);
+        ctx.agents.inject_test_agent("10_story_test", "coder-1", crate::agents::AgentStatus::Running);
+
+        let state = load_pipeline_state(&ctx).unwrap();
+
+        assert_eq!(state.current.len(), 1);
+        let item = &state.current[0];
+        assert!(item.agent.is_some(), "running agent should appear on work item");
+        let agent = item.agent.as_ref().unwrap();
+        assert_eq!(agent.agent_name, "coder-1");
+        assert_eq!(agent.status, "running");
+    }
+
+    #[test]
+    fn pipeline_state_no_agent_for_completed_story() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path().to_path_buf();
+
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("11_story_done.md"),
+            "---\nname: Done Story\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let ctx = crate::http::context::AppContext::new_test(root);
+        ctx.agents.inject_test_agent("11_story_done", "coder-1", crate::agents::AgentStatus::Completed);
+
+        let state = load_pipeline_state(&ctx).unwrap();
+
+        assert_eq!(state.current.len(), 1);
+        assert!(
+            state.current[0].agent.is_none(),
+            "completed agent should not appear on work item"
+        );
+    }
+
+    #[test]
+    fn pipeline_state_pending_agent_included() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path().to_path_buf();
+
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("12_story_pending.md"),
+            "---\nname: Pending Story\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let ctx = crate::http::context::AppContext::new_test(root);
+        ctx.agents.inject_test_agent("12_story_pending", "coder-1", crate::agents::AgentStatus::Pending);
+
+        let state = load_pipeline_state(&ctx).unwrap();
+
+        assert_eq!(state.current.len(), 1);
+        let item = &state.current[0];
+        assert!(item.agent.is_some(), "pending agent should appear on work item");
+        assert_eq!(item.agent.as_ref().unwrap().status, "pending");
+    }
+
+    #[test]
+    fn load_upcoming_parses_metadata() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(
+            backlog.join("31_story_view_upcoming.md"),
+            "---\nname: View Upcoming\n---\n# Story\n",
+        )
+        .unwrap();
+        fs::write(
+            backlog.join("32_story_worktree.md"),
+            "---\nname: Worktree Orchestration\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let ctx = crate::http::context::AppContext::new_test(tmp.path().to_path_buf());
+        let stories = load_upcoming_stories(&ctx).unwrap();
+        assert_eq!(stories.len(), 2);
+        assert_eq!(stories[0].story_id, "31_story_view_upcoming");
+        assert_eq!(stories[0].name.as_deref(), Some("View Upcoming"));
+        assert_eq!(stories[1].story_id, "32_story_worktree");
+        assert_eq!(stories[1].name.as_deref(), Some("Worktree Orchestration"));
+    }
+
+    #[test]
+    fn load_upcoming_skips_non_md_files() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(backlog.join(".gitkeep"), "").unwrap();
+        fs::write(
+            backlog.join("31_story_example.md"),
+            "---\nname: A Story\n---\n",
+        )
+        .unwrap();
+
+        let ctx = crate::http::context::AppContext::new_test(tmp.path().to_path_buf());
+        let stories = load_upcoming_stories(&ctx).unwrap();
+        assert_eq!(stories.len(), 1);
+        assert_eq!(stories[0].story_id, "31_story_example");
+    }
+
+    #[test]
+    fn validate_story_dirs_valid_files() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&current).unwrap();
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(
+            current.join("28_story_todos.md"),
+            "---\nname: Show TODOs\n---\n# Story\n",
+        )
+        .unwrap();
+        fs::write(
+            backlog.join("36_story_front_matter.md"),
+            "---\nname: Enforce Front Matter\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let results = validate_story_dirs(tmp.path()).unwrap();
+        assert_eq!(results.len(), 2);
+        assert!(results.iter().all(|r| r.valid));
+        assert!(results.iter().all(|r| r.error.is_none()));
+    }
+
+    #[test]
+    fn validate_story_dirs_missing_front_matter() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("28_story_todos.md"), "# No front matter\n").unwrap();
+
+        let results = validate_story_dirs(tmp.path()).unwrap();
+        assert_eq!(results.len(), 1);
+        assert!(!results[0].valid);
+        assert_eq!(results[0].error.as_deref(), Some("Missing front matter"));
+    }
+
+    #[test]
+    fn validate_story_dirs_missing_required_fields() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("28_story_todos.md"), "---\n---\n# Story\n").unwrap();
+
+        let results = validate_story_dirs(tmp.path()).unwrap();
+        assert_eq!(results.len(), 1);
+        assert!(!results[0].valid);
+        let err = results[0].error.as_deref().unwrap();
+        assert!(err.contains("Missing 'name' field"));
+    }
+
+    #[test]
+    fn validate_story_dirs_empty_when_no_dirs() {
+        let tmp = tempfile::tempdir().unwrap();
+        let results = validate_story_dirs(tmp.path()).unwrap();
+        assert!(results.is_empty());
+    }
+
+    // --- slugify_name tests ---
+
+    #[test]
+    fn slugify_simple_name() {
+        assert_eq!(
+            slugify_name("Enforce Front Matter on All Story Files"),
+            "enforce_front_matter_on_all_story_files"
+        );
+    }
+
+    #[test]
+    fn slugify_with_special_chars() {
+        assert_eq!(slugify_name("Hello, World! (v2)"), "hello_world_v2");
+    }
+
+    #[test]
+    fn slugify_leading_trailing_underscores() {
+        assert_eq!(slugify_name("  spaces  "), "spaces");
+    }
+
+    #[test]
+    fn slugify_consecutive_separators() {
+        assert_eq!(slugify_name("a--b__c  d"), "a_b_c_d");
+    }
+
+    #[test]
+    fn slugify_empty_after_strip() {
+        assert_eq!(slugify_name("!!!"), "");
+    }
+
+    #[test]
+    fn slugify_already_snake_case() {
+        assert_eq!(slugify_name("my_story_name"), "my_story_name");
+    }
+
+    // --- next_item_number tests ---
+
+    #[test]
+    fn next_item_number_empty_dirs() {
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&base).unwrap();
+        assert_eq!(next_item_number(tmp.path()).unwrap(), 1);
+    }
+
+    #[test]
+    fn next_item_number_scans_all_dirs() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        let current = tmp.path().join(".storkit/work/2_current");
+        let archived = tmp.path().join(".storkit/work/5_done");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::create_dir_all(&current).unwrap();
+        fs::create_dir_all(&archived).unwrap();
+        fs::write(backlog.join("10_story_foo.md"), "").unwrap();
+        fs::write(current.join("20_story_bar.md"), "").unwrap();
+        fs::write(archived.join("15_story_baz.md"), "").unwrap();
+        assert_eq!(next_item_number(tmp.path()).unwrap(), 21);
+    }
+
+    #[test]
+    fn next_item_number_no_work_dirs() {
+        let tmp = tempfile::tempdir().unwrap();
+        // No .storkit at all
+        assert_eq!(next_item_number(tmp.path()).unwrap(), 1);
+    }
+
+    // --- find_story_file tests ---
+
+    #[test]
+    fn find_story_file_searches_current_then_backlog() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&current).unwrap();
+        fs::create_dir_all(&backlog).unwrap();
+
+        // Only in backlog
+        fs::write(backlog.join("6_test.md"), "").unwrap();
+        let found = find_story_file(tmp.path(), "6_test").unwrap();
+        assert!(found.ends_with("1_backlog/6_test.md") || found.ends_with("1_backlog\\6_test.md"));
+
+        // Also in current — current should win
+        fs::write(current.join("6_test.md"), "").unwrap();
+        let found = find_story_file(tmp.path(), "6_test").unwrap();
+        assert!(found.ends_with("2_current/6_test.md") || found.ends_with("2_current\\6_test.md"));
+    }
+
+    #[test]
+    fn find_story_file_returns_error_when_not_found() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = find_story_file(tmp.path(), "99_missing");
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("not found"));
+    }
+
+    // --- replace_or_append_section tests ---
+
+    #[test]
+    fn replace_or_append_section_appends_when_absent() {
+        let contents = "---\nname: T\n---\n# Story\n";
+        let new = replace_or_append_section(contents, "## Test Results", "## Test Results\n\nfoo\n");
+        assert!(new.contains("## Test Results"));
+        assert!(new.contains("foo"));
+        assert!(new.contains("# Story"));
+    }
+
+    #[test]
+    fn replace_or_append_section_replaces_existing() {
+        let contents = "# Story\n\n## Test Results\n\nold content\n\n## Other\n\nother content\n";
+        let new = replace_or_append_section(contents, "## Test Results", "## Test Results\n\nnew content\n");
+        assert!(new.contains("new content"));
+        assert!(!new.contains("old content"));
+        assert!(new.contains("## Other"));
+    }
+}
--- a/server/src/http/workflow/story_ops.rs
+++ b/server/src/http/workflow/story_ops.rs
@@ -0,0 +1,592 @@
+use crate::io::story_metadata::set_front_matter_field;
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+
+use super::{find_story_file, next_item_number, replace_section_content, slugify_name};
+
+/// Shared create-story logic used by both the OpenApi and MCP handlers.
+///
+/// When `commit` is `true`, the new story file is git-added and committed to
+/// the current branch immediately after creation.
+pub fn create_story_file(
+    root: &std::path::Path,
+    name: &str,
+    user_story: Option<&str>,
+    acceptance_criteria: Option<&[String]>,
+    commit: bool,
+) -> Result<String, String> {
+    let story_number = next_item_number(root)?;
+    let slug = slugify_name(name);
+
+    if slug.is_empty() {
+        return Err("Name must contain at least one alphanumeric character.".to_string());
+    }
+
+    let filename = format!("{story_number}_story_{slug}.md");
+    let backlog_dir = root.join(".storkit").join("work").join("1_backlog");
+    fs::create_dir_all(&backlog_dir)
+        .map_err(|e| format!("Failed to create backlog directory: {e}"))?;
+
+    let filepath = backlog_dir.join(&filename);
+    if filepath.exists() {
+        return Err(format!("Story file already exists: {filename}"));
+    }
+
+    let story_id = filepath
+        .file_stem()
+        .and_then(|s| s.to_str())
+        .unwrap_or_default()
+        .to_string();
+
+    let mut content = String::new();
+    content.push_str("---\n");
+    content.push_str(&format!("name: \"{}\"\n", name.replace('"', "\\\"")));
+    content.push_str("---\n\n");
+    content.push_str(&format!("# Story {story_number}: {name}\n\n"));
+
+    content.push_str("## User Story\n\n");
+    if let Some(us) = user_story {
+        content.push_str(us);
+        content.push('\n');
+    } else {
+        content.push_str("As a ..., I want ..., so that ...\n");
+    }
+    content.push('\n');
+
+    content.push_str("## Acceptance Criteria\n\n");
+    if let Some(criteria) = acceptance_criteria {
+        for criterion in criteria {
+            content.push_str(&format!("- [ ] {criterion}\n"));
+        }
+    } else {
+        content.push_str("- [ ] TODO\n");
+    }
+    content.push('\n');
+
+    content.push_str("## Out of Scope\n\n");
+    content.push_str("- TBD\n");
+
+    fs::write(&filepath, &content)
+        .map_err(|e| format!("Failed to write story file: {e}"))?;
+
+    // Watcher handles the git commit asynchronously.
+    let _ = commit; // kept for API compat, ignored
+
+    Ok(story_id)
+}
+
+/// Check off the Nth unchecked acceptance criterion in a story file and auto-commit.
+///
+/// `criterion_index` is 0-based among unchecked (`- [ ]`) items.
+pub fn check_criterion_in_file(
+    project_root: &Path,
+    story_id: &str,
+    criterion_index: usize,
+) -> Result<(), String> {
+    let filepath = find_story_file(project_root, story_id)?;
+    let contents = fs::read_to_string(&filepath)
+        .map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    let mut unchecked_count: usize = 0;
+    let mut found = false;
+    let new_lines: Vec<String> = contents
+        .lines()
+        .map(|line| {
+            let trimmed = line.trim();
+            if let Some(rest) = trimmed.strip_prefix("- [ ] ") {
+                if unchecked_count == criterion_index {
+                    unchecked_count += 1;
+                    found = true;
+                    let indent_len = line.len() - trimmed.len();
+                    let indent = &line[..indent_len];
+                    return format!("{indent}- [x] {rest}");
+                }
+                unchecked_count += 1;
+            }
+            line.to_string()
+        })
+        .collect();
+
+    if !found {
+        return Err(format!(
+            "Criterion index {criterion_index} out of range. Story '{story_id}' has \
+             {unchecked_count} unchecked criteria (indices 0..{}).",
+            unchecked_count.saturating_sub(1)
+        ));
+    }
+
+    let mut new_str = new_lines.join("\n");
+    if contents.ends_with('\n') {
+        new_str.push('\n');
+    }
+    fs::write(&filepath, &new_str)
+        .map_err(|e| format!("Failed to write story file: {e}"))?;
+
+    // Watcher handles the git commit asynchronously.
+    Ok(())
+}
+
+/// Add a new acceptance criterion to a story file.
+///
+/// Appends `- [ ] {criterion}` after the last existing criterion line in the
+/// "## Acceptance Criteria" section, or directly after the section heading if
+/// the section is empty. The filesystem watcher auto-commits the change.
+pub fn add_criterion_to_file(
+    project_root: &Path,
+    story_id: &str,
+    criterion: &str,
+) -> Result<(), String> {
+    let filepath = find_story_file(project_root, story_id)?;
+    let contents = fs::read_to_string(&filepath)
+        .map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    let lines: Vec<&str> = contents.lines().collect();
+    let mut in_ac_section = false;
+    let mut ac_section_start: Option<usize> = None;
+    let mut last_criterion_line: Option<usize> = None;
+
+    for (i, line) in lines.iter().enumerate() {
+        let trimmed = line.trim();
+        if trimmed == "## Acceptance Criteria" {
+            in_ac_section = true;
+            ac_section_start = Some(i);
+            continue;
+        }
+        if in_ac_section {
+            if trimmed.starts_with("## ") {
+                break;
+            }
+            if trimmed.starts_with("- [ ] ") || trimmed.starts_with("- [x] ") {
+                last_criterion_line = Some(i);
+            }
+        }
+    }
+
+    let insert_after = last_criterion_line
+        .or(ac_section_start)
+        .ok_or_else(|| {
+            format!("Story '{story_id}' has no '## Acceptance Criteria' section.")
+        })?;
+
+    let mut new_lines: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
+    new_lines.insert(insert_after + 1, format!("- [ ] {criterion}"));
+
+    let mut new_str = new_lines.join("\n");
+    if contents.ends_with('\n') {
+        new_str.push('\n');
+    }
+    fs::write(&filepath, &new_str)
+        .map_err(|e| format!("Failed to write story file: {e}"))?;
+
+    // Watcher handles the git commit asynchronously.
+    Ok(())
+}
+
+/// Update the user story text and/or description in a story file.
+///
+/// At least one of `user_story` or `description` must be provided.
+/// Replaces the content of the corresponding `##` section in place.
+/// The filesystem watcher auto-commits the change.
+pub fn update_story_in_file(
+    project_root: &Path,
+    story_id: &str,
+    user_story: Option<&str>,
+    description: Option<&str>,
+    front_matter: Option<&HashMap<String, String>>,
+) -> Result<(), String> {
+    let has_front_matter_updates = front_matter.map(|m| !m.is_empty()).unwrap_or(false);
+    if user_story.is_none() && description.is_none() && !has_front_matter_updates {
+        return Err(
+            "At least one of 'user_story', 'description', or 'front_matter' must be provided."
+                .to_string(),
+        );
+    }
+
+    let filepath = find_story_file(project_root, story_id)?;
+    let mut contents = fs::read_to_string(&filepath)
+        .map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    if let Some(fields) = front_matter {
+        for (key, value) in fields {
+            let yaml_value = format!("\"{}\"", value.replace('"', "\\\"").replace('\n', " ").replace('\r', ""));
+            contents = set_front_matter_field(&contents, key, &yaml_value);
+        }
+    }
+
+    if let Some(us) = user_story {
+        contents = replace_section_content(&contents, "User Story", us)?;
+    }
+    if let Some(desc) = description {
+        contents = replace_section_content(&contents, "Description", desc)?;
+    }
+
+    fs::write(&filepath, &contents)
+        .map_err(|e| format!("Failed to write story file: {e}"))?;
+
+    // Watcher handles the git commit asynchronously.
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::io::story_metadata::parse_front_matter;
+
+    fn setup_git_repo(root: &std::path::Path) {
+        std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(root)
+            .output()
+            .unwrap();
+    }
+
+    fn story_with_criteria(n: usize) -> String {
+        let mut s = "---\nname: Test Story\n---\n\n## Acceptance Criteria\n\n".to_string();
+        for i in 0..n {
+            s.push_str(&format!("- [ ] Criterion {i}\n"));
+        }
+        s
+    }
+
+    // --- create_story integration tests ---
+
+    #[test]
+    fn create_story_writes_correct_content() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+        fs::write(backlog.join("36_story_existing.md"), "").unwrap();
+
+        let number = super::super::next_item_number(tmp.path()).unwrap();
+        assert_eq!(number, 37);
+
+        let slug = super::super::slugify_name("My New Feature");
+        assert_eq!(slug, "my_new_feature");
+
+        let filename = format!("{number}_{slug}.md");
+        let filepath = backlog.join(&filename);
+
+        let mut content = String::new();
+        content.push_str("---\n");
+        content.push_str("name: \"My New Feature\"\n");
+        content.push_str("---\n\n");
+        content.push_str(&format!("# Story {number}: My New Feature\n\n"));
+        content.push_str("## User Story\n\n");
+        content.push_str("As a dev, I want this feature\n\n");
+        content.push_str("## Acceptance Criteria\n\n");
+        content.push_str("- [ ] It works\n");
+        content.push_str("- [ ] It is tested\n\n");
+        content.push_str("## Out of Scope\n\n");
+        content.push_str("- TBD\n");
+
+        fs::write(&filepath, &content).unwrap();
+
+        let written = fs::read_to_string(&filepath).unwrap();
+        assert!(written.starts_with("---\nname: \"My New Feature\"\n---"));
+        assert!(written.contains("# Story 37: My New Feature"));
+        assert!(written.contains("- [ ] It works"));
+        assert!(written.contains("- [ ] It is tested"));
+        assert!(written.contains("## Out of Scope"));
+    }
+
+    #[test]
+    fn create_story_with_colon_in_name_produces_valid_yaml() {
+        let tmp = tempfile::tempdir().unwrap();
+        let name = "Server-owned agent completion: remove report_completion dependency";
+        let result = create_story_file(tmp.path(), name, None, None, false);
+        assert!(result.is_ok(), "create_story_file failed: {result:?}");
+
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        let story_id = result.unwrap();
+        let filename = format!("{story_id}.md");
+        let contents = fs::read_to_string(backlog.join(&filename)).unwrap();
+
+        let meta = parse_front_matter(&contents).expect("front matter should be valid YAML");
+        assert_eq!(meta.name.as_deref(), Some(name));
+    }
+
+    #[test]
+    fn create_story_rejects_duplicate() {
+        let tmp = tempfile::tempdir().unwrap();
+        let backlog = tmp.path().join(".storkit/work/1_backlog");
+        fs::create_dir_all(&backlog).unwrap();
+
+        let filepath = backlog.join("1_story_my_feature.md");
+        fs::write(&filepath, "existing").unwrap();
+
+        // Simulate the check
+        assert!(filepath.exists());
+    }
+
+    // ── check_criterion_in_file tests ─────────────────────────────────────────
+
+    #[test]
+    fn check_criterion_marks_first_unchecked() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo(tmp.path());
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("1_test.md");
+        fs::write(&filepath, story_with_criteria(3)).unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "add story"])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+
+        check_criterion_in_file(tmp.path(), "1_test", 0).unwrap();
+
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(contents.contains("- [x] Criterion 0"), "first should be checked");
+        assert!(contents.contains("- [ ] Criterion 1"), "second should stay unchecked");
+        assert!(contents.contains("- [ ] Criterion 2"), "third should stay unchecked");
+    }
+
+    #[test]
+    fn check_criterion_marks_second_unchecked() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo(tmp.path());
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("2_test.md");
+        fs::write(&filepath, story_with_criteria(3)).unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "add story"])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+
+        check_criterion_in_file(tmp.path(), "2_test", 1).unwrap();
+
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(contents.contains("- [ ] Criterion 0"), "first should stay unchecked");
+        assert!(contents.contains("- [x] Criterion 1"), "second should be checked");
+        assert!(contents.contains("- [ ] Criterion 2"), "third should stay unchecked");
+    }
+
+    #[test]
+    fn check_criterion_out_of_range_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        setup_git_repo(tmp.path());
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("3_test.md");
+        fs::write(&filepath, story_with_criteria(2)).unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "add story"])
+            .current_dir(tmp.path())
+            .output()
+            .unwrap();
+
+        let result = check_criterion_in_file(tmp.path(), "3_test", 5);
+        assert!(result.is_err(), "should fail for out-of-range index");
+        assert!(result.unwrap_err().contains("out of range"));
+    }
+
+    // ── add_criterion_to_file tests ───────────────────────────────────────────
+
+    fn story_with_ac_section(criteria: &[&str]) -> String {
+        let mut s = "---\nname: Test\n---\n\n## User Story\n\nAs a user...\n\n## Acceptance Criteria\n\n".to_string();
+        for c in criteria {
+            s.push_str(&format!("- [ ] {c}\n"));
+        }
+        s.push_str("\n## Out of Scope\n\n- N/A\n");
+        s
+    }
+
+    #[test]
+    fn add_criterion_appends_after_last_criterion() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("10_test.md");
+        fs::write(&filepath, story_with_ac_section(&["First", "Second"])).unwrap();
+
+        add_criterion_to_file(tmp.path(), "10_test", "Third").unwrap();
+
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(contents.contains("- [ ] First\n"));
+        assert!(contents.contains("- [ ] Second\n"));
+        assert!(contents.contains("- [ ] Third\n"));
+        // Third should come after Second
+        let pos_second = contents.find("- [ ] Second").unwrap();
+        let pos_third = contents.find("- [ ] Third").unwrap();
+        assert!(pos_third > pos_second, "Third should appear after Second");
+    }
+
+    #[test]
+    fn add_criterion_to_empty_section() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("11_test.md");
+        let content = "---\nname: Test\n---\n\n## Acceptance Criteria\n\n## Out of Scope\n\n- N/A\n";
+        fs::write(&filepath, content).unwrap();
+
+        add_criterion_to_file(tmp.path(), "11_test", "New AC").unwrap();
+
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(contents.contains("- [ ] New AC\n"), "criterion should be present");
+    }
+
+    #[test]
+    fn add_criterion_missing_section_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("12_test.md");
+        fs::write(&filepath, "---\nname: Test\n---\n\nNo AC section here.\n").unwrap();
+
+        let result = add_criterion_to_file(tmp.path(), "12_test", "X");
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("Acceptance Criteria"));
+    }
+
+    // ── update_story_in_file tests ─────────────────────────────────────────────
+
+    #[test]
+    fn update_story_replaces_user_story_section() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("20_test.md");
+        let content = "---\nname: T\n---\n\n## User Story\n\nOld text\n\n## Acceptance Criteria\n\n- [ ] AC\n";
+        fs::write(&filepath, content).unwrap();
+
+        update_story_in_file(tmp.path(), "20_test", Some("New user story text"), None, None).unwrap();
+
+        let result = fs::read_to_string(&filepath).unwrap();
+        assert!(result.contains("New user story text"), "new text should be present");
+        assert!(!result.contains("Old text"), "old text should be replaced");
+        assert!(result.contains("## Acceptance Criteria"), "other sections preserved");
+    }
+
+    #[test]
+    fn update_story_replaces_description_section() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("21_test.md");
+        let content = "---\nname: T\n---\n\n## Description\n\nOld description\n\n## Acceptance Criteria\n\n- [ ] AC\n";
+        fs::write(&filepath, content).unwrap();
+
+        update_story_in_file(tmp.path(), "21_test", None, Some("New description"), None).unwrap();
+
+        let result = fs::read_to_string(&filepath).unwrap();
+        assert!(result.contains("New description"), "new description present");
+        assert!(!result.contains("Old description"), "old description replaced");
+    }
+
+    #[test]
+    fn update_story_no_args_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("22_test.md"), "---\nname: T\n---\n").unwrap();
+
+        let result = update_story_in_file(tmp.path(), "22_test", None, None, None);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("At least one"));
+    }
+
+    #[test]
+    fn update_story_missing_section_returns_error() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("23_test.md"),
+            "---\nname: T\n---\n\nNo sections here.\n",
+        )
+        .unwrap();
+
+        let result = update_story_in_file(tmp.path(), "23_test", Some("new text"), None, None);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("User Story"));
+    }
+
+    #[test]
+    fn update_story_sets_agent_front_matter_field() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("24_test.md");
+        fs::write(&filepath, "---\nname: T\n---\n\n## User Story\n\nSome story\n").unwrap();
+
+        let mut fields = HashMap::new();
+        fields.insert("agent".to_string(), "dev".to_string());
+        update_story_in_file(tmp.path(), "24_test", None, None, Some(&fields)).unwrap();
+
+        let result = fs::read_to_string(&filepath).unwrap();
+        assert!(result.contains("agent: \"dev\""), "agent field should be set");
+        assert!(result.contains("name: T"), "name field preserved");
+    }
+
+    #[test]
+    fn update_story_sets_arbitrary_front_matter_fields() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let filepath = current.join("25_test.md");
+        fs::write(&filepath, "---\nname: T\n---\n\n## User Story\n\nSome story\n").unwrap();
+
+        let mut fields = HashMap::new();
+        fields.insert("qa".to_string(), "human".to_string());
+        fields.insert("priority".to_string(), "high".to_string());
+        update_story_in_file(tmp.path(), "25_test", None, None, Some(&fields)).unwrap();
+
+        let result = fs::read_to_string(&filepath).unwrap();
+        assert!(result.contains("qa: \"human\""), "qa field should be set");
+        assert!(result.contains("priority: \"high\""), "priority field should be set");
+        assert!(result.contains("name: T"), "name field preserved");
+    }
+
+    #[test]
+    fn update_story_front_matter_only_no_section_required() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        // File without a User Story section — front matter update should succeed
+        let filepath = current.join("26_test.md");
+        fs::write(&filepath, "---\nname: T\n---\n\nNo sections here.\n").unwrap();
+
+        let mut fields = HashMap::new();
+        fields.insert("agent".to_string(), "dev".to_string());
+        let result = update_story_in_file(tmp.path(), "26_test", None, None, Some(&fields));
+        assert!(result.is_ok(), "front-matter-only update should not require body sections");
+
+        let contents = fs::read_to_string(&filepath).unwrap();
+        assert!(contents.contains("agent: \"dev\""));
+    }
+}
--- a/server/src/http/workflow/test_results.rs
+++ b/server/src/http/workflow/test_results.rs
@@ -0,0 +1,307 @@
+use crate::io::story_metadata::write_coverage_baseline;
+use crate::workflow::{StoryTestResults, TestCaseResult, TestStatus};
+use std::fs;
+use std::path::Path;
+
+use super::{find_story_file, replace_or_append_section};
+
+const TEST_RESULTS_MARKER: &str = "<!-- storkit-test-results:";
+
+/// Write (or overwrite) the `## Test Results` section in a story file.
+///
+/// The section contains an HTML comment with JSON for machine parsing and a
+/// human-readable summary below it. If the section already exists it is
+/// replaced in-place. If the story file is not found, this is a no-op.
+pub fn write_test_results_to_story_file(
+    project_root: &Path,
+    story_id: &str,
+    results: &StoryTestResults,
+) -> Result<(), String> {
+    let path = find_story_file(project_root, story_id)?;
+    let contents =
+        fs::read_to_string(&path).map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    let json = serde_json::to_string(results)
+        .map_err(|e| format!("Failed to serialize test results: {e}"))?;
+
+    let section = build_test_results_section(&json, results);
+    let new_contents = replace_or_append_section(&contents, "## Test Results", &section);
+
+    fs::write(&path, &new_contents).map_err(|e| format!("Failed to write story file: {e}"))?;
+    Ok(())
+}
+
+/// Read test results from the `## Test Results` section of a story file.
+///
+/// Returns `None` if the file is not found or contains no test results section.
+pub fn read_test_results_from_story_file(
+    project_root: &Path,
+    story_id: &str,
+) -> Option<StoryTestResults> {
+    let path = find_story_file(project_root, story_id).ok()?;
+    let contents = fs::read_to_string(&path).ok()?;
+    parse_test_results_from_contents(&contents)
+}
+
+/// Write coverage baseline to the front matter of a story file.
+///
+/// If the story file is not found, this is a no-op (returns Ok).
+pub fn write_coverage_baseline_to_story_file(
+    project_root: &Path,
+    story_id: &str,
+    coverage_pct: f64,
+) -> Result<(), String> {
+    let path = match find_story_file(project_root, story_id) {
+        Ok(p) => p,
+        Err(_) => return Ok(()), // No story file — skip silently
+    };
+    write_coverage_baseline(&path, coverage_pct)
+}
+
+/// Build the `## Test Results` section text including JSON comment and human-readable summary.
+fn build_test_results_section(json: &str, results: &StoryTestResults) -> String {
+    let mut s = String::from("## Test Results\n\n");
+    s.push_str(&format!("{TEST_RESULTS_MARKER} {json} -->\n\n"));
+
+    // Unit tests
+    let (unit_pass, unit_fail) = count_pass_fail(&results.unit);
+    s.push_str(&format!(
+        "### Unit Tests ({unit_pass} passed, {unit_fail} failed)\n\n"
+    ));
+    if results.unit.is_empty() {
+        s.push_str("*No unit tests recorded.*\n");
+    } else {
+        for t in &results.unit {
+            s.push_str(&format_test_line(t));
+        }
+    }
+    s.push('\n');
+
+    // Integration tests
+    let (int_pass, int_fail) = count_pass_fail(&results.integration);
+    s.push_str(&format!(
+        "### Integration Tests ({int_pass} passed, {int_fail} failed)\n\n"
+    ));
+    if results.integration.is_empty() {
+        s.push_str("*No integration tests recorded.*\n");
+    } else {
+        for t in &results.integration {
+            s.push_str(&format_test_line(t));
+        }
+    }
+
+    s
+}
+
+fn count_pass_fail(tests: &[TestCaseResult]) -> (usize, usize) {
+    let pass = tests
+        .iter()
+        .filter(|t| t.status == TestStatus::Pass)
+        .count();
+    (pass, tests.len() - pass)
+}
+
+fn format_test_line(t: &TestCaseResult) -> String {
+    let icon = if t.status == TestStatus::Pass {
+        "✅"
+    } else {
+        "❌"
+    };
+    match &t.details {
+        Some(d) if !d.is_empty() => format!("- {icon} {} — {d}\n", t.name),
+        _ => format!("- {icon} {}\n", t.name),
+    }
+}
+
+/// Parse `StoryTestResults` from the JSON embedded in the `## Test Results` section.
+fn parse_test_results_from_contents(contents: &str) -> Option<StoryTestResults> {
+    for line in contents.lines() {
+        let trimmed = line.trim();
+        if let Some(rest) = trimmed.strip_prefix(TEST_RESULTS_MARKER) {
+            // rest looks like: ` {...} -->`
+            if let Some(json_end) = rest.rfind("-->") {
+                let json_str = rest[..json_end].trim();
+                if let Ok(results) = serde_json::from_str::<StoryTestResults>(json_str) {
+                    return Some(results);
+                }
+            }
+        }
+    }
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::workflow::{StoryTestResults, TestCaseResult, TestStatus};
+
+    fn make_results() -> StoryTestResults {
+        StoryTestResults {
+            unit: vec![
+                TestCaseResult {
+                    name: "unit-pass".to_string(),
+                    status: TestStatus::Pass,
+                    details: None,
+                },
+                TestCaseResult {
+                    name: "unit-fail".to_string(),
+                    status: TestStatus::Fail,
+                    details: Some("assertion failed".to_string()),
+                },
+            ],
+            integration: vec![TestCaseResult {
+                name: "int-pass".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+        }
+    }
+
+    #[test]
+    fn write_and_read_test_results_roundtrip() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("1_story_test.md"),
+            "---\nname: Test\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let results = make_results();
+        write_test_results_to_story_file(tmp.path(), "1_story_test", &results).unwrap();
+
+        let read_back = read_test_results_from_story_file(tmp.path(), "1_story_test")
+            .expect("should read back results");
+        assert_eq!(read_back.unit.len(), 2);
+        assert_eq!(read_back.integration.len(), 1);
+        assert_eq!(read_back.unit[0].name, "unit-pass");
+        assert_eq!(read_back.unit[1].status, TestStatus::Fail);
+        assert_eq!(
+            read_back.unit[1].details.as_deref(),
+            Some("assertion failed")
+        );
+    }
+
+    #[test]
+    fn write_test_results_creates_readable_section() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let story_path = current.join("2_story_check.md");
+        fs::write(
+            &story_path,
+            "---\nname: Check\n---\n# Story\n\n## Acceptance Criteria\n\n- [ ] AC1\n",
+        )
+        .unwrap();
+
+        let results = make_results();
+        write_test_results_to_story_file(tmp.path(), "2_story_check", &results).unwrap();
+
+        let contents = fs::read_to_string(&story_path).unwrap();
+        assert!(contents.contains("## Test Results"));
+        assert!(contents.contains("✅ unit-pass"));
+        assert!(contents.contains("❌ unit-fail"));
+        assert!(contents.contains("assertion failed"));
+        assert!(contents.contains("storkit-test-results:"));
+        // Original content still present
+        assert!(contents.contains("## Acceptance Criteria"));
+    }
+
+    #[test]
+    fn write_test_results_overwrites_existing_section() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        let story_path = current.join("3_story_overwrite.md");
+        fs::write(
+            &story_path,
+            "---\nname: Overwrite\n---\n# Story\n\n## Test Results\n\n<!-- storkit-test-results: {} -->\n\n### Unit Tests (0 passed, 0 failed)\n\n*No unit tests recorded.*\n",
+        )
+        .unwrap();
+
+        let results = make_results();
+        write_test_results_to_story_file(tmp.path(), "3_story_overwrite", &results).unwrap();
+
+        let contents = fs::read_to_string(&story_path).unwrap();
+        assert!(contents.contains("✅ unit-pass"));
+        // Should have only one ## Test Results header
+        let count = contents.matches("## Test Results").count();
+        assert_eq!(count, 1, "should have exactly one ## Test Results section");
+    }
+
+    #[test]
+    fn read_test_results_returns_none_when_no_section() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("4_story_empty.md"),
+            "---\nname: Empty\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let result = read_test_results_from_story_file(tmp.path(), "4_story_empty");
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn read_test_results_returns_none_for_unknown_story() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = read_test_results_from_story_file(tmp.path(), "99_story_unknown");
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn write_test_results_finds_story_in_any_stage() {
+        let tmp = tempfile::tempdir().unwrap();
+        let qa_dir = tmp.path().join(".storkit/work/3_qa");
+        fs::create_dir_all(&qa_dir).unwrap();
+        fs::write(
+            qa_dir.join("5_story_qa.md"),
+            "---\nname: QA Story\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let results = StoryTestResults {
+            unit: vec![TestCaseResult {
+                name: "u1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+            integration: vec![],
+        };
+        write_test_results_to_story_file(tmp.path(), "5_story_qa", &results).unwrap();
+
+        let read_back = read_test_results_from_story_file(tmp.path(), "5_story_qa").unwrap();
+        assert_eq!(read_back.unit.len(), 1);
+    }
+
+    #[test]
+    fn write_coverage_baseline_to_story_file_updates_front_matter() {
+        let tmp = tempfile::tempdir().unwrap();
+        let current = tmp.path().join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("6_story_cov.md"),
+            "---\nname: Cov Story\n---\n# Story\n",
+        )
+        .unwrap();
+
+        write_coverage_baseline_to_story_file(tmp.path(), "6_story_cov", 75.4).unwrap();
+
+        let contents = fs::read_to_string(current.join("6_story_cov.md")).unwrap();
+        assert!(
+            contents.contains("coverage_baseline: 75.4%"),
+            "got: {contents}"
+        );
+    }
+
+    #[test]
+    fn write_coverage_baseline_to_story_file_silent_on_missing_story() {
+        let tmp = tempfile::tempdir().unwrap();
+        // Story doesn't exist — should succeed silently
+        let result = write_coverage_baseline_to_story_file(tmp.path(), "99_story_missing", 50.0);
+        assert!(result.is_ok());
+    }
+}
--- a/server/src/http/ws.rs
+++ b/server/src/http/ws.rs
--- a/server/src/io/fs.rs
+++ b/server/src/io/fs.rs
--- a/server/src/io/mod.rs
+++ b/server/src/io/mod.rs
@@ -0,0 +1,6 @@
+pub mod fs;
+pub mod onboarding;
+pub mod search;
+pub mod shell;
+pub mod story_metadata;
+pub mod watcher;
--- a/server/src/io/onboarding.rs
+++ b/server/src/io/onboarding.rs
@@ -0,0 +1,315 @@
+use std::path::Path;
+
+/// Sentinel comment injected as the first line of scaffold templates.
+/// Only untouched templates contain this marker — real project content
+/// will never include it, so it avoids false positives when the project
+/// itself is an "Agentic AI Code Assistant".
+const TEMPLATE_SENTINEL: &str = "<!-- storkit:scaffold-template -->";
+
+/// Marker found in the default `script/test` scaffold output.
+const TEMPLATE_MARKER_SCRIPT: &str = "No tests configured";
+
+/// Summary of what parts of a project still need onboarding.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct OnboardingStatus {
+    /// True when the project context spec needs to be populated.
+    pub needs_context: bool,
+    /// True when the tech stack spec needs to be populated.
+    pub needs_stack: bool,
+    /// True when `script/test` still contains the scaffold placeholder.
+    pub needs_test_script: bool,
+    /// True when `.storkit/project.toml` is missing or has no
+    /// `[[component]]` entries.
+    pub needs_project_toml: bool,
+}
+
+impl OnboardingStatus {
+    /// Returns `true` when any onboarding step is still needed.
+    pub fn needs_onboarding(&self) -> bool {
+        self.needs_context || self.needs_stack
+    }
+}
+
+/// Inspect the project at `project_root` and determine which onboarding
+/// steps are still required.
+pub fn check_onboarding_status(project_root: &Path) -> OnboardingStatus {
+    let story_kit = project_root.join(".storkit");
+
+    OnboardingStatus {
+        needs_context: is_template_or_missing(
+            &story_kit.join("specs").join("00_CONTEXT.md"),
+            TEMPLATE_SENTINEL,
+        ),
+        needs_stack: is_template_or_missing(
+            &story_kit.join("specs").join("tech").join("STACK.md"),
+            TEMPLATE_SENTINEL,
+        ),
+        needs_test_script: is_template_or_missing(
+            &project_root.join("script").join("test"),
+            TEMPLATE_MARKER_SCRIPT,
+        ),
+        needs_project_toml: needs_project_toml(&story_kit),
+    }
+}
+
+/// Returns `true` when the file is missing, empty, or contains the
+/// given scaffold marker string.
+fn is_template_or_missing(path: &Path, marker: &str) -> bool {
+    match std::fs::read_to_string(path) {
+        Ok(content) => content.trim().is_empty() || content.contains(marker),
+        Err(_) => true,
+    }
+}
+
+/// Returns `true` when `project.toml` is missing or has no
+/// `[[component]]` entries.
+fn needs_project_toml(story_kit: &Path) -> bool {
+    let toml_path = story_kit.join("project.toml");
+    match std::fs::read_to_string(toml_path) {
+        Ok(content) => !content.contains("[[component]]"),
+        Err(_) => true,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use tempfile::TempDir;
+
+    fn setup_project(dir: &TempDir) -> std::path::PathBuf {
+        let root = dir.path().to_path_buf();
+        let sk = root.join(".storkit");
+        fs::create_dir_all(sk.join("specs").join("tech")).unwrap();
+        fs::create_dir_all(root.join("script")).unwrap();
+        root
+    }
+
+    // ── needs_onboarding ──────────────────────────────────────────
+
+    #[test]
+    fn needs_onboarding_true_when_no_files_exist() {
+        let dir = TempDir::new().unwrap();
+        let root = dir.path().to_path_buf();
+        let status = check_onboarding_status(&root);
+        assert!(status.needs_onboarding());
+        assert!(status.needs_context);
+        assert!(status.needs_stack);
+        assert!(status.needs_test_script);
+        assert!(status.needs_project_toml);
+    }
+
+    #[test]
+    fn needs_onboarding_true_when_specs_contain_scaffold_sentinel() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        // Write content that includes the scaffold sentinel
+        fs::write(
+            root.join(".storkit/specs/00_CONTEXT.md"),
+            "<!-- storkit:scaffold-template -->\n# Project Context\nPlaceholder...",
+        )
+        .unwrap();
+        fs::write(
+            root.join(".storkit/specs/tech/STACK.md"),
+            "<!-- storkit:scaffold-template -->\n# Tech Stack\nPlaceholder...",
+        )
+        .unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(status.needs_context);
+        assert!(status.needs_stack);
+        assert!(status.needs_onboarding());
+    }
+
+    #[test]
+    fn needs_onboarding_false_when_content_mentions_agentic_but_no_sentinel() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        // Real project content that happens to mention "Agentic AI Code Assistant"
+        // but does NOT contain the scaffold sentinel — should NOT trigger onboarding.
+        fs::write(
+            root.join(".storkit/specs/00_CONTEXT.md"),
+            "# Project Context\nTo build a standalone Agentic AI Code Assistant application.",
+        )
+        .unwrap();
+        fs::write(
+            root.join(".storkit/specs/tech/STACK.md"),
+            "# Tech Stack\nThis is an Agentic Code Assistant binary.",
+        )
+        .unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(!status.needs_context);
+        assert!(!status.needs_stack);
+        assert!(!status.needs_onboarding());
+    }
+
+    #[test]
+    fn needs_onboarding_false_when_specs_have_custom_content() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        fs::write(
+            root.join(".storkit/specs/00_CONTEXT.md"),
+            "# My Project\n\nThis is an e-commerce platform for selling widgets.",
+        )
+        .unwrap();
+        fs::write(
+            root.join(".storkit/specs/tech/STACK.md"),
+            "# Tech Stack\n\n## Backend: Python + FastAPI\n## Frontend: React + TypeScript",
+        )
+        .unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(!status.needs_context);
+        assert!(!status.needs_stack);
+        assert!(!status.needs_onboarding());
+    }
+
+    #[test]
+    fn needs_onboarding_true_when_specs_are_empty() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        fs::write(root.join(".storkit/specs/00_CONTEXT.md"), "  \n").unwrap();
+        fs::write(root.join(".storkit/specs/tech/STACK.md"), "").unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(status.needs_context);
+        assert!(status.needs_stack);
+    }
+
+    // ── needs_test_script ─────────────────────────────────────────
+
+    #[test]
+    fn needs_test_script_true_when_placeholder() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        fs::write(
+            root.join("script/test"),
+            "#!/usr/bin/env bash\nset -euo pipefail\necho \"No tests configured\"\n",
+        )
+        .unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(status.needs_test_script);
+    }
+
+    #[test]
+    fn needs_test_script_false_when_customised() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        fs::write(
+            root.join("script/test"),
+            "#!/usr/bin/env bash\nset -euo pipefail\ncargo test\n",
+        )
+        .unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(!status.needs_test_script);
+    }
+
+    // ── needs_project_toml ────────────────────────────────────────
+
+    #[test]
+    fn needs_project_toml_true_when_missing() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        let status = check_onboarding_status(&root);
+        assert!(status.needs_project_toml);
+    }
+
+    #[test]
+    fn needs_project_toml_true_when_no_components() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        fs::write(root.join(".storkit/project.toml"), "# empty config\n").unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(status.needs_project_toml);
+    }
+
+    #[test]
+    fn needs_project_toml_false_when_has_components() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        fs::write(
+            root.join(".storkit/project.toml"),
+            "[[component]]\nname = \"app\"\npath = \".\"\nsetup = [\"cargo check\"]\n",
+        )
+        .unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(!status.needs_project_toml);
+    }
+
+    // ── CLAUDE.md is not an onboarding step ──────────────────────
+
+    #[test]
+    fn onboarding_status_does_not_check_claude_md() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        // Write real content for the required onboarding files
+        fs::write(
+            root.join(".storkit/specs/00_CONTEXT.md"),
+            "# My Project\n\nReal project context.",
+        )
+        .unwrap();
+        fs::write(
+            root.join(".storkit/specs/tech/STACK.md"),
+            "# My Stack\n\nReal stack content.",
+        )
+        .unwrap();
+
+        // CLAUDE.md is absent — should NOT affect onboarding result
+        assert!(!root.join("CLAUDE.md").exists());
+
+        let status = check_onboarding_status(&root);
+        assert!(
+            !status.needs_context,
+            "needs_context should be false with real content"
+        );
+        assert!(
+            !status.needs_stack,
+            "needs_stack should be false with real content"
+        );
+        assert!(
+            !status.needs_onboarding(),
+            "needs_onboarding() should be false regardless of CLAUDE.md presence"
+        );
+    }
+
+    // ── partial onboarding ────────────────────────────────────────
+
+    #[test]
+    fn needs_onboarding_true_when_only_context_is_template() {
+        let dir = TempDir::new().unwrap();
+        let root = setup_project(&dir);
+
+        // Context still has sentinel
+        fs::write(
+            root.join(".storkit/specs/00_CONTEXT.md"),
+            "<!-- storkit:scaffold-template -->\n# Project Context\nPlaceholder...",
+        )
+        .unwrap();
+        // Stack is customised (no sentinel)
+        fs::write(
+            root.join(".storkit/specs/tech/STACK.md"),
+            "# My Stack\nRuby on Rails + PostgreSQL",
+        )
+        .unwrap();
+
+        let status = check_onboarding_status(&root);
+        assert!(status.needs_context);
+        assert!(!status.needs_stack);
+        assert!(status.needs_onboarding());
+    }
+}
--- a/server/src/io/search.rs
+++ b/server/src/io/search.rs
@@ -0,0 +1,218 @@
+use crate::slog;
+use crate::state::SessionState;
+use ignore::WalkBuilder;
+use serde::Serialize;
+use std::fs;
+use std::path::PathBuf;
+
+#[derive(Serialize, Debug, poem_openapi::Object)]
+pub struct SearchResult {
+    pub path: String,
+    pub matches: usize,
+}
+
+fn get_project_root(state: &SessionState) -> Result<PathBuf, String> {
+    state.get_project_root()
+}
+
+pub async fn search_files(
+    query: String,
+    state: &SessionState,
+) -> Result<Vec<SearchResult>, String> {
+    let root = get_project_root(state)?;
+    search_files_impl(query, root).await
+}
+
+pub async fn search_files_impl(query: String, root: PathBuf) -> Result<Vec<SearchResult>, String> {
+    let root_clone = root.clone();
+
+    let results = tokio::task::spawn_blocking(move || {
+        let mut matches = Vec::new();
+        let walker = WalkBuilder::new(&root_clone).git_ignore(true).build();
+
+        for result in walker {
+            match result {
+                Ok(entry) => {
+                    if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
+                        continue;
+                    }
+
+                    let path = entry.path();
+                    if let Ok(content) = fs::read_to_string(path)
+                        && content.contains(&query)
+                    {
+                        let relative = path
+                            .strip_prefix(&root_clone)
+                            .unwrap_or(path)
+                            .to_string_lossy()
+                            .to_string();
+
+                        matches.push(SearchResult {
+                            path: relative,
+                            matches: 1,
+                        });
+                    }
+                }
+                Err(err) => slog!("Error walking dir: {}", err),
+            }
+        }
+
+        matches
+    })
+    .await
+    .map_err(|e| format!("Search task failed: {e}"))?;
+
+    Ok(results)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use tempfile::TempDir;
+
+    fn setup_project(files: &[(&str, &str)]) -> TempDir {
+        let dir = TempDir::new().unwrap();
+        for (path, content) in files {
+            let full = dir.path().join(path);
+            if let Some(parent) = full.parent() {
+                fs::create_dir_all(parent).unwrap();
+            }
+            fs::write(full, content).unwrap();
+        }
+        dir
+    }
+
+    #[tokio::test]
+    async fn finds_files_matching_query() {
+        let dir = setup_project(&[
+            ("hello.txt", "hello world"),
+            ("goodbye.txt", "goodbye world"),
+        ]);
+
+        let results = search_files_impl("hello".to_string(), dir.path().to_path_buf())
+            .await
+            .unwrap();
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].path, "hello.txt");
+    }
+
+    #[tokio::test]
+    async fn returns_empty_for_no_matches() {
+        let dir = setup_project(&[("file.txt", "some content")]);
+
+        let results = search_files_impl("nonexistent".to_string(), dir.path().to_path_buf())
+            .await
+            .unwrap();
+
+        assert!(results.is_empty());
+    }
+
+    #[tokio::test]
+    async fn searches_nested_directories() {
+        let dir = setup_project(&[
+            ("top.txt", "needle"),
+            ("sub/deep.txt", "needle in haystack"),
+            ("sub/other.txt", "no match here"),
+        ]);
+
+        let results = search_files_impl("needle".to_string(), dir.path().to_path_buf())
+            .await
+            .unwrap();
+
+        assert_eq!(results.len(), 2);
+        let paths: Vec<&str> = results.iter().map(|r| r.path.as_str()).collect();
+        assert!(paths.contains(&"top.txt"));
+        assert!(paths.contains(&"sub/deep.txt"));
+    }
+
+    #[tokio::test]
+    async fn skips_directories_only_matches_files() {
+        let dir = setup_project(&[("sub/file.txt", "content")]);
+
+        let results = search_files_impl("content".to_string(), dir.path().to_path_buf())
+            .await
+            .unwrap();
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].path, "sub/file.txt");
+    }
+
+    #[tokio::test]
+    async fn respects_gitignore() {
+        let dir = setup_project(&[
+            (".gitignore", "ignored/\n"),
+            ("kept.txt", "search term"),
+            ("ignored/hidden.txt", "search term"),
+        ]);
+
+        // Initialize a git repo so .gitignore is respected
+        std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(dir.path())
+            .output()
+            .unwrap();
+
+        let results = search_files_impl("search term".to_string(), dir.path().to_path_buf())
+            .await
+            .unwrap();
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].path, "kept.txt");
+    }
+
+    #[tokio::test]
+    async fn search_files_with_session_state() {
+        let dir = setup_project(&[("found.txt", "target_text")]);
+        let state = SessionState::default();
+        *state.project_root.lock().unwrap() = Some(dir.path().to_path_buf());
+
+        let results = search_files("target_text".to_string(), &state).await.unwrap();
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].path, "found.txt");
+    }
+
+    #[tokio::test]
+    async fn search_files_errors_without_project_root() {
+        let state = SessionState::default();
+
+        let result = search_files("query".to_string(), &state).await;
+
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("No project"));
+    }
+
+    #[test]
+    fn search_result_serializes_and_debugs() {
+        let sr = SearchResult {
+            path: "src/main.rs".to_string(),
+            matches: 3,
+        };
+        let json = serde_json::to_string(&sr).unwrap();
+        assert!(json.contains("src/main.rs"));
+        assert!(json.contains("3"));
+
+        let debug = format!("{sr:?}");
+        assert!(debug.contains("SearchResult"));
+        assert!(debug.contains("src/main.rs"));
+    }
+
+    #[tokio::test]
+    async fn skips_binary_files() {
+        let dir = TempDir::new().unwrap();
+        // Write a file with invalid UTF-8 bytes
+        let binary_path = dir.path().join("binary.bin");
+        fs::write(&binary_path, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
+        // Write a valid text file with the search term
+        fs::write(dir.path().join("text.txt"), "findme").unwrap();
+
+        let results = search_files_impl("findme".to_string(), dir.path().to_path_buf())
+            .await
+            .unwrap();
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].path, "text.txt");
+    }
+}
--- a/server/src/io/shell.rs
+++ b/server/src/io/shell.rs
@@ -0,0 +1,189 @@
+use crate::state::SessionState;
+use serde::Serialize;
+use std::path::PathBuf;
+use std::process::Command;
+
+/// Helper to get the root path (cloned) without joining
+fn get_project_root(state: &SessionState) -> Result<PathBuf, String> {
+    state.get_project_root()
+}
+
+#[derive(Serialize, Debug, poem_openapi::Object)]
+pub struct CommandOutput {
+    pub stdout: String,
+    pub stderr: String,
+    pub exit_code: i32,
+}
+
+/// Execute shell command logic (pure function for testing)
+async fn exec_shell_impl(
+    command: String,
+    args: Vec<String>,
+    root: PathBuf,
+) -> Result<CommandOutput, String> {
+    // Security Allowlist
+    let allowed_commands = [
+        "git", "cargo", "npm", "yarn", "pnpm", "node", "bun", "ls", "find", "grep", "mkdir", "rm",
+        "mv", "cp", "touch", "rustc", "rustfmt",
+    ];
+
+    if !allowed_commands.contains(&command.as_str()) {
+        return Err(format!("Command '{}' is not in the allowlist.", command));
+    }
+
+    let output = tokio::task::spawn_blocking(move || {
+        Command::new(&command)
+            .args(&args)
+            .current_dir(root)
+            .output()
+    })
+    .await
+    .map_err(|e| format!("Task join error: {}", e))?
+    .map_err(|e| format!("Failed to execute command: {}", e))?;
+
+    Ok(CommandOutput {
+        stdout: String::from_utf8_lossy(&output.stdout).to_string(),
+        stderr: String::from_utf8_lossy(&output.stderr).to_string(),
+        exit_code: output.status.code().unwrap_or(-1),
+    })
+}
+
+pub async fn exec_shell(
+    command: String,
+    args: Vec<String>,
+    state: &SessionState,
+) -> Result<CommandOutput, String> {
+    let root = get_project_root(state)?;
+    exec_shell_impl(command, args, root).await
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    #[tokio::test]
+    async fn exec_shell_impl_rejects_disallowed_command() {
+        let dir = tempdir().unwrap();
+        let result = exec_shell_impl(
+            "curl".to_string(),
+            vec!["https://example.com".to_string()],
+            dir.path().to_path_buf(),
+        )
+        .await;
+
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("not in the allowlist"));
+    }
+
+    #[tokio::test]
+    async fn exec_shell_impl_runs_allowed_command() {
+        let dir = tempdir().unwrap();
+        let result = exec_shell_impl(
+            "ls".to_string(),
+            Vec::new(),
+            dir.path().to_path_buf(),
+        )
+        .await;
+
+        assert!(result.is_ok());
+        let output = result.unwrap();
+        assert_eq!(output.exit_code, 0);
+    }
+
+    #[tokio::test]
+    async fn exec_shell_impl_captures_stdout() {
+        let dir = tempdir().unwrap();
+        std::fs::write(dir.path().join("hello.txt"), "").unwrap();
+
+        let result = exec_shell_impl(
+            "ls".to_string(),
+            Vec::new(),
+            dir.path().to_path_buf(),
+        )
+        .await
+        .unwrap();
+
+        assert!(result.stdout.contains("hello.txt"));
+    }
+
+    #[tokio::test]
+    async fn exec_shell_impl_returns_nonzero_exit_code() {
+        let dir = tempdir().unwrap();
+        let result = exec_shell_impl(
+            "ls".to_string(),
+            vec!["nonexistent_file_xyz".to_string()],
+            dir.path().to_path_buf(),
+        )
+        .await
+        .unwrap();
+
+        assert_ne!(result.exit_code, 0);
+        assert!(!result.stderr.is_empty());
+    }
+
+    #[tokio::test]
+    async fn exec_shell_delegates_to_impl_via_state() {
+        let dir = tempdir().unwrap();
+        std::fs::write(dir.path().join("marker.txt"), "hello").unwrap();
+
+        let state = SessionState::default();
+        *state.project_root.lock().unwrap() = Some(dir.path().to_path_buf());
+
+        let result = exec_shell("ls".to_string(), Vec::new(), &state)
+            .await
+            .unwrap();
+
+        assert_eq!(result.exit_code, 0);
+        assert!(result.stdout.contains("marker.txt"));
+    }
+
+    #[tokio::test]
+    async fn exec_shell_errors_when_no_project_root() {
+        let state = SessionState::default();
+
+        let result = exec_shell("ls".to_string(), Vec::new(), &state).await;
+
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("No project"));
+    }
+
+    #[tokio::test]
+    async fn exec_shell_impl_errors_on_nonexistent_cwd() {
+        let result = exec_shell_impl(
+            "ls".to_string(),
+            Vec::new(),
+            PathBuf::from("/nonexistent_dir_that_does_not_exist_xyz"),
+        )
+        .await;
+
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("Failed to execute command"));
+    }
+
+    #[test]
+    fn command_output_serializes_to_json() {
+        let output = CommandOutput {
+            stdout: "hello".to_string(),
+            stderr: "".to_string(),
+            exit_code: 0,
+        };
+
+        let json = serde_json::to_string(&output).unwrap();
+        assert!(json.contains("\"stdout\":\"hello\""));
+        assert!(json.contains("\"exit_code\":0"));
+    }
+
+    #[test]
+    fn command_output_debug_format() {
+        let output = CommandOutput {
+            stdout: "out".to_string(),
+            stderr: "err".to_string(),
+            exit_code: 1,
+        };
+
+        let debug = format!("{:?}", output);
+        assert!(debug.contains("CommandOutput"));
+        assert!(debug.contains("out"));
+    }
+}
--- a/server/src/io/story_metadata.rs
+++ b/server/src/io/story_metadata.rs
@@ -0,0 +1,542 @@
+use serde::Deserialize;
+use std::fs;
+use std::path::Path;
+
+/// QA mode for a story: determines how the pipeline handles post-coder review.
+///
+/// - `Server` — skip the QA agent; rely on server gate checks (clippy + tests).
+///   If gates pass, advance straight to merge.
+/// - `Agent` — spin up a QA agent (Claude session) to review code and run gates.
+/// - `Human` — hold in QA for human approval after server gates pass.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum QaMode {
+    Server,
+    Agent,
+    Human,
+}
+
+impl QaMode {
+    /// Parse a string into a `QaMode`. Returns `None` for unrecognised values.
+    pub fn from_str(s: &str) -> Option<Self> {
+        match s.trim().to_lowercase().as_str() {
+            "server" => Some(Self::Server),
+            "agent" => Some(Self::Agent),
+            "human" => Some(Self::Human),
+            _ => None,
+        }
+    }
+
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Server => "server",
+            Self::Agent => "agent",
+            Self::Human => "human",
+        }
+    }
+}
+
+impl std::fmt::Display for QaMode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub struct StoryMetadata {
+    pub name: Option<String>,
+    pub coverage_baseline: Option<String>,
+    pub merge_failure: Option<String>,
+    pub agent: Option<String>,
+    pub review_hold: Option<bool>,
+    pub qa: Option<QaMode>,
+    /// Number of times this story has been retried at its current pipeline stage.
+    pub retry_count: Option<u32>,
+    /// When `true`, auto-assign will skip this story (retry limit exceeded).
+    pub blocked: Option<bool>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum StoryMetaError {
+    MissingFrontMatter,
+    InvalidFrontMatter(String),
+}
+
+impl std::fmt::Display for StoryMetaError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            StoryMetaError::MissingFrontMatter => write!(f, "Missing front matter"),
+            StoryMetaError::InvalidFrontMatter(msg) => write!(f, "Invalid front matter: {msg}"),
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct FrontMatter {
+    name: Option<String>,
+    coverage_baseline: Option<String>,
+    merge_failure: Option<String>,
+    agent: Option<String>,
+    review_hold: Option<bool>,
+    /// Configurable QA mode field: "human", "server", or "agent".
+    qa: Option<String>,
+    /// Number of times this story has been retried at its current pipeline stage.
+    retry_count: Option<u32>,
+    /// When `true`, auto-assign will skip this story (retry limit exceeded).
+    blocked: Option<bool>,
+}
+
+pub fn parse_front_matter(contents: &str) -> Result<StoryMetadata, StoryMetaError> {
+    let mut lines = contents.lines();
+
+    let first = lines.next().unwrap_or_default().trim();
+    if first != "---" {
+        return Err(StoryMetaError::MissingFrontMatter);
+    }
+
+    let mut front_lines = Vec::new();
+    for line in &mut lines {
+        let trimmed = line.trim();
+        if trimmed == "---" {
+            let raw = front_lines.join("\n");
+            let front: FrontMatter = serde_yaml::from_str(&raw)
+                .map_err(|e| StoryMetaError::InvalidFrontMatter(e.to_string()))?;
+            return Ok(build_metadata(front));
+        }
+        front_lines.push(line);
+    }
+
+    Err(StoryMetaError::InvalidFrontMatter(
+        "Missing closing front matter delimiter".to_string(),
+    ))
+}
+
+fn build_metadata(front: FrontMatter) -> StoryMetadata {
+    let qa = front.qa.as_deref().and_then(QaMode::from_str);
+
+    StoryMetadata {
+        name: front.name,
+        coverage_baseline: front.coverage_baseline,
+        merge_failure: front.merge_failure,
+        agent: front.agent,
+        review_hold: front.review_hold,
+        qa,
+        retry_count: front.retry_count,
+        blocked: front.blocked,
+    }
+}
+
+/// Write or update a `coverage_baseline:` field in the YAML front matter of a story file.
+///
+/// If front matter is present, adds or replaces `coverage_baseline:` before the closing `---`.
+/// If no front matter is present, this is a no-op (returns Ok).
+pub fn write_coverage_baseline(path: &Path, coverage_pct: f64) -> Result<(), String> {
+    let contents =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    let updated = set_front_matter_field(&contents, "coverage_baseline", &format!("{coverage_pct:.1}%"));
+    fs::write(path, &updated).map_err(|e| format!("Failed to write story file: {e}"))?;
+    Ok(())
+}
+
+/// Write or update a `merge_failure:` field in the YAML front matter of a story file.
+///
+/// The reason is stored as a quoted YAML string so that colons, hashes, and newlines
+/// in the failure message do not break front-matter parsing.
+/// If no front matter is present, this is a no-op (returns Ok).
+pub fn write_merge_failure(path: &Path, reason: &str) -> Result<(), String> {
+    let contents =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    // Produce a YAML-safe inline quoted string: collapse newlines, escape inner quotes.
+    let escaped = reason.replace('"', "\\\"").replace('\n', " ").replace('\r', "");
+    let yaml_value = format!("\"{escaped}\"");
+
+    let updated = set_front_matter_field(&contents, "merge_failure", &yaml_value);
+    fs::write(path, &updated).map_err(|e| format!("Failed to write story file: {e}"))?;
+    Ok(())
+}
+
+/// Write `review_hold: true` to the YAML front matter of a story file.
+///
+/// Used to mark spikes that have passed QA and are waiting for human review.
+pub fn write_review_hold(path: &Path) -> Result<(), String> {
+    let contents =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read story file: {e}"))?;
+    let updated = set_front_matter_field(&contents, "review_hold", "true");
+    fs::write(path, &updated).map_err(|e| format!("Failed to write story file: {e}"))?;
+    Ok(())
+}
+
+/// Remove a key from the YAML front matter of a story file on disk.
+///
+/// If front matter is present and contains the key, the line is removed.
+/// If no front matter or key is not found, the file is left unchanged.
+pub fn clear_front_matter_field(path: &Path, key: &str) -> Result<(), String> {
+    let contents =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read story file: {e}"))?;
+    let updated = remove_front_matter_field(&contents, key);
+    if updated != contents {
+        fs::write(path, &updated).map_err(|e| format!("Failed to write story file: {e}"))?;
+    }
+    Ok(())
+}
+
+/// Remove a key: value line from the YAML front matter of a markdown string.
+///
+/// If no front matter (opening `---`) is found or the key is absent, returns content unchanged.
+fn remove_front_matter_field(contents: &str, key: &str) -> String {
+    let mut lines: Vec<String> = contents.lines().map(String::from).collect();
+    if lines.is_empty() || lines[0].trim() != "---" {
+        return contents.to_string();
+    }
+
+    let close_idx = match lines[1..].iter().position(|l| l.trim() == "---") {
+        Some(i) => i + 1,
+        None => return contents.to_string(),
+    };
+
+    let key_prefix = format!("{key}:");
+    if let Some(idx) = lines[1..close_idx]
+        .iter()
+        .position(|l| l.trim_start().starts_with(&key_prefix))
+        .map(|i| i + 1)
+    {
+        lines.remove(idx);
+    } else {
+        return contents.to_string();
+    }
+
+    let mut result = lines.join("\n");
+    if contents.ends_with('\n') {
+        result.push('\n');
+    }
+    result
+}
+
+/// Insert or update a key: value pair in the YAML front matter of a markdown string.
+///
+/// If no front matter (opening `---`) is found, returns the content unchanged.
+pub fn set_front_matter_field(contents: &str, key: &str, value: &str) -> String {
+    let mut lines: Vec<String> = contents.lines().map(String::from).collect();
+    if lines.is_empty() || lines[0].trim() != "---" {
+        return contents.to_string();
+    }
+
+    // Find closing --- (search from index 1)
+    let close_idx = match lines[1..].iter().position(|l| l.trim() == "---") {
+        Some(i) => i + 1,
+        None => return contents.to_string(),
+    };
+
+    let key_prefix = format!("{key}:");
+    let existing_idx = lines[1..close_idx]
+        .iter()
+        .position(|l| l.trim_start().starts_with(&key_prefix))
+        .map(|i| i + 1);
+
+    let new_line = format!("{key}: {value}");
+    if let Some(idx) = existing_idx {
+        lines[idx] = new_line;
+    } else {
+        lines.insert(close_idx, new_line);
+    }
+
+    let mut result = lines.join("\n");
+    if contents.ends_with('\n') {
+        result.push('\n');
+    }
+    result
+}
+
+/// Increment the `retry_count` field in the story file's front matter.
+///
+/// Reads the current value (defaulting to 0), increments by 1, and writes back.
+/// Returns the new retry count.
+pub fn increment_retry_count(path: &Path) -> Result<u32, String> {
+    let contents =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    let current = parse_front_matter(&contents)
+        .ok()
+        .and_then(|m| m.retry_count)
+        .unwrap_or(0);
+    let new_count = current + 1;
+
+    let updated = set_front_matter_field(&contents, "retry_count", &new_count.to_string());
+    fs::write(path, &updated).map_err(|e| format!("Failed to write story file: {e}"))?;
+    Ok(new_count)
+}
+
+/// Write `blocked: true` to the YAML front matter of a story file.
+///
+/// Used to mark stories that have exceeded the retry limit and should not
+/// be auto-assigned again.
+pub fn write_blocked(path: &Path) -> Result<(), String> {
+    let contents =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read story file: {e}"))?;
+    let updated = set_front_matter_field(&contents, "blocked", "true");
+    fs::write(path, &updated).map_err(|e| format!("Failed to write story file: {e}"))?;
+    Ok(())
+}
+
+/// Append rejection notes to a story file body.
+///
+/// Adds a `## QA Rejection Notes` section at the end of the file so the coder
+/// agent can see what needs fixing.
+pub fn write_rejection_notes(path: &Path, notes: &str) -> Result<(), String> {
+    let contents =
+        fs::read_to_string(path).map_err(|e| format!("Failed to read story file: {e}"))?;
+
+    let section = format!("\n\n## QA Rejection Notes\n\n{notes}\n");
+    let updated = format!("{contents}{section}");
+    fs::write(path, &updated).map_err(|e| format!("Failed to write story file: {e}"))?;
+    Ok(())
+}
+
+/// Resolve the effective QA mode for a story file.
+///
+/// Reads the `qa` front matter field. If absent, falls back to `default`.
+/// Spikes are **not** handled here — the caller is responsible for overriding
+/// to `Human` for spikes.
+pub fn resolve_qa_mode(path: &Path, default: QaMode) -> QaMode {
+    let contents = match fs::read_to_string(path) {
+        Ok(c) => c,
+        Err(_) => return default,
+    };
+    match parse_front_matter(&contents) {
+        Ok(meta) => meta.qa.unwrap_or(default),
+        Err(_) => default,
+    }
+}
+
+pub fn parse_unchecked_todos(contents: &str) -> Vec<String> {
+    contents
+        .lines()
+        .filter_map(|line| {
+            let trimmed = line.trim();
+            trimmed
+                .strip_prefix("- [ ] ")
+                .map(|text| text.to_string())
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parses_front_matter_metadata() {
+        let input = r#"---
+name: Establish the TDD Workflow and Gates
+workflow: tdd
+---
+# Story 26
+"#;
+
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.name.as_deref(), Some("Establish the TDD Workflow and Gates"));
+        assert_eq!(meta.coverage_baseline, None);
+    }
+
+    #[test]
+    fn parses_coverage_baseline_from_front_matter() {
+        let input = "---\nname: Test Story\ncoverage_baseline: 78.5%\n---\n# Story\n";
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.coverage_baseline.as_deref(), Some("78.5%"));
+    }
+
+    #[test]
+    fn set_front_matter_field_inserts_new_key() {
+        let input = "---\nname: My Story\n---\n# Body\n";
+        let output = set_front_matter_field(input, "coverage_baseline", "55.0%");
+        assert!(output.contains("coverage_baseline: 55.0%"));
+        assert!(output.contains("name: My Story"));
+        assert!(output.ends_with('\n'));
+    }
+
+    #[test]
+    fn set_front_matter_field_updates_existing_key() {
+        let input = "---\nname: My Story\ncoverage_baseline: 40.0%\n---\n# Body\n";
+        let output = set_front_matter_field(input, "coverage_baseline", "55.0%");
+        assert!(output.contains("coverage_baseline: 55.0%"));
+        assert!(!output.contains("40.0%"));
+    }
+
+    #[test]
+    fn set_front_matter_field_no_op_without_front_matter() {
+        let input = "# No front matter\n";
+        let output = set_front_matter_field(input, "coverage_baseline", "55.0%");
+        assert_eq!(output, input);
+    }
+
+    #[test]
+    fn write_coverage_baseline_updates_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("story.md");
+        std::fs::write(&path, "---\nname: Test\n---\n# Story\n").unwrap();
+        write_coverage_baseline(&path, 82.3).unwrap();
+        let contents = std::fs::read_to_string(&path).unwrap();
+        assert!(contents.contains("coverage_baseline: 82.3%"));
+    }
+
+    #[test]
+    fn rejects_missing_front_matter() {
+        let input = "# Story 26\n";
+        assert_eq!(
+            parse_front_matter(input),
+            Err(StoryMetaError::MissingFrontMatter)
+        );
+    }
+
+    #[test]
+    fn rejects_unclosed_front_matter() {
+        let input = "---\nname: Test\n";
+        assert!(matches!(
+            parse_front_matter(input),
+            Err(StoryMetaError::InvalidFrontMatter(_))
+        ));
+    }
+
+    #[test]
+    fn remove_front_matter_field_removes_key() {
+        let input = "---\nname: My Story\nmerge_failure: \"something broke\"\n---\n# Body\n";
+        let output = remove_front_matter_field(input, "merge_failure");
+        assert!(!output.contains("merge_failure"));
+        assert!(output.contains("name: My Story"));
+        assert!(output.ends_with('\n'));
+    }
+
+    #[test]
+    fn remove_front_matter_field_no_op_when_absent() {
+        let input = "---\nname: My Story\n---\n# Body\n";
+        let output = remove_front_matter_field(input, "merge_failure");
+        assert_eq!(output, input);
+    }
+
+    #[test]
+    fn remove_front_matter_field_no_op_without_front_matter() {
+        let input = "# No front matter\n";
+        let output = remove_front_matter_field(input, "merge_failure");
+        assert_eq!(output, input);
+    }
+
+    #[test]
+    fn clear_front_matter_field_updates_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("story.md");
+        std::fs::write(&path, "---\nname: Test\nmerge_failure: \"bad\"\n---\n# Story\n").unwrap();
+        clear_front_matter_field(&path, "merge_failure").unwrap();
+        let contents = std::fs::read_to_string(&path).unwrap();
+        assert!(!contents.contains("merge_failure"));
+        assert!(contents.contains("name: Test"));
+    }
+
+    #[test]
+    fn parse_unchecked_todos_mixed() {
+        let input = "## AC\n- [ ] First thing\n- [x] Done thing\n- [ ] Second thing\n";
+        assert_eq!(
+            parse_unchecked_todos(input),
+            vec!["First thing", "Second thing"]
+        );
+    }
+
+    #[test]
+    fn parse_unchecked_todos_all_checked() {
+        let input = "- [x] Done\n- [x] Also done\n";
+        assert!(parse_unchecked_todos(input).is_empty());
+    }
+
+    #[test]
+    fn parse_unchecked_todos_no_checkboxes() {
+        let input = "# Story\nSome text\n- A bullet\n";
+        assert!(parse_unchecked_todos(input).is_empty());
+    }
+
+    #[test]
+    fn parse_unchecked_todos_leading_whitespace() {
+        let input = "  - [ ] Indented item\n";
+        assert_eq!(parse_unchecked_todos(input), vec!["Indented item"]);
+    }
+
+    #[test]
+    fn parses_review_hold_from_front_matter() {
+        let input = "---\nname: Spike\nreview_hold: true\n---\n# Spike\n";
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.review_hold, Some(true));
+    }
+
+    #[test]
+    fn review_hold_defaults_to_none() {
+        let input = "---\nname: Story\n---\n# Story\n";
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.review_hold, None);
+    }
+
+    #[test]
+    fn write_review_hold_sets_field() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("spike.md");
+        std::fs::write(&path, "---\nname: My Spike\n---\n# Spike\n").unwrap();
+        write_review_hold(&path).unwrap();
+        let contents = std::fs::read_to_string(&path).unwrap();
+        assert!(contents.contains("review_hold: true"));
+        assert!(contents.contains("name: My Spike"));
+    }
+
+    #[test]
+    fn parses_qa_mode_from_front_matter() {
+        let input = "---\nname: Story\nqa: server\n---\n# Story\n";
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.qa, Some(QaMode::Server));
+
+        let input = "---\nname: Story\nqa: agent\n---\n# Story\n";
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.qa, Some(QaMode::Agent));
+
+        let input = "---\nname: Story\nqa: human\n---\n# Story\n";
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.qa, Some(QaMode::Human));
+    }
+
+    #[test]
+    fn qa_mode_defaults_to_none() {
+        let input = "---\nname: Story\n---\n# Story\n";
+        let meta = parse_front_matter(input).expect("front matter");
+        assert_eq!(meta.qa, None);
+    }
+
+    #[test]
+    fn resolve_qa_mode_uses_file_value() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("story.md");
+        std::fs::write(&path, "---\nname: Test\nqa: human\n---\n# Story\n").unwrap();
+        assert_eq!(resolve_qa_mode(&path, QaMode::Server), QaMode::Human);
+    }
+
+    #[test]
+    fn resolve_qa_mode_falls_back_to_default() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("story.md");
+        std::fs::write(&path, "---\nname: Test\n---\n# Story\n").unwrap();
+        assert_eq!(resolve_qa_mode(&path, QaMode::Server), QaMode::Server);
+        assert_eq!(resolve_qa_mode(&path, QaMode::Agent), QaMode::Agent);
+    }
+
+    #[test]
+    fn resolve_qa_mode_missing_file_uses_default() {
+        let path = std::path::Path::new("/nonexistent/story.md");
+        assert_eq!(resolve_qa_mode(path, QaMode::Server), QaMode::Server);
+    }
+
+    #[test]
+    fn write_rejection_notes_appends_section() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("story.md");
+        std::fs::write(&path, "---\nname: Test\n---\n# Story\n").unwrap();
+        write_rejection_notes(&path, "Button color is wrong").unwrap();
+        let contents = std::fs::read_to_string(&path).unwrap();
+        assert!(contents.contains("## QA Rejection Notes"));
+        assert!(contents.contains("Button color is wrong"));
+    }
+}
--- a/server/src/io/watcher.rs
+++ b/server/src/io/watcher.rs
--- a/server/src/llm/chat.rs
+++ b/server/src/llm/chat.rs
--- a/server/src/llm/mod.rs
+++ b/server/src/llm/mod.rs
@@ -0,0 +1,4 @@
+pub mod chat;
+pub mod prompts;
+pub mod providers;
+pub mod types;
--- a/server/src/llm/prompts.rs
+++ b/server/src/llm/prompts.rs
@@ -0,0 +1,163 @@
+pub const SYSTEM_PROMPT: &str = r#"You are an AI Agent with direct access to the user's filesystem and development environment.
+
+CRITICAL INSTRUCTIONS:
+1.  **Distinguish Between Examples and Implementation:**
+    - If the user asks to "show", "give me an example", "how would I", or "what does X look like" → Respond with code in the chat
+    - If the user asks to "create", "add", "implement", "write", "fix", "modify", or "update" → Use `write_file` tool
+2.  **When Implementing:** Use the `write_file` tool to write actual files to disk
+3.  **When Teaching/Showing:** You CAN use markdown code blocks to demonstrate examples or explain concepts
+4.  **Context Matters:** If discussing a file that exists in the project, use tools. If showing generic examples, use code blocks.
+
+YOUR CAPABILITIES:
+You have the following tools available:
+- `read_file(path)` - Read the content of any file in the project
+- `write_file(path, content)` - Write or overwrite a file with new content
+- `list_directory(path)` - List files and directories
+- `search_files(query)` - Search for text patterns across all files
+- `exec_shell(command, args)` - Execute shell commands (git, cargo, npm, etc.)
+
+YOUR WORKFLOW:
+When the user requests a feature or change:
+1.  **Understand:** Read `.storkit/README.md` if you haven't already to understand the development process
+2.  **Explore:** Use `read_file` and `list_directory` to understand the current codebase structure
+3.  **Implement:** Use `write_file` to create or modify files directly
+4.  **Verify:** Use `exec_shell` to run tests, linters, or build commands to verify your changes work
+5.  **Report:** Tell the user what you did (past tense), not what they should do
+
+CRITICAL RULES:
+- **Read Before Write:** ALWAYS read files before modifying them. The `write_file` tool OVERWRITES the entire file.
+- **Complete Files Only:** When using `write_file`, output the COMPLETE file content, including all imports, functions, and unchanged code. Never write partial diffs or use placeholders like "// ... rest of code".
+- **Be Direct:** Don't announce your actions ("I will now..."). Just execute the tools immediately.
+- **Take Initiative:** If you need information, use tools to get it. Don't ask the user for things you can discover yourself.
+
+EXAMPLES OF CORRECT BEHAVIOR:
+
+Example 1 - User asks for an EXAMPLE (show in chat):
+User: "Show me a Java hello world"
+You (correct): "Here's a simple Java hello world program:
+```java
+public class HelloWorld {
+    public static void main(String[] args) {
+        System.out.println("Hello, World!");
+    }
+}
+```"
+
+Example 2 - User asks to IMPLEMENT (use tools):
+User: "Add error handling to the login function in auth.rs"
+You (correct): [Call read_file("src/auth.rs"), analyze it, then call write_file("src/auth.rs", <complete file with error handling>), then call exec_shell("cargo", ["check"])]
+You (correct response): "I've added error handling to the login function using Result<T, E> and added proper error propagation. The code compiles successfully."
+
+Example 3 - User asks to CREATE (use tools):
+User: "Create a new component called Button.tsx in the components folder"
+You (correct): [Call read_file("src/components/SomeExisting.tsx") to understand the project's component style, then call write_file("src/components/Button.tsx", <complete component code>)]
+You (correct response): "I've created Button.tsx with TypeScript interfaces and following the existing component patterns in your project."
+
+Example 4 - User asks to FIX (use tools):
+User: "The calculation in utils.js is wrong"
+You (correct): [Call read_file("src/utils.js"), identify the bug, call write_file("src/utils.js", <complete corrected file>), call exec_shell("npm", ["test"])]
+You (correct response): "I've fixed the calculation error in utils.js. The formula now correctly handles edge cases and all tests pass."
+
+EXAMPLES OF INCORRECT BEHAVIOR (DO NOT DO THIS):
+
+Example 1 - Writing a file when user asks for an example:
+User: "Show me a React component"
+You (WRONG): [Calls write_file("Component.tsx", ...)]
+You (CORRECT): Show the code in a markdown code block in the chat
+
+Example 2 - Suggesting code when user asks to implement:
+User: "Add error handling to the login function"
+You (WRONG): "Here's how you can add error handling: ```rust fn login() -> Result<User, LoginError> { ... } ``` Add this to your auth.rs file."
+You (CORRECT): [Use read_file then write_file to actually implement it]
+
+Example 3 - Writing partial code:
+User: "Update the API endpoint"
+You (WRONG): [Calls write_file with content like "// ... existing imports\n\nfn new_endpoint() { }\n\n// ... rest of file"]
+You (CORRECT): Read the file first, then write the COMPLETE file with all content
+
+Example 4 - Asking for information you can discover:
+User: "Add a new route to the app"
+You (WRONG): "What file contains your routes?"
+You (CORRECT): [Call search_files("route") or list_directory("src") to find the routing file yourself]
+
+REMEMBER:
+- **Teaching vs Implementing:** Show examples in chat, implement changes with tools
+- **Keywords matter:** "show/example" = chat, "create/add/fix" = tools
+- **Complete files:** Always write the COMPLETE file content when using write_file
+- **Verify your work:** Use exec_shell to run tests/checks after implementing changes
+- You have the power to both teach AND implement - use the right mode for the situation
+
+Remember: You are an autonomous agent that can both explain concepts and take action. Choose appropriately based on the user's request.
+"#;
+
+pub const ONBOARDING_PROMPT: &str = r#"ONBOARDING MODE ACTIVE — This is a newly scaffolded project. The spec files still contain placeholder content and must be replaced with real project information before any stories can be written.
+
+Guide the user through each step below. Ask ONE category of questions at a time — do not overwhelm the user with everything at once.
+
+## Step 1: Project Context
+Ask the user:
+- What is this project? What does it do?
+- Who are the target users?
+- What are the core features or goals?
+
+Then use `write_file` to write `.storkit/specs/00_CONTEXT.md` with:
+- **High-Level Goal** — a clear, concise summary of what the project does
+- **Core Features** — 3-5 bullet points
+- **Domain Definition** — key terms and roles
+- **Glossary** — project-specific terminology
+
+## Step 2: Tech Stack
+Ask the user:
+- What programming language(s)?
+- What framework(s) or libraries?
+- What build tool(s)?
+- What test runner(s)? (e.g. cargo test, pytest, jest, pnpm test)
+- What linter(s)? (e.g. clippy, eslint, biome, ruff)
+
+Then use `write_file` to write `.storkit/specs/tech/STACK.md` with:
+- **Overview** of the architecture
+- **Core Stack** — languages, frameworks, build tools
+- **Coding Standards** — formatting, linting, quality gates
+- **Libraries (Approved)** — key dependencies
+
+## Step 3: Test Script
+Based on the tech stack answers, use `write_file` to write `script/test` — a bash script that invokes the project's actual test runner. Examples:
+- Rust: `cargo test`
+- Python: `pytest`
+- Node/TypeScript: `pnpm test`
+- Go: `go test ./...`
+- Multi-component: run each component's tests sequentially
+
+The script must start with `#!/usr/bin/env bash` and `set -euo pipefail`.
+
+## Step 4: Project Configuration
+The scaffold has written `.storkit/project.toml` with example `[[component]]` sections. You must replace these examples with real definitions that match the project's actual tech stack.
+
+First, inspect the project structure to identify the tech stack:
+- Use `list_directory(".")` to see top-level files and directories
+- Look for tech stack markers: `Cargo.toml` (Rust/Cargo), `package.json` (Node/frontend), `pyproject.toml` or `requirements.txt` (Python), `go.mod` (Go), `Gemfile` (Ruby)
+- Check subdirectories like `frontend/`, `backend/`, `app/`, `web/` for nested stacks
+- If you find a `package.json`, check whether `pnpm-lock.yaml`, `yarn.lock`, or `package-lock.json` exists to determine the package manager
+
+Then use `read_file(".storkit/project.toml")` to see the current content, keeping the `[[agent]]` sections intact.
+
+Finally, use `write_file` to rewrite `.storkit/project.toml` with real `[[component]]` entries. Each component needs:
+- `name` — component identifier (e.g. "backend", "frontend", "app")
+- `path` — relative path from project root (use "." for root, "frontend" for a frontend subdirectory)
+- `setup` — list of setup commands that install dependencies and verify the build (e.g. ["pnpm install"], ["cargo check"])
+- `teardown` — list of cleanup commands (usually [])
+
+Preserve all `[[agent]]` entries from the existing file. Only replace the `[[component]]` sections.
+
+## Step 5: Commit & Finish
+After writing all files:
+1. Use `exec_shell` to run: `git`, `["add", "-A"]`
+2. Use `exec_shell` to run: `git`, `["commit", "-m", "docs: populate project specs and configure tooling"]`
+3. Tell the user: "Your project is set up! You're ready to write Story #1. Just tell me what you'd like to build."
+
+## Rules
+- Be conversational and helpful
+- After each file write, briefly confirm what you wrote
+- Make specs specific to the user's project — never leave scaffold placeholders
+- Do NOT skip steps or combine multiple steps into one question
+"#;
--- a/server/src/llm/providers/anthropic.rs
+++ b/server/src/llm/providers/anthropic.rs
@@ -0,0 +1,868 @@
+use crate::llm::types::{
+    CompletionResponse, FunctionCall, Message, Role, ToolCall, ToolDefinition,
+};
+use futures::StreamExt;
+use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
+use serde::{Deserialize, Serialize};
+use serde_json::json;
+use tokio::sync::watch::Receiver;
+
+const ANTHROPIC_API_URL: &str = "https://api.anthropic.com/v1/messages";
+const ANTHROPIC_VERSION: &str = "2023-06-01";
+
+pub struct AnthropicProvider {
+    api_key: String,
+    client: reqwest::Client,
+    api_url: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct AnthropicMessage {
+    role: String, // "user" or "assistant"
+    content: AnthropicContent,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(untagged)]
+enum AnthropicContent {
+    Text(String),
+    Blocks(Vec<AnthropicContentBlock>),
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type")]
+enum AnthropicContentBlock {
+    #[serde(rename = "text")]
+    Text { text: String },
+    #[serde(rename = "tool_use")]
+    ToolUse {
+        id: String,
+        name: String,
+        input: serde_json::Value,
+    },
+    #[serde(rename = "tool_result")]
+    ToolResult {
+        tool_use_id: String,
+        content: String,
+    },
+}
+
+#[derive(Debug, Serialize)]
+struct AnthropicTool {
+    name: String,
+    description: String,
+    input_schema: serde_json::Value,
+}
+
+#[derive(Debug, Deserialize)]
+struct StreamEvent {
+    #[serde(rename = "type")]
+    event_type: String,
+    #[serde(flatten)]
+    data: serde_json::Value,
+}
+
+impl AnthropicProvider {
+    pub fn new(api_key: String) -> Self {
+        Self {
+            api_key,
+            client: reqwest::Client::new(),
+            api_url: ANTHROPIC_API_URL.to_string(),
+        }
+    }
+
+    #[cfg(test)]
+    fn new_with_url(api_key: String, api_url: String) -> Self {
+        Self {
+            api_key,
+            client: reqwest::Client::new(),
+            api_url,
+        }
+    }
+
+    fn convert_tools(tools: &[ToolDefinition]) -> Vec<AnthropicTool> {
+        tools
+            .iter()
+            .map(|tool| AnthropicTool {
+                name: tool.function.name.clone(),
+                description: tool.function.description.clone(),
+                input_schema: tool.function.parameters.clone(),
+            })
+            .collect()
+    }
+
+    fn convert_messages(messages: &[Message]) -> Vec<AnthropicMessage> {
+        let mut anthropic_messages: Vec<AnthropicMessage> = Vec::new();
+
+        for msg in messages {
+            match msg.role {
+                Role::System => {
+                    continue;
+                }
+                Role::User => {
+                    anthropic_messages.push(AnthropicMessage {
+                        role: "user".to_string(),
+                        content: AnthropicContent::Text(msg.content.clone()),
+                    });
+                }
+                Role::Assistant => {
+                    if let Some(tool_calls) = &msg.tool_calls {
+                        let mut blocks = Vec::new();
+
+                        if !msg.content.is_empty() {
+                            blocks.push(AnthropicContentBlock::Text {
+                                text: msg.content.clone(),
+                            });
+                        }
+
+                        for call in tool_calls {
+                            let input: serde_json::Value =
+                                serde_json::from_str(&call.function.arguments).unwrap_or(json!({}));
+
+                            blocks.push(AnthropicContentBlock::ToolUse {
+                                id: call
+                                    .id
+                                    .clone()
+                                    .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
+                                name: call.function.name.clone(),
+                                input,
+                            });
+                        }
+
+                        anthropic_messages.push(AnthropicMessage {
+                            role: "assistant".to_string(),
+                            content: AnthropicContent::Blocks(blocks),
+                        });
+                    } else {
+                        anthropic_messages.push(AnthropicMessage {
+                            role: "assistant".to_string(),
+                            content: AnthropicContent::Text(msg.content.clone()),
+                        });
+                    }
+                }
+                Role::Tool => {
+                    let tool_use_id = msg.tool_call_id.clone().unwrap_or_default();
+                    anthropic_messages.push(AnthropicMessage {
+                        role: "user".to_string(),
+                        content: AnthropicContent::Blocks(vec![
+                            AnthropicContentBlock::ToolResult {
+                                tool_use_id,
+                                content: msg.content.clone(),
+                            },
+                        ]),
+                    });
+                }
+            }
+        }
+
+        anthropic_messages
+    }
+
+    fn extract_system_prompt(messages: &[Message]) -> String {
+        messages
+            .iter()
+            .filter(|m| matches!(m.role, Role::System))
+            .map(|m| m.content.as_str())
+            .collect::<Vec<_>>()
+            .join("\n\n")
+    }
+
+    pub async fn chat_stream<F, A>(
+        &self,
+        model: &str,
+        messages: &[Message],
+        tools: &[ToolDefinition],
+        cancel_rx: &mut Receiver<bool>,
+        mut on_token: F,
+        mut on_activity: A,
+    ) -> Result<CompletionResponse, String>
+    where
+        F: FnMut(&str),
+        A: FnMut(&str),
+    {
+        let anthropic_messages = Self::convert_messages(messages);
+        let anthropic_tools = Self::convert_tools(tools);
+        let system_prompt = Self::extract_system_prompt(messages);
+
+        let mut request_body = json!({
+            "model": model,
+            "max_tokens": 4096,
+            "messages": anthropic_messages,
+            "stream": true,
+        });
+
+        if !system_prompt.is_empty() {
+            request_body["system"] = json!(system_prompt);
+        }
+
+        if !anthropic_tools.is_empty() {
+            request_body["tools"] = json!(anthropic_tools);
+        }
+
+        let mut headers = HeaderMap::new();
+        headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
+        headers.insert(
+            "x-api-key",
+            HeaderValue::from_str(&self.api_key).map_err(|e| e.to_string())?,
+        );
+        headers.insert(
+            "anthropic-version",
+            HeaderValue::from_static(ANTHROPIC_VERSION),
+        );
+
+        let response = self
+            .client
+            .post(&self.api_url)
+            .headers(headers)
+            .json(&request_body)
+            .send()
+            .await
+            .map_err(|e| format!("Failed to send request to Anthropic: {e}"))?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let error_text = response
+                .text()
+                .await
+                .unwrap_or_else(|_| "Unknown error".to_string());
+            return Err(format!("Anthropic API error {status}: {error_text}"));
+        }
+
+        let mut stream = response.bytes_stream();
+        let mut accumulated_text = String::new();
+        let mut tool_calls: Vec<ToolCall> = Vec::new();
+        let mut current_tool_use: Option<(String, String, String)> = None;
+
+        loop {
+            let chunk = tokio::select! {
+                result = stream.next() => {
+                    match result {
+                        Some(c) => c,
+                        None => break,
+                    }
+                }
+                _ = cancel_rx.changed() => {
+                    if *cancel_rx.borrow() {
+                        return Err("Chat cancelled by user".to_string());
+                    }
+                    continue;
+                }
+            };
+
+            let bytes = chunk.map_err(|e| format!("Stream error: {e}"))?;
+            let text = String::from_utf8_lossy(&bytes);
+
+            for line in text.lines() {
+                if let Some(json_str) = line.strip_prefix("data: ") {
+                    if json_str == "[DONE]" {
+                        break;
+                    }
+
+                    let event: StreamEvent = match serde_json::from_str(json_str) {
+                        Ok(e) => e,
+                        Err(_) => continue,
+                    };
+
+                    match event.event_type.as_str() {
+                        "content_block_start" => {
+                            if let Some(content_block) = event.data.get("content_block")
+                                && content_block.get("type") == Some(&json!("tool_use"))
+                            {
+                                let id = content_block["id"].as_str().unwrap_or("").to_string();
+                                let name = content_block["name"].as_str().unwrap_or("").to_string();
+                                on_activity(&name);
+                                current_tool_use = Some((id, name, String::new()));
+                            }
+                        }
+                        "content_block_delta" => {
+                            if let Some(delta) = event.data.get("delta") {
+                                if delta.get("type") == Some(&json!("text_delta")) {
+                                    if let Some(text) = delta.get("text").and_then(|t| t.as_str()) {
+                                        accumulated_text.push_str(text);
+                                        on_token(text);
+                                    }
+                                } else if delta.get("type") == Some(&json!("input_json_delta"))
+                                    && let Some((_, _, input_json)) = &mut current_tool_use
+                                    && let Some(partial) =
+                                        delta.get("partial_json").and_then(|p| p.as_str())
+                                {
+                                    input_json.push_str(partial);
+                                }
+                            }
+                        }
+                        "content_block_stop" => {
+                            if let Some((id, name, input_json)) = current_tool_use.take() {
+                                tool_calls.push(ToolCall {
+                                    id: Some(id),
+                                    kind: "function".to_string(),
+                                    function: FunctionCall {
+                                        name,
+                                        arguments: input_json,
+                                    },
+                                });
+                            }
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+
+        Ok(CompletionResponse {
+            content: if accumulated_text.is_empty() {
+                None
+            } else {
+                Some(accumulated_text)
+            },
+            tool_calls: if tool_calls.is_empty() {
+                None
+            } else {
+                Some(tool_calls)
+            },
+            session_id: None,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{AnthropicContent, AnthropicContentBlock, AnthropicProvider};
+    use crate::llm::types::{
+        FunctionCall, Message, Role, ToolCall, ToolDefinition, ToolFunctionDefinition,
+    };
+    use serde_json::json;
+
+    fn user_msg(content: &str) -> Message {
+        Message {
+            role: Role::User,
+            content: content.to_string(),
+            tool_calls: None,
+            tool_call_id: None,
+        }
+    }
+
+    fn system_msg(content: &str) -> Message {
+        Message {
+            role: Role::System,
+            content: content.to_string(),
+            tool_calls: None,
+            tool_call_id: None,
+        }
+    }
+
+    fn assistant_msg(content: &str) -> Message {
+        Message {
+            role: Role::Assistant,
+            content: content.to_string(),
+            tool_calls: None,
+            tool_call_id: None,
+        }
+    }
+
+    fn make_tool_def(name: &str) -> ToolDefinition {
+        ToolDefinition {
+            kind: "function".to_string(),
+            function: ToolFunctionDefinition {
+                name: name.to_string(),
+                description: format!("{name} description"),
+                parameters: json!({"type": "object", "properties": {}}),
+            },
+        }
+    }
+
+    // ── convert_tools ────────────────────────────────────────────────────────
+
+    #[test]
+    fn test_convert_tools_empty() {
+        let result = AnthropicProvider::convert_tools(&[]);
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn test_convert_tools_single() {
+        let tool = make_tool_def("search_files");
+        let result = AnthropicProvider::convert_tools(&[tool]);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].name, "search_files");
+        assert_eq!(result[0].description, "search_files description");
+        assert_eq!(
+            result[0].input_schema,
+            json!({"type": "object", "properties": {}})
+        );
+    }
+
+    #[test]
+    fn test_convert_tools_multiple() {
+        let tools = vec![make_tool_def("read_file"), make_tool_def("write_file")];
+        let result = AnthropicProvider::convert_tools(&tools);
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0].name, "read_file");
+        assert_eq!(result[1].name, "write_file");
+    }
+
+    // ── convert_messages ─────────────────────────────────────────────────────
+
+    #[test]
+    fn test_convert_messages_user() {
+        let msgs = vec![user_msg("Hello")];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].role, "user");
+        match &result[0].content {
+            AnthropicContent::Text(t) => assert_eq!(t, "Hello"),
+            _ => panic!("Expected text content"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_system_skipped() {
+        let msgs = vec![system_msg("You are helpful"), user_msg("Hi")];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].role, "user");
+    }
+
+    #[test]
+    fn test_convert_messages_assistant_text() {
+        let msgs = vec![assistant_msg("I can help with that")];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].role, "assistant");
+        match &result[0].content {
+            AnthropicContent::Text(t) => assert_eq!(t, "I can help with that"),
+            _ => panic!("Expected text content"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_assistant_with_tool_calls_no_content() {
+        let msgs = vec![Message {
+            role: Role::Assistant,
+            content: String::new(),
+            tool_calls: Some(vec![ToolCall {
+                id: Some("toolu_abc".to_string()),
+                kind: "function".to_string(),
+                function: FunctionCall {
+                    name: "search_files".to_string(),
+                    arguments: r#"{"pattern": "*.rs"}"#.to_string(),
+                },
+            }]),
+            tool_call_id: None,
+        }];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].role, "assistant");
+        match &result[0].content {
+            AnthropicContent::Blocks(blocks) => {
+                assert_eq!(blocks.len(), 1);
+                match &blocks[0] {
+                    AnthropicContentBlock::ToolUse { id, name, .. } => {
+                        assert_eq!(id, "toolu_abc");
+                        assert_eq!(name, "search_files");
+                    }
+                    _ => panic!("Expected ToolUse block"),
+                }
+            }
+            _ => panic!("Expected blocks content"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_assistant_with_tool_calls_and_content() {
+        let msgs = vec![Message {
+            role: Role::Assistant,
+            content: "Let me search for that".to_string(),
+            tool_calls: Some(vec![ToolCall {
+                id: Some("toolu_xyz".to_string()),
+                kind: "function".to_string(),
+                function: FunctionCall {
+                    name: "read_file".to_string(),
+                    arguments: r#"{"path": "main.rs"}"#.to_string(),
+                },
+            }]),
+            tool_call_id: None,
+        }];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        assert_eq!(result.len(), 1);
+        match &result[0].content {
+            AnthropicContent::Blocks(blocks) => {
+                assert_eq!(blocks.len(), 2);
+                match &blocks[0] {
+                    AnthropicContentBlock::Text { text } => {
+                        assert_eq!(text, "Let me search for that");
+                    }
+                    _ => panic!("Expected Text block first"),
+                }
+                match &blocks[1] {
+                    AnthropicContentBlock::ToolUse { id, name, .. } => {
+                        assert_eq!(id, "toolu_xyz");
+                        assert_eq!(name, "read_file");
+                    }
+                    _ => panic!("Expected ToolUse block second"),
+                }
+            }
+            _ => panic!("Expected blocks content"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_assistant_tool_call_invalid_json_args() {
+        // Invalid JSON args fall back to {}
+        let msgs = vec![Message {
+            role: Role::Assistant,
+            content: String::new(),
+            tool_calls: Some(vec![ToolCall {
+                id: None,
+                kind: "function".to_string(),
+                function: FunctionCall {
+                    name: "my_tool".to_string(),
+                    arguments: "not valid json".to_string(),
+                },
+            }]),
+            tool_call_id: None,
+        }];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        match &result[0].content {
+            AnthropicContent::Blocks(blocks) => match &blocks[0] {
+                AnthropicContentBlock::ToolUse { input, .. } => {
+                    assert_eq!(*input, json!({}));
+                }
+                _ => panic!("Expected ToolUse block"),
+            },
+            _ => panic!("Expected blocks"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_assistant_tool_call_no_id_generates_uuid() {
+        let msgs = vec![Message {
+            role: Role::Assistant,
+            content: String::new(),
+            tool_calls: Some(vec![ToolCall {
+                id: None, // no id provided
+                kind: "function".to_string(),
+                function: FunctionCall {
+                    name: "my_tool".to_string(),
+                    arguments: "{}".to_string(),
+                },
+            }]),
+            tool_call_id: None,
+        }];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        match &result[0].content {
+            AnthropicContent::Blocks(blocks) => match &blocks[0] {
+                AnthropicContentBlock::ToolUse { id, .. } => {
+                    assert!(!id.is_empty(), "Should have generated a UUID");
+                }
+                _ => panic!("Expected ToolUse block"),
+            },
+            _ => panic!("Expected blocks"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_tool_role() {
+        let msgs = vec![Message {
+            role: Role::Tool,
+            content: "file content here".to_string(),
+            tool_calls: None,
+            tool_call_id: Some("toolu_123".to_string()),
+        }];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].role, "user");
+        match &result[0].content {
+            AnthropicContent::Blocks(blocks) => {
+                assert_eq!(blocks.len(), 1);
+                match &blocks[0] {
+                    AnthropicContentBlock::ToolResult {
+                        tool_use_id,
+                        content,
+                    } => {
+                        assert_eq!(tool_use_id, "toolu_123");
+                        assert_eq!(content, "file content here");
+                    }
+                    _ => panic!("Expected ToolResult block"),
+                }
+            }
+            _ => panic!("Expected blocks content"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_tool_role_no_id_defaults_empty() {
+        let msgs = vec![Message {
+            role: Role::Tool,
+            content: "result".to_string(),
+            tool_calls: None,
+            tool_call_id: None,
+        }];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        match &result[0].content {
+            AnthropicContent::Blocks(blocks) => match &blocks[0] {
+                AnthropicContentBlock::ToolResult { tool_use_id, .. } => {
+                    assert_eq!(tool_use_id, "");
+                }
+                _ => panic!("Expected ToolResult block"),
+            },
+            _ => panic!("Expected blocks"),
+        }
+    }
+
+    #[test]
+    fn test_convert_messages_mixed_roles() {
+        let msgs = vec![
+            system_msg("Be helpful"),
+            user_msg("What is the time?"),
+            assistant_msg("I can check that."),
+        ];
+        let result = AnthropicProvider::convert_messages(&msgs);
+        // System is skipped
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0].role, "user");
+        assert_eq!(result[1].role, "assistant");
+    }
+
+    // ── extract_system_prompt ─────────────────────────────────────────────────
+
+    #[test]
+    fn test_extract_system_prompt_no_messages() {
+        let msgs: Vec<Message> = vec![];
+        let prompt = AnthropicProvider::extract_system_prompt(&msgs);
+        assert!(prompt.is_empty());
+    }
+
+    #[test]
+    fn test_extract_system_prompt_no_system_messages() {
+        let msgs = vec![user_msg("Hello"), assistant_msg("Hi there")];
+        let prompt = AnthropicProvider::extract_system_prompt(&msgs);
+        assert!(prompt.is_empty());
+    }
+
+    #[test]
+    fn test_extract_system_prompt_single() {
+        let msgs = vec![system_msg("You are a helpful assistant"), user_msg("Hi")];
+        let prompt = AnthropicProvider::extract_system_prompt(&msgs);
+        assert_eq!(prompt, "You are a helpful assistant");
+    }
+
+    #[test]
+    fn test_extract_system_prompt_multiple_joined() {
+        let msgs = vec![
+            system_msg("First instruction"),
+            system_msg("Second instruction"),
+            user_msg("Hello"),
+        ];
+        let prompt = AnthropicProvider::extract_system_prompt(&msgs);
+        assert_eq!(prompt, "First instruction\n\nSecond instruction");
+    }
+
+    // ── chat_stream (HTTP mocked) ─────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn test_chat_stream_text_response() {
+        let mut server = mockito::Server::new_async().await;
+
+        let delta1 = json!({
+            "type": "content_block_delta",
+            "delta": {"type": "text_delta", "text": "Hello"}
+        });
+        let delta2 = json!({
+            "type": "content_block_delta",
+            "delta": {"type": "text_delta", "text": " world"}
+        });
+        let body = format!("data: {delta1}\ndata: {delta2}\ndata: [DONE]\n");
+
+        let _m = server
+            .mock("POST", "/v1/messages")
+            .with_status(200)
+            .with_header("content-type", "text/event-stream")
+            .with_body(body)
+            .create_async()
+            .await;
+
+        let provider = AnthropicProvider::new_with_url(
+            "test-key".to_string(),
+            format!("{}/v1/messages", server.url()),
+        );
+        let (_tx, mut cancel_rx) = tokio::sync::watch::channel(false);
+        let mut tokens = Vec::<String>::new();
+
+        let result = provider
+            .chat_stream(
+                "claude-3-5-sonnet-20241022",
+                &[user_msg("Hello")],
+                &[],
+                &mut cancel_rx,
+                |t| tokens.push(t.to_string()),
+                |_| {},
+            )
+            .await;
+
+        assert!(result.is_ok());
+        let response = result.unwrap();
+        assert_eq!(response.content, Some("Hello world".to_string()));
+        assert!(response.tool_calls.is_none());
+        assert_eq!(tokens, vec!["Hello", " world"]);
+    }
+
+    #[tokio::test]
+    async fn test_chat_stream_error_response() {
+        let mut server = mockito::Server::new_async().await;
+
+        let _m = server
+            .mock("POST", "/v1/messages")
+            .with_status(401)
+            .with_body(r#"{"error":{"type":"authentication_error","message":"Invalid API key"}}"#)
+            .create_async()
+            .await;
+
+        let provider = AnthropicProvider::new_with_url(
+            "bad-key".to_string(),
+            format!("{}/v1/messages", server.url()),
+        );
+        let (_tx, mut cancel_rx) = tokio::sync::watch::channel(false);
+
+        let result = provider
+            .chat_stream(
+                "claude-3-5-sonnet-20241022",
+                &[user_msg("Hello")],
+                &[],
+                &mut cancel_rx,
+                |_| {},
+                |_| {},
+            )
+            .await;
+
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("401"));
+    }
+
+    #[tokio::test]
+    async fn test_chat_stream_tool_use_response() {
+        let mut server = mockito::Server::new_async().await;
+
+        let start_event = json!({
+            "type": "content_block_start",
+            "content_block": {"type": "tool_use", "id": "toolu_abc", "name": "search_files"}
+        });
+        let delta_event = json!({
+            "type": "content_block_delta",
+            "delta": {"type": "input_json_delta", "partial_json": "{}"}
+        });
+        let stop_event = json!({"type": "content_block_stop"});
+        let body = format!(
+            "data: {start_event}\ndata: {delta_event}\ndata: {stop_event}\ndata: [DONE]\n"
+        );
+
+        let _m = server
+            .mock("POST", "/v1/messages")
+            .with_status(200)
+            .with_header("content-type", "text/event-stream")
+            .with_body(body)
+            .create_async()
+            .await;
+
+        let provider = AnthropicProvider::new_with_url(
+            "test-key".to_string(),
+            format!("{}/v1/messages", server.url()),
+        );
+        let (_tx, mut cancel_rx) = tokio::sync::watch::channel(false);
+        let mut activities = Vec::<String>::new();
+
+        let result = provider
+            .chat_stream(
+                "claude-3-5-sonnet-20241022",
+                &[user_msg("Find Rust files")],
+                &[make_tool_def("search_files")],
+                &mut cancel_rx,
+                |_| {},
+                |a| activities.push(a.to_string()),
+            )
+            .await;
+
+        assert!(result.is_ok());
+        let response = result.unwrap();
+        assert!(response.content.is_none());
+        let tool_calls = response.tool_calls.expect("Expected tool calls");
+        assert_eq!(tool_calls.len(), 1);
+        assert_eq!(tool_calls[0].id, Some("toolu_abc".to_string()));
+        assert_eq!(tool_calls[0].function.name, "search_files");
+        assert_eq!(activities, vec!["search_files"]);
+    }
+
+    #[tokio::test]
+    async fn test_chat_stream_includes_system_prompt() {
+        let mut server = mockito::Server::new_async().await;
+
+        let delta = json!({
+            "type": "content_block_delta",
+            "delta": {"type": "text_delta", "text": "ok"}
+        });
+        let body = format!("data: {delta}\ndata: [DONE]\n");
+
+        let _m = server
+            .mock("POST", "/v1/messages")
+            .with_status(200)
+            .with_header("content-type", "text/event-stream")
+            .with_body(body)
+            .create_async()
+            .await;
+
+        let provider = AnthropicProvider::new_with_url(
+            "test-key".to_string(),
+            format!("{}/v1/messages", server.url()),
+        );
+        let (_tx, mut cancel_rx) = tokio::sync::watch::channel(false);
+        let messages = vec![system_msg("Be concise"), user_msg("Hello")];
+
+        let result = provider
+            .chat_stream(
+                "claude-3-5-sonnet-20241022",
+                &messages,
+                &[],
+                &mut cancel_rx,
+                |_| {},
+                |_| {},
+            )
+            .await;
+
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap().content, Some("ok".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_chat_stream_empty_response_gives_none_content() {
+        let mut server = mockito::Server::new_async().await;
+
+        let _m = server
+            .mock("POST", "/v1/messages")
+            .with_status(200)
+            .with_header("content-type", "text/event-stream")
+            .with_body("data: [DONE]\n")
+            .create_async()
+            .await;
+
+        let provider = AnthropicProvider::new_with_url(
+            "test-key".to_string(),
+            format!("{}/v1/messages", server.url()),
+        );
+        let (_tx, mut cancel_rx) = tokio::sync::watch::channel(false);
+
+        let result = provider
+            .chat_stream(
+                "claude-3-5-sonnet-20241022",
+                &[user_msg("Hello")],
+                &[],
+                &mut cancel_rx,
+                |_| {},
+                |_| {},
+            )
+            .await;
+
+        assert!(result.is_ok());
+        let response = result.unwrap();
+        assert!(response.content.is_none());
+        assert!(response.tool_calls.is_none());
+    }
+}
--- a/server/src/llm/providers/claude_code.rs
+++ b/server/src/llm/providers/claude_code.rs
--- a/server/src/llm/providers/mod.rs
+++ b/server/src/llm/providers/mod.rs
@@ -0,0 +1,3 @@
+pub mod anthropic;
+pub mod claude_code;
+pub mod ollama;
--- a/server/src/llm/providers/ollama.rs
+++ b/server/src/llm/providers/ollama.rs
@@ -0,0 +1,267 @@
+use crate::llm::types::{
+    CompletionResponse, FunctionCall, Message, ModelProvider, Role, ToolCall, ToolDefinition,
+};
+use async_trait::async_trait;
+use futures::StreamExt;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+pub struct OllamaProvider {
+    base_url: String,
+}
+
+impl OllamaProvider {
+    pub fn new(base_url: String) -> Self {
+        Self { base_url }
+    }
+
+    pub async fn get_models(base_url: &str) -> Result<Vec<String>, String> {
+        let client = reqwest::Client::new();
+        let url = format!("{}/api/tags", base_url.trim_end_matches('/'));
+
+        let res = client
+            .get(&url)
+            .send()
+            .await
+            .map_err(|e| format!("Request failed: {}", e))?;
+
+        if !res.status().is_success() {
+            let status = res.status();
+            let text = res.text().await.unwrap_or_default();
+            return Err(format!("Ollama API error {}: {}", status, text));
+        }
+
+        let body: OllamaTagsResponse = res
+            .json()
+            .await
+            .map_err(|e| format!("Failed to parse response: {}", e))?;
+
+        Ok(body.models.into_iter().map(|m| m.name).collect())
+    }
+
+    /// Streaming chat that calls `on_token` for each token chunk.
+    pub async fn chat_stream<F>(
+        &self,
+        model: &str,
+        messages: &[Message],
+        tools: &[ToolDefinition],
+        cancel_rx: &mut tokio::sync::watch::Receiver<bool>,
+        mut on_token: F,
+    ) -> Result<CompletionResponse, String>
+    where
+        F: FnMut(&str) + Send,
+    {
+        let client = reqwest::Client::new();
+        let url = format!("{}/api/chat", self.base_url.trim_end_matches('/'));
+
+        let ollama_messages: Vec<OllamaRequestMessage> = messages
+            .iter()
+            .map(|m| {
+                let tool_calls = m.tool_calls.as_ref().map(|calls| {
+                    calls
+                        .iter()
+                        .map(|tc| {
+                            let args_val: Value = serde_json::from_str(&tc.function.arguments)
+                                .unwrap_or(Value::String(tc.function.arguments.clone()));
+
+                            OllamaRequestToolCall {
+                                kind: tc.kind.clone(),
+                                function: OllamaRequestFunctionCall {
+                                    name: tc.function.name.clone(),
+                                    arguments: args_val,
+                                },
+                            }
+                        })
+                        .collect()
+                });
+
+                OllamaRequestMessage {
+                    role: m.role.clone(),
+                    content: m.content.clone(),
+                    tool_calls,
+                    tool_call_id: m.tool_call_id.clone(),
+                }
+            })
+            .collect();
+
+        let request_body = OllamaRequest {
+            model,
+            messages: ollama_messages,
+            stream: true,
+            tools,
+        };
+
+        let res = client
+            .post(&url)
+            .json(&request_body)
+            .send()
+            .await
+            .map_err(|e| format!("Request failed: {}", e))?;
+
+        if !res.status().is_success() {
+            let status = res.status();
+            let text = res.text().await.unwrap_or_default();
+            return Err(format!("Ollama API error {}: {}", status, text));
+        }
+
+        let mut stream = res.bytes_stream();
+        let mut buffer = String::new();
+        let mut accumulated_content = String::new();
+        let mut final_tool_calls: Option<Vec<ToolCall>> = None;
+
+        loop {
+            if *cancel_rx.borrow() {
+                return Err("Chat cancelled by user".to_string());
+            }
+
+            let chunk_result = tokio::select! {
+                chunk = stream.next() => {
+                    match chunk {
+                        Some(c) => c,
+                        None => break,
+                    }
+                }
+                _ = cancel_rx.changed() => {
+                    if *cancel_rx.borrow() {
+                        return Err("Chat cancelled by user".to_string());
+                    } else {
+                        continue;
+                    }
+                }
+            };
+
+            let chunk = chunk_result.map_err(|e| format!("Stream error: {}", e))?;
+            buffer.push_str(&String::from_utf8_lossy(&chunk));
+
+            while let Some(newline_pos) = buffer.find('\n') {
+                let line = buffer[..newline_pos].trim().to_string();
+                buffer = buffer[newline_pos + 1..].to_string();
+
+                if line.is_empty() {
+                    continue;
+                }
+
+                let stream_msg: OllamaStreamResponse =
+                    serde_json::from_str(&line).map_err(|e| format!("JSON parse error: {}", e))?;
+
+                if !stream_msg.message.content.is_empty() {
+                    accumulated_content.push_str(&stream_msg.message.content);
+                    on_token(&stream_msg.message.content);
+                }
+
+                if let Some(tool_calls) = stream_msg.message.tool_calls {
+                    final_tool_calls = Some(
+                        tool_calls
+                            .into_iter()
+                            .map(|tc| ToolCall {
+                                id: None,
+                                kind: "function".to_string(),
+                                function: FunctionCall {
+                                    name: tc.function.name,
+                                    arguments: tc.function.arguments.to_string(),
+                                },
+                            })
+                            .collect(),
+                    );
+                }
+
+                if stream_msg.done {
+                    break;
+                }
+            }
+        }
+
+        Ok(CompletionResponse {
+            content: if accumulated_content.is_empty() {
+                None
+            } else {
+                Some(accumulated_content)
+            },
+            tool_calls: final_tool_calls,
+            session_id: None,
+        })
+    }
+}
+
+#[derive(Deserialize)]
+struct OllamaTagsResponse {
+    models: Vec<OllamaModelTag>,
+}
+
+#[derive(Deserialize)]
+struct OllamaModelTag {
+    name: String,
+}
+
+#[derive(Serialize)]
+struct OllamaRequest<'a> {
+    model: &'a str,
+    messages: Vec<OllamaRequestMessage>,
+    stream: bool,
+    #[serde(skip_serializing_if = "is_empty_tools")]
+    tools: &'a [ToolDefinition],
+}
+
+fn is_empty_tools(tools: &&[ToolDefinition]) -> bool {
+    tools.is_empty()
+}
+
+#[derive(Serialize)]
+struct OllamaRequestMessage {
+    role: Role,
+    content: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    tool_calls: Option<Vec<OllamaRequestToolCall>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    tool_call_id: Option<String>,
+}
+
+#[derive(Serialize)]
+struct OllamaRequestToolCall {
+    function: OllamaRequestFunctionCall,
+    #[serde(rename = "type")]
+    kind: String,
+}
+
+#[derive(Serialize)]
+struct OllamaRequestFunctionCall {
+    name: String,
+    arguments: Value,
+}
+
+#[derive(Deserialize)]
+struct OllamaStreamResponse {
+    message: OllamaStreamMessage,
+    done: bool,
+}
+
+#[derive(Deserialize)]
+struct OllamaStreamMessage {
+    #[serde(default)]
+    content: String,
+    #[serde(default)]
+    tool_calls: Option<Vec<OllamaResponseToolCall>>,
+}
+
+#[derive(Deserialize)]
+struct OllamaResponseToolCall {
+    function: OllamaResponseFunctionCall,
+}
+
+#[derive(Deserialize)]
+struct OllamaResponseFunctionCall {
+    name: String,
+    arguments: Value,
+}
+
+#[async_trait]
+impl ModelProvider for OllamaProvider {
+    async fn chat(
+        &self,
+        _model: &str,
+        _messages: &[Message],
+        _tools: &[ToolDefinition],
+    ) -> Result<CompletionResponse, String> {
+        Err("Non-streaming Ollama chat not implemented for server".to_string())
+    }
+}
--- a/server/src/llm/types.rs
+++ b/server/src/llm/types.rs
@@ -0,0 +1,72 @@
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+use std::fmt::Debug;
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum Role {
+    System,
+    User,
+    Assistant,
+    Tool,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Message {
+    pub role: Role,
+    pub content: String,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<Vec<ToolCall>>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_call_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ToolCall {
+    pub id: Option<String>,
+    pub function: FunctionCall,
+    #[serde(rename = "type")]
+    pub kind: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FunctionCall {
+    pub name: String,
+    pub arguments: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ToolDefinition {
+    #[serde(rename = "type")]
+    pub kind: String,
+    pub function: ToolFunctionDefinition,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ToolFunctionDefinition {
+    pub name: String,
+    pub description: String,
+    pub parameters: serde_json::Value,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct CompletionResponse {
+    pub content: Option<String>,
+    pub tool_calls: Option<Vec<ToolCall>>,
+    /// Claude Code session ID for conversation resumption.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub session_id: Option<String>,
+}
+
+#[async_trait]
+#[allow(dead_code)]
+pub trait ModelProvider: Send + Sync {
+    async fn chat(
+        &self,
+        model: &str,
+        messages: &[Message],
+        tools: &[ToolDefinition],
+    ) -> Result<CompletionResponse, String>;
+}
--- a/server/src/log_buffer.rs
+++ b/server/src/log_buffer.rs
@@ -0,0 +1,424 @@
+//! Bounded in-memory ring buffer for server log output.
+//!
+//! Use the [`slog!`] macro (INFO), [`slog_warn!`] (WARN), or [`slog_error!`]
+//! (ERROR) as drop-in replacements for `eprintln!`. Each call writes to stderr
+//! with an ISO 8601 timestamp + severity prefix, and simultaneously appends
+//! the entry to the global ring buffer, making it retrievable via the
+//! `get_server_logs` MCP tool.
+
+use std::collections::VecDeque;
+use std::fs::OpenOptions;
+use std::io::Write;
+use std::path::PathBuf;
+use std::sync::{Mutex, OnceLock};
+use tokio::sync::broadcast;
+
+const CAPACITY: usize = 1000;
+
+/// Severity level for a log entry.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum LogLevel {
+    Error,
+    Warn,
+    Info,
+}
+
+impl LogLevel {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            LogLevel::Error => "ERROR",
+            LogLevel::Warn => "WARN",
+            LogLevel::Info => "INFO",
+        }
+    }
+
+    /// Parse from a case-insensitive string. Returns `None` for unknown levels.
+    pub fn from_str_ci(s: &str) -> Option<Self> {
+        match s.to_uppercase().as_str() {
+            "ERROR" => Some(LogLevel::Error),
+            "WARN" => Some(LogLevel::Warn),
+            "INFO" => Some(LogLevel::Info),
+            _ => None,
+        }
+    }
+}
+
+/// A single captured log entry.
+#[derive(Debug, Clone)]
+pub struct LogEntry {
+    pub level: LogLevel,
+    /// ISO 8601 UTC timestamp.
+    pub timestamp: String,
+    pub message: String,
+}
+
+impl LogEntry {
+    /// Format the entry as a single log line: `{timestamp} [{LEVEL}] {message}`.
+    pub fn formatted(&self) -> String {
+        format!("{} [{}] {}", self.timestamp, self.level.as_str(), self.message)
+    }
+
+    /// Format with ANSI color codes for terminal output.
+    /// WARN is yellow, ERROR is red, INFO has no color.
+    fn colored_formatted(&self) -> String {
+        let line = self.formatted();
+        match self.level {
+            LogLevel::Warn => format!("\x1b[33m{line}\x1b[0m"),
+            LogLevel::Error => format!("\x1b[31m{line}\x1b[0m"),
+            LogLevel::Info => line,
+        }
+    }
+}
+
+pub struct LogBuffer {
+    entries: Mutex<VecDeque<LogEntry>>,
+    log_file: Mutex<Option<PathBuf>>,
+    /// Broadcast channel for live log streaming to WebSocket subscribers.
+    broadcast_tx: broadcast::Sender<LogEntry>,
+}
+
+impl LogBuffer {
+    fn new() -> Self {
+        let (broadcast_tx, _) = broadcast::channel(512);
+        Self {
+            entries: Mutex::new(VecDeque::with_capacity(CAPACITY)),
+            log_file: Mutex::new(None),
+            broadcast_tx,
+        }
+    }
+
+    /// Subscribe to live log entries as they are pushed.
+    pub fn subscribe(&self) -> broadcast::Receiver<LogEntry> {
+        self.broadcast_tx.subscribe()
+    }
+
+    /// Set the persistent log file path. Call once at startup after the
+    /// project root is known.
+    pub fn set_log_file(&self, path: PathBuf) {
+        if let Ok(mut f) = self.log_file.lock() {
+            *f = Some(path);
+        }
+    }
+
+    /// Append a log entry, evicting the oldest when at capacity.
+    pub fn push_entry(&self, level: LogLevel, message: String) {
+        let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
+        let entry = LogEntry {
+            level,
+            timestamp,
+            message,
+        };
+        eprintln!("{}", entry.colored_formatted());
+
+        // Append to persistent log file (best-effort).
+        if let Ok(guard) = self.log_file.lock()
+            && let Some(ref path) = *guard
+            && let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path)
+        {
+            let _ = writeln!(file, "{}", entry.formatted());
+        }
+
+        if let Ok(mut buf) = self.entries.lock() {
+            if buf.len() >= CAPACITY {
+                buf.pop_front();
+            }
+            buf.push_back(entry.clone());
+        }
+
+        // Best-effort broadcast to WebSocket subscribers.
+        let _ = self.broadcast_tx.send(entry);
+    }
+
+    /// Return up to `count` recent log lines as formatted strings,
+    /// optionally filtered by substring and/or severity level.
+    /// Lines are returned in chronological order (oldest first).
+    pub fn get_recent(
+        &self,
+        count: usize,
+        filter: Option<&str>,
+        severity: Option<&LogLevel>,
+    ) -> Vec<String> {
+        let buf = match self.entries.lock() {
+            Ok(b) => b,
+            Err(_) => return vec![],
+        };
+        let filtered: Vec<String> = buf
+            .iter()
+            .filter(|entry| {
+                severity.is_none_or(|s| &entry.level == s)
+                    && filter.is_none_or(|f| entry.message.contains(f) || entry.formatted().contains(f))
+            })
+            .map(|entry| entry.formatted())
+            .collect();
+        let start = filtered.len().saturating_sub(count);
+        filtered[start..].to_vec()
+    }
+
+    /// Return up to `count` recent `LogEntry` structs (not formatted strings),
+    /// optionally filtered by substring and/or severity level.
+    /// Entries are returned in chronological order (oldest first).
+    pub fn get_recent_entries(
+        &self,
+        count: usize,
+        filter: Option<&str>,
+        severity: Option<&LogLevel>,
+    ) -> Vec<LogEntry> {
+        let buf = match self.entries.lock() {
+            Ok(b) => b,
+            Err(_) => return vec![],
+        };
+        let filtered: Vec<LogEntry> = buf
+            .iter()
+            .filter(|entry| {
+                severity.is_none_or(|s| &entry.level == s)
+                    && filter.is_none_or(|f| entry.message.contains(f) || entry.formatted().contains(f))
+            })
+            .cloned()
+            .collect();
+        let start = filtered.len().saturating_sub(count);
+        filtered[start..].to_vec()
+    }
+}
+
+static GLOBAL: OnceLock<LogBuffer> = OnceLock::new();
+
+/// Access the process-wide log ring buffer.
+pub fn global() -> &'static LogBuffer {
+    GLOBAL.get_or_init(LogBuffer::new)
+}
+
+/// Write an INFO log to stderr **and** capture it in the ring buffer.
+///
+/// Usage is identical to `eprintln!`:
+/// ```ignore
+/// slog!("agent {} started", name);
+/// ```
+#[macro_export]
+macro_rules! slog {
+    ($($arg:tt)*) => {{
+        $crate::log_buffer::global().push_entry(
+            $crate::log_buffer::LogLevel::Info,
+            format!($($arg)*),
+        );
+    }};
+}
+
+/// Write a WARN log to stderr **and** capture it in the ring buffer.
+#[macro_export]
+macro_rules! slog_warn {
+    ($($arg:tt)*) => {{
+        $crate::log_buffer::global().push_entry(
+            $crate::log_buffer::LogLevel::Warn,
+            format!($($arg)*),
+        );
+    }};
+}
+
+/// Write an ERROR log to stderr **and** capture it in the ring buffer.
+#[macro_export]
+macro_rules! slog_error {
+    ($($arg:tt)*) => {{
+        $crate::log_buffer::global().push_entry(
+            $crate::log_buffer::LogLevel::Error,
+            format!($($arg)*),
+        );
+    }};
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn fresh_buffer() -> LogBuffer {
+        LogBuffer::new()
+    }
+
+    #[test]
+    fn push_and_retrieve() {
+        let buf = fresh_buffer();
+        buf.push_entry(LogLevel::Info, "line one".into());
+        buf.push_entry(LogLevel::Info, "line two".into());
+        let recent = buf.get_recent(10, None, None);
+        assert_eq!(recent.len(), 2);
+        assert!(recent[0].contains("[INFO]") && recent[0].contains("line one"));
+        assert!(recent[1].contains("[INFO]") && recent[1].contains("line two"));
+    }
+
+    #[test]
+    fn evicts_oldest_at_capacity() {
+        let buf = LogBuffer::new();
+        // Fill past capacity
+        for i in 0..=CAPACITY {
+            buf.push_entry(LogLevel::Info, format!("line {i}"));
+        }
+        let recent = buf.get_recent(CAPACITY + 1, None, None);
+        // Should have exactly CAPACITY lines
+        assert_eq!(recent.len(), CAPACITY);
+        // The oldest (line 0) should have been evicted
+        assert!(!recent.iter().any(|l| l.contains("line 0") && !l.contains("line 10")));
+        // The newest should be present
+        assert!(recent
+            .iter()
+            .any(|l| l.contains(&format!("line {CAPACITY}"))));
+    }
+
+    #[test]
+    fn filter_by_substring() {
+        let buf = fresh_buffer();
+        buf.push_entry(LogLevel::Info, "watcher started".into());
+        buf.push_entry(LogLevel::Info, "mcp call received".into());
+        buf.push_entry(LogLevel::Info, "watcher event".into());
+
+        let filtered = buf.get_recent(100, Some("watcher"), None);
+        assert_eq!(filtered.len(), 2);
+        assert!(filtered[0].contains("watcher started"));
+        assert!(filtered[1].contains("watcher event"));
+    }
+
+    #[test]
+    fn count_limits_results() {
+        let buf = fresh_buffer();
+        for i in 0..10 {
+            buf.push_entry(LogLevel::Info, format!("line {i}"));
+        }
+        let recent = buf.get_recent(3, None, None);
+        assert_eq!(recent.len(), 3);
+        // Most recent 3
+        assert!(recent[0].contains("line 7"));
+        assert!(recent[1].contains("line 8"));
+        assert!(recent[2].contains("line 9"));
+    }
+
+    #[test]
+    fn empty_buffer_returns_empty() {
+        let buf = fresh_buffer();
+        assert!(buf.get_recent(10, None, None).is_empty());
+    }
+
+    #[test]
+    fn log_lines_include_iso8601_timestamp() {
+        let buf = fresh_buffer();
+        buf.push_entry(LogLevel::Info, "timestamped message".into());
+        let recent = buf.get_recent(1, None, None);
+        assert_eq!(recent.len(), 1);
+        // Timestamp format: YYYY-MM-DDTHH:MM:SSZ
+        let line = &recent[0];
+        assert!(
+            line.len() > 20,
+            "Line should have timestamp prefix: {line}"
+        );
+        // Check it starts with a 4-digit year
+        assert!(line.chars().next().unwrap().is_ascii_digit());
+        assert!(line.contains('T'));
+        assert!(line.contains('Z'));
+    }
+
+    #[test]
+    fn filter_by_severity_error_only() {
+        let buf = fresh_buffer();
+        buf.push_entry(LogLevel::Info, "info message".into());
+        buf.push_entry(LogLevel::Warn, "warn message".into());
+        buf.push_entry(LogLevel::Error, "error message".into());
+
+        let errors = buf.get_recent(100, None, Some(&LogLevel::Error));
+        assert_eq!(errors.len(), 1);
+        assert!(errors[0].contains("[ERROR]"));
+        assert!(errors[0].contains("error message"));
+    }
+
+    #[test]
+    fn filter_by_severity_warn_only() {
+        let buf = fresh_buffer();
+        buf.push_entry(LogLevel::Info, "info message".into());
+        buf.push_entry(LogLevel::Warn, "warn message".into());
+        buf.push_entry(LogLevel::Error, "error message".into());
+
+        let warns = buf.get_recent(100, None, Some(&LogLevel::Warn));
+        assert_eq!(warns.len(), 1);
+        assert!(warns[0].contains("[WARN]"));
+        assert!(warns[0].contains("warn message"));
+    }
+
+    #[test]
+    fn severity_levels_appear_in_formatted_output() {
+        let buf = fresh_buffer();
+        buf.push_entry(LogLevel::Info, "info".into());
+        buf.push_entry(LogLevel::Warn, "warn".into());
+        buf.push_entry(LogLevel::Error, "error".into());
+
+        let all = buf.get_recent(10, None, None);
+        assert_eq!(all.len(), 3);
+        assert!(all[0].contains("[INFO]"));
+        assert!(all[1].contains("[WARN]"));
+        assert!(all[2].contains("[ERROR]"));
+    }
+
+    #[test]
+    fn loglevel_from_str_ci() {
+        assert_eq!(LogLevel::from_str_ci("ERROR"), Some(LogLevel::Error));
+        assert_eq!(LogLevel::from_str_ci("error"), Some(LogLevel::Error));
+        assert_eq!(LogLevel::from_str_ci("WARN"), Some(LogLevel::Warn));
+        assert_eq!(LogLevel::from_str_ci("warn"), Some(LogLevel::Warn));
+        assert_eq!(LogLevel::from_str_ci("INFO"), Some(LogLevel::Info));
+        assert_eq!(LogLevel::from_str_ci("info"), Some(LogLevel::Info));
+        assert_eq!(LogLevel::from_str_ci("DEBUG"), None);
+    }
+
+    #[test]
+    fn colored_formatted_warn_has_yellow_ansi() {
+        let entry = LogEntry {
+            level: LogLevel::Warn,
+            timestamp: "2026-01-01T00:00:00Z".into(),
+            message: "test warning".into(),
+        };
+        let colored = entry.colored_formatted();
+        assert!(colored.starts_with("\x1b[33m"), "WARN should start with yellow ANSI code");
+        assert!(colored.ends_with("\x1b[0m"), "WARN should end with ANSI reset");
+        assert!(colored.contains("[WARN]"));
+        assert!(colored.contains("test warning"));
+    }
+
+    #[test]
+    fn colored_formatted_error_has_red_ansi() {
+        let entry = LogEntry {
+            level: LogLevel::Error,
+            timestamp: "2026-01-01T00:00:00Z".into(),
+            message: "test error".into(),
+        };
+        let colored = entry.colored_formatted();
+        assert!(colored.starts_with("\x1b[31m"), "ERROR should start with red ANSI code");
+        assert!(colored.ends_with("\x1b[0m"), "ERROR should end with ANSI reset");
+        assert!(colored.contains("[ERROR]"));
+        assert!(colored.contains("test error"));
+    }
+
+    #[test]
+    fn colored_formatted_info_has_no_ansi() {
+        let entry = LogEntry {
+            level: LogLevel::Info,
+            timestamp: "2026-01-01T00:00:00Z".into(),
+            message: "test info".into(),
+        };
+        let colored = entry.colored_formatted();
+        assert!(!colored.contains("\x1b["), "INFO should have no ANSI escape codes");
+        assert!(colored.contains("[INFO]"));
+        assert!(colored.contains("test info"));
+    }
+
+    #[test]
+    fn ring_buffer_entries_have_no_ansi_codes() {
+        let buf = fresh_buffer();
+        buf.push_entry(LogLevel::Info, "info msg".into());
+        buf.push_entry(LogLevel::Warn, "warn msg".into());
+        buf.push_entry(LogLevel::Error, "error msg".into());
+
+        let recent = buf.get_recent(10, None, None);
+        assert_eq!(recent.len(), 3);
+        for line in &recent {
+            assert!(
+                !line.contains("\x1b["),
+                "Ring buffer entry should not contain ANSI codes: {line}"
+            );
+        }
+    }
+}
--- a/server/src/main.rs
+++ b/server/src/main.rs
@@ -0,0 +1,375 @@
+// matrix-sdk-crypto's deeply nested types require a higher recursion limit
+// when the `e2e-encryption` feature is enabled.
+#![recursion_limit = "256"]
+
+mod agent_log;
+mod agents;
+mod config;
+mod http;
+mod io;
+mod llm;
+pub mod log_buffer;
+mod matrix;
+pub mod rebuild;
+pub mod slack;
+mod state;
+mod store;
+pub mod transport;
+pub mod whatsapp;
+mod workflow;
+mod worktree;
+
+use crate::agents::AgentPool;
+use crate::http::build_routes;
+use crate::http::context::AppContext;
+use crate::http::{remove_port_file, resolve_port, write_port_file};
+use crate::io::fs::find_story_kit_root;
+use crate::state::SessionState;
+use crate::store::JsonFileStore;
+use crate::workflow::WorkflowState;
+use poem::Server;
+use poem::listener::TcpListener;
+use std::path::PathBuf;
+use std::sync::Arc;
+use tokio::sync::broadcast;
+
+/// Resolve the optional positional path argument (everything after the binary
+/// name) into an absolute `PathBuf`.  Returns `None` when no argument was
+/// supplied so that the caller can fall back to the auto-detect behaviour.
+fn parse_project_path_arg(args: &[String], cwd: &std::path::Path) -> Option<PathBuf> {
+    args.first().map(|s| io::fs::resolve_cli_path(cwd, s))
+}
+
+#[tokio::main]
+async fn main() -> Result<(), std::io::Error> {
+    let app_state = Arc::new(SessionState::default());
+    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
+    let store = Arc::new(
+        JsonFileStore::from_path(PathBuf::from("store.json")).map_err(std::io::Error::other)?,
+    );
+
+    let port = resolve_port();
+
+    // Collect CLI args, skipping the binary name (argv[0]).
+    let cli_args: Vec<String> = std::env::args().skip(1).collect();
+    let explicit_path = parse_project_path_arg(&cli_args, &cwd);
+
+    if let Some(explicit_root) = explicit_path {
+        // An explicit path was given on the command line.
+        // Open it directly — scaffold .storkit/ if it is missing — and
+        // exit with a clear error message if the path is invalid.
+        match io::fs::open_project(
+            explicit_root.to_string_lossy().to_string(),
+            &app_state,
+            store.as_ref(),
+        )
+        .await
+        {
+            Ok(_) => {
+                if let Some(root) = app_state.project_root.lock().unwrap().as_ref() {
+                    config::ProjectConfig::load(root)
+                        .unwrap_or_else(|e| panic!("Invalid project.toml: {e}"));
+                }
+            }
+            Err(e) => {
+                eprintln!("error: {e}");
+                std::process::exit(1);
+            }
+        }
+    } else {
+        // No path argument — auto-detect a .storkit/ project in cwd or
+        // parent directories (preserves existing behaviour).
+        if let Some(project_root) = find_story_kit_root(&cwd) {
+            io::fs::open_project(
+                project_root.to_string_lossy().to_string(),
+                &app_state,
+                store.as_ref(),
+            )
+            .await
+            .unwrap_or_else(|e| {
+                slog!("Warning: failed to auto-open project at {project_root:?}: {e}");
+                project_root.to_string_lossy().to_string()
+            });
+
+            // Validate agent config for the detected project root.
+            config::ProjectConfig::load(&project_root)
+                .unwrap_or_else(|e| panic!("Invalid project.toml: {e}"));
+        } else {
+            // No .storkit/ found — fall back to cwd so existing behaviour is preserved.
+            // TRACE:MERGE-DEBUG — remove once root cause is found
+            slog!(
+                "[MERGE-DEBUG] main: no .storkit/ found, falling back to cwd {:?}",
+                cwd
+            );
+            *app_state.project_root.lock().unwrap() = Some(cwd.clone());
+        }
+    }
+
+    // Enable persistent server log file now that the project root is known.
+    if let Some(ref root) = *app_state.project_root.lock().unwrap() {
+        let log_dir = root.join(".storkit").join("logs");
+        let _ = std::fs::create_dir_all(&log_dir);
+        log_buffer::global().set_log_file(log_dir.join("server.log"));
+    }
+
+    let workflow = Arc::new(std::sync::Mutex::new(WorkflowState::default()));
+
+    // Filesystem watcher: broadcast channel for work/ pipeline changes.
+    // Created before AgentPool so the pool can emit AgentStateChanged events.
+    let (watcher_tx, _) = broadcast::channel::<io::watcher::WatcherEvent>(1024);
+    let agents = Arc::new(AgentPool::new(port, watcher_tx.clone()));
+
+    // Start the background watchdog that detects and cleans up orphaned Running agents.
+    // When orphans are found, auto-assign is triggered to reassign free agents.
+    let watchdog_root: Option<PathBuf> = app_state.project_root.lock().unwrap().clone();
+    AgentPool::spawn_watchdog(Arc::clone(&agents), watchdog_root);
+    if let Some(ref root) = *app_state.project_root.lock().unwrap() {
+        let work_dir = root.join(".storkit").join("work");
+        if work_dir.is_dir() {
+            let watcher_config = config::ProjectConfig::load(root)
+                .map(|c| c.watcher)
+                .unwrap_or_default();
+            io::watcher::start_watcher(work_dir, root.clone(), watcher_tx.clone(), watcher_config);
+        }
+    }
+
+    // Subscribe to watcher events so that auto-assign triggers when a work item
+    // file is moved into an active pipeline stage (2_current/, 3_qa/, 4_merge/).
+    {
+        let watcher_auto_rx = watcher_tx.subscribe();
+        let watcher_auto_agents = Arc::clone(&agents);
+        let watcher_auto_root: Option<PathBuf> = app_state.project_root.lock().unwrap().clone();
+        if let Some(root) = watcher_auto_root {
+            tokio::spawn(async move {
+                let mut rx = watcher_auto_rx;
+                while let Ok(event) = rx.recv().await {
+                    if let io::watcher::WatcherEvent::WorkItem { ref stage, .. } = event
+                        && matches!(stage.as_str(), "2_current" | "3_qa" | "4_merge")
+                    {
+                        slog!(
+                            "[auto-assign] Watcher detected work item in {stage}/; \
+                             triggering auto-assign."
+                        );
+                        watcher_auto_agents.auto_assign_available_work(&root).await;
+                    }
+                }
+            });
+        }
+    }
+
+    // Reconciliation progress channel: startup reconciliation → WebSocket clients.
+    let (reconciliation_tx, _) = broadcast::channel::<agents::ReconciliationEvent>(64);
+
+    // Permission channel: MCP prompt_permission → WebSocket handler.
+    let (perm_tx, perm_rx) = tokio::sync::mpsc::unbounded_channel();
+
+    // Clone watcher_tx for the Matrix bot before it is moved into AppContext.
+    let watcher_tx_for_bot = watcher_tx.clone();
+    // Wrap perm_rx in Arc<Mutex> so it can be shared with both the WebSocket
+    // handler (via AppContext) and the Matrix bot.
+    let perm_rx = Arc::new(tokio::sync::Mutex::new(perm_rx));
+    let perm_rx_for_bot = Arc::clone(&perm_rx);
+
+    // Capture project root, agents Arc, and reconciliation sender before ctx
+    // is consumed by build_routes.
+    let startup_root: Option<PathBuf> = app_state.project_root.lock().unwrap().clone();
+    let startup_agents = Arc::clone(&agents);
+    let startup_reconciliation_tx = reconciliation_tx.clone();
+    // Clone for shutdown cleanup — kill orphaned PTY children before exiting.
+    let agents_for_shutdown = Arc::clone(&agents);
+    let ctx = AppContext {
+        state: app_state,
+        store,
+        workflow,
+        agents,
+        watcher_tx,
+        reconciliation_tx,
+        perm_tx,
+        perm_rx,
+        qa_app_process: Arc::new(std::sync::Mutex::new(None)),
+    };
+
+    // Build WhatsApp webhook context if bot.toml configures transport = "whatsapp".
+    let whatsapp_ctx: Option<Arc<whatsapp::WhatsAppWebhookContext>> = startup_root
+        .as_ref()
+        .and_then(|root| matrix::BotConfig::load(root))
+        .filter(|cfg| cfg.transport == "whatsapp")
+        .map(|cfg| {
+            let template_name = cfg
+                .whatsapp_notification_template
+                .clone()
+                .unwrap_or_else(|| "pipeline_notification".to_string());
+            let transport = Arc::new(whatsapp::WhatsAppTransport::new(
+                cfg.whatsapp_phone_number_id.clone().unwrap_or_default(),
+                cfg.whatsapp_access_token.clone().unwrap_or_default(),
+                template_name,
+            ));
+            let bot_name = cfg
+                .display_name
+                .clone()
+                .unwrap_or_else(|| "Assistant".to_string());
+            let root = startup_root.clone().unwrap();
+            let history = whatsapp::load_whatsapp_history(&root);
+            Arc::new(whatsapp::WhatsAppWebhookContext {
+                verify_token: cfg.whatsapp_verify_token.clone().unwrap_or_default(),
+                transport,
+                project_root: root,
+                agents: Arc::clone(&startup_agents),
+                bot_name,
+                bot_user_id: "whatsapp-bot".to_string(),
+                ambient_rooms: Arc::new(std::sync::Mutex::new(std::collections::HashSet::new())),
+                history: std::sync::Arc::new(tokio::sync::Mutex::new(history)),
+                history_size: cfg.history_size,
+                window_tracker: Arc::new(whatsapp::MessagingWindowTracker::new()),
+            })
+        });
+
+    // Build Slack webhook context if bot.toml configures transport = "slack".
+    let slack_ctx: Option<Arc<slack::SlackWebhookContext>> = startup_root
+        .as_ref()
+        .and_then(|root| matrix::BotConfig::load(root))
+        .filter(|cfg| cfg.transport == "slack")
+        .map(|cfg| {
+            let transport = Arc::new(slack::SlackTransport::new(
+                cfg.slack_bot_token.clone().unwrap_or_default(),
+            ));
+            let bot_name = cfg
+                .display_name
+                .clone()
+                .unwrap_or_else(|| "Assistant".to_string());
+            let root = startup_root.clone().unwrap();
+            let history = slack::load_slack_history(&root);
+            let channel_ids: std::collections::HashSet<String> =
+                cfg.slack_channel_ids.iter().cloned().collect();
+            Arc::new(slack::SlackWebhookContext {
+                signing_secret: cfg.slack_signing_secret.clone().unwrap_or_default(),
+                transport,
+                project_root: root,
+                agents: Arc::clone(&startup_agents),
+                bot_name,
+                bot_user_id: "slack-bot".to_string(),
+                ambient_rooms: Arc::new(std::sync::Mutex::new(std::collections::HashSet::new())),
+                history: std::sync::Arc::new(tokio::sync::Mutex::new(history)),
+                history_size: cfg.history_size,
+                channel_ids,
+            })
+        });
+
+    let app = build_routes(ctx, whatsapp_ctx, slack_ctx);
+
+    // Optional Matrix bot: connect to the homeserver and start listening for
+    // messages if `.storkit/bot.toml` is present and enabled.
+    if let Some(ref root) = startup_root {
+        matrix::spawn_bot(
+            root,
+            watcher_tx_for_bot,
+            perm_rx_for_bot,
+            Arc::clone(&startup_agents),
+        );
+    }
+
+    // On startup:
+    // 1. Reconcile any stories whose agent work was committed while the server was
+    //    offline (worktree has commits ahead of master but pipeline didn't advance).
+    // 2. Auto-assign free agents to remaining unassigned work in the pipeline.
+    if let Some(root) = startup_root {
+        tokio::spawn(async move {
+            slog!("[startup] Reconciling completed worktrees from previous session.");
+            startup_agents
+                .reconcile_on_startup(&root, &startup_reconciliation_tx)
+                .await;
+            slog!("[auto-assign] Scanning pipeline stages for unassigned work.");
+            startup_agents.auto_assign_available_work(&root).await;
+        });
+    }
+    let addr = format!("127.0.0.1:{port}");
+
+    println!(
+        "\x1b[95;1m  ____  _             _    ___ _   \n / ___|| |_ ___  _ __| | _|_ _| |_ \n \\___ \\| __/ _ \\| '__| |/ /| || __|\n  ___) | || (_) | |  |   < | || |_ \n |____/ \\__\\___/|_|  |_|\\_\\___|\\__|\n\x1b[0m"
+    );
+    println!("STORKIT_PORT={port}");
+    println!("\x1b[96;1mFrontend:\x1b[0m \x1b[94mhttp://{addr}\x1b[0m");
+    println!("\x1b[92;1mOpenAPI Docs:\x1b[0m \x1b[94mhttp://{addr}/docs\x1b[0m");
+
+    let port_file = write_port_file(&cwd, port);
+
+    let result = Server::new(TcpListener::bind(&addr)).run(app).await;
+
+    // Kill all active PTY child processes before exiting to prevent orphaned
+    // Claude Code processes from running after the server restarts.
+    agents_for_shutdown.kill_all_children();
+
+    if let Some(ref path) = port_file {
+        remove_port_file(path);
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[should_panic(expected = "Invalid project.toml: Duplicate agent name")]
+    fn panics_on_duplicate_agent_names() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        std::fs::create_dir_all(&sk).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            r#"
+[[agent]]
+name = "coder"
+
+[[agent]]
+name = "coder"
+"#,
+        )
+        .unwrap();
+
+        config::ProjectConfig::load(tmp.path())
+            .unwrap_or_else(|e| panic!("Invalid project.toml: {e}"));
+    }
+
+    // ── parse_project_path_arg ────────────────────────────────────────────
+
+    #[test]
+    fn parse_project_path_arg_none_when_no_args() {
+        let cwd = PathBuf::from("/home/user/project");
+        let result = parse_project_path_arg(&[], &cwd);
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn parse_project_path_arg_returns_path_for_absolute_arg() {
+        let cwd = PathBuf::from("/home/user/project");
+        let args = vec!["/some/absolute/path".to_string()];
+        let result = parse_project_path_arg(&args, &cwd).unwrap();
+        // Absolute path returned as-is (canonicalize may fail, fallback used)
+        assert!(
+            result.ends_with("absolute/path") || result == PathBuf::from("/some/absolute/path")
+        );
+    }
+
+    #[test]
+    fn parse_project_path_arg_resolves_dot_to_cwd() {
+        let tmp = tempfile::tempdir().unwrap();
+        let cwd = tmp.path().to_path_buf();
+        let args = vec![".".to_string()];
+        let result = parse_project_path_arg(&args, &cwd).unwrap();
+        // "." relative to an existing cwd should canonicalize to the cwd itself
+        assert_eq!(result, cwd.canonicalize().unwrap_or(cwd));
+    }
+
+    #[test]
+    fn parse_project_path_arg_resolves_relative_path() {
+        let tmp = tempfile::tempdir().unwrap();
+        let cwd = tmp.path().to_path_buf();
+        let subdir = cwd.join("myproject");
+        std::fs::create_dir_all(&subdir).unwrap();
+        let args = vec!["myproject".to_string()];
+        let result = parse_project_path_arg(&args, &cwd).unwrap();
+        assert_eq!(result, subdir.canonicalize().unwrap_or(subdir));
+    }
+}
--- a/server/src/matrix/bot.rs
+++ b/server/src/matrix/bot.rs
--- a/server/src/matrix/commands/ambient.rs
+++ b/server/src/matrix/commands/ambient.rs
@@ -0,0 +1,171 @@
+//! Handler for the `ambient` command.
+
+use super::CommandContext;
+use crate::matrix::config::save_ambient_rooms;
+
+/// Toggle ambient mode for this room.
+///
+/// Works whether or not the message directly addressed the bot — the user can
+/// say "timmy ambient on", "@timmy ambient on", or just "ambient on" in an
+/// ambient-mode room.  The command is specific enough (must be the first word
+/// after any bot-mention prefix) that accidental triggering is very unlikely.
+pub(super) fn handle_ambient(ctx: &CommandContext) -> Option<String> {
+    let enable = match ctx.args {
+        "on" => true,
+        "off" => false,
+        _ => return Some("Usage: `ambient on` or `ambient off`".to_string()),
+    };
+    let room_ids: Vec<String> = {
+        let mut ambient = ctx.ambient_rooms.lock().unwrap();
+        if enable {
+            ambient.insert(ctx.room_id.to_string());
+        } else {
+            ambient.remove(ctx.room_id);
+        }
+        ambient.iter().cloned().collect()
+    };
+    save_ambient_rooms(ctx.project_root, &room_ids);
+    let msg = if enable {
+        "Ambient mode on. I'll respond to all messages in this room."
+    } else {
+        "Ambient mode off. I'll only respond when mentioned."
+    };
+    Some(msg.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    use super::super::{CommandDispatch, try_handle_command};
+
+    fn test_ambient_rooms() -> Arc<Mutex<HashSet<String>>> {
+        Arc::new(Mutex::new(HashSet::new()))
+    }
+
+    fn test_agents() -> Arc<AgentPool> {
+        Arc::new(AgentPool::new_test(3000))
+    }
+
+    // Bug 352: ambient commands were being forwarded to LLM after refactors
+    // 328/330 because handle_ambient required is_addressed=true, but
+    // mentions_bot() only matches @-prefixed mentions, not bare bot names.
+    // "timmy ambient off" sets is_addressed=false even though it names the bot.
+    #[test]
+    fn ambient_on_works_when_unaddressed() {
+        let ambient_rooms = test_ambient_rooms();
+        let room_id = "!myroom:example.com".to_string();
+        let agents = test_agents();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: std::path::Path::new("/tmp"),
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        // "timmy ambient on" — bot name mentioned but not @-prefixed, so
+        // is_addressed is false; strip_bot_mention still strips "timmy ".
+        let result = try_handle_command(&dispatch, "timmy ambient on");
+        assert!(result.is_some(), "ambient on should fire even when is_addressed=false");
+        assert!(
+            ambient_rooms.lock().unwrap().contains(&room_id),
+            "room should be in ambient_rooms after ambient on"
+        );
+    }
+
+    #[test]
+    fn ambient_off_works_bare_in_ambient_room() {
+        let ambient_rooms = test_ambient_rooms();
+        let room_id = "!myroom:example.com".to_string();
+        ambient_rooms.lock().unwrap().insert(room_id.clone());
+        let agents = test_agents();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: std::path::Path::new("/tmp"),
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        // Bare "ambient off" in an ambient room (is_addressed=false).
+        let result = try_handle_command(&dispatch, "ambient off");
+        assert!(result.is_some(), "bare ambient off should be handled without LLM");
+        let output = result.unwrap();
+        assert!(
+            output.contains("Ambient mode off"),
+            "response should confirm ambient off: {output}"
+        );
+        assert!(
+            !ambient_rooms.lock().unwrap().contains(&room_id),
+            "room should be removed from ambient_rooms after ambient off"
+        );
+    }
+
+    #[test]
+    fn ambient_on_enables_ambient_mode() {
+        let ambient_rooms = test_ambient_rooms();
+        let agents = test_agents();
+        let room_id = "!myroom:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: std::path::Path::new("/tmp"),
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        let result = try_handle_command(&dispatch, "@timmy ambient on");
+        assert!(result.is_some(), "ambient on should produce a response");
+        let output = result.unwrap();
+        assert!(
+            output.contains("Ambient mode on"),
+            "response should confirm ambient on: {output}"
+        );
+        assert!(
+            ambient_rooms.lock().unwrap().contains(&room_id),
+            "room should be in ambient_rooms after ambient on"
+        );
+    }
+
+    #[test]
+    fn ambient_off_disables_ambient_mode() {
+        let ambient_rooms = test_ambient_rooms();
+        let agents = test_agents();
+        let room_id = "!myroom:example.com".to_string();
+        // Pre-insert the room
+        ambient_rooms.lock().unwrap().insert(room_id.clone());
+
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: std::path::Path::new("/tmp"),
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        let result = try_handle_command(&dispatch, "@timmy ambient off");
+        assert!(result.is_some(), "ambient off should produce a response");
+        let output = result.unwrap();
+        assert!(
+            output.contains("Ambient mode off"),
+            "response should confirm ambient off: {output}"
+        );
+        assert!(
+            !ambient_rooms.lock().unwrap().contains(&room_id),
+            "room should be removed from ambient_rooms after ambient off"
+        );
+    }
+
+    #[test]
+    fn ambient_invalid_args_returns_usage() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy ambient");
+        let output = result.unwrap();
+        assert!(
+            output.contains("Usage"),
+            "invalid ambient args should show usage: {output}"
+        );
+    }
+}
--- a/server/src/matrix/commands/assign.rs
+++ b/server/src/matrix/commands/assign.rs
@@ -0,0 +1,385 @@
+//! Handler for the `assign` command.
+//!
+//! `assign <number> <model>` pre-assigns a coder model (e.g. `opus`, `sonnet`)
+//! to a story before it starts. The assignment persists in the story file's
+//! front matter as `agent: coder-<model>` so that when the pipeline picks up
+//! the story — either via auto-assign or the `start` command — it uses the
+//! assigned model instead of the default.
+
+use super::CommandContext;
+use crate::io::story_metadata::{parse_front_matter, set_front_matter_field};
+
+/// All pipeline stage directories to search when finding a work item by number.
+const STAGES: &[&str] = &[
+    "1_backlog",
+    "2_current",
+    "3_qa",
+    "4_merge",
+    "5_done",
+    "6_archived",
+];
+
+/// Resolve a model name hint (e.g. `"opus"`) to a full agent name
+/// (e.g. `"coder-opus"`). If the hint already starts with `"coder-"`,
+/// it is returned unchanged to prevent double-prefixing.
+fn resolve_agent_name(model: &str) -> String {
+    if model.starts_with("coder-") {
+        model.to_string()
+    } else {
+        format!("coder-{model}")
+    }
+}
+
+pub(super) fn handle_assign(ctx: &CommandContext) -> Option<String> {
+    let args = ctx.args.trim();
+
+    // Parse `<number> <model>` from args.
+    let (number_str, model_str) = match args.split_once(char::is_whitespace) {
+        Some((n, m)) => (n.trim(), m.trim()),
+        None => {
+            return Some(format!(
+                "Usage: `{} assign <number> <model>` (e.g. `assign 42 opus`)",
+                ctx.bot_name
+            ));
+        }
+    };
+
+    if number_str.is_empty() || !number_str.chars().all(|c| c.is_ascii_digit()) {
+        return Some(format!(
+            "Invalid story number `{number_str}`. Usage: `{} assign <number> <model>`",
+            ctx.bot_name
+        ));
+    }
+
+    if model_str.is_empty() {
+        return Some(format!(
+            "Usage: `{} assign <number> <model>` (e.g. `assign 42 opus`)",
+            ctx.bot_name
+        ));
+    }
+
+    // Find the story file across all pipeline stages.
+    let mut found: Option<(std::path::PathBuf, String)> = None;
+    'outer: for stage in STAGES {
+        let dir = ctx.project_root.join(".storkit").join("work").join(stage);
+        if !dir.exists() {
+            continue;
+        }
+        if let Ok(entries) = std::fs::read_dir(&dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().and_then(|e| e.to_str()) != Some("md") {
+                    continue;
+                }
+                if let Some(stem) = path
+                    .file_stem()
+                    .and_then(|s| s.to_str())
+                    .map(|s| s.to_string())
+                {
+                    let file_num = stem
+                        .split('_')
+                        .next()
+                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+                        .unwrap_or("")
+                        .to_string();
+                    if file_num == number_str {
+                        found = Some((path, stem));
+                        break 'outer;
+                    }
+                }
+            }
+        }
+    }
+
+    let (path, story_id) = match found {
+        Some(f) => f,
+        None => {
+            return Some(format!(
+                "No story, bug, or spike with number **{number_str}** found."
+            ));
+        }
+    };
+
+    // Read the human-readable name from front matter for the response.
+    let story_name = std::fs::read_to_string(&path)
+        .ok()
+        .and_then(|contents| {
+            parse_front_matter(&contents)
+                .ok()
+                .and_then(|m| m.name)
+        })
+        .unwrap_or_else(|| story_id.clone());
+
+    let agent_name = resolve_agent_name(model_str);
+
+    // Write `agent: <agent_name>` into the story's front matter.
+    let result = std::fs::read_to_string(&path)
+        .map_err(|e| format!("Failed to read story file: {e}"))
+        .and_then(|contents| {
+            let updated = set_front_matter_field(&contents, "agent", &agent_name);
+            std::fs::write(&path, &updated)
+                .map_err(|e| format!("Failed to write story file: {e}"))
+        });
+
+    match result {
+        Ok(()) => Some(format!(
+            "Assigned **{agent_name}** to **{story_name}** (story {number_str}). \
+             The model will be used when the story starts."
+        )),
+        Err(e) => Some(format!(
+            "Failed to assign model to **{story_name}**: {e}"
+        )),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    use super::super::{CommandDispatch, try_handle_command};
+
+    fn assign_cmd_with_root(root: &std::path::Path, args: &str) -> Option<String> {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        try_handle_command(&dispatch, &format!("@timmy assign {args}"))
+    }
+
+    fn write_story_file(root: &std::path::Path, stage: &str, filename: &str, content: &str) {
+        let dir = root.join(".storkit/work").join(stage);
+        std::fs::create_dir_all(&dir).unwrap();
+        std::fs::write(dir.join(filename), content).unwrap();
+    }
+
+    // -- registration / help ------------------------------------------------
+
+    #[test]
+    fn assign_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "assign");
+        assert!(found, "assign command must be in the registry");
+    }
+
+    #[test]
+    fn assign_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(
+            output.contains("assign"),
+            "help should list assign command: {output}"
+        );
+    }
+
+    // -- argument validation ------------------------------------------------
+
+    #[test]
+    fn assign_no_args_returns_usage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = assign_cmd_with_root(tmp.path(), "").unwrap();
+        assert!(
+            output.contains("Usage"),
+            "no args should show usage: {output}"
+        );
+    }
+
+    #[test]
+    fn assign_missing_model_returns_usage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = assign_cmd_with_root(tmp.path(), "42").unwrap();
+        assert!(
+            output.contains("Usage"),
+            "missing model should show usage: {output}"
+        );
+    }
+
+    #[test]
+    fn assign_non_numeric_number_returns_error() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = assign_cmd_with_root(tmp.path(), "abc opus").unwrap();
+        assert!(
+            output.contains("Invalid story number"),
+            "non-numeric number should return error: {output}"
+        );
+    }
+
+    // -- story not found ----------------------------------------------------
+
+    #[test]
+    fn assign_unknown_story_returns_friendly_message() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        // Create stage dirs but no matching story.
+        for stage in &["1_backlog", "2_current"] {
+            std::fs::create_dir_all(tmp.path().join(".storkit/work").join(stage)).unwrap();
+        }
+        let output = assign_cmd_with_root(tmp.path(), "999 opus").unwrap();
+        assert!(
+            output.contains("999") && output.contains("found"),
+            "not-found message should include number and 'found': {output}"
+        );
+    }
+
+    // -- successful assignment ----------------------------------------------
+
+    #[test]
+    fn assign_writes_agent_field_to_front_matter() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "42_story_test_feature.md",
+            "---\nname: Test Feature\n---\n\n# Story 42\n",
+        );
+
+        let output = assign_cmd_with_root(tmp.path(), "42 opus").unwrap();
+        assert!(
+            output.contains("coder-opus"),
+            "confirmation should include resolved agent name: {output}"
+        );
+        assert!(
+            output.contains("Test Feature"),
+            "confirmation should include story name: {output}"
+        );
+
+        // Verify the file was updated.
+        let contents = std::fs::read_to_string(
+            tmp.path()
+                .join(".storkit/work/1_backlog/42_story_test_feature.md"),
+        )
+        .unwrap();
+        assert!(
+            contents.contains("agent: coder-opus"),
+            "front matter should contain agent field: {contents}"
+        );
+    }
+
+    #[test]
+    fn assign_with_sonnet_writes_coder_sonnet() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "10_story_current.md",
+            "---\nname: Current Story\n---\n",
+        );
+
+        assign_cmd_with_root(tmp.path(), "10 sonnet").unwrap();
+
+        let contents = std::fs::read_to_string(
+            tmp.path()
+                .join(".storkit/work/2_current/10_story_current.md"),
+        )
+        .unwrap();
+        assert!(
+            contents.contains("agent: coder-sonnet"),
+            "front matter should contain agent: coder-sonnet: {contents}"
+        );
+    }
+
+    #[test]
+    fn assign_with_already_prefixed_name_does_not_double_prefix() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "7_story_small.md",
+            "---\nname: Small Story\n---\n",
+        );
+
+        let output = assign_cmd_with_root(tmp.path(), "7 coder-opus").unwrap();
+        assert!(
+            output.contains("coder-opus"),
+            "should not double-prefix: {output}"
+        );
+        assert!(
+            !output.contains("coder-coder-opus"),
+            "must not double-prefix: {output}"
+        );
+
+        let contents = std::fs::read_to_string(
+            tmp.path().join(".storkit/work/1_backlog/7_story_small.md"),
+        )
+        .unwrap();
+        assert!(
+            contents.contains("agent: coder-opus"),
+            "must write coder-opus, not coder-coder-opus: {contents}"
+        );
+    }
+
+    #[test]
+    fn assign_overwrites_existing_agent_field() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "5_story_existing.md",
+            "---\nname: Existing\nagent: coder-sonnet\n---\n",
+        );
+
+        assign_cmd_with_root(tmp.path(), "5 opus").unwrap();
+
+        let contents = std::fs::read_to_string(
+            tmp.path()
+                .join(".storkit/work/1_backlog/5_story_existing.md"),
+        )
+        .unwrap();
+        assert!(
+            contents.contains("agent: coder-opus"),
+            "should overwrite old agent with new: {contents}"
+        );
+        assert!(
+            !contents.contains("coder-sonnet"),
+            "old agent should no longer appear: {contents}"
+        );
+    }
+
+    #[test]
+    fn assign_finds_story_in_any_stage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        // Story is in 3_qa/, not backlog.
+        write_story_file(
+            tmp.path(),
+            "3_qa",
+            "99_story_in_qa.md",
+            "---\nname: In QA\n---\n",
+        );
+
+        let output = assign_cmd_with_root(tmp.path(), "99 opus").unwrap();
+        assert!(
+            output.contains("coder-opus"),
+            "should find story in qa stage: {output}"
+        );
+    }
+
+    // -- resolve_agent_name unit tests --------------------------------------
+
+    #[test]
+    fn resolve_agent_name_prefixes_bare_model() {
+        assert_eq!(super::resolve_agent_name("opus"), "coder-opus");
+        assert_eq!(super::resolve_agent_name("sonnet"), "coder-sonnet");
+        assert_eq!(super::resolve_agent_name("haiku"), "coder-haiku");
+    }
+
+    #[test]
+    fn resolve_agent_name_does_not_double_prefix() {
+        assert_eq!(super::resolve_agent_name("coder-opus"), "coder-opus");
+        assert_eq!(super::resolve_agent_name("coder-sonnet"), "coder-sonnet");
+    }
+}
--- a/server/src/matrix/commands/cost.rs
+++ b/server/src/matrix/commands/cost.rs
@@ -0,0 +1,271 @@
+//! Handler for the `cost` command.
+
+use std::collections::HashMap;
+
+use super::status::story_short_label;
+use super::CommandContext;
+
+/// Show token spend: 24h total, top 5 stories, agent-type breakdown, and
+/// all-time total.
+pub(super) fn handle_cost(ctx: &CommandContext) -> Option<String> {
+    let records = match crate::agents::token_usage::read_all(ctx.project_root) {
+        Ok(r) => r,
+        Err(e) => return Some(format!("Failed to read token usage: {e}")),
+    };
+
+    if records.is_empty() {
+        return Some("**Token Spend**\n\nNo usage records found.".to_string());
+    }
+
+    let now = chrono::Utc::now();
+    let cutoff = now - chrono::Duration::hours(24);
+
+    // Partition into 24h window and all-time
+    let mut recent = Vec::new();
+    let mut all_time_cost = 0.0;
+    for r in &records {
+        all_time_cost += r.usage.total_cost_usd;
+        if let Ok(ts) = chrono::DateTime::parse_from_rfc3339(&r.timestamp)
+            && ts >= cutoff
+        {
+            recent.push(r);
+        }
+    }
+
+    // 24h total
+    let recent_cost: f64 = recent.iter().map(|r| r.usage.total_cost_usd).sum();
+
+    let mut out = String::from("**Token Spend**\n\n");
+    out.push_str(&format!("**Last 24h:** ${:.2}\n", recent_cost));
+    out.push_str(&format!("**All-time:** ${:.2}\n\n", all_time_cost));
+
+    // Top 5 most expensive stories (last 24h)
+    let mut story_costs: HashMap<&str, f64> = HashMap::new();
+    for r in &recent {
+        *story_costs.entry(r.story_id.as_str()).or_default() += r.usage.total_cost_usd;
+    }
+    let mut story_list: Vec<(&str, f64)> = story_costs.into_iter().collect();
+    story_list.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+    story_list.truncate(5);
+
+    out.push_str("**Top Stories (24h)**\n");
+    if story_list.is_empty() {
+        out.push_str("  *(none)*\n");
+    } else {
+        for (story_id, cost) in &story_list {
+            let label = story_short_label(story_id, None);
+            out.push_str(&format!("  • {label} — ${cost:.2}\n"));
+        }
+    }
+    out.push('\n');
+
+    // Breakdown by agent type (last 24h)
+    // Agent names follow pattern "coder-1", "qa-1", "mergemaster" — extract
+    // the type as everything before the last '-' digit, or the full name.
+    let mut type_costs: HashMap<String, f64> = HashMap::new();
+    for r in &recent {
+        let agent_type = extract_agent_type(&r.agent_name);
+        *type_costs.entry(agent_type).or_default() += r.usage.total_cost_usd;
+    }
+    let mut type_list: Vec<(String, f64)> = type_costs.into_iter().collect();
+    type_list.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+
+    out.push_str("**By Agent Type (24h)**\n");
+    if type_list.is_empty() {
+        out.push_str("  *(none)*\n");
+    } else {
+        for (agent_type, cost) in &type_list {
+            out.push_str(&format!("  • {agent_type} — ${cost:.2}\n"));
+        }
+    }
+
+    Some(out)
+}
+
+/// Extract the agent type from an agent name.
+///
+/// Agent names like "coder-1", "qa-2", "mergemaster" map to types "coder",
+/// "qa", "mergemaster". If the name ends with `-<digits>`, strip the suffix.
+pub(super) fn extract_agent_type(agent_name: &str) -> String {
+    if let Some(pos) = agent_name.rfind('-') {
+        let suffix = &agent_name[pos + 1..];
+        if !suffix.is_empty() && suffix.chars().all(|c| c.is_ascii_digit()) {
+            return agent_name[..pos].to_string();
+        }
+    }
+    agent_name.to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentPool;
+    use std::sync::Arc;
+
+    fn write_token_records(root: &std::path::Path, records: &[crate::agents::token_usage::TokenUsageRecord]) {
+        for r in records {
+            crate::agents::token_usage::append_record(root, r).unwrap();
+        }
+    }
+
+    fn make_usage(cost: f64) -> crate::agents::TokenUsage {
+        crate::agents::TokenUsage {
+            input_tokens: 100,
+            output_tokens: 200,
+            cache_creation_input_tokens: 0,
+            cache_read_input_tokens: 0,
+            total_cost_usd: cost,
+        }
+    }
+
+    fn make_record(story_id: &str, agent_name: &str, cost: f64, hours_ago: i64) -> crate::agents::token_usage::TokenUsageRecord {
+        let ts = (chrono::Utc::now() - chrono::Duration::hours(hours_ago)).to_rfc3339();
+        crate::agents::token_usage::TokenUsageRecord {
+            story_id: story_id.to_string(),
+            agent_name: agent_name.to_string(),
+            timestamp: ts,
+            model: None,
+            usage: make_usage(cost),
+        }
+    }
+
+    fn cost_cmd_with_root(root: &std::path::Path) -> Option<String> {
+        use super::super::{CommandDispatch, try_handle_command};
+        use std::collections::HashSet;
+        use std::sync::Mutex;
+
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        try_handle_command(&dispatch, "@timmy cost")
+    }
+
+    #[test]
+    fn cost_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "cost");
+        assert!(found, "cost command must be in the registry");
+    }
+
+    #[test]
+    fn cost_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        assert!(output.contains("cost"), "help should list cost command: {output}");
+    }
+
+    #[test]
+    fn cost_command_no_records() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = cost_cmd_with_root(tmp.path()).unwrap();
+        assert!(output.contains("No usage records found"), "should show empty message: {output}");
+    }
+
+    #[test]
+    fn cost_command_shows_24h_total() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_token_records(tmp.path(), &[
+            make_record("42_story_foo", "coder-1", 1.50, 2),
+            make_record("42_story_foo", "coder-1", 0.50, 5),
+        ]);
+        let output = cost_cmd_with_root(tmp.path()).unwrap();
+        assert!(output.contains("**Last 24h:** $2.00"), "should show 24h total: {output}");
+    }
+
+    #[test]
+    fn cost_command_excludes_old_from_24h() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_token_records(tmp.path(), &[
+            make_record("42_story_foo", "coder-1", 1.00, 2),   // within 24h
+            make_record("43_story_bar", "coder-1", 5.00, 48),  // older
+        ]);
+        let output = cost_cmd_with_root(tmp.path()).unwrap();
+        assert!(output.contains("**Last 24h:** $1.00"), "should only count recent: {output}");
+        assert!(output.contains("**All-time:** $6.00"), "all-time should include everything: {output}");
+    }
+
+    #[test]
+    fn cost_command_shows_top_stories() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_token_records(tmp.path(), &[
+            make_record("42_story_foo", "coder-1", 3.00, 1),
+            make_record("43_story_bar", "coder-1", 1.00, 1),
+            make_record("42_story_foo", "qa-1", 2.00, 1),
+        ]);
+        let output = cost_cmd_with_root(tmp.path()).unwrap();
+        assert!(output.contains("Top Stories"), "should have top stories section: {output}");
+        // Story 42 ($5.00) should appear before story 43 ($1.00)
+        let pos_42 = output.find("42").unwrap();
+        let pos_43 = output.find("43").unwrap();
+        assert!(pos_42 < pos_43, "story 42 should appear before 43 (sorted by cost): {output}");
+    }
+
+    #[test]
+    fn cost_command_limits_to_5_stories() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let mut records = Vec::new();
+        for i in 1..=7 {
+            records.push(make_record(&format!("{i}_story_s{i}"), "coder-1", i as f64, 1));
+        }
+        write_token_records(tmp.path(), &records);
+        let output = cost_cmd_with_root(tmp.path()).unwrap();
+        // The top 5 most expensive are stories 7,6,5,4,3. Stories 1 and 2 should be excluded.
+        let top_section = output.split("**By Agent Type").next().unwrap();
+        assert!(!top_section.contains("• 1 —"), "story 1 should not be in top 5: {output}");
+        assert!(!top_section.contains("• 2 —"), "story 2 should not be in top 5: {output}");
+    }
+
+    #[test]
+    fn cost_command_shows_agent_type_breakdown() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_token_records(tmp.path(), &[
+            make_record("42_story_foo", "coder-1", 2.00, 1),
+            make_record("42_story_foo", "qa-1", 1.50, 1),
+            make_record("42_story_foo", "mergemaster", 0.50, 1),
+        ]);
+        let output = cost_cmd_with_root(tmp.path()).unwrap();
+        assert!(output.contains("By Agent Type"), "should have agent type section: {output}");
+        assert!(output.contains("coder"), "should show coder type: {output}");
+        assert!(output.contains("qa"), "should show qa type: {output}");
+        assert!(output.contains("mergemaster"), "should show mergemaster type: {output}");
+    }
+
+    #[test]
+    fn cost_command_shows_all_time_total() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_token_records(tmp.path(), &[
+            make_record("42_story_foo", "coder-1", 1.00, 2),
+            make_record("43_story_bar", "coder-1", 9.00, 100),
+        ]);
+        let output = cost_cmd_with_root(tmp.path()).unwrap();
+        assert!(output.contains("**All-time:** $10.00"), "should show all-time total: {output}");
+    }
+
+    #[test]
+    fn cost_command_case_insensitive() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy COST");
+        assert!(result.is_some(), "COST should match case-insensitively");
+    }
+
+    // -- extract_agent_type -------------------------------------------------
+
+    #[test]
+    fn extract_agent_type_strips_numeric_suffix() {
+        assert_eq!(extract_agent_type("coder-1"), "coder");
+        assert_eq!(extract_agent_type("qa-2"), "qa");
+    }
+
+    #[test]
+    fn extract_agent_type_keeps_non_numeric_suffix() {
+        assert_eq!(extract_agent_type("mergemaster"), "mergemaster");
+        assert_eq!(extract_agent_type("coder-alpha"), "coder-alpha");
+    }
+}
--- a/server/src/matrix/commands/git.rs
+++ b/server/src/matrix/commands/git.rs
@@ -0,0 +1,203 @@
+//! Handler for the `git` command.
+
+use super::CommandContext;
+
+/// Show compact git status: branch, uncommitted files, ahead/behind remote.
+pub(super) fn handle_git(ctx: &CommandContext) -> Option<String> {
+    use std::process::Command;
+
+    // Current branch
+    let branch = Command::new("git")
+        .args(["rev-parse", "--abbrev-ref", "HEAD"])
+        .current_dir(ctx.project_root)
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
+        .unwrap_or_else(|| "unknown".to_string());
+
+    // Porcelain status for staged + unstaged changes
+    let status_output = Command::new("git")
+        .args(["status", "--porcelain"])
+        .current_dir(ctx.project_root)
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .map(|o| String::from_utf8_lossy(&o.stdout).to_string())
+        .unwrap_or_default();
+
+    let changed_files: Vec<&str> = status_output.lines().filter(|l| !l.is_empty()).collect();
+    let change_count = changed_files.len();
+
+    // Ahead/behind: --left-right gives "N\tM" (ahead\tbehind)
+    let ahead_behind = Command::new("git")
+        .args(["rev-list", "--count", "--left-right", "HEAD...@{u}"])
+        .current_dir(ctx.project_root)
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .and_then(|o| {
+            let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
+            let mut parts = s.split_whitespace();
+            let ahead: u32 = parts.next()?.parse().ok()?;
+            let behind: u32 = parts.next()?.parse().ok()?;
+            Some((ahead, behind))
+        });
+
+    let mut out = format!("**Branch:** `{branch}`\n");
+
+    if change_count == 0 {
+        out.push_str("**Changes:** clean\n");
+    } else {
+        out.push_str(&format!("**Changes:** {change_count} file(s)\n"));
+        for line in &changed_files {
+            // Porcelain format: "XY filename" (2-char status + space + path)
+            if line.len() > 3 {
+                let codes = &line[..2];
+                let name = line[3..].trim();
+                out.push_str(&format!("  • `{codes}` {name}\n"));
+            } else {
+                out.push_str(&format!("  • {line}\n"));
+            }
+        }
+    }
+
+    match ahead_behind {
+        Some((0, 0)) => out.push_str("**Remote:** up to date\n"),
+        Some((ahead, 0)) => out.push_str(&format!("**Remote:** ↑{ahead} ahead\n")),
+        Some((0, behind)) => out.push_str(&format!("**Remote:** ↓{behind} behind\n")),
+        Some((ahead, behind)) => {
+            out.push_str(&format!("**Remote:** ↑{ahead} ahead, ↓{behind} behind\n"));
+        }
+        None => out.push_str("**Remote:** no tracking branch\n"),
+    }
+
+    Some(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    use super::super::{CommandDispatch, try_handle_command};
+
+    fn test_ambient_rooms() -> Arc<Mutex<HashSet<String>>> {
+        Arc::new(Mutex::new(HashSet::new()))
+    }
+
+    fn test_agents() -> Arc<AgentPool> {
+        Arc::new(AgentPool::new_test(3000))
+    }
+
+    #[test]
+    fn git_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "git");
+        assert!(found, "git command must be in the registry");
+    }
+
+    #[test]
+    fn git_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        assert!(output.contains("git"), "help should list git command: {output}");
+    }
+
+    #[test]
+    fn git_command_returns_some() {
+        // Run from the actual repo root so git commands have a real repo to query.
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let agents = test_agents();
+        let ambient_rooms = test_ambient_rooms();
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: repo_root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        let result = try_handle_command(&dispatch, "@timmy git");
+        assert!(result.is_some(), "git command should always return Some");
+    }
+
+    #[test]
+    fn git_command_output_contains_branch() {
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let agents = test_agents();
+        let ambient_rooms = test_ambient_rooms();
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: repo_root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        let output = try_handle_command(&dispatch, "@timmy git").unwrap();
+        assert!(
+            output.contains("**Branch:**"),
+            "git output should contain branch info: {output}"
+        );
+    }
+
+    #[test]
+    fn git_command_output_contains_changes() {
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let agents = test_agents();
+        let ambient_rooms = test_ambient_rooms();
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: repo_root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        let output = try_handle_command(&dispatch, "@timmy git").unwrap();
+        assert!(
+            output.contains("**Changes:**"),
+            "git output should contain changes section: {output}"
+        );
+    }
+
+    #[test]
+    fn git_command_output_contains_remote() {
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let agents = test_agents();
+        let ambient_rooms = test_ambient_rooms();
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: repo_root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        let output = try_handle_command(&dispatch, "@timmy git").unwrap();
+        assert!(
+            output.contains("**Remote:**"),
+            "git output should contain remote section: {output}"
+        );
+    }
+
+    #[test]
+    fn git_command_case_insensitive() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy GIT");
+        assert!(result.is_some(), "GIT should match case-insensitively");
+    }
+}
--- a/server/src/matrix/commands/help.rs
+++ b/server/src/matrix/commands/help.rs
@@ -0,0 +1,113 @@
+//! Handler for the `help` command.
+
+use super::{commands, CommandContext};
+
+pub(super) fn handle_help(ctx: &CommandContext) -> Option<String> {
+    let mut output = format!("**{} Commands**\n\n", ctx.bot_name);
+    let mut sorted: Vec<_> = commands().iter().collect();
+    sorted.sort_by_key(|c| c.name);
+    for cmd in sorted {
+        output.push_str(&format!("- **{}** — {}\n", cmd.name, cmd.description));
+    }
+    Some(output)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::tests::{try_cmd_addressed, commands};
+
+    #[test]
+    fn help_command_matches() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        assert!(result.is_some(), "help command should match");
+    }
+
+    #[test]
+    fn help_command_case_insensitive() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy HELP");
+        assert!(result.is_some(), "HELP should match case-insensitively");
+    }
+
+    #[test]
+    fn help_output_contains_all_commands() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        for cmd in commands() {
+            assert!(
+                output.contains(cmd.name),
+                "help output must include command '{}'",
+                cmd.name
+            );
+            assert!(
+                output.contains(cmd.description),
+                "help output must include description for '{}'",
+                cmd.name
+            );
+        }
+    }
+
+    #[test]
+    fn help_output_uses_bot_name() {
+        let result = try_cmd_addressed("HAL", "@hal:example.com", "@hal help");
+        let output = result.unwrap();
+        assert!(
+            output.contains("HAL Commands"),
+            "help output should use bot name: {output}"
+        );
+    }
+
+    #[test]
+    fn help_output_formatted_as_markdown() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        assert!(
+            output.contains("**help**"),
+            "command name should be bold: {output}"
+        );
+        assert!(
+            output.contains("- **"),
+            "commands should be in a list: {output}"
+        );
+    }
+
+    #[test]
+    fn help_output_includes_status() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        assert!(output.contains("status"), "help should list status command: {output}");
+    }
+
+    #[test]
+    fn help_output_is_alphabetical() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        // Search for **name** (bold markdown) to avoid substring matches in descriptions.
+        let mut positions: Vec<(usize, &str)> = commands()
+            .iter()
+            .map(|c| {
+                let marker = format!("**{}**", c.name);
+                let pos = output.find(&marker).expect("command must appear in help as **name**");
+                (pos, c.name)
+            })
+            .collect();
+        positions.sort_by_key(|(pos, _)| *pos);
+        let names_in_order: Vec<&str> = positions.iter().map(|(_, n)| *n).collect();
+        let mut sorted = names_in_order.clone();
+        sorted.sort();
+        assert_eq!(names_in_order, sorted, "commands must appear in alphabetical order");
+    }
+
+    #[test]
+    fn help_output_includes_ambient() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        assert!(output.contains("ambient"), "help should list ambient command: {output}");
+    }
+
+    #[test]
+    fn help_output_includes_htop() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        assert!(output.contains("htop"), "help should list htop command: {output}");
+    }
+}
--- a/server/src/matrix/commands/mod.rs
+++ b/server/src/matrix/commands/mod.rs
@@ -0,0 +1,470 @@
+//! Bot-level command registry for the Matrix bot.
+//!
+//! Commands registered here are handled directly by the bot without invoking
+//! the LLM. The registry is the single source of truth — the `help` command
+//! iterates it automatically so new commands appear in the help output as soon
+//! as they are added.
+
+mod ambient;
+mod assign;
+mod cost;
+mod git;
+mod help;
+mod move_story;
+mod overview;
+mod show;
+mod status;
+mod whatsup;
+
+use crate::agents::AgentPool;
+use std::collections::HashSet;
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+
+/// A bot-level command that is handled without LLM invocation.
+pub struct BotCommand {
+    /// The command keyword (e.g., `"help"`). Always lowercase.
+    pub name: &'static str,
+    /// Short description shown in help output.
+    pub description: &'static str,
+    /// Handler that produces the response text (Markdown), or `None` to fall
+    /// through to the LLM (e.g. when a command requires direct addressing but
+    /// the message arrived via ambient mode).
+    pub handler: fn(&CommandContext) -> Option<String>,
+}
+
+/// Dispatch parameters passed to `try_handle_command`.
+///
+/// Groups all the caller-supplied context needed to dispatch and execute bot
+/// commands.  Construct one per incoming message and pass it alongside the raw
+/// message body.
+///
+/// All identifiers are platform-agnostic strings so this struct works with
+/// any [`ChatTransport`](crate::transport::ChatTransport) implementation.
+pub struct CommandDispatch<'a> {
+    /// The bot's display name (e.g., "Timmy").
+    pub bot_name: &'a str,
+    /// The bot's full user ID (e.g., `"@timmy:homeserver.local"` on Matrix).
+    pub bot_user_id: &'a str,
+    /// Project root directory (needed by status, ambient).
+    pub project_root: &'a Path,
+    /// Agent pool (needed by status).
+    pub agents: &'a AgentPool,
+    /// Set of room IDs with ambient mode enabled (needed by ambient).
+    pub ambient_rooms: &'a Arc<Mutex<HashSet<String>>>,
+    /// The room this message came from (needed by ambient).
+    pub room_id: &'a str,
+}
+
+/// Context passed to individual command handlers.
+pub struct CommandContext<'a> {
+    /// The bot's display name (e.g., "Timmy").
+    pub bot_name: &'a str,
+    /// Any text after the command keyword, trimmed.
+    pub args: &'a str,
+    /// Project root directory (needed by status, ambient).
+    pub project_root: &'a Path,
+    /// Agent pool (needed by status).
+    pub agents: &'a AgentPool,
+    /// Set of room IDs with ambient mode enabled (needed by ambient).
+    pub ambient_rooms: &'a Arc<Mutex<HashSet<String>>>,
+    /// The room this message came from (needed by ambient).
+    pub room_id: &'a str,
+}
+
+/// Returns the full list of registered bot commands.
+///
+/// Add new commands here — they will automatically appear in `help` output.
+pub fn commands() -> &'static [BotCommand] {
+    &[
+        BotCommand {
+            name: "assign",
+            description: "Pre-assign a model to a story: `assign <number> <model>` (e.g. `assign 42 opus`)",
+            handler: assign::handle_assign,
+        },
+        BotCommand {
+            name: "help",
+            description: "Show this list of available commands",
+            handler: help::handle_help,
+        },
+        BotCommand {
+            name: "status",
+            description: "Show pipeline status and agent availability",
+            handler: status::handle_status,
+        },
+        BotCommand {
+            name: "ambient",
+            description: "Toggle ambient mode for this room: `ambient on` or `ambient off`",
+            handler: ambient::handle_ambient,
+        },
+        BotCommand {
+            name: "git",
+            description: "Show git status: branch, uncommitted changes, and ahead/behind remote",
+            handler: git::handle_git,
+        },
+        BotCommand {
+            name: "htop",
+            description: "Show live system and agent process dashboard (`htop`, `htop 10m`, `htop stop`)",
+            handler: handle_htop_fallback,
+        },
+        BotCommand {
+            name: "cost",
+            description: "Show token spend: 24h total, top stories, breakdown by agent type, and all-time total",
+            handler: cost::handle_cost,
+        },
+        BotCommand {
+            name: "move",
+            description: "Move a work item to a pipeline stage: `move <number> <stage>` (stages: backlog, current, qa, merge, done)",
+            handler: move_story::handle_move,
+        },
+        BotCommand {
+            name: "show",
+            description: "Display the full text of a work item: `show <number>`",
+            handler: show::handle_show,
+        },
+        BotCommand {
+            name: "overview",
+            description: "Show implementation summary for a merged story: `overview <number>`",
+            handler: overview::handle_overview,
+        },
+        BotCommand {
+            name: "whatsup",
+            description: "Show in-progress triage dump for a story: `whatsup <number>`",
+            handler: whatsup::handle_whatsup,
+        },
+        BotCommand {
+            name: "start",
+            description: "Start a coder on a story: `start <number>` or `start <number> opus`",
+            handler: handle_start_fallback,
+        },
+        BotCommand {
+            name: "delete",
+            description: "Remove a work item from the pipeline: `delete <number>`",
+            handler: handle_delete_fallback,
+        },
+        BotCommand {
+            name: "reset",
+            description: "Clear the current Claude Code session and start fresh",
+            handler: handle_reset_fallback,
+        },
+        BotCommand {
+            name: "rebuild",
+            description: "Rebuild the server binary and restart",
+            handler: handle_rebuild_fallback,
+        },
+    ]
+}
+
+/// Try to match a user message against a registered bot command.
+///
+/// The message is expected to be the raw body text from Matrix (e.g.,
+/// `"@timmy help"`). The bot mention prefix is stripped before matching.
+///
+/// Returns `Some(response)` if a command matched and was handled, `None`
+/// otherwise (the caller should fall through to the LLM).
+pub fn try_handle_command(dispatch: &CommandDispatch<'_>, message: &str) -> Option<String> {
+    let command_text = strip_bot_mention(message, dispatch.bot_name, dispatch.bot_user_id);
+    let trimmed = command_text.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    let (cmd_name, args) = match trimmed.split_once(char::is_whitespace) {
+        Some((c, a)) => (c, a.trim()),
+        None => (trimmed, ""),
+    };
+    let cmd_lower = cmd_name.to_ascii_lowercase();
+
+    let ctx = CommandContext {
+        bot_name: dispatch.bot_name,
+        args,
+        project_root: dispatch.project_root,
+        agents: dispatch.agents,
+        ambient_rooms: dispatch.ambient_rooms,
+        room_id: dispatch.room_id,
+    };
+
+    commands()
+        .iter()
+        .find(|c| c.name == cmd_lower)
+        .and_then(|c| (c.handler)(&ctx))
+}
+
+/// Strip the bot mention prefix from a raw message body.
+///
+/// Handles these forms (case-insensitive where applicable):
+/// - `@bot_localpart:server.com rest` → `rest`
+/// - `@bot_localpart rest` → `rest`
+/// - `DisplayName rest` → `rest`
+fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
+    let trimmed = message.trim();
+
+    // Try full Matrix user ID (e.g. "@timmy:homeserver.local")
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
+        return rest;
+    }
+
+    // Try @localpart (e.g. "@timmy")
+    if let Some(localpart) = bot_user_id.split(':').next()
+        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
+    {
+        return rest;
+    }
+
+    // Try display name (e.g. "Timmy")
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
+        return rest;
+    }
+
+    trimmed
+}
+
+/// Case-insensitive prefix strip that also requires the match to end at a
+/// word boundary (whitespace, punctuation, or end-of-string).
+fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
+    if text.len() < prefix.len() {
+        return None;
+    }
+    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
+        return None;
+    }
+    let rest = &text[prefix.len()..];
+    // Must be at end or followed by non-alphanumeric
+    match rest.chars().next() {
+        None => Some(rest), // exact match, empty remainder
+        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
+        _ => Some(rest),
+    }
+}
+
+/// Fallback handler for the `htop` command when it is not intercepted by the
+/// async handler in `on_room_message`.  In practice this is never called —
+/// htop is detected and handled before `try_handle_command` is invoked.
+/// The entry exists in the registry only so `help` lists it.
+///
+/// Returns `None` to prevent the LLM from receiving "htop" as a prompt.
+fn handle_htop_fallback(_ctx: &CommandContext) -> Option<String> {
+    None
+}
+
+/// Fallback handler for the `start` command when it is not intercepted by
+/// the async handler in `on_room_message`.  In practice this is never called —
+/// start is detected and handled before `try_handle_command` is invoked.
+/// The entry exists in the registry only so `help` lists it.
+///
+/// Returns `None` to prevent the LLM from receiving "start" as a prompt.
+fn handle_start_fallback(_ctx: &CommandContext) -> Option<String> {
+    None
+}
+
+/// Fallback handler for the `delete` command when it is not intercepted by
+/// the async handler in `on_room_message`.  In practice this is never called —
+/// delete is detected and handled before `try_handle_command` is invoked.
+/// The entry exists in the registry only so `help` lists it.
+///
+/// Returns `None` to prevent the LLM from receiving "delete" as a prompt.
+fn handle_delete_fallback(_ctx: &CommandContext) -> Option<String> {
+    None
+}
+
+/// Fallback handler for the `reset` command when it is not intercepted by
+/// the async handler in `on_room_message`.  In practice this is never called —
+/// reset is detected and handled before `try_handle_command` is invoked.
+/// The entry exists in the registry only so `help` lists it.
+///
+/// Returns `None` to prevent the LLM from receiving "reset" as a prompt.
+fn handle_reset_fallback(_ctx: &CommandContext) -> Option<String> {
+    None
+}
+
+/// Fallback handler for the `rebuild` command when it is not intercepted by
+/// the async handler in `on_room_message`.  In practice this is never called —
+/// rebuild is detected and handled before `try_handle_command` is invoked.
+/// The entry exists in the registry only so `help` lists it.
+///
+/// Returns `None` to prevent the LLM from receiving "rebuild" as a prompt.
+fn handle_rebuild_fallback(_ctx: &CommandContext) -> Option<String> {
+    None
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use super::*;
+    use crate::agents::AgentPool;
+
+    // -- test helpers (shared with submodule tests) -------------------------
+
+    pub fn test_ambient_rooms() -> Arc<Mutex<HashSet<String>>> {
+        Arc::new(Mutex::new(HashSet::new()))
+    }
+
+    pub fn test_agents() -> Arc<AgentPool> {
+        Arc::new(AgentPool::new_test(3000))
+    }
+
+    pub fn try_cmd(
+        bot_name: &str,
+        bot_user_id: &str,
+        message: &str,
+        ambient_rooms: &Arc<Mutex<HashSet<String>>>,
+    ) -> Option<String> {
+        let agents = test_agents();
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name,
+            bot_user_id,
+            project_root: std::path::Path::new("/tmp"),
+            agents: &agents,
+            ambient_rooms,
+            room_id: &room_id,
+        };
+        try_handle_command(&dispatch, message)
+    }
+
+    pub fn try_cmd_addressed(bot_name: &str, bot_user_id: &str, message: &str) -> Option<String> {
+        try_cmd(bot_name, bot_user_id, message, &test_ambient_rooms())
+    }
+
+    // Re-export commands() for submodule tests
+    pub use super::commands;
+
+    // -- strip_bot_mention --------------------------------------------------
+
+    #[test]
+    fn strip_mention_full_user_id() {
+        let rest = strip_bot_mention(
+            "@timmy:homeserver.local help",
+            "Timmy",
+            "@timmy:homeserver.local",
+        );
+        assert_eq!(rest.trim(), "help");
+    }
+
+    #[test]
+    fn strip_mention_localpart() {
+        let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim(), "help me");
+    }
+
+    #[test]
+    fn strip_mention_display_name() {
+        let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim(), "help");
+    }
+
+    #[test]
+    fn strip_mention_display_name_case_insensitive() {
+        let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim(), "help");
+    }
+
+    #[test]
+    fn strip_mention_no_match_returns_original() {
+        let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest, "hello world");
+    }
+
+    #[test]
+    fn strip_mention_does_not_match_longer_name() {
+        // "@timmybot" should NOT match "@timmy"
+        let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest, "@timmybot help");
+    }
+
+    #[test]
+    fn strip_mention_comma_after_name() {
+        let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");
+    }
+
+    // -- try_handle_command -------------------------------------------------
+
+    #[test]
+    fn unknown_command_returns_none() {
+        let result = try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy what is the weather?",
+        );
+        assert!(result.is_none(), "non-command should return None");
+    }
+
+    #[test]
+    fn empty_message_after_mention_returns_none() {
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy");
+        assert!(
+            result.is_none(),
+            "bare mention with no command should fall through to LLM"
+        );
+    }
+
+    #[test]
+    fn htop_command_falls_through_to_none() {
+        // The htop handler returns None so the message is handled asynchronously
+        // in on_room_message, not here.  try_handle_command must return None.
+        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy htop");
+        assert!(
+            result.is_none(),
+            "htop should not produce a sync response (handled async): {result:?}"
+        );
+    }
+
+    // -- strip_prefix_ci ----------------------------------------------------
+
+    #[test]
+    fn strip_prefix_ci_basic() {
+        assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
+    }
+
+    #[test]
+    fn strip_prefix_ci_no_match() {
+        assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
+    }
+
+    #[test]
+    fn strip_prefix_ci_word_boundary_required() {
+        assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
+    }
+
+    #[test]
+    fn strip_prefix_ci_exact_match() {
+        assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
+    }
+
+    // -- commands registry --------------------------------------------------
+
+    #[test]
+    fn commands_registry_is_not_empty() {
+        assert!(
+            !commands().is_empty(),
+            "command registry must contain at least one command"
+        );
+    }
+
+    #[test]
+    fn all_command_names_are_lowercase() {
+        for cmd in commands() {
+            assert_eq!(
+                cmd.name,
+                cmd.name.to_ascii_lowercase(),
+                "command name '{}' must be lowercase",
+                cmd.name
+            );
+        }
+    }
+
+    #[test]
+    fn all_commands_have_descriptions() {
+        for cmd in commands() {
+            assert!(
+                !cmd.description.is_empty(),
+                "command '{}' must have a description",
+                cmd.name
+            );
+        }
+    }
+}
--- a/server/src/matrix/commands/move_story.rs
+++ b/server/src/matrix/commands/move_story.rs
@@ -0,0 +1,296 @@
+//! Handler for the `move` command.
+//!
+//! `{bot_name} move {number} {stage}` finds the work item by number across all
+//! pipeline stages, moves it to the specified stage, and returns a confirmation
+//! with the story title, old stage, and new stage.
+
+use super::CommandContext;
+use crate::agents::move_story_to_stage;
+
+/// Valid stage names accepted by the move command.
+const VALID_STAGES: &[&str] = &["backlog", "current", "qa", "merge", "done"];
+
+/// All pipeline stage directories to search when finding a work item by number.
+const SEARCH_DIRS: &[&str] = &[
+    "1_backlog",
+    "2_current",
+    "3_qa",
+    "4_merge",
+    "5_done",
+    "6_archived",
+];
+
+/// Handle the `move` command.
+///
+/// Parses `<number> <stage>` from `ctx.args`, locates the work item by its
+/// numeric prefix, moves it to the target stage using the shared lifecycle
+/// function, and returns a Markdown confirmation string.
+pub(super) fn handle_move(ctx: &CommandContext) -> Option<String> {
+    let args = ctx.args.trim();
+
+    // Parse `number stage` from args.
+    let (num_str, stage_raw) = match args.split_once(char::is_whitespace) {
+        Some((n, s)) => (n.trim(), s.trim()),
+        None => {
+            return Some(format!(
+                "Usage: `{} move <number> <stage>`\n\nValid stages: {}",
+                ctx.bot_name,
+                VALID_STAGES.join(", ")
+            ));
+        }
+    };
+
+    if num_str.is_empty() || !num_str.chars().all(|c| c.is_ascii_digit()) {
+        return Some(format!(
+            "Invalid story number: `{num_str}`. Usage: `{} move <number> <stage>`",
+            ctx.bot_name
+        ));
+    }
+
+    let target_stage = stage_raw.to_ascii_lowercase();
+    if !VALID_STAGES.contains(&target_stage.as_str()) {
+        return Some(format!(
+            "Invalid stage: `{stage_raw}`. Valid stages: {}",
+            VALID_STAGES.join(", ")
+        ));
+    }
+
+    // Find the story file across all pipeline stages by numeric prefix.
+    let mut found_story_id: Option<String> = None;
+    let mut found_name: Option<String> = None;
+
+    'outer: for stage_dir in SEARCH_DIRS {
+        let dir = ctx
+            .project_root
+            .join(".storkit")
+            .join("work")
+            .join(stage_dir);
+        if !dir.exists() {
+            continue;
+        }
+        if let Ok(entries) = std::fs::read_dir(&dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().and_then(|e| e.to_str()) != Some("md") {
+                    continue;
+                }
+                if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
+                    let file_num = stem
+                        .split('_')
+                        .next()
+                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+                        .unwrap_or("");
+                    if file_num == num_str {
+                        found_story_id = Some(stem.to_string());
+                        found_name = std::fs::read_to_string(&path)
+                            .ok()
+                            .and_then(|contents| {
+                                crate::io::story_metadata::parse_front_matter(&contents)
+                                    .ok()
+                                    .and_then(|m| m.name)
+                            });
+                        break 'outer;
+                    }
+                }
+            }
+        }
+    }
+
+    let story_id = match found_story_id {
+        Some(id) => id,
+        None => {
+            return Some(format!(
+                "No story, bug, or spike with number **{num_str}** found."
+            ));
+        }
+    };
+
+    let display_name = found_name.as_deref().unwrap_or(&story_id);
+
+    match move_story_to_stage(ctx.project_root, &story_id, &target_stage) {
+        Ok((from_stage, to_stage)) => Some(format!(
+            "Moved **{display_name}** from **{from_stage}** to **{to_stage}**."
+        )),
+        Err(e) => Some(format!("Failed to move story {num_str}: {e}")),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    use super::super::{CommandDispatch, try_handle_command};
+
+    fn move_cmd_with_root(root: &std::path::Path, args: &str) -> Option<String> {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        try_handle_command(&dispatch, &format!("@timmy move {args}"))
+    }
+
+    fn write_story_file(root: &std::path::Path, stage: &str, filename: &str, content: &str) {
+        let dir = root.join(".storkit/work").join(stage);
+        std::fs::create_dir_all(&dir).unwrap();
+        std::fs::write(dir.join(filename), content).unwrap();
+    }
+
+    #[test]
+    fn move_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "move");
+        assert!(found, "move command must be in the registry");
+    }
+
+    #[test]
+    fn move_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(
+            output.contains("move"),
+            "help should list move command: {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_no_args_returns_usage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = move_cmd_with_root(tmp.path(), "").unwrap();
+        assert!(
+            output.contains("Usage"),
+            "no args should show usage hint: {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_missing_stage_returns_usage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = move_cmd_with_root(tmp.path(), "42").unwrap();
+        assert!(
+            output.contains("Usage"),
+            "missing stage should show usage hint: {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_invalid_stage_returns_error() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = move_cmd_with_root(tmp.path(), "42 invalid_stage").unwrap();
+        assert!(
+            output.contains("Invalid stage"),
+            "invalid stage should return error: {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_non_numeric_number_returns_error() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = move_cmd_with_root(tmp.path(), "abc current").unwrap();
+        assert!(
+            output.contains("Invalid story number"),
+            "non-numeric number should return error: {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_not_found_returns_friendly_message() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = move_cmd_with_root(tmp.path(), "999 current").unwrap();
+        assert!(
+            output.contains("999") && output.contains("found"),
+            "not-found message should include number and 'found': {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_moves_story_and_confirms() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "42_story_some_feature.md",
+            "---\nname: Some Feature\n---\n\n# Story 42\n",
+        );
+
+        let output = move_cmd_with_root(tmp.path(), "42 current").unwrap();
+        assert!(
+            output.contains("Some Feature"),
+            "confirmation should include story name: {output}"
+        );
+        assert!(
+            output.contains("backlog"),
+            "confirmation should include old stage: {output}"
+        );
+        assert!(
+            output.contains("current"),
+            "confirmation should include new stage: {output}"
+        );
+
+        // Verify the file was actually moved.
+        let new_path = tmp
+            .path()
+            .join(".storkit/work/2_current/42_story_some_feature.md");
+        assert!(new_path.exists(), "story file should be in 2_current/");
+    }
+
+    #[test]
+    fn move_command_case_insensitive_stage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "10_story_test.md",
+            "---\nname: Test\n---\n",
+        );
+        let output = move_cmd_with_root(tmp.path(), "10 BACKLOG").unwrap();
+        assert!(
+            output.contains("Test") && output.contains("backlog"),
+            "stage matching should be case-insensitive: {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_idempotent_when_already_in_target() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "5_story_already_current.md",
+            "---\nname: Already Current\n---\n",
+        );
+        // Moving to the stage it's already in should return a success message.
+        let output = move_cmd_with_root(tmp.path(), "5 current").unwrap();
+        assert!(
+            output.contains("Moved") || output.contains("current"),
+            "idempotent move should succeed: {output}"
+        );
+    }
+
+    #[test]
+    fn move_command_case_insensitive_command() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy MOVE 1 backlog",
+        );
+        // Returns Some (the registry matched, regardless of result content)
+        assert!(result.is_some(), "MOVE should match case-insensitively");
+    }
+}
--- a/server/src/matrix/commands/overview.rs
+++ b/server/src/matrix/commands/overview.rs
@@ -0,0 +1,380 @@
+//! Handler for the `overview` command.
+
+use super::CommandContext;
+
+/// Show implementation summary for a story identified by its number.
+///
+/// Finds the `storkit: merge {story_id}` commit on master, displays the
+/// git diff --stat (files changed with line counts), and extracts key
+/// function/struct/type names added or modified in the implementation.
+/// Returns a friendly message when no merge commit is found.
+pub(super) fn handle_overview(ctx: &CommandContext) -> Option<String> {
+    let num_str = ctx.args.trim();
+    if num_str.is_empty() {
+        return Some(format!(
+            "Usage: `{} overview <number>`\n\nShows the implementation summary for a story.",
+            ctx.bot_name
+        ));
+    }
+    if !num_str.chars().all(|c| c.is_ascii_digit()) {
+        return Some(format!(
+            "Invalid story number: `{num_str}`. Usage: `{} overview <number>`",
+            ctx.bot_name
+        ));
+    }
+
+    let commit_hash = match find_story_merge_commit(ctx.project_root, num_str) {
+        Some(h) => h,
+        None => {
+            return Some(format!(
+                "No implementation found for story **{num_str}**. \
+                It may still be in the backlog or was never merged."
+            ));
+        }
+    };
+
+    let stat_output = get_commit_stat(ctx.project_root, &commit_hash);
+    let symbols = extract_diff_symbols(ctx.project_root, &commit_hash);
+    let story_name = find_story_name(ctx.project_root, num_str);
+
+    let short_hash = &commit_hash[..commit_hash.len().min(8)];
+    let mut out = match story_name {
+        Some(name) => format!("**Overview: Story {num_str} — {name}**\n\n"),
+        None => format!("**Overview: Story {num_str}**\n\n"),
+    };
+    out.push_str(&format!("Commit: `{short_hash}`\n\n"));
+
+    // Parse stat output: collect per-file lines and the summary line.
+    let mut file_lines: Vec<String> = Vec::new();
+    let mut summary_line = String::new();
+    for line in stat_output.lines() {
+        if line.contains("changed") && (line.contains("insertion") || line.contains("deletion")) {
+            summary_line = line.trim().to_string();
+        } else if !line.trim().is_empty() && line.contains('|') {
+            file_lines.push(line.trim().to_string());
+        }
+    }
+
+    if !summary_line.is_empty() {
+        out.push_str(&format!("**Changes:** {summary_line}\n"));
+    }
+
+    if !file_lines.is_empty() {
+        out.push_str("**Files:**\n");
+        for f in file_lines.iter().take(8) {
+            out.push_str(&format!("  • `{f}`\n"));
+        }
+        if file_lines.len() > 8 {
+            out.push_str(&format!("  … and {} more\n", file_lines.len() - 8));
+        }
+    }
+
+    if !symbols.is_empty() {
+        out.push_str("\n**Key symbols:**\n");
+        for sym in &symbols {
+            out.push_str(&format!("  • {sym}\n"));
+        }
+    }
+
+    Some(out)
+}
+
+/// Find the merge commit hash for a story by its numeric ID.
+///
+/// Searches git log for a commit whose subject matches
+/// `storkit: merge {num}_*` or the legacy `story-kit: merge {num}_*`.
+fn find_story_merge_commit(root: &std::path::Path, num_str: &str) -> Option<String> {
+    use std::process::Command;
+    // Match both the current prefix and the legacy one from before the rename.
+    let grep_pattern = format!("(storkit|story-kit): merge {num_str}_");
+    let output = Command::new("git")
+        .args([
+            "log",
+            "--format=%H",
+            "--all",
+            "--extended-regexp",
+            "--grep",
+            &grep_pattern,
+        ])
+        .current_dir(root)
+        .output()
+        .ok()
+        .filter(|o| o.status.success())?;
+    let text = String::from_utf8_lossy(&output.stdout);
+    let hash = text.lines().next()?.trim().to_string();
+    if hash.is_empty() { None } else { Some(hash) }
+}
+
+/// Find the human-readable name of a story by searching all pipeline stages.
+fn find_story_name(root: &std::path::Path, num_str: &str) -> Option<String> {
+    let stages = [
+        "1_backlog",
+        "2_current",
+        "3_qa",
+        "4_merge",
+        "5_done",
+        "6_archived",
+    ];
+    for stage in &stages {
+        let dir = root.join(".storkit").join("work").join(stage);
+        if !dir.exists() {
+            continue;
+        }
+        if let Ok(entries) = std::fs::read_dir(&dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().and_then(|e| e.to_str()) != Some("md") {
+                    continue;
+                }
+                if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
+                    let file_num = stem
+                        .split('_')
+                        .next()
+                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+                        .unwrap_or("");
+                    if file_num == num_str {
+                        return std::fs::read_to_string(&path).ok().and_then(|c| {
+                            crate::io::story_metadata::parse_front_matter(&c)
+                                .ok()
+                                .and_then(|m| m.name)
+                        });
+                    }
+                }
+            }
+        }
+    }
+    None
+}
+
+/// Return the `git show --stat` output for a commit.
+fn get_commit_stat(root: &std::path::Path, hash: &str) -> String {
+    use std::process::Command;
+    Command::new("git")
+        .args(["show", "--stat", hash])
+        .current_dir(root)
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .map(|o| String::from_utf8_lossy(&o.stdout).to_string())
+        .unwrap_or_default()
+}
+
+/// Extract up to 12 unique top-level symbol definitions from a commit diff.
+///
+/// Scans added lines (`+`) for Rust `fn`, `struct`, `enum`, `type`, `trait`,
+/// and `impl` declarations and returns them formatted as `` `Name` (kind) ``.
+fn extract_diff_symbols(root: &std::path::Path, hash: &str) -> Vec<String> {
+    use std::process::Command;
+    let output = Command::new("git")
+        .args(["show", hash])
+        .current_dir(root)
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .map(|o| String::from_utf8_lossy(&o.stdout).to_string())
+        .unwrap_or_default();
+
+    let mut symbols: Vec<String> = Vec::new();
+    for line in output.lines() {
+        if !line.starts_with('+') || line.starts_with("+++") {
+            continue;
+        }
+        if let Some(sym) = parse_symbol_definition(&line[1..]) {
+            if !symbols.contains(&sym) {
+                symbols.push(sym);
+            }
+            if symbols.len() >= 12 {
+                break;
+            }
+        }
+    }
+    symbols
+}
+
+/// Parse a single line of code and return a formatted symbol if it opens a
+/// top-level Rust definition (`fn`, `struct`, `enum`, `type`, `trait`, `impl`).
+fn parse_symbol_definition(code: &str) -> Option<String> {
+    let t = code.trim();
+    let patterns: &[(&str, &str)] = &[
+        ("pub async fn ", "fn"),
+        ("async fn ", "fn"),
+        ("pub fn ", "fn"),
+        ("fn ", "fn"),
+        ("pub struct ", "struct"),
+        ("struct ", "struct"),
+        ("pub enum ", "enum"),
+        ("enum ", "enum"),
+        ("pub type ", "type"),
+        ("type ", "type"),
+        ("pub trait ", "trait"),
+        ("trait ", "trait"),
+        ("impl ", "impl"),
+    ];
+    for (prefix, kind) in patterns {
+        if let Some(rest) = t.strip_prefix(prefix) {
+            let name: String = rest
+                .chars()
+                .take_while(|c| c.is_alphanumeric() || *c == '_')
+                .collect();
+            if !name.is_empty() {
+                return Some(format!("`{name}` ({kind})"));
+            }
+        }
+    }
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    use super::super::{CommandDispatch, try_handle_command};
+
+    fn overview_cmd_with_root(root: &std::path::Path, args: &str) -> Option<String> {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        try_handle_command(&dispatch, &format!("@timmy overview {args}"))
+    }
+
+    #[test]
+    fn overview_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "overview");
+        assert!(found, "overview command must be in the registry");
+    }
+
+    #[test]
+    fn overview_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(
+            output.contains("overview"),
+            "help should list overview command: {output}"
+        );
+    }
+
+    #[test]
+    fn overview_command_no_args_returns_usage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = overview_cmd_with_root(tmp.path(), "").unwrap();
+        assert!(
+            output.contains("Usage"),
+            "no args should show usage hint: {output}"
+        );
+    }
+
+    #[test]
+    fn overview_command_non_numeric_arg_returns_error() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = overview_cmd_with_root(tmp.path(), "abc").unwrap();
+        assert!(
+            output.contains("Invalid"),
+            "non-numeric arg should return error: {output}"
+        );
+    }
+
+    #[test]
+    fn overview_command_not_found_returns_friendly_message() {
+        // Use the real repo root but a story number that was never merged.
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let output = overview_cmd_with_root(repo_root, "99999").unwrap();
+        assert!(
+            output.contains("99999"),
+            "not-found message should include the story number: {output}"
+        );
+        assert!(
+            output.contains("backlog") || output.contains("No implementation"),
+            "not-found message should explain why: {output}"
+        );
+    }
+
+    #[test]
+    fn overview_command_found_shows_commit_and_stat() {
+        // Story 324 has a real merge commit in master.
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let output = overview_cmd_with_root(repo_root, "324").unwrap();
+        assert!(
+            output.contains("**Overview: Story 324"),
+            "output should show story header: {output}"
+        );
+        assert!(
+            output.contains("Commit:"),
+            "output should show commit hash: {output}"
+        );
+        assert!(
+            output.contains("**Changes:**") || output.contains("**Files:**"),
+            "output should show file changes: {output}"
+        );
+    }
+
+    #[test]
+    fn overview_command_case_insensitive() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy OVERVIEW 1",
+        );
+        assert!(result.is_some(), "OVERVIEW should match case-insensitively");
+    }
+
+    // -- parse_symbol_definition --------------------------------------------
+
+    #[test]
+    fn parse_symbol_pub_fn() {
+        let result =
+            parse_symbol_definition("pub fn handle_foo(ctx: &Context) -> Option<String> {");
+        assert_eq!(result, Some("`handle_foo` (fn)".to_string()));
+    }
+
+    #[test]
+    fn parse_symbol_pub_struct() {
+        let result = parse_symbol_definition("pub struct SlackTransport {");
+        assert_eq!(result, Some("`SlackTransport` (struct)".to_string()));
+    }
+
+    #[test]
+    fn parse_symbol_impl() {
+        let result = parse_symbol_definition("impl ChatTransport for SlackTransport {");
+        assert_eq!(result, Some("`ChatTransport` (impl)".to_string()));
+    }
+
+    #[test]
+    fn parse_symbol_no_match() {
+        let result = parse_symbol_definition("    let x = 42;");
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn parse_symbol_pub_enum() {
+        let result = parse_symbol_definition("pub enum QaMode {");
+        assert_eq!(result, Some("`QaMode` (enum)".to_string()));
+    }
+
+    #[test]
+    fn parse_symbol_pub_type() {
+        let result = parse_symbol_definition(
+            "pub type SlackHistory = Arc<Mutex<HashMap<String, Vec<u8>>>>;",
+        );
+        assert_eq!(result, Some("`SlackHistory` (type)".to_string()));
+    }
+}
--- a/server/src/matrix/commands/show.rs
+++ b/server/src/matrix/commands/show.rs
@@ -0,0 +1,201 @@
+//! Handler for the `show` command.
+
+use super::CommandContext;
+
+/// Display the full markdown text of a work item identified by its numeric ID.
+///
+/// Searches all pipeline stages in order and returns the raw file contents of
+/// the first matching story, bug, or spike.  Returns a friendly message when
+/// no match is found.
+pub(super) fn handle_show(ctx: &CommandContext) -> Option<String> {
+    let num_str = ctx.args.trim();
+    if num_str.is_empty() {
+        return Some(format!(
+            "Usage: `{} show <number>`\n\nDisplays the full text of a story, bug, or spike.",
+            ctx.bot_name
+        ));
+    }
+    if !num_str.chars().all(|c| c.is_ascii_digit()) {
+        return Some(format!(
+            "Invalid story number: `{num_str}`. Usage: `{} show <number>`",
+            ctx.bot_name
+        ));
+    }
+
+    let stages = [
+        "1_backlog",
+        "2_current",
+        "3_qa",
+        "4_merge",
+        "5_done",
+        "6_archived",
+    ];
+
+    for stage in &stages {
+        let dir = ctx
+            .project_root
+            .join(".storkit")
+            .join("work")
+            .join(stage);
+        if !dir.exists() {
+            continue;
+        }
+        if let Ok(entries) = std::fs::read_dir(&dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().and_then(|e| e.to_str()) != Some("md") {
+                    continue;
+                }
+                if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
+                    let file_num = stem
+                        .split('_')
+                        .next()
+                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+                        .unwrap_or("");
+                    if file_num == num_str {
+                        return match std::fs::read_to_string(&path) {
+                            Ok(contents) => Some(contents),
+                            Err(e) => Some(format!("Failed to read story {num_str}: {e}")),
+                        };
+                    }
+                }
+            }
+        }
+    }
+
+    Some(format!(
+        "No story, bug, or spike with number **{num_str}** found."
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    use super::super::{CommandDispatch, try_handle_command};
+
+    fn show_cmd_with_root(root: &std::path::Path, args: &str) -> Option<String> {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        try_handle_command(&dispatch, &format!("@timmy show {args}"))
+    }
+
+    fn write_story_file(root: &std::path::Path, stage: &str, filename: &str, content: &str) {
+        let dir = root.join(".storkit/work").join(stage);
+        std::fs::create_dir_all(&dir).unwrap();
+        std::fs::write(dir.join(filename), content).unwrap();
+    }
+
+    #[test]
+    fn show_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "show");
+        assert!(found, "show command must be in the registry");
+    }
+
+    #[test]
+    fn show_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
+        let output = result.unwrap();
+        assert!(output.contains("show"), "help should list show command: {output}");
+    }
+
+    #[test]
+    fn show_command_no_args_returns_usage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = show_cmd_with_root(tmp.path(), "").unwrap();
+        assert!(
+            output.contains("Usage"),
+            "no args should show usage hint: {output}"
+        );
+    }
+
+    #[test]
+    fn show_command_non_numeric_args_returns_error() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = show_cmd_with_root(tmp.path(), "abc").unwrap();
+        assert!(
+            output.contains("Invalid"),
+            "non-numeric arg should return error message: {output}"
+        );
+    }
+
+    #[test]
+    fn show_command_not_found_returns_friendly_message() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = show_cmd_with_root(tmp.path(), "999").unwrap();
+        assert!(
+            output.contains("999"),
+            "not-found message should include the queried number: {output}"
+        );
+        assert!(
+            output.contains("found"),
+            "not-found message should say not found: {output}"
+        );
+    }
+
+    #[test]
+    fn show_command_finds_story_in_backlog() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "305_story_show_command.md",
+            "---\nname: Show command\n---\n\n# Story 305\n\nFull story text here.",
+        );
+        let output = show_cmd_with_root(tmp.path(), "305").unwrap();
+        assert!(
+            output.contains("Full story text here."),
+            "show should return full story content: {output}"
+        );
+    }
+
+    #[test]
+    fn show_command_finds_story_in_current() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "42_story_do_something.md",
+            "---\nname: Do something\n---\n\n# Story 42\n\nIn progress.",
+        );
+        let output = show_cmd_with_root(tmp.path(), "42").unwrap();
+        assert!(
+            output.contains("In progress."),
+            "show should return story from current stage: {output}"
+        );
+    }
+
+    #[test]
+    fn show_command_finds_bug() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "7_bug_crash_on_login.md",
+            "---\nname: Crash on login\n---\n\n## Symptom\n\nCrashes.",
+        );
+        let output = show_cmd_with_root(tmp.path(), "7").unwrap();
+        assert!(
+            output.contains("Symptom"),
+            "show should return bug content: {output}"
+        );
+    }
+
+    #[test]
+    fn show_command_case_insensitive() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy SHOW 1");
+        assert!(result.is_some(), "SHOW should match case-insensitively");
+    }
+}
--- a/server/src/matrix/commands/status.rs
+++ b/server/src/matrix/commands/status.rs
@@ -0,0 +1,354 @@
+//! Handler for the `status` command and pipeline status helpers.
+
+use crate::agents::{AgentPool, AgentStatus};
+use crate::config::ProjectConfig;
+use std::collections::{HashMap, HashSet};
+
+use super::CommandContext;
+
+pub(super) fn handle_status(ctx: &CommandContext) -> Option<String> {
+    Some(build_pipeline_status(ctx.project_root, ctx.agents))
+}
+
+/// Format a short display label for a work item.
+///
+/// Extracts the leading numeric ID from the file stem (e.g. `"293"` from
+/// `"293_story_register_all_bot_commands"`) and combines it with the human-
+/// readable name from the front matter when available.
+///
+/// Examples:
+/// - `("293_story_foo", Some("Register all bot commands"))` → `"293 — Register all bot commands"`
+/// - `("293_story_foo", None)` → `"293"`
+/// - `("no_number_here", None)` → `"no_number_here"`
+pub(super) fn story_short_label(stem: &str, name: Option<&str>) -> String {
+    let number = stem
+        .split('_')
+        .next()
+        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+        .unwrap_or(stem);
+    match name {
+        Some(n) => format!("{number} — {n}"),
+        None => number.to_string(),
+    }
+}
+
+/// Read all story IDs and names from a pipeline stage directory.
+fn read_stage_items(
+    project_root: &std::path::Path,
+    stage_dir: &str,
+) -> Vec<(String, Option<String>)> {
+    let dir = project_root
+        .join(".storkit")
+        .join("work")
+        .join(stage_dir);
+    if !dir.exists() {
+        return Vec::new();
+    }
+    let mut items = Vec::new();
+    if let Ok(entries) = std::fs::read_dir(&dir) {
+        for entry in entries.flatten() {
+            let path = entry.path();
+            if path.extension().and_then(|e| e.to_str()) != Some("md") {
+                continue;
+            }
+            if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
+                let name = std::fs::read_to_string(&path)
+                    .ok()
+                    .and_then(|contents| {
+                        crate::io::story_metadata::parse_front_matter(&contents)
+                            .ok()
+                            .and_then(|m| m.name)
+                    });
+                items.push((stem.to_string(), name));
+            }
+        }
+    }
+    items.sort_by(|a, b| a.0.cmp(&b.0));
+    items
+}
+
+/// Build the full pipeline status text formatted for Matrix (markdown).
+pub(super) fn build_pipeline_status(project_root: &std::path::Path, agents: &AgentPool) -> String {
+    // Build a map from story_id → active AgentInfo for quick lookup.
+    let active_agents = agents.list_agents().unwrap_or_default();
+    let active_map: HashMap<String, &crate::agents::AgentInfo> = active_agents
+        .iter()
+        .filter(|a| matches!(a.status, AgentStatus::Running | AgentStatus::Pending))
+        .map(|a| (a.story_id.clone(), a))
+        .collect();
+
+    // Read token usage once for all stories to avoid repeated file I/O.
+    let cost_by_story: HashMap<String, f64> =
+        crate::agents::token_usage::read_all(project_root)
+            .unwrap_or_default()
+            .into_iter()
+            .fold(HashMap::new(), |mut map, r| {
+                *map.entry(r.story_id).or_insert(0.0) += r.usage.total_cost_usd;
+                map
+            });
+
+    let config = ProjectConfig::load(project_root).ok();
+
+    let mut out = String::from("**Pipeline Status**\n\n");
+
+    let stages = [
+        ("1_backlog", "Backlog"),
+        ("2_current", "In Progress"),
+        ("3_qa", "QA"),
+        ("4_merge", "Merge"),
+        ("5_done", "Done"),
+    ];
+
+    for (dir, label) in &stages {
+        let items = read_stage_items(project_root, dir);
+        let count = items.len();
+        out.push_str(&format!("**{label}** ({count})\n"));
+        if items.is_empty() {
+            out.push_str("  *(none)*\n");
+        } else {
+            for (story_id, name) in &items {
+                let display = story_short_label(story_id, name.as_deref());
+                let cost_suffix = cost_by_story
+                    .get(story_id)
+                    .filter(|&&c| c > 0.0)
+                    .map(|c| format!(" — ${c:.2}"))
+                    .unwrap_or_default();
+                if let Some(agent) = active_map.get(story_id) {
+                    let model_str = config
+                        .as_ref()
+                        .and_then(|cfg| cfg.find_agent(&agent.agent_name))
+                        .and_then(|ac| ac.model.as_deref())
+                        .unwrap_or("?");
+                    out.push_str(&format!(
+                        "  • {display}{cost_suffix} — {} ({model_str})\n",
+                        agent.agent_name
+                    ));
+                } else {
+                    out.push_str(&format!("  • {display}{cost_suffix}\n"));
+                }
+            }
+        }
+        out.push('\n');
+    }
+
+    // Free agents: configured agents not currently running or pending.
+    out.push_str("**Free Agents**\n");
+    if let Some(cfg) = &config {
+        let busy_names: HashSet<String> = active_agents
+            .iter()
+            .filter(|a| matches!(a.status, AgentStatus::Running | AgentStatus::Pending))
+            .map(|a| a.agent_name.clone())
+            .collect();
+
+        let free: Vec<String> = cfg
+            .agent
+            .iter()
+            .filter(|a| !busy_names.contains(&a.name))
+            .map(|a| match &a.model {
+                Some(m) => format!("{} ({})", a.name, m),
+                None => a.name.clone(),
+            })
+            .collect();
+
+        if free.is_empty() {
+            out.push_str("  *(none — all agents busy)*\n");
+        } else {
+            for name in &free {
+                out.push_str(&format!("  • {name}\n"));
+            }
+        }
+    } else {
+        out.push_str("  *(no agent config found)*\n");
+    }
+
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentPool;
+
+    #[test]
+    fn status_command_matches() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy status");
+        assert!(result.is_some(), "status command should match");
+    }
+
+    #[test]
+    fn status_command_returns_pipeline_text() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy status");
+        let output = result.unwrap();
+        assert!(
+            output.contains("Pipeline Status"),
+            "status output should contain pipeline info: {output}"
+        );
+    }
+
+    #[test]
+    fn status_command_case_insensitive() {
+        let result = super::super::tests::try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy STATUS");
+        assert!(result.is_some(), "STATUS should match case-insensitively");
+    }
+
+    // -- story_short_label --------------------------------------------------
+
+    #[test]
+    fn short_label_extracts_number_and_name() {
+        let label = story_short_label("293_story_register_all_bot_commands", Some("Register all bot commands"));
+        assert_eq!(label, "293 — Register all bot commands");
+    }
+
+    #[test]
+    fn short_label_number_only_when_no_name() {
+        let label = story_short_label("297_story_improve_bot_status_command_formatting", None);
+        assert_eq!(label, "297");
+    }
+
+    #[test]
+    fn short_label_falls_back_to_stem_when_no_numeric_prefix() {
+        let label = story_short_label("no_number_here", None);
+        assert_eq!(label, "no_number_here");
+    }
+
+    #[test]
+    fn short_label_does_not_include_underscore_slug() {
+        let label = story_short_label("293_story_register_all_bot_commands_in_the_command_registry", Some("Register all bot commands"));
+        assert!(
+            !label.contains("story_register"),
+            "label should not contain the slug portion: {label}"
+        );
+    }
+
+    // -- build_pipeline_status formatting -----------------------------------
+
+    #[test]
+    fn status_does_not_show_full_filename_stem() {
+        use std::io::Write;
+        use tempfile::TempDir;
+
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+
+        // Write a story file with a front-matter name
+        let story_path = stage_dir.join("293_story_register_all_bot_commands.md");
+        let mut f = std::fs::File::create(&story_path).unwrap();
+        writeln!(f, "---\nname: Register all bot commands\n---\n").unwrap();
+
+        let agents = AgentPool::new_test(3000);
+        let output = build_pipeline_status(tmp.path(), &agents);
+
+        assert!(
+            !output.contains("293_story_register_all_bot_commands"),
+            "output must not show full filename stem: {output}"
+        );
+        assert!(
+            output.contains("293 — Register all bot commands"),
+            "output must show number and title: {output}"
+        );
+    }
+
+    // -- token cost in status output ----------------------------------------
+
+    #[test]
+    fn status_shows_cost_when_token_usage_exists() {
+        use std::io::Write;
+        use tempfile::TempDir;
+
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+
+        let story_path = stage_dir.join("293_story_register_all_bot_commands.md");
+        let mut f = std::fs::File::create(&story_path).unwrap();
+        writeln!(f, "---\nname: Register all bot commands\n---\n").unwrap();
+
+        // Write token usage for this story.
+        let usage = crate::agents::TokenUsage {
+            input_tokens: 100,
+            output_tokens: 200,
+            cache_creation_input_tokens: 0,
+            cache_read_input_tokens: 0,
+            total_cost_usd: 0.29,
+        };
+        let record = crate::agents::token_usage::build_record(
+            "293_story_register_all_bot_commands",
+            "coder-1",
+            None,
+            usage,
+        );
+        crate::agents::token_usage::append_record(tmp.path(), &record).unwrap();
+
+        let agents = AgentPool::new_test(3000);
+        let output = build_pipeline_status(tmp.path(), &agents);
+
+        assert!(
+            output.contains("293 — Register all bot commands — $0.29"),
+            "output must show cost next to story: {output}"
+        );
+    }
+
+    #[test]
+    fn status_no_cost_when_no_usage() {
+        use std::io::Write;
+        use tempfile::TempDir;
+
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+
+        let story_path = stage_dir.join("293_story_register_all_bot_commands.md");
+        let mut f = std::fs::File::create(&story_path).unwrap();
+        writeln!(f, "---\nname: Register all bot commands\n---\n").unwrap();
+
+        // No token usage written.
+        let agents = AgentPool::new_test(3000);
+        let output = build_pipeline_status(tmp.path(), &agents);
+
+        assert!(
+            !output.contains("$"),
+            "output must not show cost when no usage exists: {output}"
+        );
+    }
+
+    #[test]
+    fn status_aggregates_multiple_records_per_story() {
+        use std::io::Write;
+        use tempfile::TempDir;
+
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+
+        let story_path = stage_dir.join("293_story_register_all_bot_commands.md");
+        let mut f = std::fs::File::create(&story_path).unwrap();
+        writeln!(f, "---\nname: Register all bot commands\n---\n").unwrap();
+
+        // Write two records for the same story — costs should be summed.
+        for cost in [0.10, 0.19] {
+            let usage = crate::agents::TokenUsage {
+                input_tokens: 50,
+                output_tokens: 100,
+                cache_creation_input_tokens: 0,
+                cache_read_input_tokens: 0,
+                total_cost_usd: cost,
+            };
+            let record = crate::agents::token_usage::build_record(
+                "293_story_register_all_bot_commands",
+                "coder-1",
+                None,
+                usage,
+            );
+            crate::agents::token_usage::append_record(tmp.path(), &record).unwrap();
+        }
+
+        let agents = AgentPool::new_test(3000);
+        let output = build_pipeline_status(tmp.path(), &agents);
+
+        assert!(
+            output.contains("293 — Register all bot commands — $0.29"),
+            "output must show aggregated cost: {output}"
+        );
+    }
+}
--- a/server/src/matrix/commands/whatsup.rs
+++ b/server/src/matrix/commands/whatsup.rs
@@ -0,0 +1,548 @@
+//! Handler for the `whatsup` command.
+//!
+//! Produces a triage dump for a story that is currently in-progress
+//! (`work/2_current/`): metadata, acceptance criteria, worktree/branch state,
+//! git diff, recent commits, and the tail of the agent log.
+//!
+//! The command is handled entirely at the bot level — no LLM invocation.
+
+use super::CommandContext;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+/// Handle `{bot_name} whatsup {number}`.
+pub(super) fn handle_whatsup(ctx: &CommandContext) -> Option<String> {
+    let num_str = ctx.args.trim();
+    if num_str.is_empty() {
+        return Some(format!(
+            "Usage: `{} whatsup <number>`\n\nShows a triage dump for a story currently in progress.",
+            ctx.bot_name
+        ));
+    }
+    if !num_str.chars().all(|c| c.is_ascii_digit()) {
+        return Some(format!(
+            "Invalid story number: `{num_str}`. Usage: `{} whatsup <number>`",
+            ctx.bot_name
+        ));
+    }
+
+    let current_dir = ctx
+        .project_root
+        .join(".storkit")
+        .join("work")
+        .join("2_current");
+
+    match find_story_in_dir(&current_dir, num_str) {
+        Some((path, stem)) => Some(build_triage_dump(ctx, &path, &stem, num_str)),
+        None => Some(format!(
+            "Story **{num_str}** is not currently in progress (not found in `work/2_current/`)."
+        )),
+    }
+}
+
+/// Find a `.md` file whose numeric prefix matches `num_str` in `dir`.
+///
+/// Returns `(path, file_stem)` for the first match.
+fn find_story_in_dir(dir: &Path, num_str: &str) -> Option<(PathBuf, String)> {
+    let entries = std::fs::read_dir(dir).ok()?;
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if path.extension().and_then(|e| e.to_str()) != Some("md") {
+            continue;
+        }
+        if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
+            let file_num = stem
+                .split('_')
+                .next()
+                .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+                .unwrap_or("");
+            if file_num == num_str {
+                return Some((path.clone(), stem.to_string()));
+            }
+        }
+    }
+    None
+}
+
+/// Build the full triage dump for a story.
+fn build_triage_dump(
+    ctx: &CommandContext,
+    story_path: &Path,
+    story_id: &str,
+    num_str: &str,
+) -> String {
+    let contents = match std::fs::read_to_string(story_path) {
+        Ok(c) => c,
+        Err(e) => return format!("Failed to read story {num_str}: {e}"),
+    };
+
+    let meta = crate::io::story_metadata::parse_front_matter(&contents).ok();
+    let name = meta.as_ref().and_then(|m| m.name.as_deref()).unwrap_or("(unnamed)");
+
+    let mut out = String::new();
+
+    // ---- Header ----
+    out.push_str(&format!("## Story {num_str} — {name}\n"));
+    out.push_str("**Stage:** In Progress (`2_current`)\n\n");
+
+    // ---- Front matter fields ----
+    if let Some(ref m) = meta {
+        let mut fields: Vec<String> = Vec::new();
+        if let Some(true) = m.blocked {
+            fields.push("**blocked:** true".to_string());
+        }
+        if let Some(ref agent) = m.agent {
+            fields.push(format!("**agent:** {agent}"));
+        }
+        if let Some(ref qa) = m.qa {
+            fields.push(format!("**qa:** {qa}"));
+        }
+        if let Some(true) = m.review_hold {
+            fields.push("**review_hold:** true".to_string());
+        }
+        if let Some(rc) = m.retry_count
+            && rc > 0
+        {
+            fields.push(format!("**retry_count:** {rc}"));
+        }
+        if let Some(ref cb) = m.coverage_baseline {
+            fields.push(format!("**coverage_baseline:** {cb}"));
+        }
+        if let Some(ref mf) = m.merge_failure {
+            fields.push(format!("**merge_failure:** {mf}"));
+        }
+        if !fields.is_empty() {
+            out.push_str("**Front matter:**\n");
+            for f in &fields {
+                out.push_str(&format!("  • {f}\n"));
+            }
+            out.push('\n');
+        }
+    }
+
+    // ---- Acceptance criteria ----
+    let criteria = parse_acceptance_criteria(&contents);
+    if !criteria.is_empty() {
+        out.push_str("**Acceptance Criteria:**\n");
+        for (checked, text) in &criteria {
+            let mark = if *checked { "✅" } else { "⬜" };
+            out.push_str(&format!("  {mark} {text}\n"));
+        }
+        let total = criteria.len();
+        let done = criteria.iter().filter(|(c, _)| *c).count();
+        out.push_str(&format!("  *{done}/{total} complete*\n"));
+        out.push('\n');
+    }
+
+    // ---- Worktree and branch ----
+    let wt_path = crate::worktree::worktree_path(ctx.project_root, story_id);
+    let branch = format!("feature/story-{story_id}");
+    if wt_path.is_dir() {
+        out.push_str(&format!("**Worktree:** `{}`\n", wt_path.display()));
+        out.push_str(&format!("**Branch:** `{branch}`\n\n"));
+
+        // ---- git diff --stat ----
+        let diff_stat = run_git(
+            &wt_path,
+            &["diff", "--stat", "master...HEAD"],
+        );
+        if !diff_stat.is_empty() {
+            out.push_str("**Diff stat (vs master):**\n```\n");
+            out.push_str(&diff_stat);
+            out.push_str("```\n\n");
+        } else {
+            out.push_str("**Diff stat (vs master):** *(no changes)*\n\n");
+        }
+
+        // ---- Last 5 commits on feature branch ----
+        let log = run_git(
+            &wt_path,
+            &[
+                "log",
+                "master..HEAD",
+                "--pretty=format:%h %s",
+                "-5",
+            ],
+        );
+        if !log.is_empty() {
+            out.push_str("**Recent commits (branch only):**\n```\n");
+            out.push_str(&log);
+            out.push_str("\n```\n\n");
+        } else {
+            out.push_str("**Recent commits (branch only):** *(none yet)*\n\n");
+        }
+    } else {
+        out.push_str(&format!("**Branch:** `{branch}`\n"));
+        out.push_str("**Worktree:** *(not yet created)*\n\n");
+    }
+
+    // ---- Agent log tail ----
+    let log_dir = ctx
+        .project_root
+        .join(".storkit")
+        .join("logs")
+        .join(story_id);
+    match latest_log_file(&log_dir) {
+        Some(log_path) => {
+            let tail = read_log_tail(&log_path, 20);
+            let filename = log_path
+                .file_name()
+                .and_then(|n| n.to_str())
+                .unwrap_or("agent.log");
+            if tail.is_empty() {
+                out.push_str(&format!("**Agent log** (`{filename}`):** *(empty)*\n"));
+            } else {
+                out.push_str(&format!("**Agent log tail** (`{filename}`):\n```\n"));
+                out.push_str(&tail);
+                out.push_str("\n```\n");
+            }
+        }
+        None => {
+            out.push_str("**Agent log:** *(no log found)*\n");
+        }
+    }
+
+    out
+}
+
+/// Parse acceptance criteria from story markdown.
+///
+/// Returns a list of `(checked, text)` for every `- [ ] ...` and `- [x] ...` line.
+fn parse_acceptance_criteria(contents: &str) -> Vec<(bool, String)> {
+    contents
+        .lines()
+        .filter_map(|line| {
+            let trimmed = line.trim();
+            if let Some(text) = trimmed.strip_prefix("- [x] ").or_else(|| trimmed.strip_prefix("- [X] ")) {
+                Some((true, text.to_string()))
+            } else {
+                trimmed.strip_prefix("- [ ] ").map(|text| (false, text.to_string()))
+            }
+        })
+        .collect()
+}
+
+/// Run a git command in the given directory, returning trimmed stdout (or empty on error).
+fn run_git(dir: &Path, args: &[&str]) -> String {
+    Command::new("git")
+        .args(args)
+        .current_dir(dir)
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
+        .unwrap_or_default()
+}
+
+/// Find the most recently modified `.log` file in the given directory,
+/// regardless of agent name.
+fn latest_log_file(log_dir: &Path) -> Option<PathBuf> {
+    if !log_dir.is_dir() {
+        return None;
+    }
+    let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
+    for entry in std::fs::read_dir(log_dir).ok()?.flatten() {
+        let path = entry.path();
+        if path.extension().and_then(|e| e.to_str()) != Some("log") {
+            continue;
+        }
+        let modified = match entry.metadata().and_then(|m| m.modified()) {
+            Ok(t) => t,
+            Err(_) => continue,
+        };
+        if best.as_ref().is_none_or(|(_, t)| modified > *t) {
+            best = Some((path, modified));
+        }
+    }
+    best.map(|(p, _)| p)
+}
+
+/// Read the last `n` non-empty lines from a file as a single string.
+fn read_log_tail(path: &Path, n: usize) -> String {
+    let contents = match std::fs::read_to_string(path) {
+        Ok(c) => c,
+        Err(_) => return String::new(),
+    };
+    let lines: Vec<&str> = contents.lines().filter(|l| !l.trim().is_empty()).collect();
+    let start = lines.len().saturating_sub(n);
+    lines[start..].join("\n")
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    use super::super::{CommandDispatch, try_handle_command};
+
+    fn whatsup_cmd(root: &Path, args: &str) -> Option<String> {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let room_id = "!test:example.com".to_string();
+        let dispatch = CommandDispatch {
+            bot_name: "Timmy",
+            bot_user_id: "@timmy:homeserver.local",
+            project_root: root,
+            agents: &agents,
+            ambient_rooms: &ambient_rooms,
+            room_id: &room_id,
+        };
+        try_handle_command(&dispatch, &format!("@timmy whatsup {args}"))
+    }
+
+    fn write_story_file(root: &Path, stage: &str, filename: &str, content: &str) {
+        let dir = root.join(".storkit/work").join(stage);
+        std::fs::create_dir_all(&dir).unwrap();
+        std::fs::write(dir.join(filename), content).unwrap();
+    }
+
+    // -- registration -------------------------------------------------------
+
+    #[test]
+    fn whatsup_command_is_registered() {
+        let found = super::super::commands().iter().any(|c| c.name == "whatsup");
+        assert!(found, "whatsup command must be in the registry");
+    }
+
+    #[test]
+    fn whatsup_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(
+            output.contains("whatsup"),
+            "help should list whatsup command: {output}"
+        );
+    }
+
+    // -- input validation ---------------------------------------------------
+
+    #[test]
+    fn whatsup_no_args_returns_usage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = whatsup_cmd(tmp.path(), "").unwrap();
+        assert!(
+            output.contains("Usage"),
+            "no args should show usage: {output}"
+        );
+    }
+
+    #[test]
+    fn whatsup_non_numeric_returns_error() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let output = whatsup_cmd(tmp.path(), "abc").unwrap();
+        assert!(
+            output.contains("Invalid"),
+            "non-numeric arg should return error: {output}"
+        );
+    }
+
+    // -- not found ----------------------------------------------------------
+
+    #[test]
+    fn whatsup_story_not_in_current_returns_friendly_message() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        // Create the directory but put the story in backlog, not current
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "42_story_not_in_current.md",
+            "---\nname: Not in current\n---\n",
+        );
+        let output = whatsup_cmd(tmp.path(), "42").unwrap();
+        assert!(
+            output.contains("42"),
+            "message should include story number: {output}"
+        );
+        assert!(
+            output.contains("not") || output.contains("Not"),
+            "message should say not found/in progress: {output}"
+        );
+    }
+
+    // -- found in 2_current -------------------------------------------------
+
+    #[test]
+    fn whatsup_shows_story_name_and_stage() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "99_story_my_feature.md",
+            "---\nname: My Feature\n---\n\n## Acceptance Criteria\n\n- [ ] First thing\n- [x] Done thing\n",
+        );
+        let output = whatsup_cmd(tmp.path(), "99").unwrap();
+        assert!(output.contains("99"), "should show story number: {output}");
+        assert!(
+            output.contains("My Feature"),
+            "should show story name: {output}"
+        );
+        assert!(
+            output.contains("In Progress") || output.contains("2_current"),
+            "should show pipeline stage: {output}"
+        );
+    }
+
+    #[test]
+    fn whatsup_shows_acceptance_criteria() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "99_story_criteria_test.md",
+            "---\nname: Criteria Test\n---\n\n- [ ] First thing\n- [x] Done thing\n- [ ] Second thing\n",
+        );
+        let output = whatsup_cmd(tmp.path(), "99").unwrap();
+        assert!(
+            output.contains("First thing"),
+            "should show unchecked criterion: {output}"
+        );
+        assert!(
+            output.contains("Done thing"),
+            "should show checked criterion: {output}"
+        );
+        // 1 of 3 done
+        assert!(
+            output.contains("1/3"),
+            "should show checked/total count: {output}"
+        );
+    }
+
+    #[test]
+    fn whatsup_shows_blocked_field() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "55_story_blocked_story.md",
+            "---\nname: Blocked Story\nblocked: true\n---\n",
+        );
+        let output = whatsup_cmd(tmp.path(), "55").unwrap();
+        assert!(
+            output.contains("blocked"),
+            "should show blocked field: {output}"
+        );
+    }
+
+    #[test]
+    fn whatsup_shows_agent_field() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "55_story_agent_story.md",
+            "---\nname: Agent Story\nagent: coder-1\n---\n",
+        );
+        let output = whatsup_cmd(tmp.path(), "55").unwrap();
+        assert!(
+            output.contains("coder-1"),
+            "should show agent field: {output}"
+        );
+    }
+
+    #[test]
+    fn whatsup_no_worktree_shows_not_created() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "77_story_no_worktree.md",
+            "---\nname: No Worktree\n---\n",
+        );
+        let output = whatsup_cmd(tmp.path(), "77").unwrap();
+        // Branch name should still appear
+        assert!(
+            output.contains("feature/story-77"),
+            "should show branch name: {output}"
+        );
+    }
+
+    #[test]
+    fn whatsup_no_log_shows_no_log_message() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        write_story_file(
+            tmp.path(),
+            "2_current",
+            "77_story_no_log.md",
+            "---\nname: No Log\n---\n",
+        );
+        let output = whatsup_cmd(tmp.path(), "77").unwrap();
+        assert!(
+            output.contains("no log") || output.contains("No log") || output.contains("*(no log found)*"),
+            "should indicate no log exists: {output}"
+        );
+    }
+
+    // -- parse_acceptance_criteria ------------------------------------------
+
+    #[test]
+    fn parse_criteria_mixed() {
+        let input = "## AC\n- [ ] First\n- [x] Done\n- [X] Also done\n- [ ] Last\n";
+        let result = parse_acceptance_criteria(input);
+        assert_eq!(result.len(), 4);
+        assert_eq!(result[0], (false, "First".to_string()));
+        assert_eq!(result[1], (true, "Done".to_string()));
+        assert_eq!(result[2], (true, "Also done".to_string()));
+        assert_eq!(result[3], (false, "Last".to_string()));
+    }
+
+    #[test]
+    fn parse_criteria_empty() {
+        let input = "# Story\nNo checkboxes here.\n";
+        let result = parse_acceptance_criteria(input);
+        assert!(result.is_empty());
+    }
+
+    // -- read_log_tail -------------------------------------------------------
+
+    #[test]
+    fn read_log_tail_returns_last_n_lines() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let path = tmp.path().join("test.log");
+        let content = (1..=30).map(|i| format!("line {i}")).collect::<Vec<_>>().join("\n");
+        std::fs::write(&path, &content).unwrap();
+        let tail = read_log_tail(&path, 5);
+        let lines: Vec<&str> = tail.lines().collect();
+        assert_eq!(lines.len(), 5);
+        assert_eq!(lines[0], "line 26");
+        assert_eq!(lines[4], "line 30");
+    }
+
+    #[test]
+    fn read_log_tail_fewer_lines_than_n() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let path = tmp.path().join("short.log");
+        std::fs::write(&path, "line A\nline B\n").unwrap();
+        let tail = read_log_tail(&path, 20);
+        assert!(tail.contains("line A"));
+        assert!(tail.contains("line B"));
+    }
+
+    // -- latest_log_file ----------------------------------------------------
+
+    #[test]
+    fn latest_log_file_returns_none_for_missing_dir() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let result = latest_log_file(&tmp.path().join("nonexistent"));
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn latest_log_file_finds_log() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        let log_path = tmp.path().join("coder-1-sess-abc.log");
+        std::fs::write(&log_path, "some log content\n").unwrap();
+        let result = latest_log_file(tmp.path());
+        assert!(result.is_some());
+        assert_eq!(result.unwrap(), log_path);
+    }
+}
--- a/server/src/matrix/config.rs
+++ b/server/src/matrix/config.rs
@@ -0,0 +1,815 @@
+use serde::Deserialize;
+use std::path::Path;
+
+fn default_history_size() -> usize {
+    20
+}
+
+fn default_permission_timeout_secs() -> u64 {
+    120
+}
+
+/// Configuration for the Matrix bot, read from `.storkit/bot.toml`.
+#[derive(Deserialize, Clone, Debug)]
+pub struct BotConfig {
+    /// Matrix homeserver URL, e.g. `https://matrix.example.com`
+    pub homeserver: String,
+    /// Bot user ID, e.g. `@storykit:example.com`
+    pub username: String,
+    /// Bot password
+    pub password: String,
+    /// Matrix room IDs to join, e.g. `["!roomid:example.com"]`.
+    /// Use an array for multiple rooms; a single string is accepted via the
+    /// deprecated `room_id` key for backwards compatibility.
+    #[serde(default)]
+    pub room_ids: Vec<String>,
+    /// Deprecated: use `room_ids` (list) instead. Still accepted so existing
+    /// `bot.toml` files continue to work without modification.
+    #[serde(default)]
+    pub room_id: Option<String>,
+    /// Set to `true` to enable the bot (default: false)
+    #[serde(default)]
+    pub enabled: bool,
+    /// Matrix user IDs allowed to interact with the bot.
+    /// If empty or omitted, the bot ignores ALL messages (fail-closed).
+    #[serde(default)]
+    pub allowed_users: Vec<String>,
+    /// Maximum number of conversation turns (user + assistant pairs) to keep
+    /// per room. When the history exceeds this limit the oldest messages are
+    /// dropped. Defaults to 20.
+    #[serde(default = "default_history_size")]
+    pub history_size: usize,
+    /// Timeout in seconds for permission prompts surfaced to the Matrix room.
+    /// If the user does not respond within this window the permission is denied
+    /// (fail-closed). Defaults to 120 seconds.
+    #[serde(default = "default_permission_timeout_secs")]
+    pub permission_timeout_secs: u64,
+    /// Previously used to select an Anthropic model. Now ignored — the bot
+    /// uses Claude Code which manages its own model selection. Kept for
+    /// backwards compatibility so existing bot.toml files still parse.
+    #[allow(dead_code)]
+    pub model: Option<String>,
+    /// Display name the bot uses to identify itself in conversations.
+    /// If unset, the bot falls back to "Assistant".
+    #[serde(default)]
+    pub display_name: Option<String>,
+    /// Room IDs where ambient mode is active (bot responds to all messages).
+    /// Updated at runtime when the user toggles ambient mode — do not edit
+    /// manually while the bot is running.
+    #[serde(default)]
+    pub ambient_rooms: Vec<String>,
+    /// Chat transport to use: `"matrix"` (default) or `"whatsapp"`.
+    ///
+    /// Selects which [`ChatTransport`] implementation the bot uses for
+    /// sending and editing messages.  Currently only read during bot
+    /// startup to select the transport; the field is kept for config
+    /// round-tripping.
+    #[serde(default = "default_transport")]
+    pub transport: String,
+
+    // ── WhatsApp Business API fields ─────────────────────────────────
+    // These are only required when `transport = "whatsapp"`.
+
+    /// WhatsApp Business phone number ID from the Meta dashboard.
+    #[serde(default)]
+    pub whatsapp_phone_number_id: Option<String>,
+    /// Long-lived access token for the WhatsApp Business API.
+    #[serde(default)]
+    pub whatsapp_access_token: Option<String>,
+    /// Verify token used in the webhook handshake (you choose this value
+    /// and configure it in the Meta webhook settings).
+    #[serde(default)]
+    pub whatsapp_verify_token: Option<String>,
+    /// Name of the approved Meta message template used for pipeline
+    /// notifications when the 24-hour messaging window has expired.
+    ///
+    /// The template must be registered in the Meta Business Manager before
+    /// use.  Defaults to `"pipeline_notification"`.
+    #[serde(default)]
+    pub whatsapp_notification_template: Option<String>,
+
+    // ── Slack Bot API fields ─────────────────────────────────────────
+    // These are only required when `transport = "slack"`.
+
+    /// Slack Bot User OAuth Token (starts with `xoxb-`).
+    #[serde(default)]
+    pub slack_bot_token: Option<String>,
+    /// Slack Signing Secret used to verify incoming webhook requests.
+    #[serde(default)]
+    pub slack_signing_secret: Option<String>,
+    /// Slack channel IDs the bot should listen in.
+    #[serde(default)]
+    pub slack_channel_ids: Vec<String>,
+}
+
+fn default_transport() -> String {
+    "matrix".to_string()
+}
+
+impl BotConfig {
+    /// Load bot configuration from `.storkit/bot.toml`.
+    ///
+    /// Returns `None` if the file does not exist, fails to parse, has
+    /// `enabled = false`, or specifies no room IDs.
+    pub fn load(project_root: &Path) -> Option<Self> {
+        let path = project_root.join(".storkit").join("bot.toml");
+        if !path.exists() {
+            return None;
+        }
+        let content = std::fs::read_to_string(&path)
+            .map_err(|e| eprintln!("[matrix-bot] Failed to read bot.toml: {e}"))
+            .ok()?;
+        let mut config: BotConfig = toml::from_str(&content)
+            .map_err(|e| eprintln!("[matrix-bot] Invalid bot.toml: {e}"))
+            .ok()?;
+        if !config.enabled {
+            return None;
+        }
+        // Merge deprecated `room_id` (single string) into `room_ids` (list).
+        if let Some(single) = config.room_id.take()
+            && !config.room_ids.contains(&single)
+        {
+            config.room_ids.push(single);
+        }
+
+        if config.transport == "whatsapp" {
+            // Validate WhatsApp-specific fields.
+            if config.whatsapp_phone_number_id.as_ref().is_none_or(|s| s.is_empty()) {
+                eprintln!(
+                    "[bot] bot.toml: transport=\"whatsapp\" requires \
+                     whatsapp_phone_number_id"
+                );
+                return None;
+            }
+            if config.whatsapp_access_token.as_ref().is_none_or(|s| s.is_empty()) {
+                eprintln!(
+                    "[bot] bot.toml: transport=\"whatsapp\" requires \
+                     whatsapp_access_token"
+                );
+                return None;
+            }
+            if config.whatsapp_verify_token.as_ref().is_none_or(|s| s.is_empty()) {
+                eprintln!(
+                    "[bot] bot.toml: transport=\"whatsapp\" requires \
+                     whatsapp_verify_token"
+                );
+                return None;
+            }
+        } else if config.transport == "slack" {
+            // Validate Slack-specific fields.
+            if config.slack_bot_token.as_ref().is_none_or(|s| s.is_empty()) {
+                eprintln!(
+                    "[bot] bot.toml: transport=\"slack\" requires \
+                     slack_bot_token"
+                );
+                return None;
+            }
+            if config.slack_signing_secret.as_ref().is_none_or(|s| s.is_empty()) {
+                eprintln!(
+                    "[bot] bot.toml: transport=\"slack\" requires \
+                     slack_signing_secret"
+                );
+                return None;
+            }
+            if config.slack_channel_ids.is_empty() {
+                eprintln!(
+                    "[bot] bot.toml: transport=\"slack\" requires \
+                     at least one slack_channel_ids entry"
+                );
+                return None;
+            }
+        } else if config.room_ids.is_empty() {
+            eprintln!(
+                "[matrix-bot] bot.toml has no room_ids configured — \
+                 add `room_ids = [\"!roomid:example.com\"]` to bot.toml"
+            );
+            return None;
+        }
+        Some(config)
+    }
+
+    /// Returns all configured room IDs as a flat list. Combines `room_ids`
+    /// and (after loading) any merged `room_id` value.
+    pub fn effective_room_ids(&self) -> &[String] {
+        &self.room_ids
+    }
+}
+
+/// Persist the current set of ambient room IDs back to `bot.toml`.
+///
+/// Reads the existing file as a TOML document, updates the `ambient_rooms`
+/// array, and writes the result back.  Errors are logged but not propagated
+/// so a persistence failure never interrupts the bot's message handling.
+pub fn save_ambient_rooms(project_root: &Path, room_ids: &[String]) {
+    let path = project_root.join(".storkit").join("bot.toml");
+    let content = match std::fs::read_to_string(&path) {
+        Ok(c) => c,
+        Err(e) => {
+            eprintln!("[matrix-bot] save_ambient_rooms: failed to read bot.toml: {e}");
+            return;
+        }
+    };
+    let mut doc: toml::Value = match toml::from_str(&content) {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("[matrix-bot] save_ambient_rooms: failed to parse bot.toml: {e}");
+            return;
+        }
+    };
+    if let toml::Value::Table(ref mut t) = doc {
+        let arr = toml::Value::Array(
+            room_ids
+                .iter()
+                .map(|s| toml::Value::String(s.clone()))
+                .collect(),
+        );
+        t.insert("ambient_rooms".to_string(), arr);
+    }
+    match toml::to_string_pretty(&doc) {
+        Ok(new_content) => {
+            if let Err(e) = std::fs::write(&path, new_content) {
+                eprintln!("[matrix-bot] save_ambient_rooms: failed to write bot.toml: {e}");
+            }
+        }
+        Err(e) => eprintln!("[matrix-bot] save_ambient_rooms: failed to serialise bot.toml: {e}"),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+
+    #[test]
+    fn load_returns_none_when_file_missing() {
+        let tmp = tempfile::tempdir().unwrap();
+        let result = BotConfig::load(tmp.path());
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn load_returns_none_when_disabled() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = false
+"#,
+        )
+        .unwrap();
+        let result = BotConfig::load(tmp.path());
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn load_returns_config_when_enabled_with_room_ids() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com", "!def:example.com"]
+enabled = true
+"#,
+        )
+        .unwrap();
+        let result = BotConfig::load(tmp.path());
+        assert!(result.is_some());
+        let config = result.unwrap();
+        assert_eq!(config.homeserver, "https://matrix.example.com");
+        assert_eq!(config.username, "@bot:example.com");
+        assert_eq!(
+            config.effective_room_ids(),
+            &["!abc:example.com", "!def:example.com"]
+        );
+        assert!(config.model.is_none());
+    }
+
+    #[test]
+    fn load_merges_deprecated_room_id_into_room_ids() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        // Old-style single room_id key — should still work.
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_id = "!abc:example.com"
+enabled = true
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.effective_room_ids(), &["!abc:example.com"]);
+    }
+
+    #[test]
+    fn load_returns_none_when_no_room_ids() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+"#,
+        )
+        .unwrap();
+        let result = BotConfig::load(tmp.path());
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn load_returns_none_when_toml_invalid() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(sk.join("bot.toml"), "not valid toml {{{").unwrap();
+        let result = BotConfig::load(tmp.path());
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn load_respects_optional_model() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+model = "claude-sonnet-4-6"
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.model.as_deref(), Some("claude-sonnet-4-6"));
+    }
+
+    #[test]
+    fn load_uses_default_history_size() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.history_size, 20);
+    }
+
+    #[test]
+    fn load_respects_custom_history_size() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+history_size = 50
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.history_size, 50);
+    }
+
+    #[test]
+    fn load_reads_display_name() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+display_name = "Timmy"
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.display_name.as_deref(), Some("Timmy"));
+    }
+
+    #[test]
+    fn load_display_name_defaults_to_none_when_absent() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert!(config.display_name.is_none());
+    }
+
+    #[test]
+    fn load_uses_default_permission_timeout() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.permission_timeout_secs, 120);
+    }
+
+    #[test]
+    fn load_respects_custom_permission_timeout() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+permission_timeout_secs = 60
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.permission_timeout_secs, 60);
+    }
+
+    #[test]
+    fn load_ignores_legacy_require_verified_devices_key() {
+        // Old bot.toml files that still have `require_verified_devices = true`
+        // must parse successfully — the field is simply ignored now that
+        // verification is always enforced unconditionally.
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+require_verified_devices = true
+"#,
+        )
+        .unwrap();
+        // Should still load successfully despite the unknown field.
+        let config = BotConfig::load(tmp.path());
+        assert!(
+            config.is_some(),
+            "bot.toml with legacy require_verified_devices key must still load"
+        );
+    }
+
+    #[test]
+    fn load_reads_ambient_rooms() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+ambient_rooms = ["!abc:example.com"]
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.ambient_rooms, vec!["!abc:example.com"]);
+    }
+
+    #[test]
+    fn load_ambient_rooms_defaults_to_empty_when_absent() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert!(config.ambient_rooms.is_empty());
+    }
+
+    #[test]
+    fn save_ambient_rooms_persists_to_bot_toml() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+"#,
+        )
+        .unwrap();
+
+        save_ambient_rooms(tmp.path(), &["!abc:example.com".to_string()]);
+
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.ambient_rooms, vec!["!abc:example.com"]);
+    }
+
+    #[test]
+    fn save_ambient_rooms_clears_when_empty() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+ambient_rooms = ["!abc:example.com"]
+"#,
+        )
+        .unwrap();
+
+        save_ambient_rooms(tmp.path(), &[]);
+
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert!(config.ambient_rooms.is_empty());
+    }
+
+    #[test]
+    fn load_transport_defaults_to_matrix() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.transport, "matrix");
+    }
+
+    #[test]
+    fn load_transport_reads_custom_value() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+room_ids = ["!abc:example.com"]
+enabled = true
+transport = "whatsapp"
+whatsapp_phone_number_id = "123456"
+whatsapp_access_token = "EAAtoken"
+whatsapp_verify_token = "my-verify"
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.transport, "whatsapp");
+        assert_eq!(
+            config.whatsapp_phone_number_id.as_deref(),
+            Some("123456")
+        );
+        assert_eq!(
+            config.whatsapp_access_token.as_deref(),
+            Some("EAAtoken")
+        );
+        assert_eq!(
+            config.whatsapp_verify_token.as_deref(),
+            Some("my-verify")
+        );
+    }
+
+    #[test]
+    fn load_whatsapp_returns_none_when_missing_phone_number_id() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+transport = "whatsapp"
+whatsapp_access_token = "EAAtoken"
+whatsapp_verify_token = "my-verify"
+"#,
+        )
+        .unwrap();
+        assert!(BotConfig::load(tmp.path()).is_none());
+    }
+
+    #[test]
+    fn load_whatsapp_returns_none_when_missing_access_token() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+transport = "whatsapp"
+whatsapp_phone_number_id = "123456"
+whatsapp_verify_token = "my-verify"
+"#,
+        )
+        .unwrap();
+        assert!(BotConfig::load(tmp.path()).is_none());
+    }
+
+    #[test]
+    fn load_whatsapp_returns_none_when_missing_verify_token() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+transport = "whatsapp"
+whatsapp_phone_number_id = "123456"
+whatsapp_access_token = "EAAtoken"
+"#,
+        )
+        .unwrap();
+        assert!(BotConfig::load(tmp.path()).is_none());
+    }
+
+    // ── Slack config tests ─────────────────────────────────────────────
+
+    #[test]
+    fn load_slack_transport_reads_config() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+transport = "slack"
+slack_bot_token = "xoxb-123"
+slack_signing_secret = "secret123"
+slack_channel_ids = ["C01ABCDEF"]
+"#,
+        )
+        .unwrap();
+        let config = BotConfig::load(tmp.path()).unwrap();
+        assert_eq!(config.transport, "slack");
+        assert_eq!(config.slack_bot_token.as_deref(), Some("xoxb-123"));
+        assert_eq!(config.slack_signing_secret.as_deref(), Some("secret123"));
+        assert_eq!(config.slack_channel_ids, vec!["C01ABCDEF"]);
+    }
+
+    #[test]
+    fn load_slack_returns_none_when_missing_bot_token() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+transport = "slack"
+slack_signing_secret = "secret123"
+slack_channel_ids = ["C01ABCDEF"]
+"#,
+        )
+        .unwrap();
+        assert!(BotConfig::load(tmp.path()).is_none());
+    }
+
+    #[test]
+    fn load_slack_returns_none_when_missing_signing_secret() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+transport = "slack"
+slack_bot_token = "xoxb-123"
+slack_channel_ids = ["C01ABCDEF"]
+"#,
+        )
+        .unwrap();
+        assert!(BotConfig::load(tmp.path()).is_none());
+    }
+
+    #[test]
+    fn load_slack_returns_none_when_missing_channel_ids() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        fs::create_dir_all(&sk).unwrap();
+        fs::write(
+            sk.join("bot.toml"),
+            r#"
+homeserver = "https://matrix.example.com"
+username = "@bot:example.com"
+password = "secret"
+enabled = true
+transport = "slack"
+slack_bot_token = "xoxb-123"
+slack_signing_secret = "secret123"
+"#,
+        )
+        .unwrap();
+        assert!(BotConfig::load(tmp.path()).is_none());
+    }
+}
--- a/server/src/matrix/delete.rs
+++ b/server/src/matrix/delete.rs
@@ -0,0 +1,384 @@
+//! Delete command: remove a story/bug/spike from the pipeline.
+//!
+//! `{bot_name} delete {number}` finds the work item by number across all pipeline
+//! stages, stops any running agent, removes the worktree, deletes the file, and
+//! commits the change to git.
+
+use crate::agents::{AgentPool, AgentStatus};
+use std::path::Path;
+
+/// A parsed delete command from a Matrix message body.
+#[derive(Debug, PartialEq)]
+pub enum DeleteCommand {
+    /// Delete the story with this number (digits only, e.g. `"42"`).
+    Delete { story_number: String },
+    /// The user typed `delete` but without a valid numeric argument.
+    BadArgs,
+}
+
+/// Parse a delete command from a raw Matrix message body.
+///
+/// Strips the bot mention prefix and checks whether the first word is `delete`.
+/// Returns `None` when the message is not a delete command at all.
+pub fn extract_delete_command(
+    message: &str,
+    bot_name: &str,
+    bot_user_id: &str,
+) -> Option<DeleteCommand> {
+    let stripped = strip_mention(message, bot_name, bot_user_id);
+    let trimmed = stripped
+        .trim()
+        .trim_start_matches(|c: char| !c.is_alphanumeric());
+
+    let (cmd, args) = match trimmed.split_once(char::is_whitespace) {
+        Some((c, a)) => (c, a.trim()),
+        None => (trimmed, ""),
+    };
+
+    if !cmd.eq_ignore_ascii_case("delete") {
+        return None;
+    }
+
+    if !args.is_empty() && args.chars().all(|c| c.is_ascii_digit()) {
+        Some(DeleteCommand::Delete {
+            story_number: args.to_string(),
+        })
+    } else {
+        Some(DeleteCommand::BadArgs)
+    }
+}
+
+/// Handle a delete command asynchronously.
+///
+/// Finds the work item by `story_number` across all pipeline stages, stops any
+/// running agent, removes the worktree, deletes the file, and commits to git.
+/// Returns a markdown-formatted response string.
+pub async fn handle_delete(
+    bot_name: &str,
+    story_number: &str,
+    project_root: &Path,
+    agents: &AgentPool,
+) -> String {
+    const STAGES: &[&str] = &[
+        "1_backlog",
+        "2_current",
+        "3_qa",
+        "4_merge",
+        "5_done",
+        "6_archived",
+    ];
+
+    // Find the story file across all pipeline stages.
+    let mut found: Option<(std::path::PathBuf, &str, String)> = None; // (path, stage, story_id)
+    'outer: for stage in STAGES {
+        let dir = project_root.join(".storkit").join("work").join(stage);
+        if !dir.exists() {
+            continue;
+        }
+        if let Ok(entries) = std::fs::read_dir(&dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().and_then(|e| e.to_str()) != Some("md") {
+                    continue;
+                }
+                if let Some(stem) = path
+                    .file_stem()
+                    .and_then(|s| s.to_str())
+                    .map(|s| s.to_string())
+                {
+                    let file_num = stem
+                        .split('_')
+                        .next()
+                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+                        .unwrap_or("")
+                        .to_string();
+                    if file_num == story_number {
+                        found = Some((path, stage, stem));
+                        break 'outer;
+                    }
+                }
+            }
+        }
+    }
+
+    let (path, stage, story_id) = match found {
+        Some(f) => f,
+        None => {
+            return format!("No story, bug, or spike with number **{story_number}** found.");
+        }
+    };
+
+    // Read the human-readable name from front matter for the confirmation message.
+    let story_name = std::fs::read_to_string(&path)
+        .ok()
+        .and_then(|contents| {
+            crate::io::story_metadata::parse_front_matter(&contents)
+                .ok()
+                .and_then(|m| m.name)
+        })
+        .unwrap_or_else(|| story_id.clone());
+
+    // Stop any running or pending agents for this story.
+    let running_agents: Vec<(String, String)> = agents
+        .list_agents()
+        .unwrap_or_default()
+        .into_iter()
+        .filter(|a| {
+            a.story_id == story_id
+                && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
+        })
+        .map(|a| (a.story_id.clone(), a.agent_name.clone()))
+        .collect();
+
+    let mut stopped_agents: Vec<String> = Vec::new();
+    for (sid, agent_name) in &running_agents {
+        if let Err(e) = agents.stop_agent(project_root, sid, agent_name).await {
+            return format!("Failed to stop agent '{agent_name}' for story {story_number}: {e}");
+        }
+        stopped_agents.push(agent_name.clone());
+    }
+
+    // Remove the worktree if one exists (best-effort; ignore errors).
+    let _ = crate::worktree::prune_worktree_sync(project_root, &story_id);
+
+    // Delete the story file.
+    if let Err(e) = std::fs::remove_file(&path) {
+        return format!("Failed to delete story {story_number}: {e}");
+    }
+
+    // Commit the deletion to git.
+    let commit_msg = format!("storkit: delete {story_id}");
+    let work_rel = std::path::PathBuf::from(".storkit").join("work");
+    let _ = std::process::Command::new("git")
+        .args(["add", "-A"])
+        .arg(&work_rel)
+        .current_dir(project_root)
+        .output();
+    let _ = std::process::Command::new("git")
+        .args(["commit", "-m", &commit_msg])
+        .current_dir(project_root)
+        .output();
+
+    // Build the response.
+    let stage_label = stage_display_name(stage);
+    let mut response = format!("Deleted **{story_name}** from **{stage_label}**.");
+    if !stopped_agents.is_empty() {
+        let agent_list = stopped_agents.join(", ");
+        response.push_str(&format!(" Stopped agent(s): {agent_list}."));
+    }
+
+    crate::slog!("[matrix-bot] delete command: removed {story_id} from {stage} (bot={bot_name})");
+
+    response
+}
+
+/// Human-readable label for a pipeline stage directory name.
+fn stage_display_name(stage: &str) -> &str {
+    match stage {
+        "1_backlog" => "backlog",
+        "2_current" => "in-progress",
+        "3_qa" => "QA",
+        "4_merge" => "merge",
+        "5_done" => "done",
+        "6_archived" => "archived",
+        other => other,
+    }
+}
+
+/// Strip the bot mention prefix from a raw Matrix message body.
+///
+/// Mirrors the logic in `commands::strip_bot_mention` and `htop::strip_mention`
+/// so delete detection works without depending on private symbols.
+fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
+    let trimmed = message.trim();
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
+        return rest;
+    }
+    if let Some(localpart) = bot_user_id.split(':').next()
+        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
+    {
+        return rest;
+    }
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
+        return rest;
+    }
+    trimmed
+}
+
+fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
+    if text.len() < prefix.len() {
+        return None;
+    }
+    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
+        return None;
+    }
+    let rest = &text[prefix.len()..];
+    match rest.chars().next() {
+        None => Some(rest),
+        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
+        _ => Some(rest),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // -- extract_delete_command ---------------------------------------------
+
+    #[test]
+    fn extract_with_full_user_id() {
+        let cmd =
+            extract_delete_command("@timmy:home.local delete 42", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(DeleteCommand::Delete {
+                story_number: "42".to_string()
+            })
+        );
+    }
+
+    #[test]
+    fn extract_with_display_name() {
+        let cmd = extract_delete_command("Timmy delete 310", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(DeleteCommand::Delete {
+                story_number: "310".to_string()
+            })
+        );
+    }
+
+    #[test]
+    fn extract_with_localpart() {
+        let cmd = extract_delete_command("@timmy delete 7", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(DeleteCommand::Delete {
+                story_number: "7".to_string()
+            })
+        );
+    }
+
+    #[test]
+    fn extract_case_insensitive_command() {
+        let cmd = extract_delete_command("Timmy DELETE 99", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(DeleteCommand::Delete {
+                story_number: "99".to_string()
+            })
+        );
+    }
+
+    #[test]
+    fn extract_no_args_is_bad_args() {
+        let cmd = extract_delete_command("Timmy delete", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(DeleteCommand::BadArgs));
+    }
+
+    #[test]
+    fn extract_non_numeric_arg_is_bad_args() {
+        let cmd = extract_delete_command("Timmy delete foo", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(DeleteCommand::BadArgs));
+    }
+
+    #[test]
+    fn extract_non_delete_command_returns_none() {
+        let cmd = extract_delete_command("Timmy help", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, None);
+    }
+
+    #[test]
+    fn extract_no_bot_prefix_returns_none() {
+        let cmd = extract_delete_command("delete 42", "Timmy", "@timmy:home.local");
+        // Without mention prefix the raw text is "delete 42" — cmd is "delete", args "42"
+        // strip_mention returns the full trimmed text when no prefix matches,
+        // so this is a valid delete command addressed to no-one (ambient mode).
+        assert_eq!(
+            cmd,
+            Some(DeleteCommand::Delete {
+                story_number: "42".to_string()
+            })
+        );
+    }
+
+    // -- handle_delete (integration-style, uses temp filesystem) -----------
+
+    #[tokio::test]
+    async fn handle_delete_returns_not_found_for_unknown_number() {
+        let tmp = tempfile::tempdir().unwrap();
+        let project_root = tmp.path();
+        // Create the pipeline directories.
+        for stage in &[
+            "1_backlog",
+            "2_current",
+            "3_qa",
+            "4_merge",
+            "5_done",
+            "6_archived",
+        ] {
+            std::fs::create_dir_all(project_root.join(".storkit").join("work").join(stage))
+                .unwrap();
+        }
+        let agents = std::sync::Arc::new(crate::agents::AgentPool::new_test(3000));
+        let response = handle_delete("Timmy", "999", project_root, &agents).await;
+        assert!(
+            response.contains("No story") && response.contains("999"),
+            "unexpected response: {response}"
+        );
+    }
+
+    #[tokio::test]
+    async fn handle_delete_removes_story_file_and_confirms() {
+        let tmp = tempfile::tempdir().unwrap();
+        let project_root = tmp.path();
+
+        // Init a bare git repo so the commit step doesn't fail fatally.
+        std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(project_root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(project_root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(project_root)
+            .output()
+            .unwrap();
+
+        let backlog_dir = project_root.join(".storkit").join("work").join("1_backlog");
+        std::fs::create_dir_all(&backlog_dir).unwrap();
+        let story_path = backlog_dir.join("42_story_some_feature.md");
+        std::fs::write(&story_path, "---\nname: Some Feature\n---\n\n# Story 42\n").unwrap();
+
+        // Initial commit so git doesn't complain about no commits.
+        std::process::Command::new("git")
+            .args(["add", "-A"])
+            .current_dir(project_root)
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(project_root)
+            .output()
+            .unwrap();
+
+        let agents = std::sync::Arc::new(crate::agents::AgentPool::new_test(3000));
+        let response = handle_delete("Timmy", "42", project_root, &agents).await;
+
+        assert!(
+            response.contains("Some Feature") && response.contains("backlog"),
+            "unexpected response: {response}"
+        );
+        assert!(!story_path.exists(), "story file should have been deleted");
+    }
+}
--- a/server/src/matrix/htop.rs
+++ b/server/src/matrix/htop.rs
@@ -0,0 +1,596 @@
+//! htop command: live-updating system and agent process dashboard.
+//!
+//! Sends an initial message to a Matrix room showing load average and
+//! per-agent process info, then edits it in-place every 5 seconds using
+//! Matrix replacement events.  A single htop session per room is enforced;
+//! a new `htop` invocation stops any existing session and starts a fresh one.
+//! Sessions auto-stop after the configured duration (default 5 minutes).
+
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::Duration;
+
+use tokio::sync::{Mutex as TokioMutex, watch};
+
+use crate::agents::{AgentPool, AgentStatus};
+use crate::slog;
+use crate::transport::ChatTransport;
+
+use super::bot::markdown_to_html;
+
+/// A parsed htop command from a Matrix message body.
+#[derive(Debug, PartialEq)]
+pub enum HtopCommand {
+    /// Start (or restart) monitoring.  `duration_secs` is the auto-stop
+    /// timeout; defaults to 300 (5 minutes).
+    Start { duration_secs: u64 },
+    /// Stop any active monitoring session for the room.
+    Stop,
+}
+
+/// Per-room htop session: holds the stop-signal sender so callers can cancel.
+pub struct HtopSession {
+    /// Send `true` to request a graceful stop of the background loop.
+    pub stop_tx: watch::Sender<bool>,
+}
+
+/// Per-room htop session map type alias.
+///
+/// Keys are platform-agnostic room ID strings (e.g. `"!abc:example.com"` on
+/// Matrix) so this type works with any [`ChatTransport`] implementation.
+pub type HtopSessions = Arc<TokioMutex<HashMap<String, HtopSession>>>;
+
+/// Parse an htop command from a raw Matrix message body.
+///
+/// Strips the bot mention prefix and checks whether the first word is `htop`.
+/// Returns `None` when the message is not an htop command.
+///
+/// Recognised forms (after stripping the bot mention):
+/// - `htop`          → `Start { duration_secs: 300 }`
+/// - `htop stop`     → `Stop`
+/// - `htop 10m`      → `Start { duration_secs: 600 }`
+/// - `htop 120`      → `Start { duration_secs: 120 }` (bare seconds)
+pub fn extract_htop_command(message: &str, bot_name: &str, bot_user_id: &str) -> Option<HtopCommand> {
+    let stripped = strip_mention(message, bot_name, bot_user_id);
+    let trimmed = stripped.trim();
+
+    // Strip leading punctuation (e.g. the comma in "@timmy, htop")
+    let trimmed = trimmed.trim_start_matches(|c: char| !c.is_alphanumeric());
+
+    let (cmd, args) = match trimmed.split_once(char::is_whitespace) {
+        Some((c, a)) => (c, a.trim()),
+        None => (trimmed, ""),
+    };
+
+    if !cmd.eq_ignore_ascii_case("htop") {
+        return None;
+    }
+
+    if args.eq_ignore_ascii_case("stop") {
+        return Some(HtopCommand::Stop);
+    }
+
+    let duration_secs = parse_duration(args).unwrap_or(300);
+    Some(HtopCommand::Start { duration_secs })
+}
+
+/// Parse an optional duration argument.
+///
+/// Accepts `""` (empty → `None`), `"5m"` / `"10M"` (minutes), or a bare
+/// integer interpreted as seconds.
+fn parse_duration(s: &str) -> Option<u64> {
+    if s.is_empty() {
+        return None;
+    }
+    if let Some(mins_str) = s.strip_suffix('m').or_else(|| s.strip_suffix('M')) {
+        return mins_str.parse::<u64>().ok().map(|m| m * 60);
+    }
+    s.parse::<u64>().ok()
+}
+
+/// Strip the bot mention prefix from a raw Matrix message body.
+///
+/// Mirrors the logic in `commands::strip_bot_mention` so htop detection works
+/// without depending on private symbols in that module.
+fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
+    let trimmed = message.trim();
+
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
+        return rest;
+    }
+    if let Some(localpart) = bot_user_id.split(':').next()
+        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
+    {
+        return rest;
+    }
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
+        return rest;
+    }
+    trimmed
+}
+
+fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
+    if text.len() < prefix.len() {
+        return None;
+    }
+    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
+        return None;
+    }
+    let rest = &text[prefix.len()..];
+    match rest.chars().next() {
+        None => Some(rest),
+        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
+        _ => Some(rest),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// System stats
+// ---------------------------------------------------------------------------
+
+/// Read the system load average using the `uptime` command.
+///
+/// Returns a short string like `"load average: 1.23, 0.98, 0.75"` on success,
+/// or `"load: unknown"` on failure.
+fn get_load_average() -> String {
+    let output = std::process::Command::new("uptime")
+        .output()
+        .ok()
+        .and_then(|o| String::from_utf8(o.stdout).ok())
+        .unwrap_or_default();
+
+    // uptime output typically contains "load average: X, Y, Z" (Linux/macOS)
+    // or "load averages: X Y Z" (some BSD variants).
+    if let Some(idx) = output.find("load average") {
+        output[idx..].trim().trim_end_matches('\n').to_string()
+    } else {
+        "load: unknown".to_string()
+    }
+}
+
+/// Process stats for a single agent, gathered from `ps`.
+#[derive(Debug, Default)]
+struct AgentProcessStats {
+    cpu_pct: f64,
+    mem_pct: f64,
+    num_procs: usize,
+}
+
+/// Gather CPU% and MEM% for processes whose command line contains `worktree_path`.
+///
+/// Runs `ps aux` and sums all matching lines.  Returns `None` when no
+/// matching process is found.
+fn gather_process_stats(worktree_path: &str) -> Option<AgentProcessStats> {
+    let output = std::process::Command::new("ps")
+        .args(["aux"])
+        .output()
+        .ok()
+        .and_then(|o| String::from_utf8(o.stdout).ok())?;
+
+    let mut stats = AgentProcessStats::default();
+
+    for line in output.lines().skip(1) {
+        // Avoid matching against our own status display (the ps command itself)
+        if !line.contains(worktree_path) {
+            continue;
+        }
+        let parts: Vec<&str> = line.split_whitespace().collect();
+        // ps aux columns: USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND...
+        if parts.len() >= 4
+            && let (Ok(cpu), Ok(mem)) = (parts[2].parse::<f64>(), parts[3].parse::<f64>())
+        {
+            stats.cpu_pct += cpu;
+            stats.mem_pct += mem;
+            stats.num_procs += 1;
+        }
+    }
+
+    if stats.num_procs > 0 {
+        Some(stats)
+    } else {
+        None
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Message formatting
+// ---------------------------------------------------------------------------
+
+/// Build the Markdown text for the htop dashboard.
+///
+/// `tick` is the number of updates sent so far (0 = initial).
+/// `total_duration_secs` is the configured auto-stop timeout.
+///
+/// Output uses a compact single-line format per agent so it renders
+/// without wrapping on narrow screens (~40 chars), such as mobile
+/// Matrix clients.
+pub fn build_htop_message(agents: &AgentPool, tick: u32, total_duration_secs: u64) -> String {
+    let elapsed_secs = (tick as u64) * 5;
+    let remaining_secs = total_duration_secs.saturating_sub(elapsed_secs);
+    let remaining_mins = remaining_secs / 60;
+    let remaining_secs_rem = remaining_secs % 60;
+
+    let load = get_load_average();
+
+    let mut lines = vec![
+        format!(
+            "**htop** · auto-stops in {}m{}s",
+            remaining_mins, remaining_secs_rem
+        ),
+        load,
+        String::new(),
+    ];
+
+    let all_agents = agents.list_agents().unwrap_or_default();
+    let active: Vec<_> = all_agents
+        .iter()
+        .filter(|a| matches!(a.status, AgentStatus::Running | AgentStatus::Pending))
+        .collect();
+
+    if active.is_empty() {
+        lines.push("*No agents currently running.*".to_string());
+    } else {
+        for agent in &active {
+            let story_label = agent
+                .story_id
+                .split('_')
+                .next()
+                .unwrap_or(&agent.story_id)
+                .to_string();
+            let stats = agent
+                .worktree_path
+                .as_deref()
+                .and_then(gather_process_stats)
+                .unwrap_or_default();
+            lines.push(format!(
+                "**{}** #{} cpu:{:.1}% mem:{:.1}%",
+                agent.agent_name, story_label, stats.cpu_pct, stats.mem_pct,
+            ));
+        }
+    }
+
+    lines.join("\n")
+}
+
+// ---------------------------------------------------------------------------
+// Background monitoring loop
+// ---------------------------------------------------------------------------
+
+/// Run the htop background loop: update the message every 5 seconds until
+/// the stop signal is received or the timeout expires.
+///
+/// Uses the [`ChatTransport`] abstraction so the loop works with any chat
+/// platform, not just Matrix.
+pub async fn run_htop_loop(
+    transport: Arc<dyn ChatTransport>,
+    room_id: String,
+    initial_message_id: String,
+    agents: Arc<AgentPool>,
+    mut stop_rx: watch::Receiver<bool>,
+    duration_secs: u64,
+) {
+    let interval_secs: u64 = 5;
+    let max_ticks = (duration_secs / interval_secs).max(1);
+
+    for tick in 1..=max_ticks {
+        // Wait for the interval or a stop signal.
+        let sleep = tokio::time::sleep(Duration::from_secs(interval_secs));
+        tokio::pin!(sleep);
+
+        tokio::select! {
+            _ = &mut sleep => {}
+            Ok(()) = stop_rx.changed() => {
+                if *stop_rx.borrow() {
+                    send_stopped_message(&*transport, &room_id, &initial_message_id).await;
+                    return;
+                }
+            }
+        }
+
+        // Re-check after waking — the sender might have signalled while we slept.
+        if *stop_rx.borrow() {
+            send_stopped_message(&*transport, &room_id, &initial_message_id).await;
+            return;
+        }
+
+        let text = build_htop_message(&agents, tick as u32, duration_secs);
+        let html = markdown_to_html(&text);
+
+        if let Err(e) = transport.edit_message(&room_id, &initial_message_id, &text, &html).await {
+            slog!("[htop] Failed to update message: {e}");
+            return;
+        }
+    }
+
+    // Auto-stop: timeout reached.
+    send_stopped_message(&*transport, &room_id, &initial_message_id).await;
+}
+
+async fn send_stopped_message(transport: &dyn ChatTransport, room_id: &str, message_id: &str) {
+    let text = "**htop** — monitoring stopped.";
+    let html = markdown_to_html(text);
+    if let Err(e) = transport.edit_message(room_id, message_id, text, &html).await {
+        slog!("[htop] Failed to send stop message: {e}");
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Public command handlers (called from on_room_message in bot.rs)
+// ---------------------------------------------------------------------------
+
+/// Start a new htop monitoring session for `room_id`.
+///
+/// Stops any existing session for the room, sends the initial dashboard
+/// message, and spawns a background task that edits it every 5 seconds.
+///
+/// Uses the [`ChatTransport`] abstraction so htop works with any platform.
+pub async fn handle_htop_start(
+    transport: &Arc<dyn ChatTransport>,
+    room_id: &str,
+    htop_sessions: &HtopSessions,
+    agents: Arc<AgentPool>,
+    duration_secs: u64,
+) {
+    // Stop any existing session (best-effort; ignore errors if already done).
+    stop_existing_session(htop_sessions, room_id).await;
+
+    // Send the initial message.
+    let initial_text = build_htop_message(&agents, 0, duration_secs);
+    let initial_html = markdown_to_html(&initial_text);
+    let message_id = match transport.send_message(room_id, &initial_text, &initial_html).await {
+        Ok(id) => id,
+        Err(e) => {
+            slog!("[htop] Failed to send initial message: {e}");
+            return;
+        }
+    };
+
+    // Create the stop channel and register the session.
+    let (stop_tx, stop_rx) = watch::channel(false);
+    {
+        let mut sessions = htop_sessions.lock().await;
+        sessions.insert(room_id.to_string(), HtopSession { stop_tx });
+    }
+
+    // Spawn the background update loop.
+    let transport_clone = Arc::clone(transport);
+    let sessions_clone = Arc::clone(htop_sessions);
+    let room_id_owned = room_id.to_string();
+    tokio::spawn(async move {
+        run_htop_loop(
+            transport_clone,
+            room_id_owned.clone(),
+            message_id,
+            agents,
+            stop_rx,
+            duration_secs,
+        )
+        .await;
+        // Clean up the session entry when the loop exits naturally.
+        let mut sessions = sessions_clone.lock().await;
+        sessions.remove(&room_id_owned);
+    });
+}
+
+/// Stop the active htop session for `room_id`, if any.
+///
+/// When there is no active session, sends a "no active session" reply
+/// to the room so the user knows the command was received.
+pub async fn handle_htop_stop(
+    transport: &dyn ChatTransport,
+    room_id: &str,
+    htop_sessions: &HtopSessions,
+) {
+    let had_session = stop_existing_session(htop_sessions, room_id).await;
+    if !had_session {
+        let msg = "No active htop session in this room.";
+        let html = markdown_to_html(msg);
+        if let Err(e) = transport.send_message(room_id, msg, &html).await {
+            slog!("[htop] Failed to send no-session reply: {e}");
+        }
+    }
+    // When a session was active, the background task handles the final edit.
+}
+
+/// Signal and remove the existing session for `room_id`.
+///
+/// Returns `true` if a session was found and stopped.
+async fn stop_existing_session(htop_sessions: &HtopSessions, room_id: &str) -> bool {
+    let mut sessions = htop_sessions.lock().await;
+    if let Some(session) = sessions.remove(room_id) {
+        // Signal the background task to stop (ignore error — task may be done).
+        let _ = session.stop_tx.send(true);
+        true
+    } else {
+        false
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // -- extract_htop_command -----------------------------------------------
+
+    #[test]
+    fn htop_bare_command() {
+        let cmd = extract_htop_command("@timmy htop", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 300 }));
+    }
+
+    #[test]
+    fn htop_with_display_name() {
+        let cmd = extract_htop_command("Timmy htop", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 300 }));
+    }
+
+    #[test]
+    fn htop_stop() {
+        let cmd = extract_htop_command("@timmy htop stop", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Stop));
+    }
+
+    #[test]
+    fn htop_duration_minutes() {
+        let cmd = extract_htop_command("@timmy htop 10m", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 600 }));
+    }
+
+    #[test]
+    fn htop_duration_uppercase_m() {
+        let cmd = extract_htop_command("@timmy htop 2M", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 120 }));
+    }
+
+    #[test]
+    fn htop_duration_seconds() {
+        let cmd = extract_htop_command("@timmy htop 90", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 90 }));
+    }
+
+    #[test]
+    fn non_htop_command_returns_none() {
+        let cmd = extract_htop_command("@timmy status", "Timmy", "@timmy:homeserver.local");
+        assert!(cmd.is_none());
+    }
+
+    #[test]
+    fn unrelated_message_returns_none() {
+        let cmd = extract_htop_command("hello world", "Timmy", "@timmy:homeserver.local");
+        assert!(cmd.is_none());
+    }
+
+    #[test]
+    fn htop_case_insensitive() {
+        let cmd = extract_htop_command("@timmy HTOP", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 300 }));
+    }
+
+    #[test]
+    fn htop_full_user_id() {
+        let cmd = extract_htop_command(
+            "@timmy:homeserver.local htop",
+            "Timmy",
+            "@timmy:homeserver.local",
+        );
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 300 }));
+    }
+
+    #[test]
+    fn htop_with_comma_after_mention() {
+        // Some Matrix clients format mentions as "@timmy, htop"
+        let cmd = extract_htop_command("@timmy, htop", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(cmd, Some(HtopCommand::Start { duration_secs: 300 }));
+    }
+
+    // -- parse_duration -----------------------------------------------------
+
+    #[test]
+    fn parse_duration_empty_returns_none() {
+        assert_eq!(parse_duration(""), None);
+    }
+
+    #[test]
+    fn parse_duration_minutes() {
+        assert_eq!(parse_duration("5m"), Some(300));
+    }
+
+    #[test]
+    fn parse_duration_seconds() {
+        assert_eq!(parse_duration("120"), Some(120));
+    }
+
+    #[test]
+    fn parse_duration_invalid_returns_none() {
+        assert_eq!(parse_duration("abc"), None);
+    }
+
+    // -- build_htop_message -------------------------------------------------
+
+    #[test]
+    fn build_htop_message_no_agents() {
+        let pool = Arc::new(crate::agents::AgentPool::new_test(3000));
+        let text = build_htop_message(&pool, 0, 300);
+        assert!(text.contains("htop"), "should mention htop: {text}");
+        assert!(
+            text.contains("No agents currently running"),
+            "should note no agents: {text}"
+        );
+    }
+
+    #[test]
+    fn build_htop_message_contains_load() {
+        let pool = Arc::new(crate::agents::AgentPool::new_test(3000));
+        let text = build_htop_message(&pool, 0, 300);
+        // Load average is gathered via `uptime`; it should appear in some form.
+        assert!(
+            text.contains("load"),
+            "message should contain load info: {text}"
+        );
+    }
+
+    #[test]
+    fn build_htop_message_shows_remaining_time() {
+        let pool = Arc::new(crate::agents::AgentPool::new_test(3000));
+        let text = build_htop_message(&pool, 0, 300);
+        assert!(
+            text.contains("auto-stops in"),
+            "should show remaining time: {text}"
+        );
+    }
+
+    #[test]
+    fn build_htop_message_load_on_own_line() {
+        // Load average must be on its own line, not combined with the htop header.
+        let pool = Arc::new(crate::agents::AgentPool::new_test(3000));
+        let text = build_htop_message(&pool, 0, 300);
+        let lines: Vec<&str> = text.lines().collect();
+        let header_line = lines.first().expect("should have a header line");
+        // Header line must NOT contain "load" — load is on the second line.
+        assert!(
+            !header_line.contains("load"),
+            "load should be on its own line, not the header: {header_line}"
+        );
+        // Second line must contain "load".
+        let load_line = lines.get(1).expect("should have a load line");
+        assert!(
+            load_line.contains("load"),
+            "second line should contain load info: {load_line}"
+        );
+    }
+
+    #[test]
+    fn build_htop_message_no_table_syntax() {
+        // Must not use Markdown table format (pipes/separators) — those are too
+        // wide for narrow mobile screens.
+        let pool = Arc::new(crate::agents::AgentPool::new_test(3000));
+        let text = build_htop_message(&pool, 0, 300);
+        assert!(
+            !text.contains("|----"),
+            "output must not contain table separator rows: {text}"
+        );
+        assert!(
+            !text.contains("| Agent"),
+            "output must not contain table header row: {text}"
+        );
+    }
+
+    #[test]
+    fn build_htop_message_header_fits_40_chars() {
+        // The header line (htop + remaining time) must fit in ~40 rendered chars.
+        let pool = Arc::new(crate::agents::AgentPool::new_test(3000));
+        let text = build_htop_message(&pool, 0, 300);
+        let header = text.lines().next().expect("should have a header line");
+        // Strip markdown bold markers (**) for length calculation.
+        let rendered = header.replace("**", "");
+        assert!(
+            rendered.len() <= 40,
+            "header line too wide for mobile ({} chars): {rendered}",
+            rendered.len()
+        );
+    }
+}
--- a/server/src/matrix/mod.rs
+++ b/server/src/matrix/mod.rs
@@ -0,0 +1,90 @@
+//! Matrix bot integration for Story Kit.
+//!
+//! When a `.storkit/bot.toml` file is present with `enabled = true`, the
+//! server spawns a Matrix bot that:
+//!
+//! 1. Connects to the configured homeserver and joins the configured room.
+//! 2. Listens for messages from other users in the room.
+//! 3. Passes each message to Claude Code (the same provider as the web UI),
+//!    which has native access to Story Kit MCP tools.
+//! 4. Posts Claude Code's response back to the room.
+//!
+//! The bot is optional — if `bot.toml` is missing or `enabled = false`, the
+//! server starts normally with no Matrix connection.
+//!
+//! Multi-room support: configure `room_ids = ["!room1:…", "!room2:…"]` in
+//! `bot.toml`. Each room maintains its own independent conversation history.
+
+mod bot;
+pub mod commands;
+mod config;
+pub mod delete;
+pub mod htop;
+pub mod rebuild;
+pub mod reset;
+pub mod start;
+pub mod notifications;
+pub mod transport_impl;
+
+pub use bot::{ConversationEntry, ConversationRole, RoomConversation, drain_complete_paragraphs};
+pub use config::BotConfig;
+
+use crate::agents::AgentPool;
+use crate::http::context::PermissionForward;
+use crate::io::watcher::WatcherEvent;
+use std::path::Path;
+use std::sync::Arc;
+use tokio::sync::{Mutex as TokioMutex, broadcast, mpsc};
+
+/// Attempt to start the Matrix bot.
+///
+/// Reads the bot configuration from `.storkit/bot.toml`. If the file is
+/// absent or `enabled = false`, this function returns immediately without
+/// spawning anything — the server continues normally.
+///
+/// When the bot is enabled, a notification listener is also spawned that
+/// posts stage-transition messages to all configured rooms whenever a work
+/// item moves between pipeline stages.
+///
+/// `perm_rx` is the permission-request receiver shared with the MCP
+/// `prompt_permission` tool. The bot locks it during active chat sessions
+/// to surface permission prompts to the Matrix room and relay user decisions.
+///
+/// Must be called from within a Tokio runtime context (e.g., from `main`).
+pub fn spawn_bot(
+    project_root: &Path,
+    watcher_tx: broadcast::Sender<WatcherEvent>,
+    perm_rx: Arc<TokioMutex<mpsc::UnboundedReceiver<PermissionForward>>>,
+    agents: Arc<AgentPool>,
+) {
+    let config = match BotConfig::load(project_root) {
+        Some(c) => c,
+        None => {
+            crate::slog!("[matrix-bot] bot.toml absent or disabled; Matrix integration skipped");
+            return;
+        }
+    };
+
+    // WhatsApp and Slack transports are handled via HTTP webhooks, not the Matrix sync loop.
+    if config.transport == "whatsapp" || config.transport == "slack" {
+        crate::slog!(
+            "[bot] transport={} — skipping Matrix bot; webhooks handle this transport",
+            config.transport
+        );
+        return;
+    }
+
+    crate::slog!(
+        "[matrix-bot] Starting Matrix bot → homeserver={} rooms={:?}",
+        config.homeserver,
+        config.effective_room_ids()
+    );
+
+    let root = project_root.to_path_buf();
+    let watcher_rx = watcher_tx.subscribe();
+    tokio::spawn(async move {
+        if let Err(e) = bot::run_bot(config, root, watcher_rx, perm_rx, agents).await {
+            crate::slog!("[matrix-bot] Fatal error: {e}");
+        }
+    });
+}
--- a/server/src/matrix/notifications.rs
+++ b/server/src/matrix/notifications.rs
@@ -0,0 +1,646 @@
+//! Stage transition notifications for Matrix rooms.
+//!
+//! Subscribes to [`WatcherEvent`] broadcasts and posts a notification to all
+//! configured Matrix rooms whenever a work item moves between pipeline stages.
+
+use crate::io::story_metadata::parse_front_matter;
+use crate::io::watcher::WatcherEvent;
+use crate::slog;
+use crate::transport::ChatTransport;
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+use tokio::sync::broadcast;
+
+/// Human-readable display name for a pipeline stage directory.
+pub fn stage_display_name(stage: &str) -> &'static str {
+    match stage {
+        "1_backlog" => "Backlog",
+        "2_current" => "Current",
+        "3_qa" => "QA",
+        "4_merge" => "Merge",
+        "5_done" => "Done",
+        "6_archived" => "Archived",
+        _ => "Unknown",
+    }
+}
+
+/// Infer the previous pipeline stage for a given destination stage.
+///
+/// Returns `None` for `1_backlog` since items are created there (not
+/// transitioned from another stage).
+pub fn inferred_from_stage(to_stage: &str) -> Option<&'static str> {
+    match to_stage {
+        "2_current" => Some("Backlog"),
+        "3_qa" => Some("Current"),
+        "4_merge" => Some("QA"),
+        "5_done" => Some("Merge"),
+        "6_archived" => Some("Done"),
+        _ => None,
+    }
+}
+
+/// Extract the numeric story number from an item ID like `"261_story_slug"`.
+pub fn extract_story_number(item_id: &str) -> Option<&str> {
+    item_id
+        .split('_')
+        .next()
+        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+}
+
+/// Read the story name from the work item file's YAML front matter.
+///
+/// Returns `None` if the file doesn't exist or has no parseable name.
+pub fn read_story_name(project_root: &Path, stage: &str, item_id: &str) -> Option<String> {
+    let path = project_root
+        .join(".storkit")
+        .join("work")
+        .join(stage)
+        .join(format!("{item_id}.md"));
+    let contents = std::fs::read_to_string(&path).ok()?;
+    let meta = parse_front_matter(&contents).ok()?;
+    meta.name
+}
+
+/// Format a stage transition notification message.
+///
+/// Returns `(plain_text, html)` suitable for `RoomMessageEventContent::text_html`.
+pub fn format_stage_notification(
+    item_id: &str,
+    story_name: Option<&str>,
+    from_stage: &str,
+    to_stage: &str,
+) -> (String, String) {
+    let number = extract_story_number(item_id).unwrap_or(item_id);
+    let name = story_name.unwrap_or(item_id);
+
+    let prefix = if to_stage == "Done" { "\u{1f389} " } else { "" };
+    let plain = format!("{prefix}#{number} {name} \u{2014} {from_stage} \u{2192} {to_stage}");
+    let html = format!(
+        "{prefix}<strong>#{number}</strong> <em>{name}</em> \u{2014} {from_stage} \u{2192} {to_stage}"
+    );
+    (plain, html)
+}
+
+/// Format an error notification message for a story failure.
+///
+/// Returns `(plain_text, html)` suitable for `RoomMessageEventContent::text_html`.
+pub fn format_error_notification(
+    item_id: &str,
+    story_name: Option<&str>,
+    reason: &str,
+) -> (String, String) {
+    let number = extract_story_number(item_id).unwrap_or(item_id);
+    let name = story_name.unwrap_or(item_id);
+
+    let plain = format!("\u{274c} #{number} {name} \u{2014} {reason}");
+    let html = format!(
+        "\u{274c} <strong>#{number}</strong> <em>{name}</em> \u{2014} {reason}"
+    );
+    (plain, html)
+}
+
+/// Search all pipeline stages for a story name.
+///
+/// Tries each known pipeline stage directory in order and returns the first
+/// name found.  Used for events (like rate-limit warnings) that arrive without
+/// a known stage.
+fn find_story_name_any_stage(project_root: &Path, item_id: &str) -> Option<String> {
+    for stage in &["2_current", "3_qa", "4_merge", "1_backlog", "5_done"] {
+        if let Some(name) = read_story_name(project_root, stage, item_id) {
+            return Some(name);
+        }
+    }
+    None
+}
+
+/// Minimum time between rate-limit notifications for the same agent.
+const RATE_LIMIT_DEBOUNCE: Duration = Duration::from_secs(60);
+
+/// Format a rate limit warning notification message.
+///
+/// Returns `(plain_text, html)` suitable for `ChatTransport::send_message`.
+pub fn format_rate_limit_notification(
+    item_id: &str,
+    story_name: Option<&str>,
+    agent_name: &str,
+) -> (String, String) {
+    let number = extract_story_number(item_id).unwrap_or(item_id);
+    let name = story_name.unwrap_or(item_id);
+
+    let plain = format!(
+        "\u{26a0}\u{fe0f} #{number} {name} \u{2014} {agent_name} hit an API rate limit"
+    );
+    let html = format!(
+        "\u{26a0}\u{fe0f} <strong>#{number}</strong> <em>{name}</em> \u{2014} \
+         {agent_name} hit an API rate limit"
+    );
+    (plain, html)
+}
+
+/// Spawn a background task that listens for watcher events and posts
+/// stage-transition notifications to all configured rooms via the
+/// [`ChatTransport`] abstraction.
+pub fn spawn_notification_listener(
+    transport: Arc<dyn ChatTransport>,
+    room_ids: Vec<String>,
+    watcher_rx: broadcast::Receiver<WatcherEvent>,
+    project_root: PathBuf,
+) {
+    tokio::spawn(async move {
+        let mut rx = watcher_rx;
+        // Tracks when a rate-limit notification was last sent for each
+        // "story_id:agent_name" key, to debounce repeated warnings.
+        let mut rate_limit_last_notified: HashMap<String, Instant> = HashMap::new();
+
+        loop {
+            match rx.recv().await {
+                Ok(WatcherEvent::WorkItem {
+                    ref stage,
+                    ref item_id,
+                    ..
+                }) => {
+                    // Only notify on stage transitions, not creations.
+                    let Some(from_display) = inferred_from_stage(stage) else {
+                        continue;
+                    };
+                    let to_display = stage_display_name(stage);
+
+                    let story_name = read_story_name(&project_root, stage, item_id);
+                    let (plain, html) = format_stage_notification(
+                        item_id,
+                        story_name.as_deref(),
+                        from_display,
+                        to_display,
+                    );
+
+                    slog!("[matrix-bot] Sending stage notification: {plain}");
+
+                    for room_id in &room_ids {
+                        if let Err(e) = transport.send_message(room_id, &plain, &html).await {
+                            slog!(
+                                "[matrix-bot] Failed to send notification to {room_id}: {e}"
+                            );
+                        }
+                    }
+                }
+                Ok(WatcherEvent::MergeFailure {
+                    ref story_id,
+                    ref reason,
+                }) => {
+                    let story_name =
+                        read_story_name(&project_root, "4_merge", story_id);
+                    let (plain, html) = format_error_notification(
+                        story_id,
+                        story_name.as_deref(),
+                        reason,
+                    );
+
+                    slog!("[matrix-bot] Sending error notification: {plain}");
+
+                    for room_id in &room_ids {
+                        if let Err(e) = transport.send_message(room_id, &plain, &html).await {
+                            slog!(
+                                "[matrix-bot] Failed to send error notification to {room_id}: {e}"
+                            );
+                        }
+                    }
+                }
+                Ok(WatcherEvent::RateLimitWarning {
+                    ref story_id,
+                    ref agent_name,
+                }) => {
+                    // Debounce: skip if we sent a notification for this agent
+                    // within the last RATE_LIMIT_DEBOUNCE seconds.
+                    let debounce_key = format!("{story_id}:{agent_name}");
+                    let now = Instant::now();
+                    if let Some(&last) = rate_limit_last_notified.get(&debounce_key)
+                        && now.duration_since(last) < RATE_LIMIT_DEBOUNCE
+                    {
+                        slog!(
+                            "[matrix-bot] Rate-limit notification debounced for \
+                             {story_id}:{agent_name}"
+                        );
+                        continue;
+                    }
+                    rate_limit_last_notified.insert(debounce_key, now);
+
+                    let story_name = find_story_name_any_stage(&project_root, story_id);
+                    let (plain, html) = format_rate_limit_notification(
+                        story_id,
+                        story_name.as_deref(),
+                        agent_name,
+                    );
+
+                    slog!("[matrix-bot] Sending rate-limit notification: {plain}");
+
+                    for room_id in &room_ids {
+                        if let Err(e) = transport.send_message(room_id, &plain, &html).await {
+                            slog!(
+                                "[matrix-bot] Failed to send rate-limit notification \
+                                 to {room_id}: {e}"
+                            );
+                        }
+                    }
+                }
+                Ok(_) => {} // Ignore non-work-item events
+                Err(broadcast::error::RecvError::Lagged(n)) => {
+                    slog!(
+                        "[matrix-bot] Notification listener lagged, skipped {n} events"
+                    );
+                }
+                Err(broadcast::error::RecvError::Closed) => {
+                    slog!(
+                        "[matrix-bot] Watcher channel closed, stopping notification listener"
+                    );
+                    break;
+                }
+            }
+        }
+    });
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use async_trait::async_trait;
+    use crate::transport::MessageId;
+
+    // ── MockTransport ───────────────────────────────────────────────────────
+
+    type CallLog = Arc<std::sync::Mutex<Vec<(String, String, String)>>>;
+
+    /// Records every `send_message` call for inspection in tests.
+    struct MockTransport {
+        calls: CallLog,
+    }
+
+    impl MockTransport {
+        fn new() -> (Arc<Self>, CallLog) {
+            let calls: CallLog = Arc::new(std::sync::Mutex::new(Vec::new()));
+            (Arc::new(Self { calls: Arc::clone(&calls) }), calls)
+        }
+    }
+
+    #[async_trait]
+    impl crate::transport::ChatTransport for MockTransport {
+        async fn send_message(&self, room_id: &str, plain: &str, html: &str) -> Result<MessageId, String> {
+            self.calls.lock().unwrap().push((room_id.to_string(), plain.to_string(), html.to_string()));
+            Ok("mock-msg-id".to_string())
+        }
+
+        async fn edit_message(&self, _room_id: &str, _id: &str, _plain: &str, _html: &str) -> Result<(), String> {
+            Ok(())
+        }
+
+        async fn send_typing(&self, _room_id: &str, _typing: bool) -> Result<(), String> {
+            Ok(())
+        }
+    }
+
+    // ── spawn_notification_listener: RateLimitWarning ───────────────────────
+
+    /// AC2 + AC3: when a RateLimitWarning event arrives, send_message is called
+    /// with a notification that names the agent and story.
+    #[tokio::test]
+    async fn rate_limit_warning_sends_notification_with_agent_and_story() {
+        let tmp = tempfile::tempdir().unwrap();
+        let stage_dir = tmp.path().join(".storkit").join("work").join("2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+        std::fs::write(
+            stage_dir.join("365_story_rate_limit.md"),
+            "---\nname: Rate Limit Test Story\n---\n",
+        )
+        .unwrap();
+
+        let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
+        let (transport, calls) = MockTransport::new();
+
+        spawn_notification_listener(
+            transport,
+            vec!["!room123:example.org".to_string()],
+            watcher_rx,
+            tmp.path().to_path_buf(),
+        );
+
+        watcher_tx.send(WatcherEvent::RateLimitWarning {
+            story_id: "365_story_rate_limit".to_string(),
+            agent_name: "coder-1".to_string(),
+        }).unwrap();
+
+        // Give the spawned task time to process the event.
+        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+        let calls = calls.lock().unwrap();
+        assert_eq!(calls.len(), 1, "Expected exactly one notification");
+        let (room_id, plain, _html) = &calls[0];
+        assert_eq!(room_id, "!room123:example.org");
+        assert!(plain.contains("365"), "plain should contain story number");
+        assert!(plain.contains("Rate Limit Test Story"), "plain should contain story name");
+        assert!(plain.contains("coder-1"), "plain should contain agent name");
+        assert!(plain.contains("rate limit"), "plain should mention rate limit");
+    }
+
+    /// AC4: a second RateLimitWarning for the same agent within the debounce
+    /// window must NOT trigger a second notification.
+    #[tokio::test]
+    async fn rate_limit_warning_is_debounced() {
+        let tmp = tempfile::tempdir().unwrap();
+
+        let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
+        let (transport, calls) = MockTransport::new();
+
+        spawn_notification_listener(
+            transport,
+            vec!["!room1:example.org".to_string()],
+            watcher_rx,
+            tmp.path().to_path_buf(),
+        );
+
+        // Send the same warning twice in rapid succession.
+        for _ in 0..2 {
+            watcher_tx.send(WatcherEvent::RateLimitWarning {
+                story_id: "42_story_debounce".to_string(),
+                agent_name: "coder-2".to_string(),
+            }).unwrap();
+        }
+
+        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+        let calls = calls.lock().unwrap();
+        assert_eq!(calls.len(), 1, "Debounce should suppress the second notification");
+    }
+
+    /// AC4 (corollary): warnings for different agents are NOT debounced against
+    /// each other — both should produce notifications.
+    #[tokio::test]
+    async fn rate_limit_warnings_for_different_agents_both_notify() {
+        let tmp = tempfile::tempdir().unwrap();
+
+        let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
+        let (transport, calls) = MockTransport::new();
+
+        spawn_notification_listener(
+            transport,
+            vec!["!room1:example.org".to_string()],
+            watcher_rx,
+            tmp.path().to_path_buf(),
+        );
+
+        watcher_tx.send(WatcherEvent::RateLimitWarning {
+            story_id: "42_story_foo".to_string(),
+            agent_name: "coder-1".to_string(),
+        }).unwrap();
+        watcher_tx.send(WatcherEvent::RateLimitWarning {
+            story_id: "42_story_foo".to_string(),
+            agent_name: "coder-2".to_string(),
+        }).unwrap();
+
+        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+        let calls = calls.lock().unwrap();
+        assert_eq!(calls.len(), 2, "Different agents should each trigger a notification");
+    }
+
+    // ── stage_display_name ──────────────────────────────────────────────────
+
+    #[test]
+    fn stage_display_name_maps_all_known_stages() {
+        assert_eq!(stage_display_name("1_backlog"), "Backlog");
+        assert_eq!(stage_display_name("2_current"), "Current");
+        assert_eq!(stage_display_name("3_qa"), "QA");
+        assert_eq!(stage_display_name("4_merge"), "Merge");
+        assert_eq!(stage_display_name("5_done"), "Done");
+        assert_eq!(stage_display_name("6_archived"), "Archived");
+        assert_eq!(stage_display_name("unknown"), "Unknown");
+    }
+
+    // ── inferred_from_stage ─────────────────────────────────────────────────
+
+    #[test]
+    fn inferred_from_stage_returns_previous_stage() {
+        assert_eq!(inferred_from_stage("2_current"), Some("Backlog"));
+        assert_eq!(inferred_from_stage("3_qa"), Some("Current"));
+        assert_eq!(inferred_from_stage("4_merge"), Some("QA"));
+        assert_eq!(inferred_from_stage("5_done"), Some("Merge"));
+        assert_eq!(inferred_from_stage("6_archived"), Some("Done"));
+    }
+
+    #[test]
+    fn inferred_from_stage_returns_none_for_backlog() {
+        assert_eq!(inferred_from_stage("1_backlog"), None);
+    }
+
+    #[test]
+    fn inferred_from_stage_returns_none_for_unknown() {
+        assert_eq!(inferred_from_stage("9_unknown"), None);
+    }
+
+    // ── extract_story_number ────────────────────────────────────────────────
+
+    #[test]
+    fn extract_story_number_parses_numeric_prefix() {
+        assert_eq!(
+            extract_story_number("261_story_bot_notifications"),
+            Some("261")
+        );
+        assert_eq!(extract_story_number("42_bug_fix_thing"), Some("42"));
+        assert_eq!(extract_story_number("1_spike_research"), Some("1"));
+    }
+
+    #[test]
+    fn extract_story_number_returns_none_for_non_numeric() {
+        assert_eq!(extract_story_number("abc_story_thing"), None);
+        assert_eq!(extract_story_number(""), None);
+    }
+
+    // ── read_story_name ─────────────────────────────────────────────────────
+
+    #[test]
+    fn read_story_name_reads_from_front_matter() {
+        let tmp = tempfile::tempdir().unwrap();
+        let stage_dir = tmp
+            .path()
+            .join(".storkit")
+            .join("work")
+            .join("2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+        std::fs::write(
+            stage_dir.join("42_story_my_feature.md"),
+            "---\nname: My Cool Feature\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let name = read_story_name(tmp.path(), "2_current", "42_story_my_feature");
+        assert_eq!(name.as_deref(), Some("My Cool Feature"));
+    }
+
+    #[test]
+    fn read_story_name_returns_none_for_missing_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        let name = read_story_name(tmp.path(), "2_current", "99_story_missing");
+        assert_eq!(name, None);
+    }
+
+    #[test]
+    fn read_story_name_returns_none_for_missing_name_field() {
+        let tmp = tempfile::tempdir().unwrap();
+        let stage_dir = tmp
+            .path()
+            .join(".storkit")
+            .join("work")
+            .join("2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+        std::fs::write(
+            stage_dir.join("42_story_no_name.md"),
+            "---\ncoverage_baseline: 50%\n---\n# Story\n",
+        )
+        .unwrap();
+
+        let name = read_story_name(tmp.path(), "2_current", "42_story_no_name");
+        assert_eq!(name, None);
+    }
+
+    // ── format_error_notification ────────────────────────────────────────────
+
+    #[test]
+    fn format_error_notification_with_story_name() {
+        let (plain, html) =
+            format_error_notification("262_story_bot_errors", Some("Bot error notifications"), "merge conflict in src/main.rs");
+        assert_eq!(
+            plain,
+            "\u{274c} #262 Bot error notifications \u{2014} merge conflict in src/main.rs"
+        );
+        assert_eq!(
+            html,
+            "\u{274c} <strong>#262</strong> <em>Bot error notifications</em> \u{2014} merge conflict in src/main.rs"
+        );
+    }
+
+    #[test]
+    fn format_error_notification_without_story_name_falls_back_to_item_id() {
+        let (plain, _html) =
+            format_error_notification("42_bug_fix_thing", None, "tests failed");
+        assert_eq!(
+            plain,
+            "\u{274c} #42 42_bug_fix_thing \u{2014} tests failed"
+        );
+    }
+
+    #[test]
+    fn format_error_notification_non_numeric_id_uses_full_id() {
+        let (plain, _html) =
+            format_error_notification("abc_story_thing", Some("Some Story"), "clippy errors");
+        assert_eq!(
+            plain,
+            "\u{274c} #abc_story_thing Some Story \u{2014} clippy errors"
+        );
+    }
+
+    // ── format_rate_limit_notification ─────────────────────────────────────
+
+    #[test]
+    fn format_rate_limit_notification_includes_agent_and_story() {
+        let (plain, html) = format_rate_limit_notification(
+            "365_story_my_feature",
+            Some("My Feature"),
+            "coder-2",
+        );
+        assert_eq!(
+            plain,
+            "\u{26a0}\u{fe0f} #365 My Feature \u{2014} coder-2 hit an API rate limit"
+        );
+        assert_eq!(
+            html,
+            "\u{26a0}\u{fe0f} <strong>#365</strong> <em>My Feature</em> \u{2014} coder-2 hit an API rate limit"
+        );
+    }
+
+    #[test]
+    fn format_rate_limit_notification_falls_back_to_item_id() {
+        let (plain, _html) =
+            format_rate_limit_notification("42_story_thing", None, "coder-1");
+        assert_eq!(
+            plain,
+            "\u{26a0}\u{fe0f} #42 42_story_thing \u{2014} coder-1 hit an API rate limit"
+        );
+    }
+
+    // ── format_stage_notification ───────────────────────────────────────────
+
+    #[test]
+    fn format_notification_done_stage_includes_party_emoji() {
+        let (plain, html) = format_stage_notification(
+            "353_story_done",
+            Some("Done Story"),
+            "Merge",
+            "Done",
+        );
+        assert_eq!(
+            plain,
+            "\u{1f389} #353 Done Story \u{2014} Merge \u{2192} Done"
+        );
+        assert_eq!(
+            html,
+            "\u{1f389} <strong>#353</strong> <em>Done Story</em> \u{2014} Merge \u{2192} Done"
+        );
+    }
+
+    #[test]
+    fn format_notification_non_done_stage_has_no_emoji() {
+        let (plain, _html) = format_stage_notification(
+            "42_story_thing",
+            Some("Some Story"),
+            "Backlog",
+            "Current",
+        );
+        assert!(!plain.contains("\u{1f389}"));
+    }
+
+    #[test]
+    fn format_notification_with_story_name() {
+        let (plain, html) = format_stage_notification(
+            "261_story_bot_notifications",
+            Some("Bot notifications"),
+            "Upcoming",
+            "Current",
+        );
+        assert_eq!(
+            plain,
+            "#261 Bot notifications \u{2014} Upcoming \u{2192} Current"
+        );
+        assert_eq!(
+            html,
+            "<strong>#261</strong> <em>Bot notifications</em> \u{2014} Upcoming \u{2192} Current"
+        );
+    }
+
+    #[test]
+    fn format_notification_without_story_name_falls_back_to_item_id() {
+        let (plain, _html) = format_stage_notification(
+            "42_bug_fix_thing",
+            None,
+            "Current",
+            "QA",
+        );
+        assert_eq!(
+            plain,
+            "#42 42_bug_fix_thing \u{2014} Current \u{2192} QA"
+        );
+    }
+
+    #[test]
+    fn format_notification_non_numeric_id_uses_full_id() {
+        let (plain, _html) = format_stage_notification(
+            "abc_story_thing",
+            Some("Some Story"),
+            "QA",
+            "Merge",
+        );
+        assert_eq!(
+            plain,
+            "#abc_story_thing Some Story \u{2014} QA \u{2192} Merge"
+        );
+    }
+}
--- a/server/src/matrix/rebuild.rs
+++ b/server/src/matrix/rebuild.rs
@@ -0,0 +1,145 @@
+//! Rebuild command: trigger a server rebuild and restart.
+//!
+//! `{bot_name} rebuild` stops all running agents, rebuilds the server binary
+//! with `cargo build`, and re-execs the process with the new binary.  If the
+//! build fails the error is reported back to the room and the server keeps
+//! running.
+
+use crate::agents::AgentPool;
+use std::path::Path;
+use std::sync::Arc;
+
+/// A parsed rebuild command.
+#[derive(Debug, PartialEq)]
+pub struct RebuildCommand;
+
+/// Parse a rebuild command from a raw message body.
+///
+/// Strips the bot mention prefix and checks whether the command word is
+/// `rebuild`.  Returns `None` when the message is not a rebuild command.
+pub fn extract_rebuild_command(
+    message: &str,
+    bot_name: &str,
+    bot_user_id: &str,
+) -> Option<RebuildCommand> {
+    let stripped = strip_mention(message, bot_name, bot_user_id);
+    let trimmed = stripped
+        .trim()
+        .trim_start_matches(|c: char| !c.is_alphanumeric());
+
+    let cmd = match trimmed.split_once(char::is_whitespace) {
+        Some((c, _)) => c,
+        None => trimmed,
+    };
+
+    if cmd.eq_ignore_ascii_case("rebuild") {
+        Some(RebuildCommand)
+    } else {
+        None
+    }
+}
+
+/// Handle a rebuild command: trigger server rebuild and restart.
+///
+/// Returns a string describing the outcome.  On build failure the error
+/// message is returned so it can be posted to the room; the server keeps
+/// running.  On success this function never returns (the process re-execs).
+pub async fn handle_rebuild(
+    bot_name: &str,
+    project_root: &Path,
+    agents: &Arc<AgentPool>,
+) -> String {
+    crate::slog!("[matrix-bot] rebuild command received (bot={bot_name})");
+    match crate::rebuild::rebuild_and_restart(agents, project_root).await {
+        Ok(msg) => msg,
+        Err(e) => format!("Rebuild failed: {e}"),
+    }
+}
+
+/// Strip the bot mention prefix from a raw Matrix message body.
+fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
+    let trimmed = message.trim();
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
+        return rest;
+    }
+    if let Some(localpart) = bot_user_id.split(':').next()
+        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
+    {
+        return rest;
+    }
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
+        return rest;
+    }
+    trimmed
+}
+
+fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
+    if text.len() < prefix.len() {
+        return None;
+    }
+    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
+        return None;
+    }
+    let rest = &text[prefix.len()..];
+    match rest.chars().next() {
+        None => Some(rest),
+        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
+        _ => Some(rest),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn extract_with_display_name() {
+        let cmd = extract_rebuild_command("Timmy rebuild", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(RebuildCommand));
+    }
+
+    #[test]
+    fn extract_with_full_user_id() {
+        let cmd = extract_rebuild_command(
+            "@timmy:home.local rebuild",
+            "Timmy",
+            "@timmy:home.local",
+        );
+        assert_eq!(cmd, Some(RebuildCommand));
+    }
+
+    #[test]
+    fn extract_with_localpart() {
+        let cmd = extract_rebuild_command("@timmy rebuild", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(RebuildCommand));
+    }
+
+    #[test]
+    fn extract_case_insensitive() {
+        let cmd = extract_rebuild_command("Timmy REBUILD", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(RebuildCommand));
+    }
+
+    #[test]
+    fn extract_non_rebuild_returns_none() {
+        let cmd = extract_rebuild_command("Timmy help", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, None);
+    }
+
+    #[test]
+    fn extract_ignores_extra_args() {
+        // "rebuild" with trailing text is still a rebuild command
+        let cmd = extract_rebuild_command("Timmy rebuild now", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(RebuildCommand));
+    }
+
+    #[test]
+    fn extract_no_match_returns_none() {
+        let cmd = extract_rebuild_command("Timmy status", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, None);
+    }
+}
--- a/server/src/matrix/reset.rs
+++ b/server/src/matrix/reset.rs
@@ -0,0 +1,170 @@
+//! Reset command: clear the current Claude Code session for a room.
+//!
+//! `{bot_name} reset` drops the stored session ID and conversation history for
+//! the current room so the next message starts a brand-new Claude Code session
+//! with clean context.  File-system memories (auto-memory directory) are not
+//! affected — only the in-memory/persisted conversation state is cleared.
+
+use crate::matrix::bot::{ConversationHistory, RoomConversation};
+use matrix_sdk::ruma::OwnedRoomId;
+use std::path::Path;
+
+/// A parsed reset command.
+#[derive(Debug, PartialEq)]
+pub struct ResetCommand;
+
+/// Parse a reset command from a raw message body.
+///
+/// Strips the bot mention prefix and checks whether the command word is
+/// `reset`.  Returns `None` when the message is not a reset command at all.
+pub fn extract_reset_command(
+    message: &str,
+    bot_name: &str,
+    bot_user_id: &str,
+) -> Option<ResetCommand> {
+    let stripped = strip_mention(message, bot_name, bot_user_id);
+    let trimmed = stripped
+        .trim()
+        .trim_start_matches(|c: char| !c.is_alphanumeric());
+
+    let cmd = match trimmed.split_once(char::is_whitespace) {
+        Some((c, _)) => c,
+        None => trimmed,
+    };
+
+    if cmd.eq_ignore_ascii_case("reset") {
+        Some(ResetCommand)
+    } else {
+        None
+    }
+}
+
+/// Handle a reset command: clear the session ID and conversation entries for
+/// the given room, persist the updated history, and return a confirmation.
+pub async fn handle_reset(
+    bot_name: &str,
+    room_id: &OwnedRoomId,
+    history: &ConversationHistory,
+    project_root: &Path,
+) -> String {
+    {
+        let mut guard = history.lock().await;
+        let conv = guard.entry(room_id.clone()).or_insert_with(RoomConversation::default);
+        conv.session_id = None;
+        conv.entries.clear();
+        crate::matrix::bot::save_history(project_root, &guard);
+    }
+    crate::slog!("[matrix-bot] reset command: cleared session for room {room_id} (bot={bot_name})");
+    "Session reset. Starting fresh — previous context has been cleared.".to_string()
+}
+
+/// Strip the bot mention prefix from a raw Matrix message body.
+fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
+    let trimmed = message.trim();
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
+        return rest;
+    }
+    if let Some(localpart) = bot_user_id.split(':').next()
+        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
+    {
+        return rest;
+    }
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
+        return rest;
+    }
+    trimmed
+}
+
+fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
+    if text.len() < prefix.len() {
+        return None;
+    }
+    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
+        return None;
+    }
+    let rest = &text[prefix.len()..];
+    match rest.chars().next() {
+        None => Some(rest),
+        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
+        _ => Some(rest),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn extract_with_display_name() {
+        let cmd = extract_reset_command("Timmy reset", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(ResetCommand));
+    }
+
+    #[test]
+    fn extract_with_full_user_id() {
+        let cmd =
+            extract_reset_command("@timmy:home.local reset", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(ResetCommand));
+    }
+
+    #[test]
+    fn extract_with_localpart() {
+        let cmd = extract_reset_command("@timmy reset", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(ResetCommand));
+    }
+
+    #[test]
+    fn extract_case_insensitive() {
+        let cmd = extract_reset_command("Timmy RESET", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(ResetCommand));
+    }
+
+    #[test]
+    fn extract_non_reset_returns_none() {
+        let cmd = extract_reset_command("Timmy help", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, None);
+    }
+
+    #[test]
+    fn extract_ignores_extra_args() {
+        // "reset" with trailing text is still a reset command
+        let cmd = extract_reset_command("Timmy reset everything", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(ResetCommand));
+    }
+
+    #[tokio::test]
+    async fn handle_reset_clears_session_and_entries() {
+        use crate::matrix::bot::{ConversationEntry, ConversationRole};
+        use std::collections::HashMap;
+        use std::sync::Arc;
+        use tokio::sync::Mutex as TokioMutex;
+
+        let room_id: OwnedRoomId = "!test:example.com".parse().unwrap();
+        let history: ConversationHistory = Arc::new(TokioMutex::new({
+            let mut m = HashMap::new();
+            m.insert(room_id.clone(), RoomConversation {
+                session_id: Some("old-session-id".to_string()),
+                entries: vec![ConversationEntry {
+                    role: ConversationRole::User,
+                    sender: "@alice:example.com".to_string(),
+                    content: "previous message".to_string(),
+                }],
+            });
+            m
+        }));
+
+        let tmp = tempfile::tempdir().unwrap();
+        let response = handle_reset("Timmy", &room_id, &history, tmp.path()).await;
+
+        assert!(response.contains("reset"), "response should mention reset: {response}");
+
+        let guard = history.lock().await;
+        let conv = guard.get(&room_id).unwrap();
+        assert!(conv.session_id.is_none(), "session_id should be cleared");
+        assert!(conv.entries.is_empty(), "entries should be cleared");
+    }
+}
--- a/server/src/matrix/start.rs
+++ b/server/src/matrix/start.rs
@@ -0,0 +1,391 @@
+//! Start command: start a coder agent on a story.
+//!
+//! `{bot_name} start {number}` finds the story by number, selects the default
+//! coder agent, and starts it.
+//!
+//! `{bot_name} start {number} opus` starts `coder-opus` (or any agent whose
+//! name ends with the supplied hint, e.g. `coder-{hint}`).
+
+use crate::agents::AgentPool;
+use std::path::Path;
+
+/// A parsed start command from a Matrix message body.
+#[derive(Debug, PartialEq)]
+pub enum StartCommand {
+    /// Start the story with this number using the (optional) agent hint.
+    Start {
+        story_number: String,
+        /// Optional agent name hint (e.g. `"opus"` → resolved to `"coder-opus"`).
+        agent_hint: Option<String>,
+    },
+    /// The user typed `start` but without a valid numeric argument.
+    BadArgs,
+}
+
+/// Parse a start command from a raw Matrix message body.
+///
+/// Strips the bot mention prefix and checks whether the first word is `start`.
+/// Returns `None` when the message is not a start command at all.
+pub fn extract_start_command(
+    message: &str,
+    bot_name: &str,
+    bot_user_id: &str,
+) -> Option<StartCommand> {
+    let stripped = strip_mention(message, bot_name, bot_user_id);
+    let trimmed = stripped
+        .trim()
+        .trim_start_matches(|c: char| !c.is_alphanumeric());
+
+    let (cmd, args) = match trimmed.split_once(char::is_whitespace) {
+        Some((c, a)) => (c, a.trim()),
+        None => (trimmed, ""),
+    };
+
+    if !cmd.eq_ignore_ascii_case("start") {
+        return None;
+    }
+
+    // Split args into story number and optional agent hint.
+    let (number_str, hint_str) = match args.split_once(char::is_whitespace) {
+        Some((n, h)) => (n.trim(), h.trim()),
+        None => (args, ""),
+    };
+
+    if !number_str.is_empty() && number_str.chars().all(|c| c.is_ascii_digit()) {
+        let agent_hint = if hint_str.is_empty() {
+            None
+        } else {
+            Some(hint_str.to_string())
+        };
+        Some(StartCommand::Start {
+            story_number: number_str.to_string(),
+            agent_hint,
+        })
+    } else {
+        Some(StartCommand::BadArgs)
+    }
+}
+
+/// Handle a start command asynchronously.
+///
+/// Finds the work item by `story_number` across all pipeline stages, resolves
+/// the agent name from `agent_hint`, and calls `agents.start_agent`.
+/// Returns a markdown-formatted response string.
+pub async fn handle_start(
+    bot_name: &str,
+    story_number: &str,
+    agent_hint: Option<&str>,
+    project_root: &Path,
+    agents: &AgentPool,
+) -> String {
+    const STAGES: &[&str] = &[
+        "1_backlog",
+        "2_current",
+        "3_qa",
+        "4_merge",
+        "5_done",
+        "6_archived",
+    ];
+
+    // Find the story file across all pipeline stages.
+    let mut found: Option<(std::path::PathBuf, String)> = None; // (path, story_id)
+    'outer: for stage in STAGES {
+        let dir = project_root.join(".storkit").join("work").join(stage);
+        if !dir.exists() {
+            continue;
+        }
+        if let Ok(entries) = std::fs::read_dir(&dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().and_then(|e| e.to_str()) != Some("md") {
+                    continue;
+                }
+                if let Some(stem) = path
+                    .file_stem()
+                    .and_then(|s| s.to_str())
+                    .map(|s| s.to_string())
+                {
+                    let file_num = stem
+                        .split('_')
+                        .next()
+                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+                        .unwrap_or("")
+                        .to_string();
+                    if file_num == story_number {
+                        found = Some((path, stem));
+                        break 'outer;
+                    }
+                }
+            }
+        }
+    }
+
+    let (path, story_id) = match found {
+        Some(f) => f,
+        None => {
+            return format!(
+                "No story, bug, or spike with number **{story_number}** found."
+            );
+        }
+    };
+
+    // Read the human-readable name from front matter for the response.
+    let story_name = std::fs::read_to_string(&path)
+        .ok()
+        .and_then(|contents| {
+            crate::io::story_metadata::parse_front_matter(&contents)
+                .ok()
+                .and_then(|m| m.name)
+        })
+        .unwrap_or_else(|| story_id.clone());
+
+    // Resolve agent name: try "coder-{hint}" first, then the hint as-is.
+    let resolved_agent: Option<String> = agent_hint.map(|hint| {
+        let with_prefix = format!("coder-{hint}");
+        // We'll pass the prefixed form; start_agent validates against config.
+        // If coder- prefix is already there, don't double-prefix.
+        if hint.starts_with("coder-") {
+            hint.to_string()
+        } else {
+            with_prefix
+        }
+    });
+
+    crate::slog!(
+        "[matrix-bot] start command: starting story {story_id} with agent={resolved_agent:?} (bot={bot_name})"
+    );
+
+    match agents
+        .start_agent(project_root, &story_id, resolved_agent.as_deref(), None)
+        .await
+    {
+        Ok(info) => {
+            format!(
+                "Started **{story_name}** with agent **{}**.",
+                info.agent_name
+            )
+        }
+        Err(e) if e.contains("All coder agents are busy") => {
+            format!(
+                "**{story_name}** has been queued in `work/2_current/` and will start \
+                 automatically when a coder becomes available."
+            )
+        }
+        Err(e) => {
+            format!("Failed to start **{story_name}**: {e}")
+        }
+    }
+}
+
+/// Strip the bot mention prefix from a raw Matrix message body.
+///
+/// Mirrors the logic in `commands::strip_bot_mention` and `delete::strip_mention`.
+fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
+    let trimmed = message.trim();
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
+        return rest;
+    }
+    if let Some(localpart) = bot_user_id.split(':').next()
+        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
+    {
+        return rest;
+    }
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
+        return rest;
+    }
+    trimmed
+}
+
+fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
+    if text.len() < prefix.len() {
+        return None;
+    }
+    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
+        return None;
+    }
+    let rest = &text[prefix.len()..];
+    match rest.chars().next() {
+        None => Some(rest),
+        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
+        _ => Some(rest),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // -- extract_start_command -----------------------------------------------
+
+    #[test]
+    fn extract_with_full_user_id() {
+        let cmd =
+            extract_start_command("@timmy:home.local start 331", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(StartCommand::Start {
+                story_number: "331".to_string(),
+                agent_hint: None
+            })
+        );
+    }
+
+    #[test]
+    fn extract_with_display_name() {
+        let cmd = extract_start_command("Timmy start 42", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(StartCommand::Start {
+                story_number: "42".to_string(),
+                agent_hint: None
+            })
+        );
+    }
+
+    #[test]
+    fn extract_with_localpart() {
+        let cmd = extract_start_command("@timmy start 7", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(StartCommand::Start {
+                story_number: "7".to_string(),
+                agent_hint: None
+            })
+        );
+    }
+
+    #[test]
+    fn extract_with_agent_hint() {
+        let cmd = extract_start_command("Timmy start 331 opus", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(StartCommand::Start {
+                story_number: "331".to_string(),
+                agent_hint: Some("opus".to_string())
+            })
+        );
+    }
+
+    #[test]
+    fn extract_case_insensitive_command() {
+        let cmd = extract_start_command("Timmy START 99", "Timmy", "@timmy:home.local");
+        assert_eq!(
+            cmd,
+            Some(StartCommand::Start {
+                story_number: "99".to_string(),
+                agent_hint: None
+            })
+        );
+    }
+
+    #[test]
+    fn extract_no_args_is_bad_args() {
+        let cmd = extract_start_command("Timmy start", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(StartCommand::BadArgs));
+    }
+
+    #[test]
+    fn extract_non_numeric_arg_is_bad_args() {
+        let cmd = extract_start_command("Timmy start foo", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, Some(StartCommand::BadArgs));
+    }
+
+    #[test]
+    fn extract_non_start_command_returns_none() {
+        let cmd = extract_start_command("Timmy help", "Timmy", "@timmy:home.local");
+        assert_eq!(cmd, None);
+    }
+
+    // -- handle_start (integration-style, uses temp filesystem) --------------
+
+    #[tokio::test]
+    async fn handle_start_returns_not_found_for_unknown_number() {
+        let tmp = tempfile::tempdir().unwrap();
+        let project_root = tmp.path();
+        for stage in &["1_backlog", "2_current", "3_qa", "4_merge", "5_done", "6_archived"] {
+            std::fs::create_dir_all(project_root.join(".storkit").join("work").join(stage))
+                .unwrap();
+        }
+        let agents = std::sync::Arc::new(crate::agents::AgentPool::new_test(3000));
+        let response = handle_start("Timmy", "999", None, project_root, &agents).await;
+        assert!(
+            response.contains("No story") && response.contains("999"),
+            "unexpected response: {response}"
+        );
+    }
+
+    #[tokio::test]
+    async fn handle_start_says_queued_not_error_when_all_coders_busy() {
+        use crate::agents::{AgentPool, AgentStatus};
+        use std::sync::Arc;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let project_root = tmp.path();
+        let sk = project_root.join(".storkit");
+        let backlog = sk.join("work/1_backlog");
+        std::fs::create_dir_all(&backlog).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
+        )
+        .unwrap();
+        std::fs::write(
+            backlog.join("356_story_test.md"),
+            "---\nname: Test Story\n---\n",
+        )
+        .unwrap();
+
+        let agents = Arc::new(AgentPool::new_test(3000));
+        agents.inject_test_agent("other-story", "coder-1", AgentStatus::Running);
+
+        let response = handle_start("Timmy", "356", None, project_root, &agents).await;
+
+        assert!(
+            !response.contains("Failed"),
+            "response must not say 'Failed' when coders are busy: {response}"
+        );
+        assert!(
+            response.to_lowercase().contains("queue") || response.to_lowercase().contains("available"),
+            "response must mention queued/available state: {response}"
+        );
+    }
+
+    #[test]
+    fn start_command_is_registered() {
+        use crate::matrix::commands::commands;
+        let found = commands().iter().any(|c| c.name == "start");
+        assert!(found, "start command must be in the registry");
+    }
+
+    #[test]
+    fn start_command_appears_in_help() {
+        let result = crate::matrix::commands::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(
+            output.contains("start"),
+            "help should list start command: {output}"
+        );
+    }
+
+    #[test]
+    fn start_command_falls_through_to_none_in_registry() {
+        // The start handler in the registry returns None (handled async in bot.rs).
+        let result = crate::matrix::commands::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy start 42",
+        );
+        assert!(
+            result.is_none(),
+            "start should not produce a sync response (handled async): {result:?}"
+        );
+    }
+}
--- a/server/src/matrix/transport_impl.rs
+++ b/server/src/matrix/transport_impl.rs
@@ -0,0 +1,96 @@
+//! Matrix implementation of [`ChatTransport`].
+//!
+//! Wraps a [`matrix_sdk::Client`] and delegates message sending / editing
+//! to the Matrix SDK.
+
+use async_trait::async_trait;
+use matrix_sdk::Client;
+use matrix_sdk::ruma::OwnedRoomId;
+use matrix_sdk::ruma::events::room::message::{
+    ReplacementMetadata, RoomMessageEventContent, RoomMessageEventContentWithoutRelation,
+};
+
+use crate::transport::{ChatTransport, MessageId};
+
+/// Matrix-backed [`ChatTransport`] implementation.
+///
+/// Holds a [`Client`] and resolves room IDs at send time.
+pub struct MatrixTransport {
+    client: Client,
+}
+
+impl MatrixTransport {
+    pub fn new(client: Client) -> Self {
+        Self { client }
+    }
+}
+
+#[async_trait]
+impl ChatTransport for MatrixTransport {
+    async fn send_message(
+        &self,
+        room_id: &str,
+        plain: &str,
+        html: &str,
+    ) -> Result<MessageId, String> {
+        let room_id: OwnedRoomId = room_id
+            .parse()
+            .map_err(|e| format!("Invalid room ID '{room_id}': {e}"))?;
+        let room = self
+            .client
+            .get_room(&room_id)
+            .ok_or_else(|| format!("Room {room_id} not found in client state"))?;
+
+        let content = RoomMessageEventContent::text_html(plain.to_string(), html.to_string());
+        let resp = room
+            .send(content)
+            .await
+            .map_err(|e| format!("Matrix send error: {e}"))?;
+
+        Ok(resp.event_id.to_string())
+    }
+
+    async fn edit_message(
+        &self,
+        room_id: &str,
+        original_message_id: &str,
+        plain: &str,
+        html: &str,
+    ) -> Result<(), String> {
+        let room_id: OwnedRoomId = room_id
+            .parse()
+            .map_err(|e| format!("Invalid room ID '{room_id}': {e}"))?;
+        let room = self
+            .client
+            .get_room(&room_id)
+            .ok_or_else(|| format!("Room {room_id} not found in client state"))?;
+
+        let original_event_id = original_message_id
+            .parse()
+            .map_err(|e| format!("Invalid event ID '{original_message_id}': {e}"))?;
+
+        let new_content =
+            RoomMessageEventContentWithoutRelation::text_html(plain.to_string(), html.to_string());
+        let metadata = ReplacementMetadata::new(original_event_id, None);
+        let content = new_content.make_replacement(metadata);
+
+        room.send(content)
+            .await
+            .map(|_| ())
+            .map_err(|e| format!("Matrix edit error: {e}"))
+    }
+
+    async fn send_typing(&self, room_id: &str, typing: bool) -> Result<(), String> {
+        let room_id: OwnedRoomId = room_id
+            .parse()
+            .map_err(|e| format!("Invalid room ID '{room_id}': {e}"))?;
+        let room = self
+            .client
+            .get_room(&room_id)
+            .ok_or_else(|| format!("Room {room_id} not found in client state"))?;
+
+        room.typing_notice(typing)
+            .await
+            .map_err(|e| format!("Matrix typing indicator error: {e}"))
+    }
+}
--- a/server/src/rebuild.rs
+++ b/server/src/rebuild.rs
@@ -0,0 +1,104 @@
+//! Server rebuild and restart logic shared between the MCP tool and Matrix bot command.
+
+use crate::agents::AgentPool;
+use crate::slog;
+use std::path::Path;
+
+/// Rebuild the server binary and re-exec.
+///
+/// 1. Gracefully stops all running agents (kills PTY children).
+/// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
+///    the current build profile (debug or release).
+/// 3. If the build fails, returns the build error (server stays up).
+/// 4. If the build succeeds, re-execs the process with the new binary via
+///    `std::os::unix::process::CommandExt::exec()`.
+pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Result<String, String> {
+    slog!("[rebuild] Rebuild and restart requested");
+
+    // 1. Gracefully stop all running agents.
+    let running_count = agents
+        .list_agents()
+        .unwrap_or_default()
+        .iter()
+        .filter(|a| a.status == crate::agents::AgentStatus::Running)
+        .count();
+    if running_count > 0 {
+        slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
+    }
+    agents.kill_all_children();
+
+    // 2. Find the workspace root (parent of the server binary's source).
+    //    CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
+    //    the workspace root is its parent.
+    let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
+    let workspace_root = manifest_dir
+        .parent()
+        .ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
+
+    slog!(
+        "[rebuild] Building server from workspace root: {}",
+        workspace_root.display()
+    );
+
+    // 3. Build the server binary, matching the current build profile so the
+    //    re-exec via current_exe() picks up the new binary.
+    let build_args: Vec<&str> = if cfg!(debug_assertions) {
+        vec!["build", "-p", "storkit"]
+    } else {
+        vec!["build", "--release", "-p", "storkit"]
+    };
+    slog!("[rebuild] cargo {}", build_args.join(" "));
+    let output = tokio::task::spawn_blocking({
+        let workspace_root = workspace_root.to_path_buf();
+        move || {
+            std::process::Command::new("cargo")
+                .args(&build_args)
+                .current_dir(&workspace_root)
+                .output()
+        }
+    })
+    .await
+    .map_err(|e| format!("Build task panicked: {e}"))?
+    .map_err(|e| format!("Failed to run cargo build: {e}"))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        slog!("[rebuild] Build failed:\n{stderr}");
+        return Err(format!("Build failed:\n{stderr}"));
+    }
+
+    slog!("[rebuild] Build succeeded, re-execing with new binary");
+
+    // 4. Re-exec with the new binary.
+    //    Use the cargo output path rather than current_exe() so that rebuilds
+    //    inside Docker work correctly — the running binary may be installed at
+    //    /usr/local/bin/storkit (read-only) while cargo writes the new binary
+    //    to /app/target/release/storkit (a writable volume).
+    let new_exe = if cfg!(debug_assertions) {
+        workspace_root.join("target/debug/storkit")
+    } else {
+        workspace_root.join("target/release/storkit")
+    };
+    let args: Vec<String> = std::env::args().collect();
+
+    // Remove the port file before re-exec so the new process can write its own.
+    let port_file = project_root.join(".storkit_port");
+    if port_file.exists() {
+        let _ = std::fs::remove_file(&port_file);
+    }
+    // Also check cwd for port file.
+    let cwd_port_file = std::path::Path::new(".storkit_port");
+    if cwd_port_file.exists() {
+        let _ = std::fs::remove_file(cwd_port_file);
+    }
+
+    // Use exec() to replace the current process.
+    // This never returns on success.
+    use std::os::unix::process::CommandExt;
+    let err = std::process::Command::new(&new_exe)
+        .args(&args[1..])
+        .exec();
+
+    // If we get here, exec() failed.
+    Err(format!("Failed to exec new binary: {err}"))
+}
--- a/server/src/slack.rs
+++ b/server/src/slack.rs
--- a/server/src/state.rs
+++ b/server/src/state.rs
@@ -0,0 +1,36 @@
+use std::path::PathBuf;
+use std::sync::Mutex;
+use tokio::sync::watch;
+
+pub struct SessionState {
+    pub project_root: Mutex<Option<PathBuf>>,
+    pub cancel_tx: watch::Sender<bool>,
+    pub cancel_rx: watch::Receiver<bool>,
+}
+
+impl Default for SessionState {
+    fn default() -> Self {
+        let (cancel_tx, cancel_rx) = watch::channel(false);
+        Self {
+            project_root: Mutex::new(None),
+            cancel_tx,
+            cancel_rx,
+        }
+    }
+}
+
+impl SessionState {
+    pub fn get_project_root(&self) -> Result<PathBuf, String> {
+        let root_guard = self.project_root.lock().map_err(|e| e.to_string())?;
+        let root = root_guard.as_ref().ok_or_else(|| {
+            // TRACE:MERGE-DEBUG — remove once root cause is found
+            crate::slog_error!(
+                "[MERGE-DEBUG] get_project_root() called but project_root is None! \
+                 Backtrace hint: check caller in MCP tool handler."
+            );
+            "No project is currently open.".to_string()
+        })?;
+        Ok(root.clone())
+    }
+
+}
--- a/server/src/store.rs
+++ b/server/src/store.rs
@@ -0,0 +1,183 @@
+use serde_json::Value;
+use std::collections::HashMap;
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::sync::Mutex;
+
+pub trait StoreOps: Send + Sync {
+    fn get(&self, key: &str) -> Option<Value>;
+    fn set(&self, key: &str, value: Value);
+    fn delete(&self, key: &str);
+    fn save(&self) -> Result<(), String>;
+}
+
+pub struct JsonFileStore {
+    path: PathBuf,
+    data: Mutex<HashMap<String, Value>>,
+}
+
+impl JsonFileStore {
+    pub fn new(path: PathBuf) -> Result<Self, String> {
+        let data = if path.exists() {
+            let content =
+                fs::read_to_string(&path).map_err(|e| format!("Failed to read store: {e}"))?;
+            if content.trim().is_empty() {
+                HashMap::new()
+            } else {
+                serde_json::from_str::<HashMap<String, Value>>(&content)
+                    .map_err(|e| format!("Failed to parse store: {e}"))?
+            }
+        } else {
+            HashMap::new()
+        };
+
+        Ok(Self {
+            path,
+            data: Mutex::new(data),
+        })
+    }
+
+    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, String> {
+        Self::new(path.as_ref().to_path_buf())
+    }
+
+    #[allow(dead_code)]
+    pub fn path(&self) -> &Path {
+        &self.path
+    }
+
+    fn ensure_parent_dir(&self) -> Result<(), String> {
+        if let Some(parent) = self.path.parent() {
+            fs::create_dir_all(parent)
+                .map_err(|e| format!("Failed to create store directory: {e}"))?;
+        }
+        Ok(())
+    }
+}
+
+impl StoreOps for JsonFileStore {
+    fn get(&self, key: &str) -> Option<Value> {
+        self.data.lock().ok().and_then(|map| map.get(key).cloned())
+    }
+
+    fn set(&self, key: &str, value: Value) {
+        if let Ok(mut map) = self.data.lock() {
+            map.insert(key.to_string(), value);
+        }
+    }
+
+    fn delete(&self, key: &str) {
+        if let Ok(mut map) = self.data.lock() {
+            map.remove(key);
+        }
+    }
+
+    fn save(&self) -> Result<(), String> {
+        self.ensure_parent_dir()?;
+        let map = self.data.lock().map_err(|e| e.to_string())?;
+        let content =
+            serde_json::to_string_pretty(&*map).map_err(|e| format!("Serialize failed: {e}"))?;
+        fs::write(&self.path, content).map_err(|e| format!("Failed to write store: {e}"))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+    use tempfile::TempDir;
+
+    fn store_in(dir: &TempDir, name: &str) -> JsonFileStore {
+        let path = dir.path().join(name);
+        JsonFileStore::new(path).expect("store creation should succeed")
+    }
+
+    #[test]
+    fn new_from_missing_file_creates_empty_store() {
+        let dir = TempDir::new().unwrap();
+        let store = store_in(&dir, "missing.json");
+        assert!(store.get("anything").is_none());
+    }
+
+    #[test]
+    fn new_from_empty_file_creates_empty_store() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("empty.json");
+        fs::write(&path, "").unwrap();
+        let store = JsonFileStore::new(path).expect("should handle empty file");
+        assert!(store.get("anything").is_none());
+    }
+
+    #[test]
+    fn new_from_corrupt_file_returns_error() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("corrupt.json");
+        fs::write(&path, "not valid json {{{").unwrap();
+        let result = JsonFileStore::new(path);
+        match result {
+            Err(e) => assert!(e.contains("Failed to parse store"), "unexpected error: {e}"),
+            Ok(_) => panic!("expected error for corrupt file"),
+        }
+    }
+
+    #[test]
+    fn get_set_delete_roundtrip() {
+        let dir = TempDir::new().unwrap();
+        let store = store_in(&dir, "data.json");
+
+        assert!(store.get("key").is_none());
+
+        store.set("key", json!("value"));
+        assert_eq!(store.get("key"), Some(json!("value")));
+
+        store.set("key", json!(42));
+        assert_eq!(store.get("key"), Some(json!(42)));
+
+        store.delete("key");
+        assert!(store.get("key").is_none());
+    }
+
+    #[test]
+    fn save_persists_and_reload_restores() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("persist.json");
+
+        {
+            let store = JsonFileStore::new(path.clone()).unwrap();
+            store.set("name", json!("storkit"));
+            store.set("version", json!(1));
+            store.save().expect("save should succeed");
+        }
+
+        let store = JsonFileStore::new(path).unwrap();
+        assert_eq!(store.get("name"), Some(json!("storkit")));
+        assert_eq!(store.get("version"), Some(json!(1)));
+    }
+
+    #[test]
+    fn save_creates_parent_directories() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("nested").join("deep").join("store.json");
+        let store = JsonFileStore::new(path.clone()).unwrap();
+        store.set("key", json!("value"));
+        store.save().expect("save should create parent dirs");
+        assert!(path.exists());
+    }
+
+    #[test]
+    fn delete_nonexistent_key_is_noop() {
+        let dir = TempDir::new().unwrap();
+        let store = store_in(&dir, "data.json");
+        store.delete("nonexistent");
+        assert!(store.get("nonexistent").is_none());
+    }
+
+    #[test]
+    fn from_path_works_like_new() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("via_from.json");
+        let store = JsonFileStore::from_path(&path).unwrap();
+        store.set("test", json!(true));
+        assert_eq!(store.get("test"), Some(json!(true)));
+    }
+}
--- a/server/src/transport.rs
+++ b/server/src/transport.rs
@@ -0,0 +1,97 @@
+//! Transport abstraction for chat platforms.
+//!
+//! The [`ChatTransport`] trait defines a platform-agnostic interface for
+//! sending and editing messages, allowing the bot logic (commands, htop,
+//! notifications) to work against any chat platform — Matrix, WhatsApp, etc.
+
+use async_trait::async_trait;
+
+/// A platform-agnostic identifier for a sent message.
+///
+/// On Matrix this is the event ID; on other platforms it may be a message ID
+/// or similar opaque string.  The transport implementation is responsible for
+/// producing and consuming these identifiers.
+pub type MessageId = String;
+
+/// A platform-agnostic identifier for a chat room / channel / conversation.
+pub type RoomId = String;
+
+/// Abstraction over a chat platform's message-sending capabilities.
+///
+/// Implementations must be `Send + Sync` so they can be shared across
+/// async tasks via `Arc<dyn ChatTransport>`.
+#[async_trait]
+pub trait ChatTransport: Send + Sync {
+    /// Send a plain-text + HTML message to a room.
+    ///
+    /// Returns the platform-specific message ID on success so it can be
+    /// referenced later (e.g. for edits or reply detection).
+    async fn send_message(
+        &self,
+        room_id: &str,
+        plain: &str,
+        html: &str,
+    ) -> Result<MessageId, String>;
+
+    /// Edit a previously sent message.
+    ///
+    /// `original_message_id` is the [`MessageId`] returned by a prior
+    /// [`send_message`](ChatTransport::send_message) call.
+    ///
+    /// Platforms that do not support editing (e.g. WhatsApp) should send a
+    /// new message instead.
+    async fn edit_message(
+        &self,
+        room_id: &str,
+        original_message_id: &str,
+        plain: &str,
+        html: &str,
+    ) -> Result<(), String>;
+
+    /// Signal that the bot is typing (or has stopped typing) in a room.
+    ///
+    /// Platforms that do not support typing indicators should no-op.
+    async fn send_typing(&self, room_id: &str, typing: bool) -> Result<(), String>;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::Arc;
+
+    /// Verify that WhatsAppTransport satisfies the ChatTransport trait and
+    /// can be used as `Arc<dyn ChatTransport>` (compile-time check).
+    /// Functional tests are in `whatsapp::tests` using mockito.
+    #[test]
+    fn whatsapp_transport_satisfies_trait() {
+        fn assert_transport<T: ChatTransport>() {}
+        assert_transport::<crate::whatsapp::WhatsAppTransport>();
+
+        // Verify it can be wrapped in Arc<dyn ChatTransport>.
+        let _: Arc<dyn ChatTransport> =
+            Arc::new(crate::whatsapp::WhatsAppTransport::new(
+                "test-phone".to_string(),
+                "test-token".to_string(),
+                "pipeline_notification".to_string(),
+            ));
+    }
+
+    /// MatrixTransport cannot be tested without a live homeserver, but we
+    /// can verify the type implements the trait at compile time.
+    #[test]
+    fn matrix_transport_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<crate::matrix::transport_impl::MatrixTransport>();
+    }
+
+    /// Verify that SlackTransport satisfies the ChatTransport trait and
+    /// can be used as `Arc<dyn ChatTransport>` (compile-time check).
+    #[test]
+    fn slack_transport_satisfies_trait() {
+        fn assert_transport<T: ChatTransport>() {}
+        assert_transport::<crate::slack::SlackTransport>();
+
+        let _: Arc<dyn ChatTransport> =
+            Arc::new(crate::slack::SlackTransport::new("xoxb-test".to_string()));
+    }
+}
--- a/server/src/whatsapp.rs
+++ b/server/src/whatsapp.rs
--- a/server/src/workflow.rs
+++ b/server/src/workflow.rs
@@ -0,0 +1,399 @@
+//! Workflow module: test result tracking and acceptance evaluation.
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum TestStatus {
+    Pass,
+    Fail,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TestCaseResult {
+    pub name: String,
+    pub status: TestStatus,
+    pub details: Option<String>,
+}
+
+struct TestRunSummary {
+    total: usize,
+    failed: usize,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct AcceptanceDecision {
+    pub can_accept: bool,
+    pub reasons: Vec<String>,
+    pub warning: Option<String>,
+}
+
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct StoryTestResults {
+    pub unit: Vec<TestCaseResult>,
+    pub integration: Vec<TestCaseResult>,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct WorkflowState {
+    pub results: HashMap<String, StoryTestResults>,
+    pub coverage: HashMap<String, CoverageReport>,
+}
+
+impl WorkflowState {
+    pub fn record_test_results_validated(
+        &mut self,
+        story_id: String,
+        unit: Vec<TestCaseResult>,
+        integration: Vec<TestCaseResult>,
+    ) -> Result<(), String> {
+        let failures = unit
+            .iter()
+            .chain(integration.iter())
+            .filter(|test| test.status == TestStatus::Fail)
+            .count();
+
+        if failures > 1 {
+            return Err(format!(
+                "Multiple failing tests detected ({failures}); register failures one at a time."
+            ));
+        }
+
+        self.results
+            .insert(story_id, StoryTestResults { unit, integration });
+
+        Ok(())
+    }
+
+}
+
+fn summarize_results(results: &StoryTestResults) -> TestRunSummary {
+    let mut total = 0;
+    let mut failed = 0;
+
+    for test in results.unit.iter().chain(results.integration.iter()) {
+        total += 1;
+        if test.status == TestStatus::Fail {
+            failed += 1;
+        }
+    }
+
+    TestRunSummary { total, failed }
+}
+
+fn evaluate_acceptance(results: &StoryTestResults) -> AcceptanceDecision {
+    let summary = summarize_results(results);
+
+    if summary.failed == 0 && summary.total > 0 {
+        return AcceptanceDecision {
+            can_accept: true,
+            reasons: Vec::new(),
+            warning: None,
+        };
+    }
+
+    let mut reasons = Vec::new();
+    if summary.total == 0 {
+        reasons.push("No test results recorded for the story.".to_string());
+    }
+    if summary.failed > 0 {
+        reasons.push(format!(
+            "{} test(s) are failing; acceptance is blocked.",
+            summary.failed
+        ));
+    }
+
+    let warning = if summary.failed > 1 {
+        Some(format!(
+            "Multiple tests are failing ({} failures).",
+            summary.failed
+        ))
+    } else {
+        None
+    };
+
+    AcceptanceDecision {
+        can_accept: false,
+        reasons,
+        warning,
+    }
+}
+
+/// Coverage report for a story.
+#[derive(Debug, Clone, PartialEq)]
+pub struct CoverageReport {
+    pub current_percent: f64,
+    pub threshold_percent: f64,
+    pub baseline_percent: Option<f64>,
+}
+
+/// Evaluate acceptance with optional coverage data.
+pub fn evaluate_acceptance_with_coverage(
+    results: &StoryTestResults,
+    coverage: Option<&CoverageReport>,
+) -> AcceptanceDecision {
+    let mut decision = evaluate_acceptance(results);
+
+    if let Some(report) = coverage {
+        if report.current_percent < report.threshold_percent {
+            decision.can_accept = false;
+            decision.reasons.push(format!(
+                "Coverage below threshold ({:.1}% < {:.1}%).",
+                report.current_percent, report.threshold_percent
+            ));
+        }
+        if let Some(baseline) = report.baseline_percent
+            && report.current_percent < baseline {
+                decision.can_accept = false;
+                decision.reasons.push(format!(
+                    "Coverage regression: {:.1}% → {:.1}% (threshold: {:.1}%).",
+                    baseline, report.current_percent, report.threshold_percent
+                ));
+            }
+    }
+
+    decision
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // === evaluate_acceptance_with_coverage ===
+
+    #[test]
+    fn acceptance_blocked_by_coverage_below_threshold() {
+        let results = StoryTestResults {
+            unit: vec![TestCaseResult {
+                name: "unit-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+            integration: vec![TestCaseResult {
+                name: "int-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+        };
+
+        let coverage = CoverageReport {
+            current_percent: 55.0,
+            threshold_percent: 80.0,
+            baseline_percent: None,
+        };
+
+        let decision = evaluate_acceptance_with_coverage(&results, Some(&coverage));
+        assert!(!decision.can_accept);
+        assert!(decision.reasons.iter().any(|r| r.contains("Coverage below threshold")));
+    }
+
+    #[test]
+    fn acceptance_blocked_by_coverage_regression() {
+        let results = StoryTestResults {
+            unit: vec![TestCaseResult {
+                name: "unit-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+            integration: vec![TestCaseResult {
+                name: "int-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+        };
+
+        let coverage = CoverageReport {
+            current_percent: 82.0,
+            threshold_percent: 80.0,
+            baseline_percent: Some(90.0),
+        };
+
+        let decision = evaluate_acceptance_with_coverage(&results, Some(&coverage));
+        assert!(!decision.can_accept);
+        assert!(decision.reasons.iter().any(|r| r.contains("Coverage regression")));
+    }
+
+    #[test]
+    fn acceptance_passes_with_good_coverage() {
+        let results = StoryTestResults {
+            unit: vec![TestCaseResult {
+                name: "unit-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+            integration: vec![TestCaseResult {
+                name: "int-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+        };
+
+        let coverage = CoverageReport {
+            current_percent: 92.0,
+            threshold_percent: 80.0,
+            baseline_percent: Some(90.0),
+        };
+
+        let decision = evaluate_acceptance_with_coverage(&results, Some(&coverage));
+        assert!(decision.can_accept);
+    }
+
+    #[test]
+    fn acceptance_works_without_coverage_data() {
+        let results = StoryTestResults {
+            unit: vec![TestCaseResult {
+                name: "unit-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+            integration: vec![TestCaseResult {
+                name: "int-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+        };
+
+        let decision = evaluate_acceptance_with_coverage(&results, None);
+        assert!(decision.can_accept);
+    }
+
+    // === evaluate_acceptance ===
+
+    #[test]
+    fn warns_when_multiple_tests_fail() {
+        let results = StoryTestResults {
+            unit: vec![
+                TestCaseResult {
+                    name: "unit-1".to_string(),
+                    status: TestStatus::Fail,
+                    details: None,
+                },
+                TestCaseResult {
+                    name: "unit-2".to_string(),
+                    status: TestStatus::Fail,
+                    details: None,
+                },
+            ],
+            integration: vec![TestCaseResult {
+                name: "integration-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+        };
+
+        let decision = evaluate_acceptance(&results);
+
+        assert!(!decision.can_accept);
+        assert_eq!(
+            decision.warning,
+            Some("Multiple tests are failing (2 failures).".to_string())
+        );
+    }
+
+    #[test]
+    fn accepts_when_all_tests_pass() {
+        let results = StoryTestResults {
+            unit: vec![TestCaseResult {
+                name: "unit-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+            integration: vec![TestCaseResult {
+                name: "integration-1".to_string(),
+                status: TestStatus::Pass,
+                details: None,
+            }],
+        };
+
+        let decision = evaluate_acceptance(&results);
+        assert!(decision.can_accept);
+        assert!(decision.reasons.is_empty());
+        assert!(decision.warning.is_none());
+    }
+
+    #[test]
+    fn rejects_when_no_results_recorded() {
+        let results = StoryTestResults::default();
+        let decision = evaluate_acceptance(&results);
+        assert!(!decision.can_accept);
+        assert!(decision.reasons.iter().any(|r| r.contains("No test results")));
+    }
+
+    #[test]
+    fn rejects_with_single_failure_no_warning() {
+        let results = StoryTestResults {
+            unit: vec![
+                TestCaseResult {
+                    name: "unit-1".to_string(),
+                    status: TestStatus::Pass,
+                    details: None,
+                },
+                TestCaseResult {
+                    name: "unit-2".to_string(),
+                    status: TestStatus::Fail,
+                    details: None,
+                },
+            ],
+            integration: vec![],
+        };
+
+        let decision = evaluate_acceptance(&results);
+        assert!(!decision.can_accept);
+        assert!(decision.reasons.iter().any(|r| r.contains("failing")));
+        assert!(decision.warning.is_none());
+    }
+
+    // === record_test_results_validated ===
+
+    #[test]
+    fn rejects_recording_multiple_failures() {
+        let mut state = WorkflowState::default();
+        let unit = vec![
+            TestCaseResult {
+                name: "unit-1".to_string(),
+                status: TestStatus::Fail,
+                details: None,
+            },
+            TestCaseResult {
+                name: "unit-2".to_string(),
+                status: TestStatus::Fail,
+                details: None,
+            },
+        ];
+        let integration = vec![TestCaseResult {
+            name: "integration-1".to_string(),
+            status: TestStatus::Pass,
+            details: None,
+        }];
+
+        let result = state.record_test_results_validated("story-26".to_string(), unit, integration);
+
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn record_valid_results_stores_them() {
+        let mut state = WorkflowState::default();
+        let unit = vec![TestCaseResult {
+            name: "unit-1".to_string(),
+            status: TestStatus::Pass,
+            details: None,
+        }];
+        let integration = vec![TestCaseResult {
+            name: "int-1".to_string(),
+            status: TestStatus::Pass,
+            details: None,
+        }];
+
+        let result = state.record_test_results_validated(
+            "story-29".to_string(),
+            unit,
+            integration,
+        );
+        assert!(result.is_ok());
+        assert!(state.results.contains_key("story-29"));
+        assert_eq!(state.results["story-29"].unit.len(), 1);
+        assert_eq!(state.results["story-29"].integration.len(), 1);
+    }
+}
--- a/server/src/worktree.rs
+++ b/server/src/worktree.rs
@@ -0,0 +1,884 @@
+use crate::config::ProjectConfig;
+use crate::slog;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+/// Write a `.mcp.json` file in the given directory pointing to the MCP server
+/// at the given port.
+pub fn write_mcp_json(dir: &Path, port: u16) -> Result<(), String> {
+    let content = format!(
+        "{{\n  \"mcpServers\": {{\n    \"storkit\": {{\n      \"type\": \"http\",\n      \"url\": \"http://localhost:{port}/mcp\"\n    }}\n  }}\n}}\n"
+    );
+    std::fs::write(dir.join(".mcp.json"), content).map_err(|e| format!("Write .mcp.json: {e}"))
+}
+
+#[derive(Debug, Clone)]
+pub struct WorktreeInfo {
+    pub path: PathBuf,
+    pub branch: String,
+    pub base_branch: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct WorktreeListEntry {
+    pub story_id: String,
+    pub path: PathBuf,
+}
+
+/// Worktree path inside the project: `{project_root}/.storkit/worktrees/{story_id}`.
+pub fn worktree_path(project_root: &Path, story_id: &str) -> PathBuf {
+    project_root
+        .join(".storkit")
+        .join("worktrees")
+        .join(story_id)
+}
+
+fn branch_name(story_id: &str) -> String {
+    format!("feature/story-{story_id}")
+}
+
+/// Detect the current branch of the project root (the base branch worktrees fork from).
+fn detect_base_branch(project_root: &Path) -> String {
+    Command::new("git")
+        .args(["rev-parse", "--abbrev-ref", "HEAD"])
+        .current_dir(project_root)
+        .output()
+        .ok()
+        .and_then(|o| {
+            if o.status.success() {
+                Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
+            } else {
+                None
+            }
+        })
+        .unwrap_or_else(|| "master".to_string())
+}
+
+/// Create a git worktree for the given story.
+///
+/// - Creates the worktree at `{project_root}/.storkit/worktrees/{story_id}`
+///   on branch `feature/story-{story_id}`.
+/// - Writes `.mcp.json` in the worktree pointing to the MCP server at `port`.
+/// - Runs setup commands from the config for each component.
+/// - If the worktree/branch already exists, reuses rather than errors.
+pub async fn create_worktree(
+    project_root: &Path,
+    story_id: &str,
+    config: &ProjectConfig,
+    port: u16,
+) -> Result<WorktreeInfo, String> {
+    let wt_path = worktree_path(project_root, story_id);
+    let branch = branch_name(story_id);
+    let base_branch = detect_base_branch(project_root);
+    let root = project_root.to_path_buf();
+
+    // Already exists — reuse (ensure sparse checkout is configured)
+    if wt_path.exists() {
+        let wt_clone = wt_path.clone();
+        tokio::task::spawn_blocking(move || configure_sparse_checkout(&wt_clone))
+            .await
+            .map_err(|e| format!("spawn_blocking: {e}"))??;
+        write_mcp_json(&wt_path, port)?;
+        run_setup_commands(&wt_path, config).await;
+        return Ok(WorktreeInfo {
+            path: wt_path,
+            branch,
+            base_branch,
+        });
+    }
+
+    let wt = wt_path.clone();
+    let br = branch.clone();
+
+    tokio::task::spawn_blocking(move || create_worktree_sync(&root, &wt, &br))
+        .await
+        .map_err(|e| format!("spawn_blocking: {e}"))??;
+
+    write_mcp_json(&wt_path, port)?;
+    run_setup_commands(&wt_path, config).await;
+
+    Ok(WorktreeInfo {
+        path: wt_path,
+        branch,
+        base_branch,
+    })
+}
+
+fn create_worktree_sync(project_root: &Path, wt_path: &Path, branch: &str) -> Result<(), String> {
+    // Ensure the parent directory exists
+    if let Some(parent) = wt_path.parent() {
+        std::fs::create_dir_all(parent).map_err(|e| format!("Create worktree dir: {e}"))?;
+    }
+
+    // Prune stale worktree references (e.g. directories deleted externally)
+    let _ = Command::new("git")
+        .args(["worktree", "prune"])
+        .current_dir(project_root)
+        .output();
+
+    // Try to create branch. If it already exists that's fine.
+    let _ = Command::new("git")
+        .args(["branch", branch])
+        .current_dir(project_root)
+        .output();
+
+    // Create worktree
+    let output = Command::new("git")
+        .args(["worktree", "add", &wt_path.to_string_lossy(), branch])
+        .current_dir(project_root)
+        .output()
+        .map_err(|e| format!("git worktree add: {e}"))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        // If it says already checked out, that's fine
+        if stderr.contains("already checked out") || stderr.contains("already exists") {
+            return Ok(());
+        }
+        return Err(format!("git worktree add failed: {stderr}"));
+    }
+
+    // Enable sparse checkout to exclude pipeline files from the worktree.
+    // This prevents .storkit/work/ changes from ending up in feature branches,
+    // which cause rename/delete merge conflicts when merging back to master.
+    configure_sparse_checkout(wt_path)?;
+
+    Ok(())
+}
+
+/// Placeholder for worktree isolation of `.storkit/work/`.
+///
+/// Previous approaches (sparse checkout, skip-worktree) all leaked state
+/// from worktrees back to the main checkout's config/index. For now this
+/// is a no-op — merge conflicts from pipeline file moves are handled at
+/// merge time by the mergemaster (squash merge ignores work/ diffs).
+fn configure_sparse_checkout(_wt_path: &Path) -> Result<(), String> {
+    Ok(())
+}
+
+/// Remove the git worktree for a story if it exists, deriving the path and
+/// branch name deterministically from `project_root` and `story_id`.
+///
+/// Returns `Ok(())` if the worktree was removed or did not exist.
+/// Removal is best-effort: `remove_worktree_sync` logs failures internally
+/// but always returns `Ok`.
+pub fn prune_worktree_sync(project_root: &Path, story_id: &str) -> Result<(), String> {
+    let wt_path = worktree_path(project_root, story_id);
+    if !wt_path.exists() {
+        return Ok(());
+    }
+    let branch = branch_name(story_id);
+    remove_worktree_sync(project_root, &wt_path, &branch)
+}
+
+/// Remove a git worktree and its branch.
+pub async fn remove_worktree(
+    project_root: &Path,
+    info: &WorktreeInfo,
+    config: &ProjectConfig,
+) -> Result<(), String> {
+    run_teardown_commands(&info.path, config).await?;
+
+    let root = project_root.to_path_buf();
+    let wt_path = info.path.clone();
+    let branch = info.branch.clone();
+
+    tokio::task::spawn_blocking(move || remove_worktree_sync(&root, &wt_path, &branch))
+        .await
+        .map_err(|e| format!("spawn_blocking: {e}"))?
+}
+
+/// Remove a git worktree by story ID, deriving the path and branch deterministically.
+pub async fn remove_worktree_by_story_id(
+    project_root: &Path,
+    story_id: &str,
+    config: &ProjectConfig,
+) -> Result<(), String> {
+    let path = worktree_path(project_root, story_id);
+    if !path.exists() {
+        return Err(format!("Worktree not found for story: {story_id}"));
+    }
+    let branch = branch_name(story_id);
+    let base_branch = detect_base_branch(project_root);
+    let info = WorktreeInfo {
+        path,
+        branch,
+        base_branch,
+    };
+    remove_worktree(project_root, &info, config).await
+}
+
+/// List all worktrees under `{project_root}/.storkit/worktrees/`.
+/// Find the worktree path for a given story ID, if it exists.
+pub fn find_worktree_path(project_root: &Path, story_id: &str) -> Option<PathBuf> {
+    let wt_path = project_root
+        .join(".storkit")
+        .join("worktrees")
+        .join(story_id);
+    if wt_path.is_dir() {
+        Some(wt_path)
+    } else {
+        None
+    }
+}
+
+pub fn list_worktrees(project_root: &Path) -> Result<Vec<WorktreeListEntry>, String> {
+    let worktrees_dir = project_root.join(".storkit").join("worktrees");
+    if !worktrees_dir.exists() {
+        return Ok(Vec::new());
+    }
+    let mut entries = Vec::new();
+    for entry in std::fs::read_dir(&worktrees_dir).map_err(|e| format!("list worktrees: {e}"))? {
+        let entry = entry.map_err(|e| format!("list worktrees entry: {e}"))?;
+        let path = entry.path();
+        if path.is_dir() {
+            let story_id = path
+                .file_name()
+                .map(|n| n.to_string_lossy().to_string())
+                .unwrap_or_default();
+            entries.push(WorktreeListEntry { story_id, path });
+        }
+    }
+    entries.sort_by(|a, b| a.story_id.cmp(&b.story_id));
+    Ok(entries)
+}
+
+fn remove_worktree_sync(project_root: &Path, wt_path: &Path, branch: &str) -> Result<(), String> {
+    // Remove worktree
+    let output = Command::new("git")
+        .args(["worktree", "remove", "--force", &wt_path.to_string_lossy()])
+        .current_dir(project_root)
+        .output()
+        .map_err(|e| format!("git worktree remove: {e}"))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        if stderr.contains("not a working tree") {
+            // Orphaned directory: git doesn't recognise it as a worktree.
+            // Remove the directory directly and prune stale git metadata.
+            slog!(
+                "[worktree] orphaned worktree detected, removing directory: {}",
+                wt_path.display()
+            );
+            if let Err(e) = std::fs::remove_dir_all(wt_path) {
+                slog!("[worktree] failed to remove orphaned directory: {e}");
+            }
+            let _ = Command::new("git")
+                .args(["worktree", "prune"])
+                .current_dir(project_root)
+                .output();
+        } else {
+            slog!("[worktree] remove warning: {stderr}");
+        }
+    }
+
+    // Delete branch (best effort)
+    let _ = Command::new("git")
+        .args(["branch", "-d", branch])
+        .current_dir(project_root)
+        .output();
+
+    Ok(())
+}
+
+async fn run_setup_commands(wt_path: &Path, config: &ProjectConfig) {
+    for component in &config.component {
+        let cmd_dir = wt_path.join(&component.path);
+        for cmd in &component.setup {
+            if let Err(e) = run_shell_command(cmd, &cmd_dir).await {
+                slog!("[worktree] setup warning for {}: {e}", component.name);
+            }
+        }
+    }
+}
+
+async fn run_teardown_commands(wt_path: &Path, config: &ProjectConfig) -> Result<(), String> {
+    for component in &config.component {
+        let cmd_dir = wt_path.join(&component.path);
+        for cmd in &component.teardown {
+            // Best effort — don't fail teardown
+            if let Err(e) = run_shell_command(cmd, &cmd_dir).await {
+                slog!("[worktree] teardown warning for {}: {e}", component.name);
+            }
+        }
+    }
+    Ok(())
+}
+
+async fn run_shell_command(cmd: &str, cwd: &Path) -> Result<(), String> {
+    let cmd = cmd.to_string();
+    let cwd = cwd.to_path_buf();
+
+    tokio::task::spawn_blocking(move || {
+        slog!("[worktree] Running: {cmd} in {}", cwd.display());
+        let output = Command::new("sh")
+            .args(["-c", &cmd])
+            .current_dir(&cwd)
+            .output()
+            .map_err(|e| format!("Run '{cmd}': {e}"))?;
+
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            return Err(format!("Command '{cmd}' failed: {stderr}"));
+        }
+        Ok(())
+    })
+    .await
+    .map_err(|e| format!("spawn_blocking: {e}"))?
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::config::{ComponentConfig, WatcherConfig};
+    use std::fs;
+    use tempfile::TempDir;
+
+    /// Initialise a bare-minimum git repo so worktree operations work.
+    fn init_git_repo(dir: &Path) {
+        Command::new("git")
+            .args(["init"])
+            .current_dir(dir)
+            .output()
+            .expect("git init");
+        Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(dir)
+            .output()
+            .expect("git commit");
+    }
+
+    #[test]
+    fn write_mcp_json_uses_given_port() {
+        let tmp = TempDir::new().unwrap();
+        write_mcp_json(tmp.path(), 4242).unwrap();
+        let content = std::fs::read_to_string(tmp.path().join(".mcp.json")).unwrap();
+        assert!(content.contains("http://localhost:4242/mcp"));
+    }
+
+    #[test]
+    fn write_mcp_json_default_port() {
+        let tmp = TempDir::new().unwrap();
+        write_mcp_json(tmp.path(), 3001).unwrap();
+        let content = std::fs::read_to_string(tmp.path().join(".mcp.json")).unwrap();
+        assert!(content.contains("http://localhost:3001/mcp"));
+    }
+
+    #[test]
+    fn worktree_path_is_inside_project() {
+        let project_root = Path::new("/home/user/my-project");
+        let path = worktree_path(project_root, "42_my_story");
+        assert_eq!(
+            path,
+            Path::new("/home/user/my-project/.storkit/worktrees/42_my_story")
+        );
+    }
+
+    #[test]
+    fn list_worktrees_empty_when_no_dir() {
+        let tmp = TempDir::new().unwrap();
+        let entries = list_worktrees(tmp.path()).unwrap();
+        assert!(entries.is_empty());
+    }
+
+    #[test]
+    fn list_worktrees_returns_subdirs() {
+        let tmp = TempDir::new().unwrap();
+        let worktrees_dir = tmp.path().join(".storkit").join("worktrees");
+        fs::create_dir_all(worktrees_dir.join("42_story_a")).unwrap();
+        fs::create_dir_all(worktrees_dir.join("43_story_b")).unwrap();
+        // A file (not dir) — should be ignored
+        fs::write(worktrees_dir.join("readme.txt"), "").unwrap();
+
+        let entries = list_worktrees(tmp.path()).unwrap();
+        assert_eq!(entries.len(), 2);
+        assert_eq!(entries[0].story_id, "42_story_a");
+        assert_eq!(entries[1].story_id, "43_story_b");
+    }
+
+    #[test]
+    fn create_worktree_after_stale_reference() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let wt_path = tmp.path().join("my-worktree");
+        let branch = "feature/test-stale";
+
+        // First creation should succeed
+        create_worktree_sync(&project_root, &wt_path, branch).unwrap();
+        assert!(wt_path.exists());
+
+        // Simulate external deletion (e.g., rm -rf by another agent)
+        fs::remove_dir_all(&wt_path).unwrap();
+        assert!(!wt_path.exists());
+
+        // Second creation should succeed despite stale git reference.
+        // Without `git worktree prune`, this fails with "already checked out"
+        // or "already exists".
+        let result = create_worktree_sync(&project_root, &wt_path, branch);
+        assert!(
+            result.is_ok(),
+            "Expected worktree creation to succeed after stale reference, got: {:?}",
+            result.err()
+        );
+        assert!(wt_path.exists());
+    }
+
+    #[test]
+    fn worktree_has_all_files_including_work() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        // Create a tracked file under .storkit/work/ on the initial branch
+        let work_dir = project_root.join(".storkit").join("work");
+        fs::create_dir_all(&work_dir).unwrap();
+        fs::write(work_dir.join("test_story.md"), "# Test").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(&project_root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add work file"])
+            .current_dir(&project_root)
+            .output()
+            .unwrap();
+
+        let wt_path = tmp.path().join("my-worktree");
+        let branch = "feature/test-sparse";
+        create_worktree_sync(&project_root, &wt_path, branch).unwrap();
+
+        // Worktree should have all files including .storkit/work/
+        assert!(wt_path.join(".storkit").join("work").exists());
+        assert!(wt_path.join(".git").exists());
+
+        // Main checkout must NOT be affected by worktree creation.
+        assert!(
+            work_dir.exists(),
+            ".storkit/work/ must still exist in the main checkout"
+        );
+    }
+
+    #[test]
+    fn branch_name_format() {
+        assert_eq!(branch_name("42_my_story"), "feature/story-42_my_story");
+        assert_eq!(branch_name("1_test"), "feature/story-1_test");
+    }
+
+    #[test]
+    fn detect_base_branch_returns_branch_in_git_repo() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let branch = detect_base_branch(&project_root);
+        assert!(!branch.is_empty());
+    }
+
+    #[test]
+    fn detect_base_branch_falls_back_to_master_for_non_git_dir() {
+        let tmp = TempDir::new().unwrap();
+        let branch = detect_base_branch(tmp.path());
+        assert_eq!(branch, "master");
+    }
+
+    #[test]
+    fn configure_sparse_checkout_is_noop() {
+        let tmp = TempDir::new().unwrap();
+        assert!(configure_sparse_checkout(tmp.path()).is_ok());
+    }
+
+    #[tokio::test]
+    async fn run_shell_command_succeeds_for_echo() {
+        let tmp = TempDir::new().unwrap();
+        let result = run_shell_command("echo hello", tmp.path()).await;
+        assert!(result.is_ok(), "Expected success: {:?}", result.err());
+    }
+
+    #[tokio::test]
+    async fn run_shell_command_fails_for_nonzero_exit() {
+        let tmp = TempDir::new().unwrap();
+        let result = run_shell_command("exit 1", tmp.path()).await;
+        assert!(result.is_err());
+        assert!(result.unwrap_err().contains("failed"));
+    }
+
+    #[tokio::test]
+    async fn run_setup_commands_no_components_succeeds() {
+        let tmp = TempDir::new().unwrap();
+        let config = ProjectConfig {
+            component: vec![],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // Should complete without panic
+        run_setup_commands(tmp.path(), &config).await;
+    }
+
+    #[tokio::test]
+    async fn run_setup_commands_runs_each_command_successfully() {
+        let tmp = TempDir::new().unwrap();
+        let config = ProjectConfig {
+            component: vec![ComponentConfig {
+                name: "test".to_string(),
+                path: ".".to_string(),
+                setup: vec!["echo setup_ok".to_string()],
+                teardown: vec![],
+            }],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // Should complete without panic
+        run_setup_commands(tmp.path(), &config).await;
+    }
+
+    #[tokio::test]
+    async fn run_setup_commands_ignores_failures() {
+        let tmp = TempDir::new().unwrap();
+        let config = ProjectConfig {
+            component: vec![ComponentConfig {
+                name: "test".to_string(),
+                path: ".".to_string(),
+                setup: vec!["exit 1".to_string()],
+                teardown: vec![],
+            }],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // Setup command failures are non-fatal — should not panic or propagate
+        run_setup_commands(tmp.path(), &config).await;
+    }
+
+    #[tokio::test]
+    async fn run_teardown_commands_ignores_failures() {
+        let tmp = TempDir::new().unwrap();
+        let config = ProjectConfig {
+            component: vec![ComponentConfig {
+                name: "test".to_string(),
+                path: ".".to_string(),
+                setup: vec![],
+                teardown: vec!["exit 1".to_string()],
+            }],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // Teardown failures are best-effort — should not propagate
+        assert!(run_teardown_commands(tmp.path(), &config).await.is_ok());
+    }
+
+    #[tokio::test]
+    async fn create_worktree_fresh_creates_dir_and_mcp_json() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let config = ProjectConfig {
+            component: vec![],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        let info = create_worktree(&project_root, "42_fresh_test", &config, 3001)
+            .await
+            .unwrap();
+
+        assert!(info.path.exists());
+        assert!(info.path.join(".mcp.json").exists());
+        let mcp = fs::read_to_string(info.path.join(".mcp.json")).unwrap();
+        assert!(mcp.contains("3001"));
+        assert_eq!(info.branch, "feature/story-42_fresh_test");
+    }
+
+    #[tokio::test]
+    async fn create_worktree_reuses_existing_path_and_updates_port() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let config = ProjectConfig {
+            component: vec![],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // First creation
+        let _info1 = create_worktree(&project_root, "43_reuse_test", &config, 3001)
+            .await
+            .unwrap();
+        // Second call — worktree already exists, reuse path, update port
+        let info2 = create_worktree(&project_root, "43_reuse_test", &config, 3002)
+            .await
+            .unwrap();
+
+        let mcp = fs::read_to_string(info2.path.join(".mcp.json")).unwrap();
+        assert!(
+            mcp.contains("3002"),
+            "MCP json should be updated to new port"
+        );
+    }
+
+    #[test]
+    fn remove_worktree_sync_removes_orphaned_directory() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        // Create a directory that looks like a worktree but isn't registered with git
+        let wt_path = project_root
+            .join(".storkit")
+            .join("worktrees")
+            .join("orphan");
+        fs::create_dir_all(&wt_path).unwrap();
+        fs::write(wt_path.join("some_file.txt"), "stale").unwrap();
+        assert!(wt_path.exists());
+
+        // git worktree remove will fail with "not a working tree",
+        // but the fallback should rm -rf the directory
+        remove_worktree_sync(&project_root, &wt_path, "feature/orphan").unwrap();
+        assert!(!wt_path.exists());
+    }
+
+    #[test]
+    fn remove_worktree_sync_cleans_up_directory() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let wt_path = project_root
+            .join(".storkit")
+            .join("worktrees")
+            .join("test_rm");
+        create_worktree_sync(&project_root, &wt_path, "feature/test-rm").unwrap();
+        assert!(wt_path.exists());
+
+        remove_worktree_sync(&project_root, &wt_path, "feature/test-rm").unwrap();
+        assert!(!wt_path.exists());
+    }
+
+    #[tokio::test]
+    async fn remove_worktree_by_story_id_returns_err_when_not_found() {
+        let tmp = TempDir::new().unwrap();
+        let config = ProjectConfig {
+            component: vec![],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+
+        let result = remove_worktree_by_story_id(tmp.path(), "99_nonexistent", &config).await;
+        assert!(result.is_err());
+        assert!(
+            result
+                .unwrap_err()
+                .contains("Worktree not found for story: 99_nonexistent")
+        );
+    }
+
+    #[tokio::test]
+    async fn remove_worktree_by_story_id_removes_existing_worktree() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let config = ProjectConfig {
+            component: vec![],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        create_worktree(&project_root, "88_remove_by_id", &config, 3001)
+            .await
+            .unwrap();
+
+        let result = remove_worktree_by_story_id(&project_root, "88_remove_by_id", &config).await;
+        assert!(
+            result.is_ok(),
+            "Expected removal to succeed: {:?}",
+            result.err()
+        );
+    }
+
+    // ── prune_worktree_sync ──────────────────────────────────────────────────
+
+    #[test]
+    fn prune_worktree_sync_noop_when_no_worktree_dir() {
+        let tmp = TempDir::new().unwrap();
+        // No worktree directory exists — must return Ok without touching git.
+        let result = prune_worktree_sync(tmp.path(), "42_story_nonexistent");
+        assert!(
+            result.is_ok(),
+            "Expected Ok when worktree dir absent: {:?}",
+            result.err()
+        );
+    }
+
+    #[test]
+    fn prune_worktree_sync_removes_real_worktree() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let story_id = "55_story_prune_test";
+        let wt_path = worktree_path(&project_root, story_id);
+        create_worktree_sync(
+            &project_root,
+            &wt_path,
+            &format!("feature/story-{story_id}"),
+        )
+        .unwrap();
+        assert!(wt_path.exists(), "worktree dir should exist before prune");
+
+        let result = prune_worktree_sync(&project_root, story_id);
+        assert!(
+            result.is_ok(),
+            "prune_worktree_sync must return Ok: {:?}",
+            result.err()
+        );
+        assert!(!wt_path.exists(), "worktree dir should be gone after prune");
+    }
+
+    #[tokio::test]
+    async fn create_worktree_succeeds_despite_setup_failure() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let config = ProjectConfig {
+            component: vec![ComponentConfig {
+                name: "broken-build".to_string(),
+                path: ".".to_string(),
+                setup: vec!["exit 1".to_string()],
+                teardown: vec![],
+            }],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // Even though setup commands fail, create_worktree must succeed
+        // so the agent can start and fix the problem itself.
+        let result = create_worktree(&project_root, "172_setup_fail", &config, 3001).await;
+        assert!(
+            result.is_ok(),
+            "create_worktree must succeed even if setup commands fail: {:?}",
+            result.err()
+        );
+        let info = result.unwrap();
+        assert!(info.path.exists());
+        assert!(info.path.join(".mcp.json").exists());
+    }
+
+    #[tokio::test]
+    async fn create_worktree_reuse_succeeds_despite_setup_failure() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let empty_config = ProjectConfig {
+            component: vec![],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // First creation — no setup commands, should succeed
+        create_worktree(&project_root, "173_reuse_fail", &empty_config, 3001)
+            .await
+            .unwrap();
+
+        let failing_config = ProjectConfig {
+            component: vec![ComponentConfig {
+                name: "broken-build".to_string(),
+                path: ".".to_string(),
+                setup: vec!["exit 1".to_string()],
+                teardown: vec![],
+            }],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        // Second call — worktree exists, setup commands fail, must still succeed
+        let result = create_worktree(&project_root, "173_reuse_fail", &failing_config, 3002).await;
+        assert!(
+            result.is_ok(),
+            "create_worktree reuse must succeed even if setup commands fail: {:?}",
+            result.err()
+        );
+    }
+
+    #[tokio::test]
+    async fn remove_worktree_async_removes_directory() {
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        let config = ProjectConfig {
+            component: vec![],
+            agent: vec![],
+            watcher: WatcherConfig::default(),
+            default_qa: "server".to_string(),
+            default_coder_model: None,
+            max_coders: None,
+            max_retries: 2,
+        };
+        let info = create_worktree(&project_root, "77_remove_async", &config, 3001)
+            .await
+            .unwrap();
+
+        let path = info.path.clone();
+        assert!(path.exists());
+        remove_worktree(&project_root, &info, &config)
+            .await
+            .unwrap();
+        assert!(!path.exists());
+    }
+}