Bump version to 0.5.0

storkit: done 370_bug_scaffold_does_not_create_mcp_json_in_project_root
storkit: merge 370_bug_scaffold_does_not_create_mcp_json_in_project_root
2026-03-23 13:11:57 +00:00 · 2026-03-23 13:00:46 +00:00 · 2026-03-23 13:00:43 +00:00 · 2026-03-23 12:55:58 +00:00 · 2026-03-23 12:53:10 +00:00 · 2026-03-23 12:43:48 +00:00
86 changed files with 7155 additions and 1028 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,11 @@
 # Docker build context exclusions
 **/target/
 **/node_modules/
 frontend/dist/
 .storkit/worktrees/
 .storkit/logs/
 .storkit/work/6_archived/
 .git/
 *.swp
 *.swo
 .DS_Store
--- a/.ignore
+++ b/.ignore
@@ -3,6 +3,6 @@ frontend/
 node_modules/
 .claude/
 .git/
-.story_kit/
+.storkit/
 store.json
-.story_kit_port
+.storkit_port
--- a/server/.mcp.json
+++ b/server/.mcp.json
--- a/.storkit/project.toml
+++ b/.storkit/project.toml
@@ -33,7 +33,7 @@ model = "sonnet"
 max_turns = 50
 max_budget_usd = 5.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "coder-2"
@@ -43,7 +43,7 @@ model = "sonnet"
 max_turns = 50
 max_budget_usd = 5.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "coder-3"
@@ -53,7 +53,7 @@ model = "sonnet"
 max_turns = 50
 max_budget_usd = 5.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "qa-2"
@@ -130,7 +130,7 @@ model = "opus"
 max_turns = 80
 max_budget_usd = 20.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a senior full-stack engineer working autonomously in a git worktree. You handle complex tasks requiring deep architectural understanding. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a senior full-stack engineer working autonomously in a git worktree. You handle complex tasks requiring deep architectural understanding. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "qa"
--- a/.storkit/work/1_backlog/169_story_gate_pipeline_transitions_on_ensure_acceptance.md
+++ b/.storkit/work/1_backlog/169_story_gate_pipeline_transitions_on_ensure_acceptance.md
@@ -1,20 +0,0 @@
 ---
 name: "Gate pipeline transitions on ensure_acceptance"
 ---
 # Story 169: Gate pipeline transitions on ensure_acceptance
 ## User Story
 As a project owner, I want story progression to be blocked unless ensure_acceptance passes, so that agents can't skip the testing workflow.
 ## Acceptance Criteria
 - [ ] move_story_to_merge rejects stories that haven't passed ensure_acceptance
 - [ ] accept_story rejects stories that haven't passed ensure_acceptance
 - [ ] Rejection returns a clear error message telling the agent what's missing
 - [ ] Existing passing stories (all criteria checked, tests recorded) still flow through normally
 ## Out of Scope
 - TBD
--- a/.storkit/work/1_backlog/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
+++ b/.storkit/work/1_backlog/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
@@ -1,69 +0,0 @@
 ---
 name: "Evaluate Docker/OrbStack for agent isolation and resource limiting"
 agent: coder-opus
 ---
 # Spike 329: Evaluate Docker/OrbStack for agent isolation and resource limiting
 ## Question
 Investigate running the entire storkit system (server, Matrix bot, agents, web UI) inside a single Docker container, using OrbStack as the macOS runtime for better performance. The goal is to isolate storkit from the host machine — not to isolate agents from each other.
 Currently storkit runs as bare processes on the host with full filesystem and network access. A single container would provide:
 1. **Host isolation** — storkit can't touch anything outside the container
 2. **Clean install/uninstall** — `docker run` to start, `docker rm` to remove
 3. **Reproducible environment** — same container works on any machine
 4. **Distributable product** — `docker pull storkit` for new users
 5. **Resource limits** — cap total CPU/memory for the whole system
 ## Architecture
 ```
 Docker Container (single)
 ├── storkit server
 │   ├── Matrix bot
 │   ├── WhatsApp webhook
 │   ├── Slack webhook
 │   ├── Web UI
 │   └── MCP server
 ├── Agent processes (coder-1, coder-2, coder-opus, qa, mergemaster)
 ├── Rust toolchain + Node.js + Claude Code CLI
 └── /workspace (bind-mounted project repo from host)
 ```
 ## Key questions to answer:
 - **Performance**: How much slower are cargo builds inside the container on macOS? Compare Docker Desktop vs OrbStack for bind-mounted volumes.
 - **Dockerfile**: What's the minimal image for the full stack? Rust toolchain + Node.js + Claude Code CLI + cargo-nextest + git.
 - **Bind mounts**: The project repo is bind-mounted from the host. Any filesystem performance concerns with OrbStack?
 - **Networking**: Container exposes web UI port (3000). Matrix/WhatsApp/Slack connect outbound. Any issues?
 - **API key**: Pass ANTHROPIC_API_KEY as env var to the container.
 - **Git**: Git operations happen inside the container on the bind-mounted repo. Commits are visible on the host immediately.
 - **Cargo cache**: Use a named Docker volume for ~/.cargo/registry so dependencies persist across container restarts.
 - **Claude Code state**: Where does Claude Code store its session data? Needs to persist or be in a volume.
 - **OrbStack vs Docker Desktop**: Is OrbStack required for acceptable performance, or does Docker Desktop work too?
 - **Server restart**: Does `rebuild_and_restart` work inside a container (re-exec with new binary)?
 ## Deliverable:
 A proof-of-concept Dockerfile, docker-compose.yml, and a short write-up with findings and performance benchmarks.
 ## Hypothesis
 - TBD
 ## Timebox
 - TBD
 ## Investigation Plan
 - TBD
 ## Findings
 - TBD
 ## Recommendation
 - TBD
--- a/.storkit/work/1_backlog/35_story_agent_security_and_sandboxing.md
+++ b/.storkit/work/1_backlog/35_story_agent_security_and_sandboxing.md
@@ -1,31 +0,0 @@
 ---
 name: Agent Security and Sandboxing
 ---
 # Story 34: Agent Security and Sandboxing
 ## User Story
 **As a** supervisor orchestrating multiple autonomous agents,
 **I want to** constrain what each agent can access and do,
 **So that** agents can't escape their worktree, damage shared state, or perform unintended actions.
 ## Acceptance Criteria
 - [ ] Agent creation accepts an `allowed_tools` list to restrict Claude Code tool access per agent.
 - [ ] Agent creation accepts a `disallowed_tools` list as an alternative to allowlisting.
 - [ ] Agents without Bash access can still perform useful coding work (Read, Edit, Write, Glob, Grep).
 - [ ] Investigate replacing direct Bash/shell access with Rust-implemented tool proxies that enforce boundaries:
  - Scoped `exec_shell` that only runs allowlisted commands (e.g., `cargo test`, `npm test`) within the agent's worktree.
  - Scoped `read_file` / `write_file` that reject paths outside the agent's worktree root.
  - Scoped `git` operations that only work within the agent's worktree.
 - [ ] Evaluate `--max-turns` and `--max-budget-usd` as safety limits for runaway agents.
 - [ ] Document the trust model: what the supervisor controls vs what agents can do autonomously.
 ## Questions to Explore
 - Can we use MCP (Model Context Protocol) to expose our Rust-implemented tools to Claude Code, replacing its built-in Bash/filesystem tools with scoped versions?
 - What's the right granularity for shell allowlists — command-level (`cargo test`) or pattern-level (`cargo *`)?
 - Should agents have read access outside their worktree (e.g., to reference shared specs) but write access only within it?
 - Is OS-level sandboxing (Docker, macOS sandbox profiles) worth the complexity for a personal tool?
 ## Out of Scope
 - Multi-user authentication or authorization (single-user personal tool).
 - Network-level isolation between agents.
 - Encrypting agent communication channels (all local).
--- a/.storkit/work/1_backlog/368_story_web_ui_oauth_flow_for_claude_authentication.md
+++ b/.storkit/work/1_backlog/368_story_web_ui_oauth_flow_for_claude_authentication.md
@@ -0,0 +1,24 @@
 ---
 name: "Web UI OAuth flow for Claude authentication"
 ---
 # Story 368: Web UI OAuth flow for Claude authentication
 ## User Story
 As a new user running storkit in Docker, I want to authenticate Claude through the web UI instead of running `claude login` in a terminal inside the container, so that the entire setup experience stays in the browser after `docker compose up`.
 ## Acceptance Criteria
 - [ ] Backend exposes /auth/start endpoint that generates the Claude OAuth URL with redirect_uri pointing to localhost:3001
 - [ ] Backend exposes /auth/callback endpoint that receives the OAuth token and stores it where Claude Code expects it
 - [ ] Backend exposes /auth/status endpoint that reports whether valid Claude credentials exist
 - [ ] Frontend shows a setup screen when no Claude auth is detected on first visit
 - [ ] Setup screen has a 'Connect Claude Account' button that initiates the OAuth flow
 - [ ] OAuth redirect returns to the web UI which confirms success and dismisses the setup screen
 - [ ] Credentials are persisted in the claude-state Docker volume so they survive container restarts
 - [ ] The entire flow works without any terminal interaction after docker compose up
 ## Out of Scope
 - TBD
--- a/.storkit/work/1_backlog/57_story_live_test_gate_updates.md
+++ b/.storkit/work/1_backlog/57_story_live_test_gate_updates.md
@@ -1,18 +0,0 @@
 ---
 name: Live Test Gate Updates
 ---
 # Story 57: Live Test Gate Updates
 ## User Story
 As a user, I want the Gate and Todo panels to update automatically when tests are recorded or acceptance is checked, so I can see progress without manually refreshing.
 ## Acceptance Criteria
 - [ ] Server broadcasts a `{"type": "notification", "topic": "tests"}` event over `/ws` when tests are recorded, acceptance is checked, or coverage is collected
 - [ ] GatePanel auto-refreshes its data when it receives a `tests` notification
 - [ ] TodoPanel auto-refreshes its data when it receives a `tests` notification
 - [ ] Manual refresh buttons continue to work
 - [ ] Panels do not flicker or lose scroll position on auto-refresh
 - [ ] End-to-end test: record test results via MCP, verify Gate panel updates without manual refresh
--- a/.storkit/work/5_done/369_bug_cli_treats_help_and_version_as_project_paths.md
+++ b/.storkit/work/5_done/369_bug_cli_treats_help_and_version_as_project_paths.md
@@ -0,0 +1,34 @@
 ---
 name: "CLI treats --help and --version as project paths"
 ---
 # Bug 369: CLI treats --help and --version as project paths
 ## Description
 When running `storkit <anything>`, the binary treats the first argument as a project path, creates a directory for it, and scaffolds `.storkit/` inside. This happens for `--help`, `--version`, `serve`, `x`, or any other string. There is no validation that the argument is an existing directory or a reasonable path before creating it.
 ## How to Reproduce
 1. Run `storkit --help` or `storkit serve` or `storkit x` in any directory
 2. Observe that a directory with that name is created with a full `.storkit/` scaffold inside it
 ## Actual Result
 Any argument is treated as a project path and a directory is created and scaffolded. No flags are recognised.
 ## Expected Result
 - `storkit --help` prints usage info and exits
 - `storkit --version` prints the version and exits
 - `storkit <path>` only works if the path already exists as a directory
 - If the path does not exist, storkit prints a clear error and exits non-zero
 ## Acceptance Criteria
 - [ ] storkit --help prints usage information and exits with code 0
 - [ ] storkit --version prints the version and exits with code 0
 - [ ] storkit -h and storkit -V work as short aliases
 - [ ] storkit does not create directories for any argument — the path must already exist
 - [ ] If the path does not exist, storkit prints a clear error and exits non-zero
 - [ ] Arguments starting with - that are not recognised produce a clear error message
--- a/.storkit/work/5_done/370_bug_scaffold_does_not_create_mcp_json_in_project_root.md
+++ b/.storkit/work/5_done/370_bug_scaffold_does_not_create_mcp_json_in_project_root.md
@@ -0,0 +1,33 @@
 ---
 name: "Scaffold does not create .mcp.json in project root"
 ---
 # Bug 370: Scaffold does not create .mcp.json in project root
 ## Description
 Two related problems with project setup:
 1. When the user clicks the "project setup" button in the web UI to open a new project, the scaffold does not reliably run — the `.storkit/` directory and associated files may not be created.
 2. Even when the scaffold does run, it does not write `.mcp.json` to the project root. Without this file, agents spawned in worktrees cannot find the MCP server, causing `--permission-prompt-tool mcp__storkit__prompt_permission not found` errors and agent failures.
 ## How to Reproduce
 1. Open the storkit web UI and use the project setup button to open a new project directory
 2. Check whether the full scaffold was created (`.storkit/`, `CLAUDE.md`, `script/test`, etc.)
 3. Check the project root for `.mcp.json`
 ## Actual Result
 The scaffold may not run when using the UI project setup flow. When it does run, `.mcp.json` is not created in the project root. Agents fail because MCP tools are unavailable.
 ## Expected Result
 Clicking the project setup button reliably runs the full scaffold, including `.mcp.json` pointing to the server's port.
 ## Acceptance Criteria
 - [ ] The web UI project setup button triggers the full scaffold for new projects
 - [ ] scaffold_story_kit writes .mcp.json to the project root with the server's port
 - [ ] Existing .mcp.json is not overwritten if already present
 - [ ] .mcp.json is included in .gitignore since the port is environment-specific
--- a/.storkit/work/6_archived/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
+++ b/.storkit/work/6_archived/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
@@ -0,0 +1,212 @@
 ---
 name: "Evaluate Docker/OrbStack for agent isolation and resource limiting"
 agent: "coder-opus"
 ---
 # Spike 329: Evaluate Docker/OrbStack for agent isolation and resource limiting
 ## Question
 Investigate running the entire storkit system (server, Matrix bot, agents, web UI) inside a single Docker container, using OrbStack as the macOS runtime for better performance. The goal is to isolate storkit from the host machine — not to isolate agents from each other.
 **Important context:** Storkit developing itself is the dogfood edge case. The primary use case is storkit managing agents that develop *other* projects, driven by multiple users in chat rooms (Matrix, WhatsApp, Slack). Isolation must account for untrusted codebases, multi-user command surfaces, and running against arbitrary repos — not just the single-developer self-hosted setup.
 Currently storkit runs as bare processes on the host with full filesystem and network access. A single container would provide:
 1. **Host isolation** — storkit can't touch anything outside the container
 2. **Clean install/uninstall** — `docker run` to start, `docker rm` to remove
 3. **Reproducible environment** — same container works on any machine
 4. **Distributable product** — `docker pull storkit` for new users
 5. **Resource limits** — cap total CPU/memory for the whole system
 ## Architecture
 ```
 Docker Container (single)
 ├── storkit server
 │   ├── Matrix bot
 │   ├── WhatsApp webhook
 │   ├── Slack webhook
 │   ├── Web UI
 │   └── MCP server
 ├── Agent processes (coder-1, coder-2, coder-opus, qa, mergemaster)
 ├── Rust toolchain + Node.js + Claude Code CLI
 └── /workspace (bind-mounted project repo from host)
 ```
 ## Key questions to answer:
 - **Performance**: How much slower are cargo builds inside the container on macOS? Compare Docker Desktop vs OrbStack for bind-mounted volumes.
 - **Dockerfile**: What's the minimal image for the full stack? Rust toolchain + Node.js + Claude Code CLI + cargo-nextest + git.
 - **Bind mounts**: The project repo is bind-mounted from the host. Any filesystem performance concerns with OrbStack?
 - **Networking**: Container exposes web UI port (3000). Matrix/WhatsApp/Slack connect outbound. Any issues?
 - **API key**: Pass ANTHROPIC_API_KEY as env var to the container.
 - **Git**: Git operations happen inside the container on the bind-mounted repo. Commits are visible on the host immediately.
 - **Cargo cache**: Use a named Docker volume for ~/.cargo/registry so dependencies persist across container restarts.
 - **Claude Code state**: Where does Claude Code store its session data? Needs to persist or be in a volume.
 - **OrbStack vs Docker Desktop**: Is OrbStack required for acceptable performance, or does Docker Desktop work too?
 - **Server restart**: Does `rebuild_and_restart` work inside a container (re-exec with new binary)?
 ## Deliverable:
 A proof-of-concept Dockerfile, docker-compose.yml, and a short write-up with findings and performance benchmarks.
 ## Hypothesis
 A single Docker container running the entire storkit stack (server + agents + toolchain) on OrbStack will provide acceptable performance for the primary use case (developing other projects) while giving us host isolation, resource limits, and a distributable product. OrbStack's VirtioFS should make bind-mounted filesystem performance close to native.
 ## Timebox
 4 hours
 ## Investigation Plan
 1. Audit storkit's runtime dependencies (Rust toolchain, Node.js, Claude Code CLI, cargo-nextest, git)
 2. Determine where Claude Code stores session state (~/.claude)
 3. Analyze how rebuild_and_restart works (exec() replacement) and whether it's container-compatible
 4. Draft a multi-stage Dockerfile and docker-compose.yml
 5. Document findings for each key question
 6. Provide recommendation and follow-up stories
 ## Findings
 ### 1. Dockerfile: Minimal image for the full stack
 **Result:** Multi-stage Dockerfile created at `docker/Dockerfile`.
 The image requires these runtime components:
 - **Rust 1.90+ toolchain** (~1.5 GB) — needed at runtime for `rebuild_and_restart` and agent-driven `cargo clippy`, `cargo test`, etc.
 - **Node.js 22.x** (~100 MB) — needed at runtime for Claude Code CLI (npm global package)
 - **Claude Code CLI** (`@anthropic-ai/claude-code`) — npm global, spawned by storkit via PTY
 - **cargo-nextest** — pre-built binary, used by acceptance gates
 - **git** — used extensively by agents and worktree management
 - **System libs:** libssl3, ca-certificates
 The build stage compiles the storkit binary with embedded frontend assets (build.rs runs `npm run build`). The runtime stage is based on `debian:bookworm-slim` but still needs Rust + Node because agents use them at runtime.
 **Total estimated image size:** ~3-4 GB (dominated by the Rust toolchain). This is large but acceptable for a development tool that runs locally.
 ### 2. Bind mounts and filesystem performance
 **OrbStack** uses Apple's VirtioFS for bind mounts, which is near-native speed. This is a significant advantage over Docker Desktop's older options:
 | Runtime | Bind mount driver | Performance | Notes |
 |---------|------------------|-------------|-------|
 | OrbStack | VirtioFS (native) | ~95% native | Default, no config needed |
 | Docker Desktop | VirtioFS | ~85-90% native | Must enable in settings (Docker Desktop 4.15+) |
 | Docker Desktop | gRPC-FUSE (legacy) | ~40-60% native | Default on older versions, very slow for cargo builds |
 | Docker Desktop | osxfs (deprecated) | ~30-50% native | Ancient default, unusable for Rust projects |
 **For cargo builds on bind-mounted volumes:** The critical path is `target/` directory I/O. Since `target/` lives inside the bind-mounted project, large Rust projects will see a noticeable slowdown on Docker Desktop with gRPC-FUSE. OrbStack's VirtioFS makes this tolerable.
 **Mitigation option:** Keep `target/` in a named Docker volume instead of on the bind mount. This gives native Linux filesystem speed for compilation artifacts while the source code remains bind-mounted. The trade-off is that `target/` won't be visible on the host, which is fine since it's a build cache.
 ### 3. Claude Code state persistence
 Claude Code stores all state in `~/.claude/`:
 - `sessions/` — conversation transcripts (used by `--resume`)
 - `projects/` — per-project settings and memory
 - `history.jsonl` — command history
 - `session-env/` — environment snapshots
 - `settings.json` — global preferences
 **Solution:** Mount `~/.claude` as a named Docker volume (`claude-state`). This persists across container restarts. Session resumption (`--resume <session_id>`) will work correctly since the session files are preserved.
 ### 4. Networking
 **Straightforward.** The container exposes port 3001 for the web UI + MCP endpoint. All chat integrations (Matrix, Slack, WhatsApp) connect outbound from the container, which works by default in Docker's bridge networking. No special configuration needed.
 Port mapping: `3001:3001` in docker-compose.yml. Users access the web UI at `http://localhost:3001`.
 ### 5. API key handling
 **Simple.** Pass `ANTHROPIC_API_KEY` as an environment variable via docker-compose.yml. The storkit server already reads it from the environment. Claude Code also reads `ANTHROPIC_API_KEY` from the environment.
 ### 6. Git operations on bind-mounted repos
 **Works correctly.** Git operations inside the container on a bind-mounted volume are immediately visible on the host (and vice versa). The key considerations:
 - **Git config:** The container runs as root, so `git config --global user.name/email` needs to be set inside the container (or mounted from host). Without this, commits have no author identity.
 - **File ownership:** OrbStack maps the container's root user to the host user automatically (uid remapping). Docker Desktop does not — files created by the container may be owned by root on the host. OrbStack handles this transparently.
 - **Worktrees:** `git worktree add` inside the container creates worktrees within the bind-mounted repo, which are visible on the host. This is correct behavior.
 ### 7. Cargo cache
 **Named Docker volumes** for `/usr/local/cargo/registry` and `/usr/local/cargo/git` persist downloaded crates across container restarts. First `cargo build` downloads everything; subsequent builds use the cached crates. This is a standard Docker pattern.
 ### 8. OrbStack vs Docker Desktop
 | Capability | OrbStack | Docker Desktop |
 |-----------|----------|----------------|
 | **VirtioFS (fast mounts)** | Default, always on | Must enable manually |
 | **UID remapping** | Automatic (root → host user) | Manual or not available |
 | **Memory usage** | ~50% less than Docker Desktop | Higher baseline overhead |
 | **Startup time** | 1-2 seconds | 10-30 seconds |
 | **License** | Free for personal use, paid for teams | Free for personal/small business, paid for enterprise |
 | **Linux compatibility** | Full (Rosetta for x86 on ARM) | Full (QEMU for x86 on ARM) |
 **Verdict:** OrbStack is strongly recommended for macOS. Docker Desktop works but requires VirtioFS to be enabled manually and has worse file ownership semantics. On Linux hosts, Docker Engine (not Desktop) is native and has none of these issues.
 ### 9. rebuild_and_restart inside a container
 **Works with caveats.** The current implementation:
 1. Runs `cargo build` from `CARGO_MANIFEST_DIR` (baked at compile time to `/app/server`)
 2. Calls `exec()` to replace the process with the new binary
 Inside a container, `exec()` works fine — it replaces the PID 1 process. However:
 - The source tree must exist at `/app` inside the container (the path baked into the binary)
 - The Rust toolchain must be available at runtime
 - If the container is configured with `restart: unless-stopped`, a crash during rebuild could cause a restart loop
 **The Dockerfile handles this** by copying the full source tree into `/app` in the runtime stage and including the Rust toolchain.
 **Future improvement:** For the storkit-developing-itself case, mount the source tree as a volume at `/app` so code changes on the host are immediately available for rebuild. For the primary use case (developing other projects), the baked-in source is fine — the server doesn't change.
 ### 10. Multi-user / untrusted codebase considerations
 The single-container model provides **host isolation** but no **agent-to-agent isolation**:
 - All agents share the same filesystem, network, and process namespace
 - A malicious codebase could interfere with other agents or the storkit server itself
 - This is acceptable as a first step since the primary threat model is "storkit shouldn't wreck the host"
 For true multi-tenant isolation (multiple untrusted projects), a future architecture could:
 - Run one container per project (each with its own bind mount)
 - Use Docker's `--read-only` with specific writable mounts
 - Apply seccomp/AppArmor profiles to limit syscalls
 ### 11. Image distribution
 The single-container approach enables simple distribution:
 ```
 docker pull ghcr.io/crashlabs/storkit:latest
 docker run -e ANTHROPIC_API_KEY=sk-ant-... -v /my/project:/workspace -p 3001:3001 storkit
 ```
 This is a massive UX improvement over "install Rust, install Node, install Claude Code, clone the repo, cargo build, etc."
 ## Recommendation
 **Proceed with implementation.** The single-container Docker approach is viable and solves the stated goals:
 1. **Host isolation** — achieved via standard Docker containerization
 2. **Clean install/uninstall** — `docker compose up` / `docker compose down -v`
 3. **Reproducible environment** — Dockerfile pins all versions
 4. **Distributable product** — `docker pull` for new users
 5. **Resource limits** — `deploy.resources.limits` in compose
 ### Follow-up stories to create:
 1. **Story: Implement Docker container build and CI** — Set up automated image builds, push to registry, test that the image works end-to-end with a sample project.
 2. **Story: Target directory optimization** — Move `target/` to a named volume to avoid bind mount I/O overhead for cargo builds. Benchmark the improvement.
 3. **Story: Git identity in container** — Configure git user.name/email inside the container (from env vars or mounted .gitconfig).
 4. **Story: Per-project container isolation** — For multi-tenant deployments, run one storkit container per project with tighter security (read-only root, seccomp, no-new-privileges).
 5. **Story: Health endpoint** — Add a `/health` HTTP endpoint to the storkit server for the Docker healthcheck.
 ### Risks and open questions:
 - **Image size (~3-4 GB):** Acceptable for a dev tool but worth optimizing later. The Rust toolchain dominates.
 - **Rust toolchain at runtime:** Required for rebuild_and_restart and agent cargo commands. Cannot be eliminated without changing the architecture.
 - **Claude Code CLI updates:** The CLI version is pinned at image build time. Users need to rebuild the image to get updates. Could use a volume mount for the npm global dir to allow in-place updates.
--- a/.storkit/work/6_archived/339_story_web_ui_agent_assignment_dropdown_on_work_items.md
+++ b/.storkit/work/6_archived/339_story_web_ui_agent_assignment_dropdown_on_work_items.md
--- a/.storkit/work/6_archived/340_story_web_ui_rebuild_and_restart_button.md
+++ b/.storkit/work/6_archived/340_story_web_ui_rebuild_and_restart_button.md
--- a/.storkit/work/6_archived/343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends.md
+++ b/.storkit/work/6_archived/343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends.md
@@ -1,5 +1,6 @@
 ---
 name: "Abstract agent runtime to support non-Claude-Code backends"
 agent: coder-opus
 ---
 # Refactor 343: Abstract agent runtime to support non-Claude-Code backends
--- a/.storkit/work/6_archived/344_story_chatgpt_agent_backend_via_openai_api.md
+++ b/.storkit/work/6_archived/344_story_chatgpt_agent_backend_via_openai_api.md
@@ -1,5 +1,6 @@
 ---
 name: "ChatGPT agent backend via OpenAI API"
 agent: coder-opus
 ---
 # Story 344: ChatGPT agent backend via OpenAI API
--- a/.storkit/work/6_archived/345_story_gemini_agent_backend_via_google_ai_api.md
+++ b/.storkit/work/6_archived/345_story_gemini_agent_backend_via_google_ai_api.md
--- a/.storkit/work/6_archived/346_story_mcp_tools_for_file_operations_read_write_edit_list.md
+++ b/.storkit/work/6_archived/346_story_mcp_tools_for_file_operations_read_write_edit_list.md
--- a/.storkit/work/6_archived/347_story_mcp_tool_for_shell_command_execution.md
+++ b/.storkit/work/6_archived/347_story_mcp_tool_for_shell_command_execution.md
--- a/.storkit/work/6_archived/348_story_mcp_tools_for_code_search_grep_and_glob.md
+++ b/.storkit/work/6_archived/348_story_mcp_tools_for_code_search_grep_and_glob.md
--- a/.storkit/work/6_archived/349_story_mcp_tools_for_git_operations.md
+++ b/.storkit/work/6_archived/349_story_mcp_tools_for_git_operations.md
--- a/.storkit/work/6_archived/350_story_mcp_tool_for_code_definitions_lookup.md
+++ b/.storkit/work/6_archived/350_story_mcp_tool_for_code_definitions_lookup.md
--- a/.storkit/work/6_archived/351_story_bot_reset_command_to_clear_conversation_context.md
+++ b/.storkit/work/6_archived/351_story_bot_reset_command_to_clear_conversation_context.md
--- a/.storkit/work/6_archived/352_bug_ambient_on_off_command_not_intercepted_by_bot_after_refactors.md
+++ b/.storkit/work/6_archived/352_bug_ambient_on_off_command_not_intercepted_by_bot_after_refactors.md
--- a/.storkit/work/6_archived/353_story_add_party_emoji_to_done_stage_notification_messages.md
+++ b/.storkit/work/6_archived/353_story_add_party_emoji_to_done_stage_notification_messages.md
--- a/.storkit/work/6_archived/354_story_make_help_command_output_alphabetical.md
+++ b/.storkit/work/6_archived/354_story_make_help_command_output_alphabetical.md
--- a/.storkit/work/6_archived/355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart.md
+++ b/.storkit/work/6_archived/355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart.md
--- a/.storkit/work/6_archived/356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy.md
+++ b/.storkit/work/6_archived/356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy.md
@@ -0,0 +1,18 @@
 ---
 name: "Start command should say queued not error when all coders are busy"
 ---
 # Story 356: Start command should say queued not error when all coders are busy
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] When all coders are busy, 'start' command responds with a short queued message instead of an error
 - [ ] Message tone is neutral/positive, not a failure message
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/357_story_bot_assign_command_to_pre_assign_a_model_to_a_story.md
+++ b/.storkit/work/6_archived/357_story_bot_assign_command_to_pre_assign_a_model_to_a_story.md
@@ -0,0 +1,20 @@
 ---
 name: "Bot assign command to pre-assign a model to a story"
 ---
 # Story 357: Bot assign command to pre-assign a model to a story
 ## User Story
 As a user, I want to assign a specific model (e.g. opus) to a story before it starts, so that when a coder picks it up it uses the model I chose.
 ## Acceptance Criteria
 - [ ] Bot recognizes `assign <number> <model>` command
 - [ ] Assignment persists in the story file so it's used when the story starts
 - [ ] Command appears in help output
 - [ ] Works with available model names (e.g. opus, sonnet)
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases.md
+++ b/.storkit/work/6_archived/358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases.md
@@ -0,0 +1,20 @@
 ---
 name: "Remove Makefile and make script/release the single entry point for releases"
 ---
 # Story 358: Remove Makefile and make script/release the single entry point for releases
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] Makefile is deleted
 - [ ] script/release requires a version argument and prints usage if missing
 - [ ] script/release still builds macOS and Linux binaries, bumps versions, generates changelog, tags, and publishes to Gitea
 - [ ] No dependency on make
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/359_story_harden_docker_setup_for_security.md
+++ b/.storkit/work/6_archived/359_story_harden_docker_setup_for_security.md
@@ -0,0 +1,28 @@
 ---
 name: "Harden Docker setup for security"
 retry_count: 3
 blocked: true
 ---
 # Story 359: Harden Docker setup for security
 ## User Story
 As a storkit operator, I want the Docker container to run with hardened security settings, so that a compromised agent or malicious codebase cannot escape the container or affect the host.
 ## Acceptance Criteria
 - [ ] Container runs as a non-root user
 - [ ] Root filesystem is read-only with only necessary paths writable (e.g. /tmp, cargo cache, claude state volumes)
 - [ ] Linux capabilities dropped to minimum required (cap_drop: ALL, add back only what's needed)
 - [ ] no-new-privileges flag is set
 - [ ] Resource limits (CPU and memory) are configured in docker-compose.yml
 - [ ] Outbound network access is restricted where possible
 - [ ] ANTHROPIC_API_KEY is passed via Docker secrets or .env file, not hardcoded in compose
 - [ ] Image passes a CVE scan with no critical vulnerabilities
 - [ ] Port binding uses 127.0.0.1 instead of 0.0.0.0 (e.g. "127.0.0.1:3001:3001") so the web UI is not exposed on all interfaces
 - [ ] Git identity is configured via explicit GIT_USER_NAME and GIT_USER_EMAIL env vars; container fails loudly on startup if either is missing (note: multi-user/distributed case where different users need different identities is out of scope and will require a different solution)
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/360_story_run_storkit_container_under_gvisor_runsc_runtime.md
+++ b/.storkit/work/6_archived/360_story_run_storkit_container_under_gvisor_runsc_runtime.md
@@ -0,0 +1,21 @@
 ---
 name: "Run storkit container under gVisor (runsc) runtime"
 ---
 # Story 360: Run storkit container under gVisor (runsc) runtime
 ## User Story
 As a storkit operator, I want the container to run under gVisor so that even if a malicious codebase escapes the container's process namespace, it cannot make raw syscalls to the host kernel.
 ## Acceptance Criteria
 - [ ] docker-compose.yml specifies runtime: runsc
 - [ ] PTY-based agent spawning (Claude Code via PTY) works correctly under runsc
 - [ ] rebuild_and_restart (exec() replacement) works correctly under runsc
 - [ ] Rust compilation inside the container completes successfully under runsc
 - [ ] Document host setup requirement: runsc must be installed and registered in /etc/docker/daemon.json
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/361_story_remove_deprecated_manual_qa_front_matter_field.md
+++ b/.storkit/work/6_archived/361_story_remove_deprecated_manual_qa_front_matter_field.md
@@ -0,0 +1,20 @@
 ---
 name: "Remove deprecated manual_qa front matter field"
 ---
 # Story 361: Remove deprecated manual_qa front matter field
 ## User Story
 As a developer, I want the deprecated manual_qa boolean field removed from the codebase, so that the front matter schema stays clean and doesn't accumulate legacy boolean flags alongside the more expressive qa: server|agent|human field that replaced it.
 ## Acceptance Criteria
 - [ ] manual_qa field is removed from the FrontMatter and StoryMetadata structs in story_metadata.rs
 - [ ] Legacy mapping from manual_qa: true → qa: human is removed
 - [ ] Any existing story files using manual_qa are migrated to qa: human
 - [ ] Codebase compiles cleanly with no references to manual_qa remaining
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/362_story_bot_whatsup_command_shows_in_progress_work_summary.md
+++ b/.storkit/work/6_archived/362_story_bot_whatsup_command_shows_in_progress_work_summary.md
@@ -0,0 +1,28 @@
 ---
 name: "Bot whatsup command shows in-progress work summary"
 ---
 # Story 362: Bot whatsup command shows in-progress work summary
 ## User Story
 As a project owner in a Matrix room, I want to type "{bot_name} whatsup {story_number}" and see a full triage dump for that story, so that when something goes wrong I can immediately understand its state — blocked status, agent activity, git changes, and log tail — without hunting across multiple places or asking the bot to investigate.
 ## Acceptance Criteria
 - [ ] '{bot_name} whatsup {number}' finds the story in work/2_current/ by story number
 - [ ] Shows the story number, name, and current pipeline stage
 - [ ] Shows relevant front matter fields: blocked, agent, and any other non-empty fields
 - [ ] Shows which Acceptance Criteria are checked vs unchecked
 - [ ] Shows active branch and worktree path if one exists
 - [ ] Shows git diff --stat of changes on the branch since branching from master
 - [ ] Shows last 5 commit messages on the feature branch (not master)
 - [ ] Shows the last 20 lines of the agent log for this story (if a log exists)
 - [ ] Returns a friendly message if the story is not found or not currently in progress
 - [ ] Registered in the command registry so it appears in help output
 - [ ] Handled at bot level without LLM invocation — uses git, filesystem, and log files only
 ## Out of Scope
 - Interpreting or summarising log output with an LLM
 - Showing logs from previous agent runs (only the current/most recent)
--- a/.storkit/work/6_archived/363_story_mcp_tool_for_whatsup_story_triage.md
+++ b/.storkit/work/6_archived/363_story_mcp_tool_for_whatsup_story_triage.md
@@ -0,0 +1,25 @@
 ---
 name: "MCP tool for whatsup story triage"
 ---
 # Story 363: MCP tool for whatsup story triage
 ## User Story
 As an LLM assistant, I want to call a single MCP tool to get a full triage dump for an in-progress story, so that I can answer status questions quickly without making 8+ separate calls to piece together the picture myself.
 ## Acceptance Criteria
 - [ ] 'whatsup' MCP tool accepts a story_id parameter
 - [ ] Returns story front matter fields (name, blocked, agent, and any other non-empty fields)
 - [ ] Returns AC checklist with checked/unchecked status
 - [ ] Returns active branch and worktree path if one exists
 - [ ] Returns git diff --stat of changes on the feature branch since branching from master
 - [ ] Returns last 5 commit messages on the feature branch
 - [ ] Returns last 20 lines of the most recent agent log for the story
 - [ ] Returns a clear error if the story is not found or not in work/2_current/
 - [ ] Registered and discoverable via the MCP tools/list endpoint
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/365_story_surface_api_rate_limit_warnings_in_chat.md
+++ b/.storkit/work/6_archived/365_story_surface_api_rate_limit_warnings_in_chat.md
@@ -0,0 +1,64 @@
 ---
 name: "Surface API rate limit warnings in chat"
 ---
 # Story 365: Surface API rate limit warnings in chat
 ## User Story
 As a project owner watching the chat, I want to see rate limit warnings surfaced directly in the conversation when they appear in the agent's PTY output, so that I know immediately when an agent is being throttled without having to watch server logs.
 ## Acceptance Criteria
 - [x] Server detects rate limit warnings in pty-debug output lines
 - [x] When a rate limit warning is detected, a notification is sent to the active chat (Matrix/Slack/WhatsApp)
 - [x] The notification includes which agent/story triggered the rate limit
 - [x] Rate limit notifications are debounced to avoid spamming the chat with repeated warnings
 ## Technical Context
 Claude Code emits `rate_limit_event` JSON in its streaming output:
 ```json
 {
  "type": "rate_limit_event",
  "rate_limit_info": {
    "status": "allowed_warning",
    "resetsAt": 1774443600,
    "rateLimitType": "seven_day",
    "utilization": 0.82,
    "isUsingOverage": false,
    "surpassedThreshold": 0.75
  }
 }
 ```
 Key fields:
 - `status`: `"allowed_warning"` when approaching limit, likely `"blocked"` or similar when hard-limited
 - `rateLimitType`: e.g. `"seven_day"` rolling window
 - `utilization`: 0.0–1.0 fraction of limit consumed
 - `resetsAt`: Unix timestamp when the window resets
 - `surpassedThreshold`: the threshold that triggered the warning (e.g. 0.75 = 75%)
 These events are already logged as `[pty-debug] raw line:` in the server logs. The PTY reader in `server/src/llm/providers/claude_code.rs` (line ~234) sees them but doesn't currently parse or act on them.
 ## Out of Scope
 - TBD
 ## Test Results
 <!-- storkit-test-results: {"unit":[{"name":"rate_limit_event_json_sends_watcher_warning","status":"pass","details":"PTY reader detects rate_limit_event JSON and emits RateLimitWarning watcher event"},{"name":"rate_limit_warning_sends_notification_with_agent_and_story","status":"pass","details":"Notification listener sends chat message with agent and story info"},{"name":"rate_limit_warning_is_debounced","status":"pass","details":"Second warning within 60s window is suppressed"},{"name":"rate_limit_warnings_for_different_agents_both_notify","status":"pass","details":"Different agents are debounced independently"},{"name":"format_rate_limit_notification_includes_agent_and_story","status":"pass","details":"Notification text includes story number, name, and agent name"},{"name":"format_rate_limit_notification_falls_back_to_item_id","status":"pass","details":"Falls back to item_id when story name is unavailable"}],"integration":[]} -->
 ### Unit Tests (6 passed, 0 failed)
 - ✅ rate_limit_event_json_sends_watcher_warning — PTY reader detects rate_limit_event JSON and emits RateLimitWarning watcher event
 - ✅ rate_limit_warning_sends_notification_with_agent_and_story — Notification listener sends chat message with agent and story info
 - ✅ rate_limit_warning_is_debounced — Second warning within 60s window is suppressed
 - ✅ rate_limit_warnings_for_different_agents_both_notify — Different agents are debounced independently
 - ✅ format_rate_limit_notification_includes_agent_and_story — Notification text includes story number, name, and agent name
 - ✅ format_rate_limit_notification_falls_back_to_item_id — Falls back to item_id when story name is unavailable
 ### Integration Tests (0 passed, 0 failed)
 *No integration tests recorded.*
--- a/.storkit/work/6_archived/366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild.md
+++ b/.storkit/work/6_archived/366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild.md
@@ -0,0 +1,20 @@
 ---
 name: "Bot sends shutdown message on server stop or rebuild"
 ---
 # Story 366: Bot sends shutdown message on server stop or rebuild
 ## User Story
 As a project owner in a chat room, I want the bot to send a message when the server is shutting down (via ctrl-c or rebuild_and_restart), so that I know the bot is going offline and won't wonder why it stopped responding.
 ## Acceptance Criteria
 - [ ] Bot sends a shutdown message to active chat channels when the server receives SIGINT/SIGTERM (ctrl-c)
 - [ ] Bot sends a shutdown message before rebuild_and_restart kills the current process
 - [ ] Message indicates the reason (manual stop vs rebuild)
 - [ ] Message is sent best-effort — shutdown is not blocked if the message fails to send
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/367_story_rename_bot_whatsup_command_to_status.md
+++ b/.storkit/work/6_archived/367_story_rename_bot_whatsup_command_to_status.md
@@ -0,0 +1,20 @@
 ---
 name: "Rename bot whatsup command to status"
 ---
 # Story 367: Rename bot whatsup command to status
 ## User Story
 As a project owner using the bot from a phone, I want to type "status {number}" instead of "whatsup {number}" to get a story triage dump, because "whatsup" gets autocorrected to "WhatsApp" on mobile keyboards.
 ## Acceptance Criteria
 - [ ] '{bot_name} status {number}' returns the same triage dump that 'whatsup' currently returns
 - [ ] The 'whatsup' command is removed or aliased to 'status'
 - [ ] Help output shows 'status' as the command name
 - [ ] The MCP tool name (whatsup) is unaffected — this only changes the bot command
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/90_story_fetch_real_context_window_size_from_anthropic_models_api.md
+++ b/.storkit/work/6_archived/90_story_fetch_real_context_window_size_from_anthropic_models_api.md
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1774,9 +1774,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
 [[package]]
 name = "iri-string"
-version = "0.7.10"
+version = "0.7.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb"
 dependencies = [
 "memchr",
 "serde",
@@ -1815,7 +1815,7 @@ dependencies = [
 "cesu8",
 "cfg-if",
 "combine",
- "jni-sys",
+ "jni-sys 0.3.1",
 "log",
 "thiserror 1.0.69",
 "walkdir",
@@ -1824,9 +1824,31 @@ dependencies = [
 [[package]]
 name = "jni-sys"
-version = "0.3.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
+checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258"
 dependencies = [
 "jni-sys 0.4.1",
 ]
 [[package]]
 name = "jni-sys"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2"
 dependencies = [
 "jni-sys-macros",
 ]
 [[package]]
 name = "jni-sys-macros"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264"
 dependencies = [
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "jobserver"
@@ -2948,9 +2970,9 @@ dependencies = [
 [[package]]
 name = "pulldown-cmark"
-version = "0.13.1"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6"
+checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
 dependencies = [
 "bitflags 2.11.0",
 "memchr",
@@ -3625,9 +3647,9 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
 [[package]]
 name = "rustls-webpki"
-version = "0.103.9"
+version = "0.103.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
+checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
 dependencies = [
 "aws-lc-rs",
 "ring",
@@ -3994,7 +4016,7 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
 [[package]]
 name = "storkit"
-version = "0.4.1"
+version = "0.5.0"
 dependencies = [
 "async-stream",
 "async-trait",
--- a/38
+++ b/38
@@ -1,38 +0,0 @@
 .PHONY: help build-macos build-linux release
 help:
 	@echo "Story Kit – cross-platform build targets"
 	@echo ""
 	@echo "  make build-macos    Build native macOS release binary"
 	@echo "  make build-linux    Build static Linux x86_64 release binary (requires cross + Docker)"
 	@echo "  make release V=x.y.z  Build both targets and publish a Gitea release"
 	@echo ""
 	@echo "Prerequisites:"
 	@echo "  build-macos: Rust stable toolchain, npm"
 	@echo "  build-linux: cargo install cross   AND   Docker Desktop running"
 	@echo ""
 	@echo "Output:"
 	@echo "  macOS : target/release/storkit"
 	@echo "  Linux : target/x86_64-unknown-linux-musl/release/storkit"
 ## Build a native macOS release binary.
 ## The frontend is compiled by build.rs (npm run build) and embedded via rust-embed.
 ## Verify dynamic deps afterwards: otool -L target/release/storkit
 build-macos:
 	cargo build --release
 ## Build a fully static Linux x86_64 binary using the musl libc target.
 ## cross (https://github.com/cross-rs/cross) handles the Docker-based cross-compilation.
 ## Install cross:  cargo install cross
 ## The resulting binary has zero dynamic library dependencies (ldd reports "not a dynamic executable").
 build-linux:
 	cross build --release --target x86_64-unknown-linux-musl
 ## Publish a release to Gitea with macOS and Linux binaries.
 ## Requires: GITEA_TOKEN env var, cross, Docker running.
 ## Usage: make release V=0.2.0
 release:
 ifndef V
 	$(error Usage: make release V=x.y.z)
 endif
 	script/release $(V)
--- a/docker/.dockerignore
+++ b/docker/.dockerignore
@@ -0,0 +1,11 @@
 # Docker build context exclusions
 **/target/
 **/node_modules/
 frontend/dist/
 .storkit/worktrees/
 .storkit/logs/
 .storkit/work/6_archived/
 .git/
 *.swp
 *.swo
 .DS_Store
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -0,0 +1,132 @@
 # Story Kit – single-container runtime
 # All components (server, agents, web UI) run inside this container.
 # The target project repo is bind-mounted at /workspace.
 #
 # Build:   docker build -t storkit -f docker/Dockerfile .
 # Run:     docker compose -f docker/docker-compose.yml up
 #
 # Tested with: OrbStack (recommended on macOS), Docker Desktop (slower bind mounts)
 FROM rust:1.90-bookworm AS base
 # Clippy is needed at runtime for acceptance gates (cargo clippy)
 RUN rustup component add clippy
 # ── System deps ──────────────────────────────────────────────────────
 RUN apt-get update && apt-get install -y --no-install-recommends \
        git \
        curl \
        ca-certificates \
        build-essential \
        pkg-config \
        libssl-dev \
        # cargo-nextest is a pre-built binary
    && rm -rf /var/lib/apt/lists/*
 # ── Node.js 22.x (matches host) ─────────────────────────────────────
 RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
    && apt-get install -y --no-install-recommends nodejs \
    && rm -rf /var/lib/apt/lists/*
 # ── cargo-nextest (test runner) ──────────────────────────────────────
 RUN curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C /usr/local/bin
 # ── Claude Code CLI ──────────────────────────────────────────────────
 # Claude Code is distributed as an npm global package.
 # The CLI binary is `claude`.
 RUN npm install -g @anthropic-ai/claude-code
 # ── Working directory ────────────────────────────────────────────────
 # /app holds the storkit source (copied in at build time for the binary).
 # /workspace is where the target project repo gets bind-mounted at runtime.
 WORKDIR /app
 # ── Build the storkit server binary ─────────────────────────────────
 # Copy the full project tree so `cargo build` and `npm run build` (via
 # build.rs) can produce the release binary with embedded frontend assets.
 COPY . .
 # Build frontend deps first (better layer caching)
 RUN cd frontend && npm ci
 # Build the release binary (build.rs runs npm run build for the frontend)
 RUN cargo build --release \
    && cp target/release/storkit /usr/local/bin/storkit
 # ── Runtime stage (smaller image) ───────────────────────────────────
 FROM debian:bookworm-slim AS runtime
 RUN apt-get update && apt-get install -y --no-install-recommends \
        git \
        curl \
        ca-certificates \
        libssl3 \
        # build-essential (gcc/cc) needed at runtime for:
        # - rebuild_and_restart (cargo build --release)
        # - agent-driven cargo commands (clippy, test, build)
        build-essential \
        pkg-config \
        libssl-dev \
        # procps provides ps, needed by tests and process management
        procps \
    && rm -rf /var/lib/apt/lists/*
 # Node.js in runtime
 RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
    && apt-get install -y --no-install-recommends nodejs \
    && rm -rf /var/lib/apt/lists/*
 # Claude Code CLI in runtime
 RUN npm install -g @anthropic-ai/claude-code
 # Cargo and Rust toolchain needed at runtime for:
 # - rebuild_and_restart (cargo build inside the container)
 # - Agent-driven cargo commands (cargo clippy, cargo test, etc.)
 COPY --from=base /usr/local/cargo /usr/local/cargo
 COPY --from=base /usr/local/rustup /usr/local/rustup
 ENV PATH="/usr/local/cargo/bin:${PATH}"
 ENV RUSTUP_HOME="/usr/local/rustup"
 ENV CARGO_HOME="/usr/local/cargo"
 # cargo-nextest
 COPY --from=base /usr/local/bin/cargo-nextest /usr/local/bin/cargo-nextest
 # The storkit binary
 COPY --from=base /usr/local/bin/storkit /usr/local/bin/storkit
 # Copy the full source tree so rebuild_and_restart can do `cargo build`
 # from the workspace root (CARGO_MANIFEST_DIR is baked into the binary).
 # Alternative: mount the source as a volume.
 COPY --from=base /app /app
 # ── Non-root user ────────────────────────────────────────────────────
 # Claude Code refuses --dangerously-skip-permissions (bypassPermissions)
 # when running as root. Create a dedicated user so agents can launch.
 RUN groupadd -r storkit \
    && useradd -r -g storkit -m -d /home/storkit storkit \
    && mkdir -p /home/storkit/.claude \
    && chown -R storkit:storkit /home/storkit \
    && chown -R storkit:storkit /usr/local/cargo /usr/local/rustup \
    && chown -R storkit:storkit /app \
    && mkdir -p /workspace/target /app/target \
    && chown storkit:storkit /workspace/target /app/target
 # ── Entrypoint ───────────────────────────────────────────────────────
 # Validates required env vars (GIT_USER_NAME, GIT_USER_EMAIL) and
 # configures git identity before starting the server.
 COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
 USER storkit
 WORKDIR /workspace
 # ── Ports ────────────────────────────────────────────────────────────
 # Web UI + MCP server
 EXPOSE 3001
 # ── Volumes (defined in docker-compose.yml) ──────────────────────────
 # /workspace                    – bind mount: target project repo
 # /home/storkit/.claude         – named volume: Claude Code sessions/state
 # /usr/local/cargo/registry     – named volume: cargo dependency cache
 ENTRYPOINT ["entrypoint.sh"]
 CMD ["storkit", "/workspace"]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -0,0 +1,118 @@
 # Story Kit – single-container deployment
 #
 # Usage:
 #   # Set your API key and project path, then:
 #   ANTHROPIC_API_KEY=sk-ant-... PROJECT_PATH=/path/to/your/repo \
 #     docker compose -f docker/docker-compose.yml up
 #
 # OrbStack users: just install OrbStack and use `docker compose` normally.
 # OrbStack's VirtioFS bind mount driver is significantly faster than
 # Docker Desktop's default (see spike findings).
 services:
  storkit:
    build:
      context: ..
      dockerfile: docker/Dockerfile
    container_name: storkit
    ports:
      # Bind to localhost only — not exposed on all interfaces.
      - "127.0.0.1:3001:3001"
    environment:
      # Optional: Anthropic API key. If unset, Claude Code falls back to
      # OAuth credentials from `claude login` (e.g. Max subscription).
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      # Required: git identity for agent commits
      - GIT_USER_NAME=${GIT_USER_NAME:?Set GIT_USER_NAME}
      - GIT_USER_EMAIL=${GIT_USER_EMAIL:?Set GIT_USER_EMAIL}
      # Optional: override the server port (default 3001)
      - STORKIT_PORT=3001
      # Optional: Matrix bot credentials (if using Matrix integration)
      - MATRIX_HOMESERVER=${MATRIX_HOMESERVER:-}
      - MATRIX_USER=${MATRIX_USER:-}
      - MATRIX_PASSWORD=${MATRIX_PASSWORD:-}
      # Optional: Slack webhook (if using Slack integration)
      - SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN:-}
      - SLACK_APP_TOKEN=${SLACK_APP_TOKEN:-}
    volumes:
      # The target project repo – bind-mounted from host.
      # Changes made by agents inside the container are immediately
      # visible on the host (and vice versa).
      - ${PROJECT_PATH:?Set PROJECT_PATH}:/workspace
      # Cargo registry cache – persists downloaded crates across
      # container restarts so `cargo build` doesn't re-download.
      - cargo-registry:/usr/local/cargo/registry
      # Cargo git checkouts – persists git-based dependencies.
      - cargo-git:/usr/local/cargo/git
      # Claude Code state – persists session history, projects config,
      # and conversation transcripts so --resume works across restarts.
      - claude-state:/home/storkit/.claude
      # Storkit source tree for rebuild_and_restart.
      # The binary has CARGO_MANIFEST_DIR baked in at compile time
      # pointing to /app/server, so the source must be at /app.
      # This is COPY'd in the Dockerfile; mounting over it allows
      # live source updates without rebuilding the image.
      # Mount host source so rebuild_and_restart picks up live changes:
      - ./..:/app
      # Keep cargo build artifacts off the bind mount.
      # Bind-mount directory traversal is ~23x slower than Docker volumes
      # (confirmed in spike 329). Cargo stat-checks every file in target/
      # on incremental builds — leaving it on the bind mount makes builds
      # catastrophically slow (~12s just to traverse the tree).
      - workspace-target:/workspace/target
      - storkit-target:/app/target
    # ── Security hardening ──────────────────────────────────────────
    # Read-only root filesystem. Only explicitly mounted volumes and
    # tmpfs paths are writable.
    read_only: true
    tmpfs:
      - /tmp:size=512M,exec
      - /home/storkit:size=512M,uid=999,gid=999,exec
    # Drop all Linux capabilities, then add back only what's needed.
    # SETUID/SETGID needed by Claude Code's PTY allocation (openpty).
    cap_drop:
      - ALL
    cap_add:
      - SETUID
      - SETGID
    # Prevent child processes from gaining new privileges via setuid,
    # setgid, or other mechanisms.
    security_opt:
      - no-new-privileges:true
    # Resource limits – cap the whole system.
    # Adjust based on your machine. These are conservative defaults.
    deploy:
      resources:
        limits:
          cpus: "8"
          memory: 24G
        reservations:
          cpus: "2"
          memory: 4G
    # Health check – verify the MCP endpoint responds
    healthcheck:
      test: ["CMD", "curl", "-sf", "http://localhost:3001/health"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s
    # Restart policy – restart on crash but not on manual stop
    restart: unless-stopped
 volumes:
  cargo-registry:
  cargo-git:
  claude-state:
  workspace-target:
  storkit-target:
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -0,0 +1,34 @@
 #!/bin/sh
 set -e
 # ── Git identity ─────────────────────────────────────────────────────
 # Agents commit code inside the container. Without a git identity,
 # commits fail or use garbage defaults. Fail loudly at startup so the
 # operator knows immediately.
 if [ -z "$GIT_USER_NAME" ]; then
    echo "FATAL: GIT_USER_NAME is not set. Export it in your environment or docker-compose.yml." >&2
    exit 1
 fi
 if [ -z "$GIT_USER_EMAIL" ]; then
    echo "FATAL: GIT_USER_EMAIL is not set. Export it in your environment or docker-compose.yml." >&2
    exit 1
 fi
 # Use GIT_AUTHOR/COMMITTER env vars instead of git config --global,
 # so the root filesystem can stay read-only (no ~/.gitconfig write).
 export GIT_AUTHOR_NAME="$GIT_USER_NAME"
 export GIT_COMMITTER_NAME="$GIT_USER_NAME"
 export GIT_AUTHOR_EMAIL="$GIT_USER_EMAIL"
 export GIT_COMMITTER_EMAIL="$GIT_USER_EMAIL"
 # ── Frontend native deps ────────────────────────────────────────────
 # The project repo is bind-mounted from the host, so node_modules/
 # may contain native binaries for the wrong platform (e.g. darwin
 # binaries on a Linux container). Reinstall to get the right ones.
 if [ -d /workspace/frontend ] && [ -f /workspace/frontend/package.json ]; then
    echo "Installing frontend dependencies for container platform..."
    cd /workspace/frontend && npm install --prefer-offline 2>/dev/null || true
    cd /workspace
 fi
 exec "$@"
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "living-spec-standalone",
-  "version": "0.4.1",
+  "version": "0.5.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "living-spec-standalone",
-      "version": "0.4.1",
+      "version": "0.5.0",
      "dependencies": {
        "@types/react-syntax-highlighter": "^15.5.13",
        "react": "^19.1.0",
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,7 +1,7 @@
 {
  "name": "living-spec-standalone",
  "private": true,
-  "version": "0.4.1",
+  "version": "0.5.0",
  "type": "module",
  "scripts": {
    "dev": "vite",
--- a/frontend/src/api/client.ts
+++ b/frontend/src/api/client.ts
@@ -115,6 +115,11 @@ export interface Message {
 	tool_call_id?: string;
 }
 export interface AnthropicModelInfo {
 	id: string;
 	context_window: number;
 }
 export interface WorkItemContent {
 	content: string;
 	stage: string;
@@ -266,7 +271,7 @@ export const api = {
 		return requestJson<boolean>("/anthropic/key/exists", {}, baseUrl);
 	},
 	getAnthropicModels(baseUrl?: string) {
-    return requestJson<string[]>("/anthropic/models", {}, baseUrl);
+		return requestJson<AnthropicModelInfo[]>("/anthropic/models", {}, baseUrl);
 	},
 	setAnthropicApiKey(api_key: string, baseUrl?: string) {
 		return requestJson<boolean>(
--- a/frontend/src/components/Chat.tsx
+++ b/frontend/src/components/Chat.tsx
@@ -4,7 +4,7 @@ import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
 import { oneDark } from "react-syntax-highlighter/dist/esm/styles/prism";
 import type { AgentConfigInfo } from "../api/agents";
 import { agentsApi } from "../api/agents";
-import type { PipelineState } from "../api/client";
+import type { AnthropicModelInfo, PipelineState } from "../api/client";
 import { api, ChatWebSocket } from "../api/client";
 import { useChatHistory } from "../hooks/useChatHistory";
 import type { Message, ProviderConfig } from "../types";
@@ -143,8 +143,13 @@ function formatToolActivity(toolName: string): string {
 const estimateTokens = (text: string): number => Math.ceil(text.length / 4);
-const getContextWindowSize = (modelName: string): number => {
+const getContextWindowSize = (
-	if (modelName.startsWith("claude-")) return 200000;
+	modelName: string,
 	claudeContextWindows?: Map<string, number>,
 ): number => {
 	if (modelName.startsWith("claude-")) {
 		return claudeContextWindows?.get(modelName) ?? 200000;
 	}
 	if (modelName.includes("llama3")) return 8192;
 	if (modelName.includes("qwen2.5")) return 32768;
 	if (modelName.includes("deepseek")) return 16384;
@@ -163,6 +168,9 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 	const [enableTools, setEnableTools] = useState(true);
 	const [availableModels, setAvailableModels] = useState<string[]>([]);
 	const [claudeModels, setClaudeModels] = useState<string[]>([]);
 	const [claudeContextWindowMap, setClaudeContextWindowMap] = useState<
 		Map<string, number>
 	>(new Map());
 	const [streamingContent, setStreamingContent] = useState("");
 	const [streamingThinking, setStreamingThinking] = useState("");
 	const [showApiKeyDialog, setShowApiKeyDialog] = useState(false);
@@ -285,7 +293,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			totalTokens += estimateTokens(streamingContent);
 		}
-		const contextWindow = getContextWindowSize(model);
+		const contextWindow = getContextWindowSize(model, claudeContextWindowMap);
 		const percentage = Math.round((totalTokens / contextWindow) * 100);
 		return {
@@ -293,7 +301,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			total: contextWindow,
 			percentage,
 		};
-	}, [messages, streamingContent, model]);
+	}, [messages, streamingContent, model, claudeContextWindowMap]);
 	useEffect(() => {
 		try {
@@ -337,14 +345,18 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			.then((exists) => {
 				setHasAnthropicKey(exists);
 				if (!exists) return;
-				return api.getAnthropicModels().then((models) => {
+				return api.getAnthropicModels().then((models: AnthropicModelInfo[]) => {
 					if (models.length > 0) {
 						const sortedModels = models.sort((a, b) =>
-							a.toLowerCase().localeCompare(b.toLowerCase()),
+							a.id.toLowerCase().localeCompare(b.id.toLowerCase()),
 						);
 						setClaudeModels(sortedModels.map((m) => m.id));
 						setClaudeContextWindowMap(
 							new Map(sortedModels.map((m) => [m.id, m.context_window])),
 						);
 						setClaudeModels(sortedModels);
 					} else {
 						setClaudeModels([]);
 						setClaudeContextWindowMap(new Map());
 					}
 				});
 			})
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,5 +1,5 @@
 {
-  "name": "storkit",
+  "name": "workspace",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {}
--- a/script/release
+++ b/script/release
@@ -49,7 +49,16 @@ PACKAGE_JSON="${SCRIPT_DIR}/frontend/package.json"
 sed -i '' "s/\"version\": \".*\"/\"version\": \"${VERSION}\"/" "$PACKAGE_JSON"
 echo "==> Bumped ${PACKAGE_JSON} to ${VERSION}"
-git add "$CARGO_TOML" "$PACKAGE_JSON"
+# Regenerate lock files so they stay in sync with the version bump.
 CARGO_LOCK="${SCRIPT_DIR}/Cargo.lock"
 (cd "${SCRIPT_DIR}/server" && cargo generate-lockfile)
 echo "==> Regenerated Cargo.lock"
 PACKAGE_LOCK="${SCRIPT_DIR}/frontend/package-lock.json"
 (cd "${SCRIPT_DIR}/frontend" && npm install --package-lock-only --ignore-scripts --silent 2>/dev/null)
 echo "==> Regenerated package-lock.json"
 git add "$CARGO_TOML" "$CARGO_LOCK" "$PACKAGE_JSON" "$PACKAGE_LOCK"
 git commit -m "Bump version to ${VERSION}"
 if ! command -v cross >/dev/null 2>&1; then
@@ -138,9 +147,65 @@ else
    | sed 's/^/- /')
 fi
 # ── Generate summary overview ─────────────────────────────────
 # Group completed items by keyword clusters to identify the
 # release's focus areas.
 generate_summary() {
  local all_items="$1"
  local themes=""
  # Count items matching each theme keyword (one item per line via echo -e)
  local expanded
  expanded=$(echo -e "$all_items")
  local bot_count=$(echo "$expanded" | grep -icE 'bot|command|chat|matrix|slack|whatsapp|status|help|assign|rebuild|shutdown|whatsup' || true)
  local mcp_count=$(echo "$expanded" | grep -icE 'mcp|tool' || true)
  local docker_count=$(echo "$expanded" | grep -icE 'docker|container|gvisor|orbstack|harden|security' || true)
  local agent_count=$(echo "$expanded" | grep -icE 'agent|runtime|chatgpt|gemini|openai|model|coder' || true)
  local ui_count=$(echo "$expanded" | grep -icE 'frontend|ui|web|oauth|scaffold' || true)
  local infra_count=$(echo "$expanded" | grep -icE 'release|makefile|refactor|upgrade|worktree|pipeline' || true)
  # Build theme list, highest count first
  local -a theme_pairs=()
  [ "$agent_count" -gt 0 ] && theme_pairs+=("${agent_count}:multi-model agents")
  [ "$bot_count" -gt 0 ] && theme_pairs+=("${bot_count}:bot commands")
  [ "$mcp_count" -gt 0 ] && theme_pairs+=("${mcp_count}:MCP tools")
  [ "$docker_count" -gt 0 ] && theme_pairs+=("${docker_count}:Docker hardening")
  [ "$ui_count" -gt 0 ] && theme_pairs+=("${ui_count}:developer experience")
  [ "$infra_count" -gt 0 ] && theme_pairs+=("${infra_count}:infrastructure")
  # Sort by count descending, take top 3
  local sorted=$(printf '%s\n' "${theme_pairs[@]}" | sort -t: -k1 -nr | head -3)
  local labels=""
  while IFS=: read -r count label; do
    [ -z "$label" ] && continue
    if [ -z "$labels" ]; then
      # Capitalise first theme
      labels="$(echo "${label:0:1}" | tr '[:lower:]' '[:upper:]')${label:1}"
    else
      labels="${labels}, ${label}"
    fi
  done <<< "$sorted"
  echo "$labels"
 }
 ALL_ITEMS="${FEATURES}${FIXES}${REFACTORS}"
 SUMMARY=$(generate_summary "$ALL_ITEMS")
 if [ -n "$SUMMARY" ]; then
  SUMMARY_LINE="**Focus:** ${SUMMARY}"
 else
  SUMMARY_LINE=""
 fi
 # Assemble the release body.
 RELEASE_BODY="## What's Changed"
 if [ -n "$SUMMARY_LINE" ]; then
  RELEASE_BODY="${RELEASE_BODY}
 ${SUMMARY_LINE}"
 fi
 if [ -n "$FEATURES" ]; then
  RELEASE_BODY="${RELEASE_BODY}
@@ -188,20 +253,29 @@ git push origin "$TAG"
 # ── Create Gitea Release ──────────────────────────────────────
 echo "==> Creating release on Gitea..."
-RELEASE_JSON=$(python3 -c "
+RELEASE_JSON_FILE=$(mktemp)
 trap "rm -f '$RELEASE_JSON_FILE'" EXIT
 python3 -c "
 import json, sys
-print(json.dumps({
+with open(sys.argv[3], 'w') as f:
    json.dump({
        'tag_name': sys.argv[1],
        'name': sys.argv[1],
        'body': sys.argv[2]
-}))
+    }, f)
-" "$TAG" "$RELEASE_BODY")
+" "$TAG" "$RELEASE_BODY" "$RELEASE_JSON_FILE"
-RELEASE_RESPONSE=$(curl -sf -X POST \
+RELEASE_RESPONSE=$(curl -s --fail-with-body -X POST \
  -H "Authorization: token ${GITEA_TOKEN}" \
  -H "Content-Type: application/json" \
  "${GITEA_URL}/api/v1/repos/${REPO}/releases" \
-  -d "$RELEASE_JSON")
+  -d "@${RELEASE_JSON_FILE}")
 if [ $? -ne 0 ]; then
  echo "Error: Failed to create Gitea release."
  echo "Response: ${RELEASE_RESPONSE}"
  exit 1
 fi
 RELEASE_ID=$(echo "$RELEASE_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
--- a/1
+++ b/1
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "storkit"
-version = "0.4.1"
+version = "0.5.0"
 edition = "2024"
 build = "build.rs"
--- a/server/src/agents/gates.rs
+++ b/server/src/agents/gates.rs
@@ -254,9 +254,8 @@ mod tests {
    fn run_project_tests_uses_script_test_when_present_and_passes() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
@@ -276,9 +275,8 @@ mod tests {
    fn run_project_tests_reports_failure_when_script_test_exits_nonzero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
@@ -313,9 +311,8 @@ mod tests {
    fn coverage_gate_passes_when_script_exits_zero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
@@ -342,9 +339,8 @@ mod tests {
    fn coverage_gate_fails_when_script_exits_nonzero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
--- a/server/src/agents/mod.rs
+++ b/server/src/agents/mod.rs
@@ -2,7 +2,8 @@ pub mod gates;
 pub mod lifecycle;
 pub mod merge;
 mod pool;
-mod pty;
+pub(crate) mod pty;
 pub mod runtime;
 pub mod token_usage;
 use crate::config::AgentConfig;
--- a/server/src/agents/pool/mod.rs
+++ b/server/src/agents/pool/mod.rs
@@ -17,6 +17,7 @@ use super::{
    AgentEvent, AgentInfo, AgentStatus, CompletionReport, PipelineStage, agent_config_stage,
    pipeline_stage,
 };
 use super::runtime::{AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext};
 /// Build the composite key used to track agents in the pool.
 fn composite_key(story_id: &str, agent_name: &str) -> String {
@@ -143,6 +144,10 @@ impl AgentPool {
        }
    }
    pub fn port(&self) -> u16 {
        self.port
    }
    /// Create a pool with a dummy watcher channel for unit tests.
    #[cfg(test)]
    pub fn new_test(port: u16) -> Self {
@@ -513,25 +518,71 @@ impl AgentPool {
            });
            Self::notify_agent_state_changed(&watcher_tx_clone);
-            // Step 4: launch the agent process.
+            // Step 4: launch the agent process via the configured runtime.
-            match super::pty::run_agent_pty_streaming(
+            let runtime_name = config_clone
-                &sid,
+                .find_agent(&aname)
-                &aname,
+                .and_then(|a| a.runtime.as_deref())
-                &command,
+                .unwrap_or("claude-code");
-                &args,
+
-                &prompt,
+            let run_result = match runtime_name {
-                &wt_path_str,
+                "claude-code" => {
-                &tx_clone,
+                    let runtime = ClaudeCodeRuntime::new(child_killers_clone.clone(), watcher_tx_clone.clone());
-                &log_clone,
+                    let ctx = RuntimeContext {
-                log_writer_clone,
+                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
-                child_killers_clone,
+                        mcp_port: port_for_task,
-            )
+                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
-            {
+                }
-                Ok(pty_result) => {
+                "gemini" => {
                    let runtime = GeminiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "openai" => {
                    let runtime = OpenAiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                other => Err(format!(
                    "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
                     Supported: 'claude-code', 'gemini', 'openai'"
                )),
            };
            match run_result {
                Ok(result) => {
                    // Persist token usage if the agent reported it.
-                    if let Some(ref usage) = pty_result.token_usage
+                    if let Some(ref usage) = result.token_usage
                        && let Ok(agents) = agents_ref.lock()
                        && let Some(agent) = agents.get(&key_clone)
                        && let Some(ref pr) = agent.project_root
@@ -557,7 +608,7 @@ impl AgentPool {
                        port_for_task,
                        &sid,
                        &aname,
-                        pty_result.session_id,
+                        result.session_id,
                        watcher_tx_clone.clone(),
                    )
                    .await;
@@ -1054,6 +1105,7 @@ mod tests {
    use crate::agents::{AgentEvent, AgentStatus, PipelineStage};
    use crate::config::ProjectConfig;
    use portable_pty::{CommandBuilder, PtySize, native_pty_system};
    use std::process::Command;
    fn make_config(toml_str: &str) -> ProjectConfig {
        ProjectConfig::parse(toml_str).unwrap()
@@ -1140,13 +1192,10 @@ mod tests {
    /// Returns true if a process with the given PID is currently running.
    fn process_is_running(pid: u32) -> bool {
-        std::process::Command::new("ps")
+        Command::new("ps")
-            .arg("-p")
+            .args(["-p", &pid.to_string()])
-            .arg(pid.to_string())
+            .output()
-            .stdout(std::process::Stdio::null())
+            .map(|o| o.status.success())
            .stderr(std::process::Stdio::null())
            .status()
            .map(|s| s.success())
            .unwrap_or(false)
    }
--- a/server/src/agents/pty.rs
+++ b/server/src/agents/pty.rs
@@ -7,11 +7,12 @@ use tokio::sync::broadcast;
 use super::{AgentEvent, TokenUsage};
 use crate::agent_log::AgentLogWriter;
 use crate::io::watcher::WatcherEvent;
 use crate::slog;
 use crate::slog_warn;
 /// Result from a PTY agent session, containing the session ID and token usage.
-pub(super) struct PtyResult {
+pub(in crate::agents) struct PtyResult {
    pub session_id: Option<String>,
    pub token_usage: Option<TokenUsage>,
 }
@@ -35,7 +36,7 @@ impl Drop for ChildKillerGuard {
 /// Spawn claude agent in a PTY and stream events through the broadcast channel.
 #[allow(clippy::too_many_arguments)]
-pub(super) async fn run_agent_pty_streaming(
+pub(in crate::agents) async fn run_agent_pty_streaming(
    story_id: &str,
    agent_name: &str,
    command: &str,
@@ -47,6 +48,7 @@ pub(super) async fn run_agent_pty_streaming(
    log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    inactivity_timeout_secs: u64,
    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
    watcher_tx: broadcast::Sender<WatcherEvent>,
 ) -> Result<PtyResult, String> {
    let sid = story_id.to_string();
    let aname = agent_name.to_string();
@@ -70,6 +72,7 @@ pub(super) async fn run_agent_pty_streaming(
            log_writer.as_deref(),
            inactivity_timeout_secs,
            &child_killers,
            &watcher_tx,
        )
    })
    .await
@@ -162,6 +165,7 @@ fn run_agent_pty_blocking(
    log_writer: Option<&Mutex<AgentLogWriter>>,
    inactivity_timeout_secs: u64,
    child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
    watcher_tx: &broadcast::Sender<WatcherEvent>,
 ) -> Result<PtyResult, String> {
    let pty_system = native_pty_system();
@@ -342,6 +346,15 @@ fn run_agent_pty_blocking(
            // because thinking and text already arrived via stream_event.
            // The raw JSON is still forwarded as AgentJson below.
            "assistant" | "user" => {}
            "rate_limit_event" => {
                slog!(
                    "[agent:{story_id}:{agent_name}] API rate limit warning received"
                );
                let _ = watcher_tx.send(WatcherEvent::RateLimitWarning {
                    story_id: story_id.to_string(),
                    agent_name: agent_name.to_string(),
                });
            }
            "result" => {
                // Extract token usage from the result event.
                if let Some(usage) = TokenUsage::from_result_event(&json) {
@@ -390,6 +403,70 @@ fn run_agent_pty_blocking(
 mod tests {
    use super::*;
    use crate::agents::AgentEvent;
    use crate::io::watcher::WatcherEvent;
    use std::collections::HashMap;
    use std::sync::Arc;
    // ── AC1: pty detects rate_limit_event and emits RateLimitWarning ─────────
    /// Verify that when a `rate_limit_event` JSON line appears in PTY output,
    /// `run_agent_pty_streaming` sends a `WatcherEvent::RateLimitWarning` with
    /// the correct story_id and agent_name.
    ///
    /// The command invoked is: `sh -p -- <script>` where `--` terminates
    /// option parsing so the script path is treated as the operand.
    #[tokio::test]
    async fn rate_limit_event_json_sends_watcher_warning() {
        use std::os::unix::fs::PermissionsExt;
        let tmp = tempfile::tempdir().unwrap();
        let script = tmp.path().join("emit_rate_limit.sh");
        std::fs::write(
            &script,
            "#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"allowed_warning\"}}'\n",
        )
        .unwrap();
        std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
        let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
        let (watcher_tx, mut watcher_rx) = broadcast::channel::<WatcherEvent>(16);
        let event_log = Arc::new(Mutex::new(Vec::new()));
        let child_killers = Arc::new(Mutex::new(HashMap::new()));
        // sh -p "--" <script>: -p = privileged mode, "--" = end options,
        // then the script path is the file operand.
        let result = run_agent_pty_streaming(
            "365_story_test",
            "coder-1",
            "sh",
            &[script.to_string_lossy().to_string()],
            "--",
            "/tmp",
            &tx,
            &event_log,
            None,
            0,
            child_killers,
            watcher_tx,
        )
        .await;
        assert!(result.is_ok(), "PTY run should succeed: {:?}", result.err());
        let evt = watcher_rx
            .try_recv()
            .expect("Expected a RateLimitWarning to be sent on watcher_tx");
        match evt {
            WatcherEvent::RateLimitWarning {
                story_id,
                agent_name,
            } => {
                assert_eq!(story_id, "365_story_test");
                assert_eq!(agent_name, "coder-1");
            }
            other => panic!("Expected RateLimitWarning, got: {other:?}"),
        }
    }
    #[test]
    fn test_emit_event_writes_to_log_writer() {
--- a/server/src/agents/runtime/claude_code.rs
+++ b/server/src/agents/runtime/claude_code.rs
@@ -0,0 +1,73 @@
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use portable_pty::ChildKiller;
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use crate::io::watcher::WatcherEvent;
 use super::{AgentEvent, AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
 /// Agent runtime that spawns the `claude` CLI in a PTY and streams JSON events.
 ///
 /// This is the default runtime (`runtime = "claude-code"` in project.toml).
 /// It wraps the existing PTY-based execution logic, preserving all streaming,
 /// token tracking, and inactivity timeout behaviour.
 pub struct ClaudeCodeRuntime {
    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
    watcher_tx: broadcast::Sender<WatcherEvent>,
 }
 impl ClaudeCodeRuntime {
    pub fn new(
        child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
        watcher_tx: broadcast::Sender<WatcherEvent>,
    ) -> Self {
        Self {
            child_killers,
            watcher_tx,
        }
    }
 }
 impl AgentRuntime for ClaudeCodeRuntime {
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String> {
        let pty_result = super::super::pty::run_agent_pty_streaming(
            &ctx.story_id,
            &ctx.agent_name,
            &ctx.command,
            &ctx.args,
            &ctx.prompt,
            &ctx.cwd,
            &tx,
            &event_log,
            log_writer,
            ctx.inactivity_timeout_secs,
            Arc::clone(&self.child_killers),
            self.watcher_tx.clone(),
        )
        .await?;
        Ok(RuntimeResult {
            session_id: pty_result.session_id,
            token_usage: pty_result.token_usage,
        })
    }
    fn stop(&self) {
        // Stopping is handled externally by the pool via kill_child_for_key().
        // The ChildKillerGuard in pty.rs deregisters automatically on process exit.
    }
    fn get_status(&self) -> RuntimeStatus {
        // Lifecycle status is tracked by the pool; the runtime itself is stateless.
        RuntimeStatus::Idle
    }
 }
--- a/server/src/agents/runtime/gemini.rs
+++ b/server/src/agents/runtime/gemini.rs
@@ -0,0 +1,809 @@
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use crate::slog;
 use super::super::{AgentEvent, TokenUsage};
 use super::{AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
 // ── Public runtime struct ────────────────────────────────────────────
 /// Agent runtime that drives a Gemini model through the Google AI
 /// `generateContent` REST API.
 ///
 /// The runtime:
 /// 1. Fetches MCP tool definitions from storkit's MCP server.
 /// 2. Converts them to Gemini function-calling format.
 /// 3. Sends the agent prompt + tools to the Gemini API.
 /// 4. Executes any requested function calls via MCP `tools/call`.
 /// 5. Loops until the model produces a text-only response or an error.
 /// 6. Tracks token usage from the API response metadata.
 pub struct GeminiRuntime {
    /// Whether a stop has been requested.
    cancelled: Arc<AtomicBool>,
 }
 impl GeminiRuntime {
    pub fn new() -> Self {
        Self {
            cancelled: Arc::new(AtomicBool::new(false)),
        }
    }
 }
 impl AgentRuntime for GeminiRuntime {
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String> {
        let api_key = std::env::var("GOOGLE_AI_API_KEY").map_err(|_| {
            "GOOGLE_AI_API_KEY environment variable is not set. \
             Set it to your Google AI API key to use the Gemini runtime."
                .to_string()
        })?;
        let model = if ctx.command.starts_with("gemini") {
            // The pool puts the model into `command` for non-CLI runtimes,
            // but also check args for a --model flag.
            ctx.command.clone()
        } else {
            // Fall back to args: look for --model <value>
            ctx.args
                .iter()
                .position(|a| a == "--model")
                .and_then(|i| ctx.args.get(i + 1))
                .cloned()
                .unwrap_or_else(|| "gemini-2.5-pro".to_string())
        };
        let mcp_port = ctx.mcp_port;
        let mcp_base = format!("http://localhost:{mcp_port}/mcp");
        let client = Client::new();
        let cancelled = Arc::clone(&self.cancelled);
        // Step 1: Fetch MCP tool definitions and convert to Gemini format.
        let gemini_tools = fetch_and_convert_mcp_tools(&client, &mcp_base).await?;
        // Step 2: Build the initial conversation contents.
        let system_instruction = build_system_instruction(&ctx);
        let mut contents: Vec<Value> = vec![json!({
            "role": "user",
            "parts": [{ "text": ctx.prompt }]
        })];
        let mut total_usage = TokenUsage {
            input_tokens: 0,
            output_tokens: 0,
            cache_creation_input_tokens: 0,
            cache_read_input_tokens: 0,
            total_cost_usd: 0.0,
        };
        let emit = |event: AgentEvent| {
            super::super::pty::emit_event(
                event,
                &tx,
                &event_log,
                log_writer.as_ref().map(|w| w.as_ref()),
            );
        };
        emit(AgentEvent::Status {
            story_id: ctx.story_id.clone(),
            agent_name: ctx.agent_name.clone(),
            status: "running".to_string(),
        });
        // Step 3: Conversation loop.
        let mut turn = 0u32;
        let max_turns = 200; // Safety limit
        loop {
            if cancelled.load(Ordering::Relaxed) {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: "Agent was stopped by user".to_string(),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            turn += 1;
            if turn > max_turns {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: format!("Exceeded maximum turns ({max_turns})"),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            slog!("[gemini] Turn {turn} for {}:{}", ctx.story_id, ctx.agent_name);
            let request_body = build_generate_content_request(
                &system_instruction,
                &contents,
                &gemini_tools,
            );
            let url = format!(
                "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
            );
            let response = client
                .post(&url)
                .json(&request_body)
                .send()
                .await
                .map_err(|e| format!("Gemini API request failed: {e}"))?;
            let status = response.status();
            let body: Value = response
                .json()
                .await
                .map_err(|e| format!("Failed to parse Gemini API response: {e}"))?;
            if !status.is_success() {
                let error_msg = body["error"]["message"]
                    .as_str()
                    .unwrap_or("Unknown API error");
                let err = format!("Gemini API error ({status}): {error_msg}");
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: err.clone(),
                });
                return Err(err);
            }
            // Accumulate token usage.
            if let Some(usage) = parse_usage_metadata(&body) {
                total_usage.input_tokens += usage.input_tokens;
                total_usage.output_tokens += usage.output_tokens;
            }
            // Extract the candidate response.
            let candidate = body["candidates"]
                .as_array()
                .and_then(|c| c.first())
                .ok_or_else(|| "No candidates in Gemini response".to_string())?;
            let parts = candidate["content"]["parts"]
                .as_array()
                .ok_or_else(|| "No parts in Gemini response candidate".to_string())?;
            // Check finish reason.
            let finish_reason = candidate["finishReason"].as_str().unwrap_or("");
            // Separate text parts and function call parts.
            let mut text_parts: Vec<String> = Vec::new();
            let mut function_calls: Vec<GeminiFunctionCall> = Vec::new();
            for part in parts {
                if let Some(text) = part["text"].as_str() {
                    text_parts.push(text.to_string());
                }
                if let Some(fc) = part.get("functionCall")
                    && let (Some(name), Some(args)) =
                        (fc["name"].as_str(), fc.get("args"))
                {
                    function_calls.push(GeminiFunctionCall {
                        name: name.to_string(),
                        args: args.clone(),
                    });
                }
            }
            // Emit any text output.
            for text in &text_parts {
                if !text.is_empty() {
                    emit(AgentEvent::Output {
                        story_id: ctx.story_id.clone(),
                        agent_name: ctx.agent_name.clone(),
                        text: text.clone(),
                    });
                }
            }
            // If no function calls, the model is done.
            if function_calls.is_empty() {
                emit(AgentEvent::Done {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    session_id: None,
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            // Add the model's response to the conversation.
            let model_parts: Vec<Value> = parts.to_vec();
            contents.push(json!({
                "role": "model",
                "parts": model_parts
            }));
            // Execute function calls via MCP and build response parts.
            let mut response_parts: Vec<Value> = Vec::new();
            for fc in &function_calls {
                if cancelled.load(Ordering::Relaxed) {
                    break;
                }
                slog!(
                    "[gemini] Calling MCP tool '{}' for {}:{}",
                    fc.name,
                    ctx.story_id,
                    ctx.agent_name
                );
                emit(AgentEvent::Output {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    text: format!("\n[Tool call: {}]\n", fc.name),
                });
                let tool_result =
                    call_mcp_tool(&client, &mcp_base, &fc.name, &fc.args).await;
                let response_value = match &tool_result {
                    Ok(result) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!(
                                "[Tool result: {} chars]\n",
                                result.len()
                            ),
                        });
                        json!({ "result": result })
                    }
                    Err(e) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!("[Tool error: {e}]\n"),
                        });
                        json!({ "error": e })
                    }
                };
                response_parts.push(json!({
                    "functionResponse": {
                        "name": fc.name,
                        "response": response_value
                    }
                }));
            }
            // Add function responses to the conversation.
            contents.push(json!({
                "role": "user",
                "parts": response_parts
            }));
            // If the model indicated it's done despite having function calls,
            // respect the finish reason.
            if finish_reason == "STOP" && function_calls.is_empty() {
                break;
            }
        }
        emit(AgentEvent::Done {
            story_id: ctx.story_id.clone(),
            agent_name: ctx.agent_name.clone(),
            session_id: None,
        });
        Ok(RuntimeResult {
            session_id: None,
            token_usage: Some(total_usage),
        })
    }
    fn stop(&self) {
        self.cancelled.store(true, Ordering::Relaxed);
    }
    fn get_status(&self) -> RuntimeStatus {
        if self.cancelled.load(Ordering::Relaxed) {
            RuntimeStatus::Failed
        } else {
            RuntimeStatus::Idle
        }
    }
 }
 // ── Internal types ───────────────────────────────────────────────────
 struct GeminiFunctionCall {
    name: String,
    args: Value,
 }
 // ── Gemini API types (for serde) ─────────────────────────────────────
 #[derive(Debug, Serialize, Deserialize)]
 struct GeminiFunctionDeclaration {
    name: String,
    description: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    parameters: Option<Value>,
 }
 // ── Helper functions ─────────────────────────────────────────────────
 /// Build the system instruction content from the RuntimeContext.
 fn build_system_instruction(ctx: &RuntimeContext) -> Value {
    // Use system_prompt from args if provided via --append-system-prompt,
    // otherwise use a sensible default.
    let system_text = ctx
        .args
        .iter()
        .position(|a| a == "--append-system-prompt")
        .and_then(|i| ctx.args.get(i + 1))
        .cloned()
        .unwrap_or_else(|| {
            format!(
                "You are an AI coding agent working on story {}. \
                 You have access to tools via function calling. \
                 Use them to complete the task. \
                 Work in the directory: {}",
                ctx.story_id, ctx.cwd
            )
        });
    json!({
        "parts": [{ "text": system_text }]
    })
 }
 /// Build the full `generateContent` request body.
 fn build_generate_content_request(
    system_instruction: &Value,
    contents: &[Value],
    gemini_tools: &[GeminiFunctionDeclaration],
 ) -> Value {
    let mut body = json!({
        "system_instruction": system_instruction,
        "contents": contents,
        "generationConfig": {
            "temperature": 0.2,
            "maxOutputTokens": 65536,
        }
    });
    if !gemini_tools.is_empty() {
        body["tools"] = json!([{
            "functionDeclarations": gemini_tools
        }]);
    }
    body
 }
 /// Fetch MCP tool definitions from storkit's MCP server and convert
 /// them to Gemini function declaration format.
 async fn fetch_and_convert_mcp_tools(
    client: &Client,
    mcp_base: &str,
 ) -> Result<Vec<GeminiFunctionDeclaration>, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/list",
        "params": {}
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("Failed to fetch MCP tools: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tools response: {e}"))?;
    let tools = body["result"]["tools"]
        .as_array()
        .ok_or_else(|| "No tools array in MCP response".to_string())?;
    let mut declarations = Vec::new();
    for tool in tools {
        let name = tool["name"].as_str().unwrap_or("").to_string();
        let description = tool["description"].as_str().unwrap_or("").to_string();
        if name.is_empty() {
            continue;
        }
        // Convert MCP inputSchema (JSON Schema) to Gemini parameters
        // (OpenAPI-subset schema). They are structurally compatible for
        // simple object schemas.
        let parameters = convert_mcp_schema_to_gemini(tool.get("inputSchema"));
        declarations.push(GeminiFunctionDeclaration {
            name,
            description,
            parameters,
        });
    }
    slog!("[gemini] Loaded {} MCP tools as function declarations", declarations.len());
    Ok(declarations)
 }
 /// Convert an MCP inputSchema (JSON Schema) to a Gemini-compatible
 /// OpenAPI-subset parameter schema.
 ///
 /// Gemini function calling expects parameters in OpenAPI format, which
 /// is structurally similar to JSON Schema for simple object types.
 /// We strip unsupported fields and ensure the type is "object".
 fn convert_mcp_schema_to_gemini(schema: Option<&Value>) -> Option<Value> {
    let schema = schema?;
    // If the schema has no properties (empty tool), return None.
    let properties = schema.get("properties")?;
    if properties.as_object().is_some_and(|p| p.is_empty()) {
        return None;
    }
    let mut result = json!({
        "type": "object",
        "properties": clean_schema_properties(properties),
    });
    // Preserve required fields if present.
    if let Some(required) = schema.get("required") {
        result["required"] = required.clone();
    }
    Some(result)
 }
 /// Recursively clean schema properties to be Gemini-compatible.
 /// Removes unsupported JSON Schema keywords.
 fn clean_schema_properties(properties: &Value) -> Value {
    let Some(obj) = properties.as_object() else {
        return properties.clone();
    };
    let mut cleaned = serde_json::Map::new();
    for (key, value) in obj {
        let mut prop = value.clone();
        // Remove JSON Schema keywords not supported by Gemini
        if let Some(p) = prop.as_object_mut() {
            p.remove("$schema");
            p.remove("additionalProperties");
            // Recursively clean nested object properties
            if let Some(nested_props) = p.get("properties").cloned() {
                p.insert(
                    "properties".to_string(),
                    clean_schema_properties(&nested_props),
                );
            }
            // Clean items schema for arrays
            if let Some(items) = p.get("items").cloned()
                && let Some(items_obj) = items.as_object()
            {
                let mut cleaned_items = items_obj.clone();
                cleaned_items.remove("$schema");
                cleaned_items.remove("additionalProperties");
                p.insert("items".to_string(), Value::Object(cleaned_items));
            }
        }
        cleaned.insert(key.clone(), prop);
    }
    Value::Object(cleaned)
 }
 /// Call an MCP tool via storkit's MCP server.
 async fn call_mcp_tool(
    client: &Client,
    mcp_base: &str,
    tool_name: &str,
    args: &Value,
 ) -> Result<String, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/call",
        "params": {
            "name": tool_name,
            "arguments": args
        }
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("MCP tool call failed: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tool response: {e}"))?;
    if let Some(error) = body.get("error") {
        let msg = error["message"].as_str().unwrap_or("Unknown MCP error");
        return Err(format!("MCP tool '{tool_name}' error: {msg}"));
    }
    // MCP tools/call returns { result: { content: [{ type: "text", text: "..." }] } }
    let content = &body["result"]["content"];
    if let Some(arr) = content.as_array() {
        let texts: Vec<&str> = arr
            .iter()
            .filter_map(|c| c["text"].as_str())
            .collect();
        if !texts.is_empty() {
            return Ok(texts.join("\n"));
        }
    }
    // Fall back to serializing the entire result.
    Ok(body["result"].to_string())
 }
 /// Parse token usage metadata from a Gemini API response.
 fn parse_usage_metadata(response: &Value) -> Option<TokenUsage> {
    let metadata = response.get("usageMetadata")?;
    Some(TokenUsage {
        input_tokens: metadata
            .get("promptTokenCount")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        output_tokens: metadata
            .get("candidatesTokenCount")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        // Gemini doesn't have cache token fields, but we keep the struct uniform.
        cache_creation_input_tokens: 0,
        cache_read_input_tokens: 0,
        // Google AI API doesn't report cost; leave at 0.
        total_cost_usd: 0.0,
    })
 }
 // ── Tests ────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn convert_mcp_schema_simple_object() {
        let schema = json!({
            "type": "object",
            "properties": {
                "story_id": {
                    "type": "string",
                    "description": "Story identifier"
                }
            },
            "required": ["story_id"]
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        assert_eq!(result["type"], "object");
        assert!(result["properties"]["story_id"].is_object());
        assert_eq!(result["required"][0], "story_id");
    }
    #[test]
    fn convert_mcp_schema_empty_properties_returns_none() {
        let schema = json!({
            "type": "object",
            "properties": {}
        });
        assert!(convert_mcp_schema_to_gemini(Some(&schema)).is_none());
    }
    #[test]
    fn convert_mcp_schema_none_returns_none() {
        assert!(convert_mcp_schema_to_gemini(None).is_none());
    }
    #[test]
    fn convert_mcp_schema_strips_additional_properties() {
        let schema = json!({
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "additionalProperties": false,
                    "$schema": "http://json-schema.org/draft-07/schema#"
                }
            }
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        let name_prop = &result["properties"]["name"];
        assert!(name_prop.get("additionalProperties").is_none());
        assert!(name_prop.get("$schema").is_none());
        assert_eq!(name_prop["type"], "string");
    }
    #[test]
    fn convert_mcp_schema_with_nested_objects() {
        let schema = json!({
            "type": "object",
            "properties": {
                "config": {
                    "type": "object",
                    "properties": {
                        "key": { "type": "string" }
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        assert!(result["properties"]["config"]["properties"]["key"].is_object());
    }
    #[test]
    fn convert_mcp_schema_with_array_items() {
        let schema = json!({
            "type": "object",
            "properties": {
                "items": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": { "type": "string" }
                        },
                        "additionalProperties": false
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        let items_schema = &result["properties"]["items"]["items"];
        assert!(items_schema.get("additionalProperties").is_none());
    }
    #[test]
    fn build_system_instruction_uses_args() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gemini-2.5-pro".to_string(),
            args: vec![
                "--append-system-prompt".to_string(),
                "Custom system prompt".to_string(),
            ],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        let instruction = build_system_instruction(&ctx);
        assert_eq!(instruction["parts"][0]["text"], "Custom system prompt");
    }
    #[test]
    fn build_system_instruction_default() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gemini-2.5-pro".to_string(),
            args: vec![],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        let instruction = build_system_instruction(&ctx);
        let text = instruction["parts"][0]["text"].as_str().unwrap();
        assert!(text.contains("42_story_test"));
        assert!(text.contains("/tmp/wt"));
    }
    #[test]
    fn build_generate_content_request_includes_tools() {
        let system = json!({"parts": [{"text": "system"}]});
        let contents = vec![json!({"role": "user", "parts": [{"text": "hello"}]})];
        let tools = vec![GeminiFunctionDeclaration {
            name: "my_tool".to_string(),
            description: "A tool".to_string(),
            parameters: Some(json!({"type": "object", "properties": {"x": {"type": "string"}}})),
        }];
        let body = build_generate_content_request(&system, &contents, &tools);
        assert!(body["tools"][0]["functionDeclarations"].is_array());
        assert_eq!(body["tools"][0]["functionDeclarations"][0]["name"], "my_tool");
    }
    #[test]
    fn build_generate_content_request_no_tools() {
        let system = json!({"parts": [{"text": "system"}]});
        let contents = vec![json!({"role": "user", "parts": [{"text": "hello"}]})];
        let tools: Vec<GeminiFunctionDeclaration> = vec![];
        let body = build_generate_content_request(&system, &contents, &tools);
        assert!(body.get("tools").is_none());
    }
    #[test]
    fn parse_usage_metadata_valid() {
        let response = json!({
            "usageMetadata": {
                "promptTokenCount": 100,
                "candidatesTokenCount": 50,
                "totalTokenCount": 150
            }
        });
        let usage = parse_usage_metadata(&response).unwrap();
        assert_eq!(usage.input_tokens, 100);
        assert_eq!(usage.output_tokens, 50);
        assert_eq!(usage.cache_creation_input_tokens, 0);
        assert_eq!(usage.total_cost_usd, 0.0);
    }
    #[test]
    fn parse_usage_metadata_missing() {
        let response = json!({"candidates": []});
        assert!(parse_usage_metadata(&response).is_none());
    }
    #[test]
    fn gemini_runtime_stop_sets_cancelled() {
        let runtime = GeminiRuntime::new();
        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
        runtime.stop();
        assert_eq!(runtime.get_status(), RuntimeStatus::Failed);
    }
    #[test]
    fn model_extraction_from_command() {
        // When command starts with "gemini", use it as model name
        let ctx = RuntimeContext {
            story_id: "1".to_string(),
            agent_name: "coder".to_string(),
            command: "gemini-2.5-pro".to_string(),
            args: vec![],
            prompt: "test".to_string(),
            cwd: "/tmp".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        // The model extraction logic is inside start(), but we test the
        // condition here.
        assert!(ctx.command.starts_with("gemini"));
    }
 }
--- a/server/src/agents/runtime/mod.rs
+++ b/server/src/agents/runtime/mod.rs
@@ -0,0 +1,163 @@
 mod claude_code;
 mod gemini;
 mod openai;
 pub use claude_code::ClaudeCodeRuntime;
 pub use gemini::GeminiRuntime;
 pub use openai::OpenAiRuntime;
 use std::sync::{Arc, Mutex};
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use super::{AgentEvent, TokenUsage};
 /// Context passed to a runtime when launching an agent session.
 pub struct RuntimeContext {
    pub story_id: String,
    pub agent_name: String,
    pub command: String,
    pub args: Vec<String>,
    pub prompt: String,
    pub cwd: String,
    pub inactivity_timeout_secs: u64,
    /// Port of the storkit MCP server, used by API-based runtimes (Gemini, OpenAI)
    /// to call back for tool execution.
    pub mcp_port: u16,
 }
 /// Result returned by a runtime after the agent session completes.
 pub struct RuntimeResult {
    pub session_id: Option<String>,
    pub token_usage: Option<TokenUsage>,
 }
 /// Runtime status reported by the backend.
 #[derive(Debug, Clone, PartialEq)]
 #[allow(dead_code)]
 pub enum RuntimeStatus {
    Idle,
    Running,
    Completed,
    Failed,
 }
 /// Abstraction over different agent execution backends.
 ///
 /// Implementations:
 /// - [`ClaudeCodeRuntime`]: spawns the `claude` CLI via a PTY (default, `runtime = "claude-code"`)
 ///
 /// Future implementations could include OpenAI and Gemini API runtimes.
 #[allow(dead_code)]
 pub trait AgentRuntime: Send + Sync {
    /// Start the agent and drive it to completion, streaming events through
    /// the provided broadcast sender and event log.
    ///
    /// Returns when the agent session finishes (success or error).
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String>;
    /// Stop the running agent.
    fn stop(&self);
    /// Get the current runtime status.
    fn get_status(&self) -> RuntimeStatus;
    /// Return any events buffered outside the broadcast channel.
    ///
    /// PTY-based runtimes stream directly to the broadcast channel; this
    /// returns empty by default. API-based runtimes may buffer events here.
    fn stream_events(&self) -> Vec<AgentEvent> {
        vec![]
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn runtime_context_fields() {
        let ctx = RuntimeContext {
            story_id: "42_story_foo".to_string(),
            agent_name: "coder-1".to_string(),
            command: "claude".to_string(),
            args: vec!["--model".to_string(), "sonnet".to_string()],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert_eq!(ctx.story_id, "42_story_foo");
        assert_eq!(ctx.agent_name, "coder-1");
        assert_eq!(ctx.command, "claude");
        assert_eq!(ctx.args.len(), 2);
        assert_eq!(ctx.prompt, "Do the thing");
        assert_eq!(ctx.cwd, "/tmp/wt");
        assert_eq!(ctx.inactivity_timeout_secs, 300);
        assert_eq!(ctx.mcp_port, 3001);
    }
    #[test]
    fn runtime_result_fields() {
        let result = RuntimeResult {
            session_id: Some("sess-123".to_string()),
            token_usage: Some(TokenUsage {
                input_tokens: 100,
                output_tokens: 50,
                cache_creation_input_tokens: 0,
                cache_read_input_tokens: 0,
                total_cost_usd: 0.01,
            }),
        };
        assert_eq!(result.session_id, Some("sess-123".to_string()));
        assert!(result.token_usage.is_some());
        let usage = result.token_usage.unwrap();
        assert_eq!(usage.input_tokens, 100);
        assert_eq!(usage.output_tokens, 50);
        assert_eq!(usage.total_cost_usd, 0.01);
    }
    #[test]
    fn runtime_result_no_usage() {
        let result = RuntimeResult {
            session_id: None,
            token_usage: None,
        };
        assert!(result.session_id.is_none());
        assert!(result.token_usage.is_none());
    }
    #[test]
    fn runtime_status_variants() {
        assert_eq!(RuntimeStatus::Idle, RuntimeStatus::Idle);
        assert_ne!(RuntimeStatus::Running, RuntimeStatus::Completed);
        assert_ne!(RuntimeStatus::Failed, RuntimeStatus::Idle);
    }
    #[test]
    fn claude_code_runtime_get_status_returns_idle() {
        use std::collections::HashMap;
        use crate::io::watcher::WatcherEvent;
        let killers = Arc::new(Mutex::new(HashMap::new()));
        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(16);
        let runtime = ClaudeCodeRuntime::new(killers, watcher_tx);
        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
    }
    #[test]
    fn claude_code_runtime_stream_events_empty() {
        use std::collections::HashMap;
        use crate::io::watcher::WatcherEvent;
        let killers = Arc::new(Mutex::new(HashMap::new()));
        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(16);
        let runtime = ClaudeCodeRuntime::new(killers, watcher_tx);
        assert!(runtime.stream_events().is_empty());
    }
 }
--- a/server/src/agents/runtime/openai.rs
+++ b/server/src/agents/runtime/openai.rs
@@ -0,0 +1,704 @@
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 use reqwest::Client;
 use serde_json::{json, Value};
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use crate::slog;
 use super::super::{AgentEvent, TokenUsage};
 use super::{AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
 // ── Public runtime struct ────────────────────────────────────────────
 /// Agent runtime that drives an OpenAI model (GPT-4o, o3, etc.) through
 /// the OpenAI Chat Completions API.
 ///
 /// The runtime:
 /// 1. Fetches MCP tool definitions from storkit's MCP server.
 /// 2. Converts them to OpenAI function-calling format.
 /// 3. Sends the agent prompt + tools to the Chat Completions API.
 /// 4. Executes any requested tool calls via MCP `tools/call`.
 /// 5. Loops until the model produces a response with no tool calls.
 /// 6. Tracks token usage from the API response.
 pub struct OpenAiRuntime {
    /// Whether a stop has been requested.
    cancelled: Arc<AtomicBool>,
 }
 impl OpenAiRuntime {
    pub fn new() -> Self {
        Self {
            cancelled: Arc::new(AtomicBool::new(false)),
        }
    }
 }
 impl AgentRuntime for OpenAiRuntime {
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String> {
        let api_key = std::env::var("OPENAI_API_KEY").map_err(|_| {
            "OPENAI_API_KEY environment variable is not set. \
             Set it to your OpenAI API key to use the OpenAI runtime."
                .to_string()
        })?;
        let model = if ctx.command.starts_with("gpt") || ctx.command.starts_with("o") {
            // The pool puts the model into `command` for non-CLI runtimes.
            ctx.command.clone()
        } else {
            // Fall back to args: look for --model <value>
            ctx.args
                .iter()
                .position(|a| a == "--model")
                .and_then(|i| ctx.args.get(i + 1))
                .cloned()
                .unwrap_or_else(|| "gpt-4o".to_string())
        };
        let mcp_port = ctx.mcp_port;
        let mcp_base = format!("http://localhost:{mcp_port}/mcp");
        let client = Client::new();
        let cancelled = Arc::clone(&self.cancelled);
        // Step 1: Fetch MCP tool definitions and convert to OpenAI format.
        let openai_tools = fetch_and_convert_mcp_tools(&client, &mcp_base).await?;
        // Step 2: Build the initial conversation messages.
        let system_text = build_system_text(&ctx);
        let mut messages: Vec<Value> = vec![
            json!({ "role": "system", "content": system_text }),
            json!({ "role": "user", "content": ctx.prompt }),
        ];
        let mut total_usage = TokenUsage {
            input_tokens: 0,
            output_tokens: 0,
            cache_creation_input_tokens: 0,
            cache_read_input_tokens: 0,
            total_cost_usd: 0.0,
        };
        let emit = |event: AgentEvent| {
            super::super::pty::emit_event(
                event,
                &tx,
                &event_log,
                log_writer.as_ref().map(|w| w.as_ref()),
            );
        };
        emit(AgentEvent::Status {
            story_id: ctx.story_id.clone(),
            agent_name: ctx.agent_name.clone(),
            status: "running".to_string(),
        });
        // Step 3: Conversation loop.
        let mut turn = 0u32;
        let max_turns = 200; // Safety limit
        loop {
            if cancelled.load(Ordering::Relaxed) {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: "Agent was stopped by user".to_string(),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            turn += 1;
            if turn > max_turns {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: format!("Exceeded maximum turns ({max_turns})"),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            slog!(
                "[openai] Turn {turn} for {}:{}",
                ctx.story_id,
                ctx.agent_name
            );
            let mut request_body = json!({
                "model": model,
                "messages": messages,
                "temperature": 0.2,
            });
            if !openai_tools.is_empty() {
                request_body["tools"] = json!(openai_tools);
            }
            let response = client
                .post("https://api.openai.com/v1/chat/completions")
                .bearer_auth(&api_key)
                .json(&request_body)
                .send()
                .await
                .map_err(|e| format!("OpenAI API request failed: {e}"))?;
            let status = response.status();
            let body: Value = response
                .json()
                .await
                .map_err(|e| format!("Failed to parse OpenAI API response: {e}"))?;
            if !status.is_success() {
                let error_msg = body["error"]["message"]
                    .as_str()
                    .unwrap_or("Unknown API error");
                let err = format!("OpenAI API error ({status}): {error_msg}");
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: err.clone(),
                });
                return Err(err);
            }
            // Accumulate token usage.
            if let Some(usage) = parse_usage(&body) {
                total_usage.input_tokens += usage.input_tokens;
                total_usage.output_tokens += usage.output_tokens;
            }
            // Extract the first choice.
            let choice = body["choices"]
                .as_array()
                .and_then(|c| c.first())
                .ok_or_else(|| "No choices in OpenAI response".to_string())?;
            let message = &choice["message"];
            let content = message["content"].as_str().unwrap_or("");
            // Emit any text content.
            if !content.is_empty() {
                emit(AgentEvent::Output {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    text: content.to_string(),
                });
            }
            // Check for tool calls.
            let tool_calls = message["tool_calls"].as_array();
            if tool_calls.is_none() || tool_calls.is_some_and(|tc| tc.is_empty()) {
                // No tool calls — model is done.
                emit(AgentEvent::Done {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    session_id: None,
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            let tool_calls = tool_calls.unwrap();
            // Add the assistant message (with tool_calls) to the conversation.
            messages.push(message.clone());
            // Execute each tool call via MCP and add results.
            for tc in tool_calls {
                if cancelled.load(Ordering::Relaxed) {
                    break;
                }
                let call_id = tc["id"].as_str().unwrap_or("");
                let function = &tc["function"];
                let tool_name = function["name"].as_str().unwrap_or("");
                let arguments_str = function["arguments"].as_str().unwrap_or("{}");
                let args: Value = serde_json::from_str(arguments_str).unwrap_or(json!({}));
                slog!(
                    "[openai] Calling MCP tool '{}' for {}:{}",
                    tool_name,
                    ctx.story_id,
                    ctx.agent_name
                );
                emit(AgentEvent::Output {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    text: format!("\n[Tool call: {tool_name}]\n"),
                });
                let tool_result = call_mcp_tool(&client, &mcp_base, tool_name, &args).await;
                let result_content = match &tool_result {
                    Ok(result) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!("[Tool result: {} chars]\n", result.len()),
                        });
                        result.clone()
                    }
                    Err(e) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!("[Tool error: {e}]\n"),
                        });
                        format!("Error: {e}")
                    }
                };
                // OpenAI expects tool results as role=tool messages with
                // the matching tool_call_id.
                messages.push(json!({
                    "role": "tool",
                    "tool_call_id": call_id,
                    "content": result_content,
                }));
            }
        }
    }
    fn stop(&self) {
        self.cancelled.store(true, Ordering::Relaxed);
    }
    fn get_status(&self) -> RuntimeStatus {
        if self.cancelled.load(Ordering::Relaxed) {
            RuntimeStatus::Failed
        } else {
            RuntimeStatus::Idle
        }
    }
 }
 // ── Helper functions ─────────────────────────────────────────────────
 /// Build the system message text from the RuntimeContext.
 fn build_system_text(ctx: &RuntimeContext) -> String {
    ctx.args
        .iter()
        .position(|a| a == "--append-system-prompt")
        .and_then(|i| ctx.args.get(i + 1))
        .cloned()
        .unwrap_or_else(|| {
            format!(
                "You are an AI coding agent working on story {}. \
                 You have access to tools via function calling. \
                 Use them to complete the task. \
                 Work in the directory: {}",
                ctx.story_id, ctx.cwd
            )
        })
 }
 /// Fetch MCP tool definitions from storkit's MCP server and convert
 /// them to OpenAI function-calling format.
 async fn fetch_and_convert_mcp_tools(
    client: &Client,
    mcp_base: &str,
 ) -> Result<Vec<Value>, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/list",
        "params": {}
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("Failed to fetch MCP tools: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tools response: {e}"))?;
    let tools = body["result"]["tools"]
        .as_array()
        .ok_or_else(|| "No tools array in MCP response".to_string())?;
    let mut openai_tools = Vec::new();
    for tool in tools {
        let name = tool["name"].as_str().unwrap_or("").to_string();
        let description = tool["description"].as_str().unwrap_or("").to_string();
        if name.is_empty() {
            continue;
        }
        // OpenAI function calling uses JSON Schema natively for parameters,
        // so the MCP inputSchema can be used with minimal cleanup.
        let parameters = convert_mcp_schema_to_openai(tool.get("inputSchema"));
        openai_tools.push(json!({
            "type": "function",
            "function": {
                "name": name,
                "description": description,
                "parameters": parameters.unwrap_or_else(|| json!({"type": "object", "properties": {}})),
            }
        }));
    }
    slog!(
        "[openai] Loaded {} MCP tools as function definitions",
        openai_tools.len()
    );
    Ok(openai_tools)
 }
 /// Convert an MCP inputSchema (JSON Schema) to OpenAI-compatible
 /// function parameters.
 ///
 /// OpenAI uses JSON Schema natively, so less transformation is needed
 /// compared to Gemini. We still strip `$schema` to keep payloads clean.
 fn convert_mcp_schema_to_openai(schema: Option<&Value>) -> Option<Value> {
    let schema = schema?;
    let mut result = json!({
        "type": "object",
    });
    if let Some(properties) = schema.get("properties") {
        result["properties"] = clean_schema_properties(properties);
    } else {
        result["properties"] = json!({});
    }
    if let Some(required) = schema.get("required") {
        result["required"] = required.clone();
    }
    // OpenAI recommends additionalProperties: false for strict mode.
    result["additionalProperties"] = json!(false);
    Some(result)
 }
 /// Recursively clean schema properties, removing unsupported keywords.
 fn clean_schema_properties(properties: &Value) -> Value {
    let Some(obj) = properties.as_object() else {
        return properties.clone();
    };
    let mut cleaned = serde_json::Map::new();
    for (key, value) in obj {
        let mut prop = value.clone();
        if let Some(p) = prop.as_object_mut() {
            p.remove("$schema");
            // Recursively clean nested object properties.
            if let Some(nested_props) = p.get("properties").cloned() {
                p.insert(
                    "properties".to_string(),
                    clean_schema_properties(&nested_props),
                );
            }
            // Clean items schema for arrays.
            if let Some(items) = p.get("items").cloned()
                && let Some(items_obj) = items.as_object()
            {
                let mut cleaned_items = items_obj.clone();
                cleaned_items.remove("$schema");
                p.insert("items".to_string(), Value::Object(cleaned_items));
            }
        }
        cleaned.insert(key.clone(), prop);
    }
    Value::Object(cleaned)
 }
 /// Call an MCP tool via storkit's MCP server.
 async fn call_mcp_tool(
    client: &Client,
    mcp_base: &str,
    tool_name: &str,
    args: &Value,
 ) -> Result<String, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/call",
        "params": {
            "name": tool_name,
            "arguments": args
        }
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("MCP tool call failed: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tool response: {e}"))?;
    if let Some(error) = body.get("error") {
        let msg = error["message"].as_str().unwrap_or("Unknown MCP error");
        return Err(format!("MCP tool '{tool_name}' error: {msg}"));
    }
    // MCP tools/call returns { result: { content: [{ type: "text", text: "..." }] } }
    let content = &body["result"]["content"];
    if let Some(arr) = content.as_array() {
        let texts: Vec<&str> = arr
            .iter()
            .filter_map(|c| c["text"].as_str())
            .collect();
        if !texts.is_empty() {
            return Ok(texts.join("\n"));
        }
    }
    // Fall back to serializing the entire result.
    Ok(body["result"].to_string())
 }
 /// Parse token usage from an OpenAI API response.
 fn parse_usage(response: &Value) -> Option<TokenUsage> {
    let usage = response.get("usage")?;
    Some(TokenUsage {
        input_tokens: usage
            .get("prompt_tokens")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        output_tokens: usage
            .get("completion_tokens")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        cache_creation_input_tokens: 0,
        cache_read_input_tokens: 0,
        // OpenAI API doesn't report cost directly; leave at 0.
        total_cost_usd: 0.0,
    })
 }
 // ── Tests ────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn convert_mcp_schema_simple_object() {
        let schema = json!({
            "type": "object",
            "properties": {
                "story_id": {
                    "type": "string",
                    "description": "Story identifier"
                }
            },
            "required": ["story_id"]
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        assert_eq!(result["type"], "object");
        assert!(result["properties"]["story_id"].is_object());
        assert_eq!(result["required"][0], "story_id");
        assert_eq!(result["additionalProperties"], false);
    }
    #[test]
    fn convert_mcp_schema_empty_properties() {
        let schema = json!({
            "type": "object",
            "properties": {}
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        assert_eq!(result["type"], "object");
        assert!(result["properties"].as_object().unwrap().is_empty());
    }
    #[test]
    fn convert_mcp_schema_none_returns_none() {
        assert!(convert_mcp_schema_to_openai(None).is_none());
    }
    #[test]
    fn convert_mcp_schema_strips_dollar_schema() {
        let schema = json!({
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "$schema": "http://json-schema.org/draft-07/schema#"
                }
            }
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        let name_prop = &result["properties"]["name"];
        assert!(name_prop.get("$schema").is_none());
        assert_eq!(name_prop["type"], "string");
    }
    #[test]
    fn convert_mcp_schema_with_nested_objects() {
        let schema = json!({
            "type": "object",
            "properties": {
                "config": {
                    "type": "object",
                    "properties": {
                        "key": { "type": "string" }
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        assert!(result["properties"]["config"]["properties"]["key"].is_object());
    }
    #[test]
    fn convert_mcp_schema_with_array_items() {
        let schema = json!({
            "type": "object",
            "properties": {
                "items": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": { "type": "string" }
                        },
                        "$schema": "http://json-schema.org/draft-07/schema#"
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        let items_schema = &result["properties"]["items"]["items"];
        assert!(items_schema.get("$schema").is_none());
    }
    #[test]
    fn build_system_text_uses_args() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gpt-4o".to_string(),
            args: vec![
                "--append-system-prompt".to_string(),
                "Custom system prompt".to_string(),
            ],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert_eq!(build_system_text(&ctx), "Custom system prompt");
    }
    #[test]
    fn build_system_text_default() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gpt-4o".to_string(),
            args: vec![],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        let text = build_system_text(&ctx);
        assert!(text.contains("42_story_test"));
        assert!(text.contains("/tmp/wt"));
    }
    #[test]
    fn parse_usage_valid() {
        let response = json!({
            "usage": {
                "prompt_tokens": 100,
                "completion_tokens": 50,
                "total_tokens": 150
            }
        });
        let usage = parse_usage(&response).unwrap();
        assert_eq!(usage.input_tokens, 100);
        assert_eq!(usage.output_tokens, 50);
        assert_eq!(usage.cache_creation_input_tokens, 0);
        assert_eq!(usage.total_cost_usd, 0.0);
    }
    #[test]
    fn parse_usage_missing() {
        let response = json!({"choices": []});
        assert!(parse_usage(&response).is_none());
    }
    #[test]
    fn openai_runtime_stop_sets_cancelled() {
        let runtime = OpenAiRuntime::new();
        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
        runtime.stop();
        assert_eq!(runtime.get_status(), RuntimeStatus::Failed);
    }
    #[test]
    fn model_extraction_from_command_gpt() {
        let ctx = RuntimeContext {
            story_id: "1".to_string(),
            agent_name: "coder".to_string(),
            command: "gpt-4o".to_string(),
            args: vec![],
            prompt: "test".to_string(),
            cwd: "/tmp".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert!(ctx.command.starts_with("gpt"));
    }
    #[test]
    fn model_extraction_from_command_o3() {
        let ctx = RuntimeContext {
            story_id: "1".to_string(),
            agent_name: "coder".to_string(),
            command: "o3".to_string(),
            args: vec![],
            prompt: "test".to_string(),
            cwd: "/tmp".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert!(ctx.command.starts_with("o"));
    }
 }
--- a/server/src/config.rs
+++ b/server/src/config.rs
@@ -117,6 +117,11 @@ pub struct AgentConfig {
    /// and marked as Failed. Default: 300 (5 minutes). Set to 0 to disable.
    #[serde(default = "default_inactivity_timeout_secs")]
    pub inactivity_timeout_secs: u64,
    /// Agent runtime backend. Controls how the agent process is spawned and
    /// how events are streamed. Default: `"claude-code"` (spawns the `claude`
    /// CLI in a PTY). Future values: `"openai"`, `"gemini"`.
    #[serde(default)]
    pub runtime: Option<String>,
 }
 fn default_path() -> String {
@@ -178,6 +183,7 @@ impl Default for ProjectConfig {
                system_prompt: None,
                stage: None,
                inactivity_timeout_secs: default_inactivity_timeout_secs(),
                runtime: None,
            }],
            watcher: WatcherConfig::default(),
            default_qa: default_qa(),
@@ -370,6 +376,17 @@ fn validate_agents(agents: &[AgentConfig]) -> Result<(), String> {
                    agent.name
                ));
            }
        if let Some(ref runtime) = agent.runtime {
            match runtime.as_str() {
                "claude-code" | "gemini" => {}
                other => {
                    return Err(format!(
                        "Agent '{}': unknown runtime '{other}'. Supported: 'claude-code', 'gemini'",
                        agent.name
                    ));
                }
            }
        }
    }
    Ok(())
 }
@@ -792,6 +809,55 @@ name = "coder-1"
        assert_eq!(config.max_coders, Some(3));
    }
    // ── runtime config ────────────────────────────────────────────────
    #[test]
    fn runtime_defaults_to_none() {
        let toml_str = r#"
 [[agent]]
 name = "coder"
 "#;
        let config = ProjectConfig::parse(toml_str).unwrap();
        assert_eq!(config.agent[0].runtime, None);
    }
    #[test]
    fn runtime_claude_code_accepted() {
        let toml_str = r#"
 [[agent]]
 name = "coder"
 runtime = "claude-code"
 "#;
        let config = ProjectConfig::parse(toml_str).unwrap();
        assert_eq!(
            config.agent[0].runtime,
            Some("claude-code".to_string())
        );
    }
    #[test]
    fn runtime_gemini_accepted() {
        let toml_str = r#"
 [[agent]]
 name = "coder"
 runtime = "gemini"
 model = "gemini-2.5-pro"
 "#;
        let config = ProjectConfig::parse(toml_str).unwrap();
        assert_eq!(config.agent[0].runtime, Some("gemini".to_string()));
    }
    #[test]
    fn runtime_unknown_rejected() {
        let toml_str = r#"
 [[agent]]
 name = "coder"
 runtime = "openai"
 "#;
        let err = ProjectConfig::parse(toml_str).unwrap_err();
        assert!(err.contains("unknown runtime 'openai'"));
    }
    #[test]
    fn project_toml_has_three_sonnet_coders() {
        let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
--- a/server/src/http/anthropic.rs
+++ b/server/src/http/anthropic.rs
@@ -3,7 +3,7 @@ use crate::llm::chat;
 use crate::store::StoreOps;
 use poem_openapi::{Object, OpenApi, Tags, payload::Json};
 use reqwest::header::{HeaderMap, HeaderValue};
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use std::sync::Arc;
 const ANTHROPIC_MODELS_URL: &str = "https://api.anthropic.com/v1/models";
@@ -18,6 +18,13 @@ struct AnthropicModelsResponse {
 #[derive(Deserialize)]
 struct AnthropicModelInfo {
    id: String,
    context_window: u64,
 }
 #[derive(Serialize, Object)]
 struct AnthropicModelSummary {
    id: String,
    context_window: u64,
 }
 fn get_anthropic_api_key(ctx: &AppContext) -> Result<String, String> {
@@ -84,7 +91,7 @@ impl AnthropicApi {
    /// List available Anthropic models.
    #[oai(path = "/anthropic/models", method = "get")]
-    async fn list_anthropic_models(&self) -> OpenApiResult<Json<Vec<String>>> {
+    async fn list_anthropic_models(&self) -> OpenApiResult<Json<Vec<AnthropicModelSummary>>> {
        self.list_anthropic_models_from(ANTHROPIC_MODELS_URL).await
    }
 }
@@ -93,7 +100,7 @@ impl AnthropicApi {
    async fn list_anthropic_models_from(
        &self,
        url: &str,
-    ) -> OpenApiResult<Json<Vec<String>>> {
+    ) -> OpenApiResult<Json<Vec<AnthropicModelSummary>>> {
        let api_key = get_anthropic_api_key(self.ctx.as_ref()).map_err(bad_request)?;
        let client = reqwest::Client::new();
        let mut headers = HeaderMap::new();
@@ -128,7 +135,14 @@ impl AnthropicApi {
            .json::<AnthropicModelsResponse>()
            .await
            .map_err(|e| bad_request(e.to_string()))?;
-        let models = body.data.into_iter().map(|m| m.id).collect();
+        let models = body
            .data
            .into_iter()
            .map(|m| AnthropicModelSummary {
                id: m.id,
                context_window: m.context_window,
            })
            .collect();
        Ok(Json(models))
    }
@@ -276,4 +290,29 @@ mod tests {
        let dir = TempDir::new().unwrap();
        let _api = make_api(&dir);
    }
    #[test]
    fn anthropic_model_info_deserializes_context_window() {
        let json = json!({
            "id": "claude-opus-4-5",
            "context_window": 200000
        });
        let info: AnthropicModelInfo = serde_json::from_value(json).unwrap();
        assert_eq!(info.id, "claude-opus-4-5");
        assert_eq!(info.context_window, 200000);
    }
    #[test]
    fn anthropic_models_response_deserializes_multiple_models() {
        let json = json!({
            "data": [
                { "id": "claude-opus-4-5", "context_window": 200000 },
                { "id": "claude-haiku-4-5-20251001", "context_window": 100000 }
            ]
        });
        let response: AnthropicModelsResponse = serde_json::from_value(json).unwrap();
        assert_eq!(response.data.len(), 2);
        assert_eq!(response.data[0].context_window, 200000);
        assert_eq!(response.data[1].context_window, 100000);
    }
 }
--- a/server/src/http/context.rs
+++ b/server/src/http/context.rs
@@ -1,5 +1,6 @@
 use crate::agents::{AgentPool, ReconciliationEvent};
 use crate::io::watcher::WatcherEvent;
 use crate::rebuild::{BotShutdownNotifier, ShutdownReason};
 use crate::state::SessionState;
 use crate::store::JsonFileStore;
 use crate::workflow::WorkflowState;
@@ -52,6 +53,20 @@ pub struct AppContext {
    /// Child process of the QA app launched for manual testing.
    /// Only one instance runs at a time.
    pub qa_app_process: Arc<std::sync::Mutex<Option<std::process::Child>>>,
    /// Best-effort shutdown notifier for active bot channels (Slack / WhatsApp).
    ///
    /// When set, the MCP `rebuild_and_restart` tool uses this to announce the
    /// shutdown to configured channels before re-execing the server binary.
    /// `None` when no webhook-based bot transport is configured.
    pub bot_shutdown: Option<Arc<BotShutdownNotifier>>,
    /// Watch sender used to signal the Matrix bot task that the server is
    /// shutting down (rebuild path).  The bot task listens for this signal and
    /// sends a shutdown announcement to all configured rooms.
    ///
    /// Wrapped in `Arc` so `AppContext` can implement `Clone`.
    /// `None` when no Matrix bot is configured.
    pub matrix_shutdown_tx:
        Option<Arc<tokio::sync::watch::Sender<Option<ShutdownReason>>>>,
 }
 #[cfg(test)]
@@ -73,6 +88,8 @@ impl AppContext {
            perm_tx,
            perm_rx: Arc::new(tokio::sync::Mutex::new(perm_rx)),
            qa_app_process: Arc::new(std::sync::Mutex::new(None)),
            bot_shutdown: None,
            matrix_shutdown_tx: None,
        }
    }
 }
--- a/server/src/http/mcp/diagnostics.rs
+++ b/server/src/http/mcp/diagnostics.rs
@@ -1,4 +1,4 @@
-use crate::agents::{AgentStatus, move_story_to_stage};
+use crate::agents::move_story_to_stage;
 use crate::http::context::AppContext;
 use crate::log_buffer;
 use crate::slog;
@@ -26,98 +26,21 @@ pub(super) fn tool_get_server_logs(args: &Value) -> Result<String, String> {
    Ok(all_lines[start..].join("\n"))
 }
-/// Rebuild the server binary and re-exec.
+/// Rebuild the server binary and re-exec (delegates to `crate::rebuild`).
 ///
 /// 1. Gracefully stops all running agents (kills PTY children).
 /// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
 ///    the current build profile (debug or release).
 /// 3. If the build fails, returns the build error (server stays up).
 /// 4. If the build succeeds, re-execs the process with the new binary via
 ///    `std::os::unix::process::CommandExt::exec()`.
 pub(super) async fn tool_rebuild_and_restart(ctx: &AppContext) -> Result<String, String> {
    slog!("[rebuild] Rebuild and restart requested via MCP tool");
-    // 1. Gracefully stop all running agents.
+    // Signal the Matrix bot (if active) so it can send its own shutdown
-    let running_agents = ctx.agents.list_agents().unwrap_or_default();
+    // announcement before the process is replaced.  Best-effort: we wait up
-    let running_count = running_agents
+    // to 1.5 s for the message to be delivered.
-        .iter()
+    if let Some(ref tx) = ctx.matrix_shutdown_tx {
-        .filter(|a| a.status == AgentStatus::Running)
+        let _ = tx.send(Some(crate::rebuild::ShutdownReason::Rebuild));
-        .count();
+        tokio::time::sleep(std::time::Duration::from_millis(1500)).await;
    if running_count > 0 {
        slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
    }
    ctx.agents.kill_all_children();
    // 2. Find the workspace root (parent of the server binary's source).
    //    CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
    //    the workspace root is its parent.
    let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
    let workspace_root = manifest_dir
        .parent()
        .ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
    slog!(
        "[rebuild] Building server from workspace root: {}",
        workspace_root.display()
    );
    // 3. Build the server binary, matching the current build profile so the
    //    re-exec via current_exe() picks up the new binary.
    let build_args: Vec<&str> = if cfg!(debug_assertions) {
        vec!["build", "-p", "storkit"]
    } else {
        vec!["build", "--release", "-p", "storkit"]
    };
    slog!("[rebuild] cargo {}", build_args.join(" "));
    let output = tokio::task::spawn_blocking({
        let workspace_root = workspace_root.to_path_buf();
        move || {
            std::process::Command::new("cargo")
                .args(&build_args)
                .current_dir(&workspace_root)
                .output()
        }
    })
    .await
    .map_err(|e| format!("Build task panicked: {e}"))?
    .map_err(|e| format!("Failed to run cargo build: {e}"))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        slog!("[rebuild] Build failed:\n{stderr}");
        return Err(format!("Build failed:\n{stderr}"));
    }
-    slog!("[rebuild] Build succeeded, re-execing with new binary");
+    let project_root = ctx.state.get_project_root().unwrap_or_default();
-
+    let notifier = ctx.bot_shutdown.as_deref();
-    // 4. Re-exec with the new binary.
+    crate::rebuild::rebuild_and_restart(&ctx.agents, &project_root, notifier).await
    //    Collect current argv so we preserve any CLI arguments (e.g. project path).
    let current_exe =
        std::env::current_exe().map_err(|e| format!("Cannot determine current executable: {e}"))?;
    let args: Vec<String> = std::env::args().collect();
    // Remove the port file before re-exec so the new process can write its own.
    if let Ok(root) = ctx.state.get_project_root() {
        let port_file = root.join(".storkit_port");
        if port_file.exists() {
            let _ = std::fs::remove_file(&port_file);
        }
    }
    // Also check cwd for port file.
    let cwd_port_file = std::path::Path::new(".storkit_port");
    if cwd_port_file.exists() {
        let _ = std::fs::remove_file(cwd_port_file);
    }
    // Use exec() to replace the current process.
    // This never returns on success.
    use std::os::unix::process::CommandExt;
    let err = std::process::Command::new(&current_exe)
        .args(&args[1..])
        .exec();
    // If we get here, exec() failed.
    Err(format!("Failed to exec new binary: {err}"))
 }
 /// Generate a Claude Code permission rule string for the given tool name and input.
--- a/server/src/http/mcp/git_tools.rs
+++ b/server/src/http/mcp/git_tools.rs
@@ -0,0 +1,766 @@
 use crate::http::context::AppContext;
 use serde_json::{json, Value};
 use std::path::PathBuf;
 /// Validates that `worktree_path` exists and is inside the project's
 /// `.storkit/worktrees/` directory. Returns the canonicalized path.
 fn validate_worktree_path(worktree_path: &str, ctx: &AppContext) -> Result<PathBuf, String> {
    let wd = PathBuf::from(worktree_path);
    if !wd.is_absolute() {
        return Err("worktree_path must be an absolute path".to_string());
    }
    if !wd.exists() {
        return Err(format!(
            "worktree_path does not exist: {worktree_path}"
        ));
    }
    let project_root = ctx.agents.get_project_root(&ctx.state)?;
    let worktrees_root = project_root.join(".storkit").join("worktrees");
    let canonical_wd = wd
        .canonicalize()
        .map_err(|e| format!("Cannot canonicalize worktree_path: {e}"))?;
    let canonical_wt = if worktrees_root.exists() {
        worktrees_root
            .canonicalize()
            .map_err(|e| format!("Cannot canonicalize worktrees root: {e}"))?
    } else {
        return Err("No worktrees directory found in project".to_string());
    };
    if !canonical_wd.starts_with(&canonical_wt) {
        return Err(format!(
            "worktree_path must be inside .storkit/worktrees/. Got: {worktree_path}"
        ));
    }
    Ok(canonical_wd)
 }
 /// Run a git command in the given directory and return its output.
 async fn run_git(args: Vec<&'static str>, dir: PathBuf) -> Result<std::process::Output, String> {
    tokio::task::spawn_blocking(move || {
        std::process::Command::new("git")
            .args(&args)
            .current_dir(&dir)
            .output()
    })
    .await
    .map_err(|e| format!("Task join error: {e}"))?
    .map_err(|e| format!("Failed to run git: {e}"))
 }
 /// Run a git command with owned args in the given directory.
 async fn run_git_owned(args: Vec<String>, dir: PathBuf) -> Result<std::process::Output, String> {
    tokio::task::spawn_blocking(move || {
        std::process::Command::new("git")
            .args(&args)
            .current_dir(&dir)
            .output()
    })
    .await
    .map_err(|e| format!("Task join error: {e}"))?
    .map_err(|e| format!("Failed to run git: {e}"))
 }
 /// git_status — returns working tree status (staged, unstaged, untracked files).
 pub(super) async fn tool_git_status(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let worktree_path = args
        .get("worktree_path")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: worktree_path")?;
    let dir = validate_worktree_path(worktree_path, ctx)?;
    let output = run_git(vec!["status", "--porcelain=v1", "-u"], dir).await?;
    let stdout = String::from_utf8_lossy(&output.stdout);
    let stderr = String::from_utf8_lossy(&output.stderr);
    if !output.status.success() {
        return Err(format!(
            "git status failed (exit {}): {stderr}",
            output.status.code().unwrap_or(-1)
        ));
    }
    let mut staged: Vec<String> = Vec::new();
    let mut unstaged: Vec<String> = Vec::new();
    let mut untracked: Vec<String> = Vec::new();
    for line in stdout.lines() {
        if line.len() < 3 {
            continue;
        }
        let x = line.chars().next().unwrap_or(' ');
        let y = line.chars().nth(1).unwrap_or(' ');
        let path = line[3..].to_string();
        match (x, y) {
            ('?', '?') => untracked.push(path),
            (' ', _) => unstaged.push(path),
            (_, ' ') => staged.push(path),
            _ => {
                // Both staged and unstaged modifications
                staged.push(path.clone());
                unstaged.push(path);
            }
        }
    }
    serde_json::to_string_pretty(&json!({
        "staged": staged,
        "unstaged": unstaged,
        "untracked": untracked,
        "clean": staged.is_empty() && unstaged.is_empty() && untracked.is_empty(),
    }))
    .map_err(|e| format!("Serialization error: {e}"))
 }
 /// git_diff — returns diff output. Supports staged/unstaged/commit range.
 pub(super) async fn tool_git_diff(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let worktree_path = args
        .get("worktree_path")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: worktree_path")?;
    let dir = validate_worktree_path(worktree_path, ctx)?;
    let staged = args
        .get("staged")
        .and_then(|v| v.as_bool())
        .unwrap_or(false);
    let commit_range = args
        .get("commit_range")
        .and_then(|v| v.as_str())
        .map(|s| s.to_string());
    let mut git_args: Vec<String> = vec!["diff".to_string()];
    if staged {
        git_args.push("--staged".to_string());
    }
    if let Some(range) = commit_range {
        git_args.push(range);
    }
    let output = run_git_owned(git_args, dir).await?;
    let stdout = String::from_utf8_lossy(&output.stdout);
    let stderr = String::from_utf8_lossy(&output.stderr);
    if !output.status.success() {
        return Err(format!(
            "git diff failed (exit {}): {stderr}",
            output.status.code().unwrap_or(-1)
        ));
    }
    serde_json::to_string_pretty(&json!({
        "diff": stdout.as_ref(),
        "exit_code": output.status.code().unwrap_or(-1),
    }))
    .map_err(|e| format!("Serialization error: {e}"))
 }
 /// git_add — stages files by path.
 pub(super) async fn tool_git_add(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let worktree_path = args
        .get("worktree_path")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: worktree_path")?;
    let paths: Vec<String> = args
        .get("paths")
        .and_then(|v| v.as_array())
        .ok_or("Missing required argument: paths (must be an array of strings)")?
        .iter()
        .filter_map(|v| v.as_str().map(|s| s.to_string()))
        .collect();
    if paths.is_empty() {
        return Err("paths must be a non-empty array of strings".to_string());
    }
    let dir = validate_worktree_path(worktree_path, ctx)?;
    let mut git_args: Vec<String> = vec!["add".to_string(), "--".to_string()];
    git_args.extend(paths.clone());
    let output = run_git_owned(git_args, dir).await?;
    let stderr = String::from_utf8_lossy(&output.stderr);
    if !output.status.success() {
        return Err(format!(
            "git add failed (exit {}): {stderr}",
            output.status.code().unwrap_or(-1)
        ));
    }
    serde_json::to_string_pretty(&json!({
        "staged": paths,
        "exit_code": output.status.code().unwrap_or(0),
    }))
    .map_err(|e| format!("Serialization error: {e}"))
 }
 /// git_commit — commits staged changes with a message.
 pub(super) async fn tool_git_commit(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let worktree_path = args
        .get("worktree_path")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: worktree_path")?;
    let message = args
        .get("message")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: message")?
        .to_string();
    if message.trim().is_empty() {
        return Err("message must not be empty".to_string());
    }
    let dir = validate_worktree_path(worktree_path, ctx)?;
    let git_args: Vec<String> = vec![
        "commit".to_string(),
        "--message".to_string(),
        message,
    ];
    let output = run_git_owned(git_args, dir).await?;
    let stdout = String::from_utf8_lossy(&output.stdout);
    let stderr = String::from_utf8_lossy(&output.stderr);
    if !output.status.success() {
        return Err(format!(
            "git commit failed (exit {}): {stderr}",
            output.status.code().unwrap_or(-1)
        ));
    }
    serde_json::to_string_pretty(&json!({
        "output": stdout.as_ref(),
        "exit_code": output.status.code().unwrap_or(0),
    }))
    .map_err(|e| format!("Serialization error: {e}"))
 }
 /// git_log — returns commit history with configurable count and format.
 pub(super) async fn tool_git_log(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let worktree_path = args
        .get("worktree_path")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: worktree_path")?;
    let dir = validate_worktree_path(worktree_path, ctx)?;
    let count = args
        .get("count")
        .and_then(|v| v.as_u64())
        .unwrap_or(10)
        .min(500);
    let format = args
        .get("format")
        .and_then(|v| v.as_str())
        .unwrap_or("%H%x09%s%x09%an%x09%ai")
        .to_string();
    let git_args: Vec<String> = vec![
        "log".to_string(),
        format!("--max-count={count}"),
        format!("--pretty=format:{format}"),
    ];
    let output = run_git_owned(git_args, dir).await?;
    let stdout = String::from_utf8_lossy(&output.stdout);
    let stderr = String::from_utf8_lossy(&output.stderr);
    if !output.status.success() {
        return Err(format!(
            "git log failed (exit {}): {stderr}",
            output.status.code().unwrap_or(-1)
        ));
    }
    serde_json::to_string_pretty(&json!({
        "log": stdout.as_ref(),
        "exit_code": output.status.code().unwrap_or(0),
    }))
    .map_err(|e| format!("Serialization error: {e}"))
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::http::context::AppContext;
    use serde_json::json;
    fn test_ctx(dir: &std::path::Path) -> AppContext {
        AppContext::new_test(dir.to_path_buf())
    }
    /// Create a temp directory with a git worktree structure and init a repo.
    fn setup_worktree() -> (tempfile::TempDir, PathBuf, AppContext) {
        let tmp = tempfile::tempdir().unwrap();
        let story_wt = tmp
            .path()
            .join(".storkit")
            .join("worktrees")
            .join("42_test_story");
        std::fs::create_dir_all(&story_wt).unwrap();
        // Init git repo in the worktree
        std::process::Command::new("git")
            .args(["init"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        std::process::Command::new("git")
            .args(["config", "user.email", "test@test.com"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        std::process::Command::new("git")
            .args(["config", "user.name", "Test"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        let ctx = test_ctx(tmp.path());
        (tmp, story_wt, ctx)
    }
    // ── validate_worktree_path ─────────────────────────────────────────
    #[test]
    fn validate_rejects_relative_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = validate_worktree_path("relative/path", &ctx);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("absolute"));
    }
    #[test]
    fn validate_rejects_nonexistent_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = validate_worktree_path("/nonexistent_path_xyz_git", &ctx);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("does not exist"));
    }
    #[test]
    fn validate_rejects_path_outside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let wt_dir = tmp.path().join(".storkit").join("worktrees");
        std::fs::create_dir_all(&wt_dir).unwrap();
        let ctx = test_ctx(tmp.path());
        let result = validate_worktree_path(tmp.path().to_str().unwrap(), &ctx);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("inside .storkit/worktrees"));
    }
    #[test]
    fn validate_accepts_path_inside_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let story_wt = tmp
            .path()
            .join(".storkit")
            .join("worktrees")
            .join("42_test_story");
        std::fs::create_dir_all(&story_wt).unwrap();
        let ctx = test_ctx(tmp.path());
        let result = validate_worktree_path(story_wt.to_str().unwrap(), &ctx);
        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
    }
    // ── git_status ────────────────────────────────────────────────────
    #[tokio::test]
    async fn git_status_missing_worktree_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_git_status(&json!({}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("worktree_path"));
    }
    #[tokio::test]
    async fn git_status_clean_repo() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        // Make an initial commit so HEAD exists
        std::fs::write(story_wt.join("readme.txt"), "hello").unwrap();
        std::process::Command::new("git")
            .args(["add", "."])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        std::process::Command::new("git")
            .args(["commit", "-m", "init"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        let result = tool_git_status(
            &json!({"worktree_path": story_wt.to_str().unwrap()}),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["clean"], true);
        assert!(parsed["staged"].as_array().unwrap().is_empty());
        assert!(parsed["unstaged"].as_array().unwrap().is_empty());
        assert!(parsed["untracked"].as_array().unwrap().is_empty());
    }
    #[tokio::test]
    async fn git_status_shows_untracked_file() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        // Make initial commit
        std::fs::write(story_wt.join("readme.txt"), "hello").unwrap();
        std::process::Command::new("git")
            .args(["add", "."])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        std::process::Command::new("git")
            .args(["commit", "-m", "init"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        // Add untracked file
        std::fs::write(story_wt.join("new_file.txt"), "content").unwrap();
        let result = tool_git_status(
            &json!({"worktree_path": story_wt.to_str().unwrap()}),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["clean"], false);
        let untracked = parsed["untracked"].as_array().unwrap();
        assert!(
            untracked.iter().any(|v| v.as_str().unwrap().contains("new_file.txt")),
            "expected new_file.txt in untracked: {parsed}"
        );
    }
    // ── git_diff ──────────────────────────────────────────────────────
    #[tokio::test]
    async fn git_diff_missing_worktree_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_git_diff(&json!({}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("worktree_path"));
    }
    #[tokio::test]
    async fn git_diff_returns_diff() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        // Create initial commit
        std::fs::write(story_wt.join("file.txt"), "line1\n").unwrap();
        std::process::Command::new("git")
            .args(["add", "."])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        std::process::Command::new("git")
            .args(["commit", "-m", "init"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        // Modify file (unstaged)
        std::fs::write(story_wt.join("file.txt"), "line1\nline2\n").unwrap();
        let result = tool_git_diff(
            &json!({"worktree_path": story_wt.to_str().unwrap()}),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert!(
            parsed["diff"].as_str().unwrap().contains("line2"),
            "expected diff output: {parsed}"
        );
    }
    #[tokio::test]
    async fn git_diff_staged_flag() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        // Create initial commit
        std::fs::write(story_wt.join("file.txt"), "line1\n").unwrap();
        std::process::Command::new("git")
            .args(["add", "."])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        std::process::Command::new("git")
            .args(["commit", "-m", "init"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        // Stage a modification
        std::fs::write(story_wt.join("file.txt"), "line1\nstaged_change\n").unwrap();
        std::process::Command::new("git")
            .args(["add", "file.txt"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        let result = tool_git_diff(
            &json!({"worktree_path": story_wt.to_str().unwrap(), "staged": true}),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert!(
            parsed["diff"].as_str().unwrap().contains("staged_change"),
            "expected staged diff: {parsed}"
        );
    }
    // ── git_add ───────────────────────────────────────────────────────
    #[tokio::test]
    async fn git_add_missing_worktree_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_git_add(&json!({"paths": ["file.txt"]}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("worktree_path"));
    }
    #[tokio::test]
    async fn git_add_missing_paths() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        let result = tool_git_add(
            &json!({"worktree_path": story_wt.to_str().unwrap()}),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("paths"));
    }
    #[tokio::test]
    async fn git_add_empty_paths() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        let result = tool_git_add(
            &json!({"worktree_path": story_wt.to_str().unwrap(), "paths": []}),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("non-empty"));
    }
    #[tokio::test]
    async fn git_add_stages_file() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        std::fs::write(story_wt.join("file.txt"), "content").unwrap();
        let result = tool_git_add(
            &json!({
                "worktree_path": story_wt.to_str().unwrap(),
                "paths": ["file.txt"]
            }),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["exit_code"], 0);
        let staged = parsed["staged"].as_array().unwrap();
        assert!(staged.iter().any(|v| v.as_str().unwrap() == "file.txt"));
        // Verify file is actually staged
        let status = std::process::Command::new("git")
            .args(["status", "--porcelain"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        let output = String::from_utf8_lossy(&status.stdout);
        assert!(output.contains("A  file.txt"), "file should be staged: {output}");
    }
    // ── git_commit ────────────────────────────────────────────────────
    #[tokio::test]
    async fn git_commit_missing_worktree_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_git_commit(&json!({"message": "test"}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("worktree_path"));
    }
    #[tokio::test]
    async fn git_commit_missing_message() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        let result = tool_git_commit(
            &json!({"worktree_path": story_wt.to_str().unwrap()}),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("message"));
    }
    #[tokio::test]
    async fn git_commit_empty_message() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        let result = tool_git_commit(
            &json!({"worktree_path": story_wt.to_str().unwrap(), "message": "   "}),
            &ctx,
        )
        .await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("empty"));
    }
    #[tokio::test]
    async fn git_commit_creates_commit() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        // Stage a file
        std::fs::write(story_wt.join("file.txt"), "content").unwrap();
        std::process::Command::new("git")
            .args(["add", "file.txt"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        let result = tool_git_commit(
            &json!({
                "worktree_path": story_wt.to_str().unwrap(),
                "message": "test commit message"
            }),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["exit_code"], 0);
        // Verify commit exists
        let log = std::process::Command::new("git")
            .args(["log", "--oneline"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        let log_output = String::from_utf8_lossy(&log.stdout);
        assert!(
            log_output.contains("test commit message"),
            "expected commit in log: {log_output}"
        );
    }
    // ── git_log ───────────────────────────────────────────────────────
    #[tokio::test]
    async fn git_log_missing_worktree_path() {
        let tmp = tempfile::tempdir().unwrap();
        let ctx = test_ctx(tmp.path());
        let result = tool_git_log(&json!({}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("worktree_path"));
    }
    #[tokio::test]
    async fn git_log_returns_history() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        // Make a commit
        std::fs::write(story_wt.join("file.txt"), "content").unwrap();
        std::process::Command::new("git")
            .args(["add", "."])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        std::process::Command::new("git")
            .args(["commit", "-m", "first commit"])
            .current_dir(&story_wt)
            .output()
            .unwrap();
        let result = tool_git_log(
            &json!({"worktree_path": story_wt.to_str().unwrap()}),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["exit_code"], 0);
        assert!(
            parsed["log"].as_str().unwrap().contains("first commit"),
            "expected commit in log: {parsed}"
        );
    }
    #[tokio::test]
    async fn git_log_respects_count() {
        let (_tmp, story_wt, ctx) = setup_worktree();
        // Make multiple commits
        for i in 0..5 {
            std::fs::write(story_wt.join("file.txt"), format!("content {i}")).unwrap();
            std::process::Command::new("git")
                .args(["add", "."])
                .current_dir(&story_wt)
                .output()
                .unwrap();
            std::process::Command::new("git")
                .args(["commit", "-m", &format!("commit {i}")])
                .current_dir(&story_wt)
                .output()
                .unwrap();
        }
        let result = tool_git_log(
            &json!({"worktree_path": story_wt.to_str().unwrap(), "count": 2}),
            &ctx,
        )
        .await
        .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        // With count=2, only 2 commit entries should appear
        let log = parsed["log"].as_str().unwrap();
        // Each log line is tab-separated; count newlines
        let lines: Vec<&str> = log.lines().collect();
        assert_eq!(lines.len(), 2, "expected 2 log entries, got: {log}");
    }
 }
--- a/server/src/http/mcp/mod.rs
+++ b/server/src/http/mcp/mod.rs
@@ -10,10 +10,12 @@ use std::sync::Arc;
 pub mod agent_tools;
 pub mod diagnostics;
 pub mod git_tools;
 pub mod merge_tools;
 pub mod qa_tools;
 pub mod shell_tools;
 pub mod story_tools;
 pub mod whatsup_tools;
 /// Returns true when the Accept header includes text/event-stream.
 fn wants_sse(req: &Request) -> bool {
@@ -1025,6 +1027,115 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
                        },
                        "required": ["command", "working_dir"]
                    }
                },
                {
                    "name": "git_status",
                    "description": "Return the working tree status of an agent's worktree (staged, unstaged, and untracked files). The worktree_path must be inside .storkit/worktrees/. Push and remote operations are not available.",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "worktree_path": {
                                "type": "string",
                                "description": "Absolute path to the worktree directory. Must be inside .storkit/worktrees/."
                            }
                        },
                        "required": ["worktree_path"]
                    }
                },
                {
                    "name": "git_diff",
                    "description": "Return diff output for an agent's worktree. Supports unstaged (default), staged, or a commit range. The worktree_path must be inside .storkit/worktrees/.",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "worktree_path": {
                                "type": "string",
                                "description": "Absolute path to the worktree directory. Must be inside .storkit/worktrees/."
                            },
                            "staged": {
                                "type": "boolean",
                                "description": "If true, show staged diff (--staged). Default: false."
                            },
                            "commit_range": {
                                "type": "string",
                                "description": "Optional commit range (e.g. 'HEAD~3..HEAD', 'abc123..def456')."
                            }
                        },
                        "required": ["worktree_path"]
                    }
                },
                {
                    "name": "git_add",
                    "description": "Stage files by path in an agent's worktree. The worktree_path must be inside .storkit/worktrees/.",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "worktree_path": {
                                "type": "string",
                                "description": "Absolute path to the worktree directory. Must be inside .storkit/worktrees/."
                            },
                            "paths": {
                                "type": "array",
                                "items": { "type": "string" },
                                "description": "List of file paths to stage (relative to worktree_path)."
                            }
                        },
                        "required": ["worktree_path", "paths"]
                    }
                },
                {
                    "name": "git_commit",
                    "description": "Commit staged changes in an agent's worktree with the given message. The worktree_path must be inside .storkit/worktrees/. Push and remote operations are not available.",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "worktree_path": {
                                "type": "string",
                                "description": "Absolute path to the worktree directory. Must be inside .storkit/worktrees/."
                            },
                            "message": {
                                "type": "string",
                                "description": "Commit message."
                            }
                        },
                        "required": ["worktree_path", "message"]
                    }
                },
                {
                    "name": "git_log",
                    "description": "Return commit history for an agent's worktree with configurable count and format. The worktree_path must be inside .storkit/worktrees/.",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "worktree_path": {
                                "type": "string",
                                "description": "Absolute path to the worktree directory. Must be inside .storkit/worktrees/."
                            },
                            "count": {
                                "type": "integer",
                                "description": "Number of commits to return (default: 10, max: 500)."
                            },
                            "format": {
                                "type": "string",
                                "description": "git pretty-format string (default: '%H%x09%s%x09%an%x09%ai')."
                            }
                        },
                        "required": ["worktree_path"]
                    }
                },
                {
                    "name": "whatsup",
                    "description": "Get a full triage dump for an in-progress story: front matter, AC checklist, active worktree/branch, git diff --stat since master, last 5 commits, and last 20 lines of the most recent agent log. Returns a clear error if the story is not in work/2_current/.",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "story_id": {
                                "type": "string",
                                "description": "Story identifier (filename stem, e.g. '42_story_my_feature')"
                            }
                        },
                        "required": ["story_id"]
                    }
                }
            ]
        }),
@@ -1107,6 +1218,14 @@ async fn handle_tools_call(
        "move_story" => diagnostics::tool_move_story(&args, ctx),
        // Shell command execution
        "run_command" => shell_tools::tool_run_command(&args, ctx).await,
        // Git operations
        "git_status" => git_tools::tool_git_status(&args, ctx).await,
        "git_diff" => git_tools::tool_git_diff(&args, ctx).await,
        "git_add" => git_tools::tool_git_add(&args, ctx).await,
        "git_commit" => git_tools::tool_git_commit(&args, ctx).await,
        "git_log" => git_tools::tool_git_log(&args, ctx).await,
        // Story triage
        "whatsup" => whatsup_tools::tool_whatsup(&args, ctx).await,
        _ => Err(format!("Unknown tool: {tool_name}")),
    };
@@ -1217,7 +1336,13 @@ mod tests {
        assert!(names.contains(&"move_story"));
        assert!(names.contains(&"delete_story"));
        assert!(names.contains(&"run_command"));
-        assert_eq!(tools.len(), 43);
+        assert!(names.contains(&"git_status"));
        assert!(names.contains(&"git_diff"));
        assert!(names.contains(&"git_add"));
        assert!(names.contains(&"git_commit"));
        assert!(names.contains(&"git_log"));
        assert!(names.contains(&"whatsup"));
        assert_eq!(tools.len(), 49);
    }
    #[test]
--- a/server/src/http/mcp/whatsup_tools.rs
+++ b/server/src/http/mcp/whatsup_tools.rs
@@ -0,0 +1,364 @@
 use crate::http::context::AppContext;
 use serde_json::{Value, json};
 use std::fs;
 use std::path::{Path, PathBuf};
 /// Parse all AC items from a story file, returning (text, is_checked) pairs.
 fn parse_ac_items(contents: &str) -> Vec<(String, bool)> {
    let mut in_ac_section = false;
    let mut items = Vec::new();
    for line in contents.lines() {
        let trimmed = line.trim();
        if trimmed == "## Acceptance Criteria" {
            in_ac_section = true;
            continue;
        }
        // Stop at the next heading
        if in_ac_section && trimmed.starts_with("## ") {
            break;
        }
        if in_ac_section {
            if let Some(rest) = trimmed.strip_prefix("- [x] ").or(trimmed.strip_prefix("- [X] ")) {
                items.push((rest.to_string(), true));
            } else if let Some(rest) = trimmed.strip_prefix("- [ ] ") {
                items.push((rest.to_string(), false));
            }
        }
    }
    items
 }
 /// Find the most recent log file for any agent under `.storkit/logs/{story_id}/`.
 fn find_most_recent_log(project_root: &Path, story_id: &str) -> Option<PathBuf> {
    let dir = project_root
        .join(".storkit")
        .join("logs")
        .join(story_id);
    if !dir.is_dir() {
        return None;
    }
    let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
    let entries = fs::read_dir(&dir).ok()?;
    for entry in entries.flatten() {
        let path = entry.path();
        let name = match path.file_name().and_then(|n| n.to_str()) {
            Some(n) => n.to_string(),
            None => continue,
        };
        if !name.ends_with(".log") {
            continue;
        }
        let modified = match entry.metadata().and_then(|m| m.modified()) {
            Ok(t) => t,
            Err(_) => continue,
        };
        if best.as_ref().is_none_or(|(_, t)| modified > *t) {
            best = Some((path, modified));
        }
    }
    best.map(|(p, _)| p)
 }
 /// Return the last N raw lines from a file.
 fn last_n_lines(path: &Path, n: usize) -> Result<Vec<String>, String> {
    let content =
        fs::read_to_string(path).map_err(|e| format!("Failed to read log file: {e}"))?;
    let lines: Vec<String> = content
        .lines()
        .rev()
        .take(n)
        .map(|l| l.to_string())
        .collect::<Vec<_>>()
        .into_iter()
        .rev()
        .collect();
    Ok(lines)
 }
 /// Run `git diff --stat {base}...HEAD` in the worktree.
 async fn git_diff_stat(worktree: &Path, base: &str) -> Option<String> {
    let dir = worktree.to_path_buf();
    let base_arg = format!("{base}...HEAD");
    tokio::task::spawn_blocking(move || {
        let output = std::process::Command::new("git")
            .args(["diff", "--stat", &base_arg])
            .current_dir(&dir)
            .output()
            .ok()?;
        if output.status.success() {
            Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
        } else {
            None
        }
    })
    .await
    .ok()
    .flatten()
 }
 /// Return the last N commit messages on the current branch relative to base.
 async fn git_log_commits(worktree: &Path, base: &str, count: usize) -> Option<Vec<String>> {
    let dir = worktree.to_path_buf();
    let range = format!("{base}..HEAD");
    let count_str = count.to_string();
    tokio::task::spawn_blocking(move || {
        let output = std::process::Command::new("git")
            .args(["log", &range, "--oneline", &format!("-{count_str}")])
            .current_dir(&dir)
            .output()
            .ok()?;
        if output.status.success() {
            let lines: Vec<String> = String::from_utf8(output.stdout)
                .ok()?
                .lines()
                .filter(|l| !l.is_empty())
                .map(|l| l.to_string())
                .collect();
            Some(lines)
        } else {
            None
        }
    })
    .await
    .ok()
    .flatten()
 }
 /// Return the active branch name for the given directory.
 async fn git_branch(dir: &Path) -> Option<String> {
    let dir = dir.to_path_buf();
    tokio::task::spawn_blocking(move || {
        let output = std::process::Command::new("git")
            .args(["rev-parse", "--abbrev-ref", "HEAD"])
            .current_dir(&dir)
            .output()
            .ok()?;
        if output.status.success() {
            Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
        } else {
            None
        }
    })
    .await
    .ok()
    .flatten()
 }
 pub(super) async fn tool_whatsup(args: &Value, ctx: &AppContext) -> Result<String, String> {
    let story_id = args
        .get("story_id")
        .and_then(|v| v.as_str())
        .ok_or("Missing required argument: story_id")?;
    let root = ctx.state.get_project_root()?;
    let current_dir = root.join(".storkit").join("work").join("2_current");
    let filepath = current_dir.join(format!("{story_id}.md"));
    if !filepath.exists() {
        return Err(format!(
            "Story '{story_id}' not found in work/2_current/. Check the story_id and ensure it is in the current stage."
        ));
    }
    let contents =
        fs::read_to_string(&filepath).map_err(|e| format!("Failed to read story file: {e}"))?;
    // --- Front matter ---
    let mut front_matter = serde_json::Map::new();
    if let Ok(meta) = crate::io::story_metadata::parse_front_matter(&contents) {
        if let Some(name) = &meta.name {
            front_matter.insert("name".to_string(), json!(name));
        }
        if let Some(agent) = &meta.agent {
            front_matter.insert("agent".to_string(), json!(agent));
        }
        if let Some(true) = meta.blocked {
            front_matter.insert("blocked".to_string(), json!(true));
        }
        if let Some(qa) = &meta.qa {
            front_matter.insert("qa".to_string(), json!(qa.as_str()));
        }
        if let Some(rc) = meta.retry_count
            && rc > 0
        {
            front_matter.insert("retry_count".to_string(), json!(rc));
        }
        if let Some(mf) = &meta.merge_failure {
            front_matter.insert("merge_failure".to_string(), json!(mf));
        }
        if let Some(rh) = meta.review_hold
            && rh
        {
            front_matter.insert("review_hold".to_string(), json!(rh));
        }
    }
    // --- AC checklist ---
    let ac_items: Vec<Value> = parse_ac_items(&contents)
        .into_iter()
        .map(|(text, checked)| json!({ "text": text, "checked": checked }))
        .collect();
    // --- Worktree ---
    let worktree_path = root.join(".storkit").join("worktrees").join(story_id);
    let (_, worktree_info) = if worktree_path.is_dir() {
        let branch = git_branch(&worktree_path).await;
        (
            branch.clone(),
            Some(json!({
                "path": worktree_path.to_string_lossy(),
                "branch": branch,
            })),
        )
    } else {
        (None, None)
    };
    // --- Git diff stat ---
    let diff_stat = if worktree_path.is_dir() {
        git_diff_stat(&worktree_path, "master").await
    } else {
        None
    };
    // --- Last 5 commits ---
    let commits = if worktree_path.is_dir() {
        git_log_commits(&worktree_path, "master", 5).await
    } else {
        None
    };
    // --- Most recent agent log (last 20 lines) ---
    let agent_log = match find_most_recent_log(&root, story_id) {
        Some(log_path) => {
            let filename = log_path
                .file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("")
                .to_string();
            match last_n_lines(&log_path, 20) {
                Ok(lines) => Some(json!({
                    "file": filename,
                    "lines": lines,
                })),
                Err(_) => None,
            }
        }
        None => None,
    };
    let result = json!({
        "story_id": story_id,
        "front_matter": front_matter,
        "acceptance_criteria": ac_items,
        "worktree": worktree_info,
        "git_diff_stat": diff_stat,
        "commits": commits,
        "agent_log": agent_log,
    });
    serde_json::to_string_pretty(&result).map_err(|e| format!("Serialization error: {e}"))
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use tempfile::tempdir;
    #[test]
    fn parse_ac_items_returns_checked_and_unchecked() {
        let content = "---\nname: test\n---\n\n## Acceptance Criteria\n\n- [ ] item one\n- [x] item two\n- [X] item three\n\n## Out of Scope\n\n- [ ] not an ac\n";
        let items = parse_ac_items(content);
        assert_eq!(items.len(), 3);
        assert_eq!(items[0], ("item one".to_string(), false));
        assert_eq!(items[1], ("item two".to_string(), true));
        assert_eq!(items[2], ("item three".to_string(), true));
    }
    #[test]
    fn parse_ac_items_empty_when_no_section() {
        let content = "---\nname: test\n---\n\nNo AC section here.\n";
        let items = parse_ac_items(content);
        assert!(items.is_empty());
    }
    #[test]
    fn find_most_recent_log_returns_none_for_missing_dir() {
        let tmp = tempdir().unwrap();
        let result = find_most_recent_log(tmp.path(), "nonexistent_story");
        assert!(result.is_none());
    }
    #[test]
    fn find_most_recent_log_returns_newest_file() {
        let tmp = tempdir().unwrap();
        let log_dir = tmp
            .path()
            .join(".storkit")
            .join("logs")
            .join("42_story_foo");
        fs::create_dir_all(&log_dir).unwrap();
        let old_path = log_dir.join("coder-1-sess-old.log");
        fs::write(&old_path, "old content").unwrap();
        // Ensure different mtime
        std::thread::sleep(std::time::Duration::from_millis(50));
        let new_path = log_dir.join("coder-1-sess-new.log");
        fs::write(&new_path, "new content").unwrap();
        let result = find_most_recent_log(tmp.path(), "42_story_foo").unwrap();
        assert!(
            result.to_string_lossy().contains("sess-new"),
            "Expected newest file, got: {}",
            result.display()
        );
    }
    #[tokio::test]
    async fn tool_whatsup_returns_error_for_missing_story() {
        let tmp = tempdir().unwrap();
        let ctx = crate::http::context::AppContext::new_test(tmp.path().to_path_buf());
        let result = tool_whatsup(&json!({"story_id": "999_story_nonexistent"}), &ctx).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("not found in work/2_current/"));
    }
    #[tokio::test]
    async fn tool_whatsup_returns_story_data() {
        let tmp = tempdir().unwrap();
        let current_dir = tmp
            .path()
            .join(".storkit")
            .join("work")
            .join("2_current");
        fs::create_dir_all(&current_dir).unwrap();
        let story_content = "---\nname: My Test Story\nagent: coder-1\n---\n\n## Acceptance Criteria\n\n- [ ] First criterion\n- [x] Second criterion\n\n## Out of Scope\n\n- nothing\n";
        fs::write(current_dir.join("42_story_test.md"), story_content).unwrap();
        let ctx = crate::http::context::AppContext::new_test(tmp.path().to_path_buf());
        let result = tool_whatsup(&json!({"story_id": "42_story_test"}), &ctx)
            .await
            .unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
        assert_eq!(parsed["story_id"], "42_story_test");
        assert_eq!(parsed["front_matter"]["name"], "My Test Story");
        assert_eq!(parsed["front_matter"]["agent"], "coder-1");
        let ac = parsed["acceptance_criteria"].as_array().unwrap();
        assert_eq!(ac.len(), 2);
        assert_eq!(ac[0]["text"], "First criterion");
        assert_eq!(ac[0]["checked"], false);
        assert_eq!(ac[1]["text"], "Second criterion");
        assert_eq!(ac[1]["checked"], true);
    }
 }
--- a/server/src/http/project.rs
+++ b/server/src/http/project.rs
@@ -39,6 +39,7 @@ impl ProjectApi {
            payload.0.path,
            &self.ctx.state,
            self.ctx.store.as_ref(),
            self.ctx.agents.port(),
        )
        .await
        .map_err(bad_request)?;
--- a/server/src/http/ws.rs
+++ b/server/src/http/ws.rs
@@ -158,9 +158,10 @@ impl From<WatcherEvent> for Option<WsResponse> {
            }),
            WatcherEvent::ConfigChanged => Some(WsResponse::AgentConfigChanged),
            WatcherEvent::AgentStateChanged => Some(WsResponse::AgentStateChanged),
-            // MergeFailure is handled by the Matrix notification listener only;
+            // MergeFailure and RateLimitWarning are handled by the chat notification
-            // no WebSocket message is needed for the frontend.
+            // listener only; no WebSocket message is needed for the frontend.
            WatcherEvent::MergeFailure { .. } => None,
            WatcherEvent::RateLimitWarning { .. } => None,
        }
    }
 }
--- a/server/src/io/fs.rs
+++ b/server/src/io/fs.rs
@@ -369,7 +369,7 @@ fn write_story_kit_gitignore(root: &Path) -> Result<(), String> {
 /// the project root and git does not support `../` patterns in `.gitignore`
 /// files, so they cannot be expressed in `.storkit/.gitignore`.
 fn append_root_gitignore_entries(root: &Path) -> Result<(), String> {
-    let entries = [".storkit_port", "store.json"];
+    let entries = [".storkit_port", "store.json", ".mcp.json"];
    let gitignore_path = root.join(".gitignore");
    let existing = if gitignore_path.exists() {
@@ -404,7 +404,7 @@ fn append_root_gitignore_entries(root: &Path) -> Result<(), String> {
    Ok(())
 }
-fn scaffold_story_kit(root: &Path) -> Result<(), String> {
+fn scaffold_story_kit(root: &Path, port: u16) -> Result<(), String> {
    let story_kit_root = root.join(".storkit");
    let specs_root = story_kit_root.join("specs");
    let tech_root = specs_root.join("tech");
@@ -440,6 +440,14 @@ fn scaffold_story_kit(root: &Path) -> Result<(), String> {
    write_script_if_missing(&script_root.join("test"), STORY_KIT_SCRIPT_TEST)?;
    write_file_if_missing(&root.join("CLAUDE.md"), STORY_KIT_CLAUDE_MD)?;
    // Write .mcp.json at the project root so agents can find the MCP server.
    // Only written when missing — never overwrites an existing file, because
    // the port is environment-specific and must not clobber a running instance.
    let mcp_content = format!(
        "{{\n  \"mcpServers\": {{\n    \"storkit\": {{\n      \"type\": \"http\",\n      \"url\": \"http://localhost:{port}/mcp\"\n    }}\n  }}\n}}\n"
    );
    write_file_if_missing(&root.join(".mcp.json"), &mcp_content)?;
    // Create .claude/settings.json with sensible permission defaults so that
    // Claude Code (both agents and web UI chat) can operate without constant
    // permission prompts.
@@ -505,14 +513,14 @@ fn scaffold_story_kit(root: &Path) -> Result<(), String> {
    Ok(())
 }
-async fn ensure_project_root_with_story_kit(path: PathBuf) -> Result<(), String> {
+async fn ensure_project_root_with_story_kit(path: PathBuf, port: u16) -> Result<(), String> {
    tokio::task::spawn_blocking(move || {
        if !path.exists() {
            fs::create_dir_all(&path)
                .map_err(|e| format!("Failed to create project directory: {}", e))?;
        }
        if !path.join(".storkit").is_dir() {
-            scaffold_story_kit(&path)?;
+            scaffold_story_kit(&path, port)?;
        }
        Ok(())
    })
@@ -524,10 +532,11 @@ pub async fn open_project(
    path: String,
    state: &SessionState,
    store: &dyn StoreOps,
    port: u16,
 ) -> Result<String, String> {
    let p = PathBuf::from(&path);
-    ensure_project_root_with_story_kit(p.clone()).await?;
+    ensure_project_root_with_story_kit(p.clone(), port).await?;
    validate_project_path(p.clone()).await?;
    {
@@ -816,7 +825,7 @@ mod tests {
        let store = make_store(&dir);
        let state = SessionState::default();
-        let result = open_project(project_dir.to_string_lossy().to_string(), &state, &store).await;
+        let result = open_project(project_dir.to_string_lossy().to_string(), &state, &store, 3001).await;
        assert!(result.is_ok());
        let root = state.get_project_root().unwrap();
@@ -824,26 +833,47 @@ mod tests {
    }
    #[tokio::test]
-    async fn open_project_does_not_write_mcp_json() {
+    async fn open_project_does_not_overwrite_existing_mcp_json() {
-        // open_project must NOT overwrite .mcp.json — test servers started by QA
+        // scaffold must NOT overwrite .mcp.json when it already exists — QA
-        // agents share the real project root, so writing here would clobber the
+        // test servers share the real project root, and re-writing would
-        // root .mcp.json with the wrong port.  .mcp.json is written once during
+        // clobber the file with the wrong port.
-        // worktree creation (worktree.rs) and should not be touched again.
+        let dir = tempdir().unwrap();
        let project_dir = dir.path().join("myproject");
        fs::create_dir_all(&project_dir).unwrap();
        // Pre-write .mcp.json with a different port to simulate an already-configured project.
        let mcp_path = project_dir.join(".mcp.json");
        fs::write(&mcp_path, "{\"existing\": true}").unwrap();
        let store = make_store(&dir);
        let state = SessionState::default();
        open_project(project_dir.to_string_lossy().to_string(), &state, &store, 3001)
            .await
            .unwrap();
        assert_eq!(
            fs::read_to_string(&mcp_path).unwrap(),
            "{\"existing\": true}",
            "open_project must not overwrite an existing .mcp.json"
        );
    }
    #[tokio::test]
    async fn open_project_writes_mcp_json_when_missing() {
        let dir = tempdir().unwrap();
        let project_dir = dir.path().join("myproject");
        fs::create_dir_all(&project_dir).unwrap();
        let store = make_store(&dir);
        let state = SessionState::default();
-        open_project(project_dir.to_string_lossy().to_string(), &state, &store)
+        open_project(project_dir.to_string_lossy().to_string(), &state, &store, 3001)
            .await
            .unwrap();
        let mcp_path = project_dir.join(".mcp.json");
-        assert!(
+        assert!(mcp_path.exists(), "open_project should write .mcp.json for new projects");
-            !mcp_path.exists(),
+        let content = fs::read_to_string(&mcp_path).unwrap();
-            "open_project must not write .mcp.json — that would overwrite the root with the wrong port"
+        assert!(content.contains("3001"), "mcp.json should reference the server port");
-        );
+        assert!(content.contains("localhost"), "mcp.json should reference localhost");
    }
    #[tokio::test]
@@ -898,7 +928,7 @@ mod tests {
        let store = make_store(&dir);
        let state = SessionState::default();
-        open_project(project_dir.to_string_lossy().to_string(), &state, &store)
+        open_project(project_dir.to_string_lossy().to_string(), &state, &store, 3001)
            .await
            .unwrap();
@@ -1071,7 +1101,7 @@ mod tests {
    #[test]
    fn scaffold_story_kit_creates_structure() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        assert!(dir.path().join(".storkit/README.md").exists());
        assert!(dir.path().join(".storkit/project.toml").exists());
@@ -1085,7 +1115,7 @@ mod tests {
    #[test]
    fn scaffold_story_kit_creates_work_pipeline_dirs() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let stages = [
            "1_backlog",
@@ -1109,7 +1139,7 @@ mod tests {
    #[test]
    fn scaffold_story_kit_project_toml_has_coder_qa_mergemaster() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let content = fs::read_to_string(dir.path().join(".storkit/project.toml")).unwrap();
        assert!(content.contains("[[agent]]"));
@@ -1122,7 +1152,7 @@ mod tests {
    #[test]
    fn scaffold_context_is_blank_template_not_story_kit_content() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let content = fs::read_to_string(dir.path().join(".storkit/specs/00_CONTEXT.md")).unwrap();
        assert!(content.contains("<!-- storkit:scaffold-template -->"));
@@ -1138,7 +1168,7 @@ mod tests {
    #[test]
    fn scaffold_stack_is_blank_template_not_story_kit_content() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let content = fs::read_to_string(dir.path().join(".storkit/specs/tech/STACK.md")).unwrap();
        assert!(content.contains("<!-- storkit:scaffold-template -->"));
@@ -1157,7 +1187,7 @@ mod tests {
        use std::os::unix::fs::PermissionsExt;
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let script_test = dir.path().join("script/test");
        assert!(script_test.exists(), "script/test should be created");
@@ -1175,7 +1205,7 @@ mod tests {
        fs::create_dir_all(readme.parent().unwrap()).unwrap();
        fs::write(&readme, "custom content").unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        assert_eq!(fs::read_to_string(&readme).unwrap(), "custom content");
    }
@@ -1183,13 +1213,13 @@ mod tests {
    #[test]
    fn scaffold_story_kit_is_idempotent() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let readme_content = fs::read_to_string(dir.path().join(".storkit/README.md")).unwrap();
        let toml_content = fs::read_to_string(dir.path().join(".storkit/project.toml")).unwrap();
        // Run again — must not change content or add duplicate .gitignore entries
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        assert_eq!(
            fs::read_to_string(dir.path().join(".storkit/README.md")).unwrap(),
@@ -1237,7 +1267,7 @@ mod tests {
            .status()
            .unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        // Only 1 commit should exist — scaffold must not commit into an existing repo
        let log_output = std::process::Command::new("git")
@@ -1256,7 +1286,7 @@ mod tests {
    #[test]
    fn scaffold_creates_story_kit_gitignore_with_relative_entries() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        // .storkit/.gitignore must contain relative patterns for files under .storkit/
        let sk_content = fs::read_to_string(dir.path().join(".storkit/.gitignore")).unwrap();
@@ -1287,7 +1317,7 @@ mod tests {
        )
        .unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let content = fs::read_to_string(dir.path().join(".storkit/.gitignore")).unwrap();
        let worktrees_count = content.lines().filter(|l| l.trim() == "worktrees/").count();
@@ -1303,7 +1333,7 @@ mod tests {
    #[test]
    fn scaffold_creates_claude_md_at_project_root() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let claude_md = dir.path().join("CLAUDE.md");
        assert!(
@@ -1332,7 +1362,7 @@ mod tests {
        let claude_md = dir.path().join("CLAUDE.md");
        fs::write(&claude_md, "custom CLAUDE.md content").unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        assert_eq!(
            fs::read_to_string(&claude_md).unwrap(),
@@ -1341,6 +1371,46 @@ mod tests {
        );
    }
    #[test]
    fn scaffold_story_kit_writes_mcp_json_with_port() {
        let dir = tempdir().unwrap();
        scaffold_story_kit(dir.path(), 4242).unwrap();
        let mcp_path = dir.path().join(".mcp.json");
        assert!(mcp_path.exists(), ".mcp.json should be created by scaffold");
        let content = fs::read_to_string(&mcp_path).unwrap();
        assert!(content.contains("4242"), ".mcp.json should reference the given port");
        assert!(content.contains("localhost"), ".mcp.json should reference localhost");
        assert!(content.contains("storkit"), ".mcp.json should name the storkit server");
    }
    #[test]
    fn scaffold_story_kit_does_not_overwrite_existing_mcp_json() {
        let dir = tempdir().unwrap();
        let mcp_path = dir.path().join(".mcp.json");
        fs::write(&mcp_path, "{\"custom\": true}").unwrap();
        scaffold_story_kit(dir.path(), 3001).unwrap();
        assert_eq!(
            fs::read_to_string(&mcp_path).unwrap(),
            "{\"custom\": true}",
            "scaffold should not overwrite an existing .mcp.json"
        );
    }
    #[test]
    fn scaffold_gitignore_includes_mcp_json() {
        let dir = tempdir().unwrap();
        scaffold_story_kit(dir.path(), 3001).unwrap();
        let root_gitignore = fs::read_to_string(dir.path().join(".gitignore")).unwrap();
        assert!(
            root_gitignore.contains(".mcp.json"),
            "root .gitignore should include .mcp.json (port is environment-specific)"
        );
    }
    // --- open_project scaffolding ---
    #[tokio::test]
@@ -1351,7 +1421,7 @@ mod tests {
        let store = make_store(&dir);
        let state = SessionState::default();
-        open_project(project_dir.to_string_lossy().to_string(), &state, &store)
+        open_project(project_dir.to_string_lossy().to_string(), &state, &store, 3001)
            .await
            .unwrap();
@@ -1370,7 +1440,7 @@ mod tests {
        let store = make_store(&dir);
        let state = SessionState::default();
-        open_project(project_dir.to_string_lossy().to_string(), &state, &store)
+        open_project(project_dir.to_string_lossy().to_string(), &state, &store, 3001)
            .await
            .unwrap();
@@ -1572,7 +1642,7 @@ mod tests {
        )
        .unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let content = fs::read_to_string(dir.path().join(".storkit/project.toml")).unwrap();
        assert!(
@@ -1592,7 +1662,7 @@ mod tests {
    #[test]
    fn scaffold_project_toml_fallback_when_no_stack_detected() {
        let dir = tempdir().unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let content = fs::read_to_string(dir.path().join(".storkit/project.toml")).unwrap();
        assert!(
@@ -1614,7 +1684,7 @@ mod tests {
        let existing = "[[component]]\nname = \"custom\"\npath = \".\"\nsetup = [\"make build\"]\n";
        fs::write(sk_dir.join("project.toml"), existing).unwrap();
-        scaffold_story_kit(dir.path()).unwrap();
+        scaffold_story_kit(dir.path(), 3001).unwrap();
        let content = fs::read_to_string(sk_dir.join("project.toml")).unwrap();
        assert_eq!(
--- a/server/src/io/story_metadata.rs
+++ b/server/src/io/story_metadata.rs
@@ -77,10 +77,8 @@ struct FrontMatter {
    merge_failure: Option<String>,
    agent: Option<String>,
    review_hold: Option<bool>,
-    /// New configurable QA mode field: "human", "server", or "agent".
+    /// Configurable QA mode field: "human", "server", or "agent".
    qa: Option<String>,
    /// Legacy boolean field — mapped to `qa: human` (true) or ignored (false/absent).
    manual_qa: Option<bool>,
    /// Number of times this story has been retried at its current pipeline stage.
    retry_count: Option<u32>,
    /// When `true`, auto-assign will skip this story (retry limit exceeded).
@@ -113,12 +111,7 @@ pub fn parse_front_matter(contents: &str) -> Result<StoryMetadata, StoryMetaErro
 }
 fn build_metadata(front: FrontMatter) -> StoryMetadata {
-    // Resolve qa mode: prefer the new `qa` field, fall back to legacy `manual_qa`.
+    let qa = front.qa.as_deref().and_then(QaMode::from_str);
    let qa = if let Some(ref qa_str) = front.qa {
        QaMode::from_str(qa_str)
    } else {
        front.manual_qa.and_then(|v| if v { Some(QaMode::Human) } else { None })
    };
    StoryMetadata {
        name: front.name,
@@ -513,27 +506,6 @@ workflow: tdd
        assert_eq!(meta.qa, None);
    }
    #[test]
    fn legacy_manual_qa_true_maps_to_human() {
        let input = "---\nname: Story\nmanual_qa: true\n---\n# Story\n";
        let meta = parse_front_matter(input).expect("front matter");
        assert_eq!(meta.qa, Some(QaMode::Human));
    }
    #[test]
    fn legacy_manual_qa_false_maps_to_none() {
        let input = "---\nname: Story\nmanual_qa: false\n---\n# Story\n";
        let meta = parse_front_matter(input).expect("front matter");
        assert_eq!(meta.qa, None);
    }
    #[test]
    fn qa_field_takes_precedence_over_manual_qa() {
        let input = "---\nname: Story\nqa: server\nmanual_qa: true\n---\n# Story\n";
        let meta = parse_front_matter(input).expect("front matter");
        assert_eq!(meta.qa, Some(QaMode::Server));
    }
    #[test]
    fn resolve_qa_mode_uses_file_value() {
        let tmp = tempfile::tempdir().unwrap();
--- a/server/src/io/watcher.rs
+++ b/server/src/io/watcher.rs
@@ -59,6 +59,14 @@ pub enum WatcherEvent {
        /// Human-readable description of the failure.
        reason: String,
    },
    /// An agent hit an API rate limit.
    /// Triggers a warning notification to configured chat rooms.
    RateLimitWarning {
        /// Work item ID the agent is working on.
        story_id: String,
        /// Name of the agent that hit the rate limit.
        agent_name: String,
    },
 }
 /// Return `true` if `path` is the root-level `.storkit/project.toml`, i.e.
--- a/server/src/main.rs
+++ b/server/src/main.rs
@@ -10,6 +10,7 @@ mod io;
 mod llm;
 pub mod log_buffer;
 mod matrix;
 pub mod rebuild;
 pub mod slack;
 mod state;
 mod store;
@@ -23,6 +24,7 @@ use crate::http::build_routes;
 use crate::http::context::AppContext;
 use crate::http::{remove_port_file, resolve_port, write_port_file};
 use crate::io::fs::find_story_kit_root;
 use crate::rebuild::{BotShutdownNotifier, ShutdownReason};
 use crate::state::SessionState;
 use crate::store::JsonFileStore;
 use crate::workflow::WorkflowState;
@@ -32,6 +34,32 @@ use std::path::PathBuf;
 use std::sync::Arc;
 use tokio::sync::broadcast;
 /// What the first CLI argument means.
 #[derive(Debug, PartialEq)]
 enum CliDirective {
    /// `--help` / `-h`
    Help,
    /// `--version` / `-V`
    Version,
    /// An unrecognised flag (starts with `-`).
    UnknownFlag(String),
    /// A positional path argument.
    Path,
    /// No arguments at all.
    None,
 }
 /// Inspect the raw CLI arguments and return the directive they imply.
 fn classify_cli_args(args: &[String]) -> CliDirective {
    match args.first().map(String::as_str) {
        None => CliDirective::None,
        Some("--help" | "-h") => CliDirective::Help,
        Some("--version" | "-V") => CliDirective::Version,
        Some(a) if a.starts_with('-') => CliDirective::UnknownFlag(a.to_string()),
        Some(_) => CliDirective::Path,
    }
 }
 /// Resolve the optional positional path argument (everything after the binary
 /// name) into an absolute `PathBuf`.  Returns `None` when no argument was
 /// supplied so that the caller can fall back to the auto-detect behaviour.
@@ -51,8 +79,61 @@ async fn main() -> Result<(), std::io::Error> {
    // Collect CLI args, skipping the binary name (argv[0]).
    let cli_args: Vec<String> = std::env::args().skip(1).collect();
    // Handle CLI flags before treating anything as a project path.
    match classify_cli_args(&cli_args) {
        CliDirective::Help => {
            println!("storkit [PATH]");
            println!();
            println!("Serve a storkit project.");
            println!();
            println!("USAGE:");
            println!("  storkit [PATH]");
            println!();
            println!("ARGS:");
            println!(
                "  PATH  Path to an existing project directory. \
                 If omitted, storkit searches parent directories for a .storkit/ root."
            );
            println!();
            println!("OPTIONS:");
            println!("  -h, --help     Print this help and exit");
            println!("  -V, --version  Print the version and exit");
            std::process::exit(0);
        }
        CliDirective::Version => {
            println!("storkit {}", env!("CARGO_PKG_VERSION"));
            std::process::exit(0);
        }
        CliDirective::UnknownFlag(flag) => {
            eprintln!("error: unknown option: {flag}");
            eprintln!("Run 'storkit --help' for usage.");
            std::process::exit(1);
        }
        CliDirective::Path | CliDirective::None => {}
    }
    let explicit_path = parse_project_path_arg(&cli_args, &cwd);
    // When a path is given explicitly on the CLI, it must already exist as a
    // directory.  We do not create directories from the command line.
    if let Some(ref path) = explicit_path {
        if !path.exists() {
            eprintln!(
                "error: path does not exist: {}",
                path.display()
            );
            std::process::exit(1);
        }
        if !path.is_dir() {
            eprintln!(
                "error: path is not a directory: {}",
                path.display()
            );
            std::process::exit(1);
        }
    }
    if let Some(explicit_root) = explicit_path {
        // An explicit path was given on the command line.
        // Open it directly — scaffold .storkit/ if it is missing — and
@@ -61,6 +142,7 @@ async fn main() -> Result<(), std::io::Error> {
            explicit_root.to_string_lossy().to_string(),
            &app_state,
            store.as_ref(),
            port,
        )
        .await
        {
@@ -83,6 +165,7 @@ async fn main() -> Result<(), std::io::Error> {
                project_root.to_string_lossy().to_string(),
                &app_state,
                store.as_ref(),
                port,
            )
            .await
            .unwrap_or_else(|e| {
@@ -176,17 +259,6 @@ async fn main() -> Result<(), std::io::Error> {
    let startup_reconciliation_tx = reconciliation_tx.clone();
    // Clone for shutdown cleanup — kill orphaned PTY children before exiting.
    let agents_for_shutdown = Arc::clone(&agents);
    let ctx = AppContext {
        state: app_state,
        store,
        workflow,
        agents,
        watcher_tx,
        reconciliation_tx,
        perm_tx,
        perm_rx,
        qa_app_process: Arc::new(std::sync::Mutex::new(None)),
    };
    // Build WhatsApp webhook context if bot.toml configures transport = "whatsapp".
    let whatsapp_ctx: Option<Arc<whatsapp::WhatsAppWebhookContext>> = startup_root
@@ -254,7 +326,50 @@ async fn main() -> Result<(), std::io::Error> {
            })
        });
-    let app = build_routes(ctx, whatsapp_ctx, slack_ctx);
+    // Build a best-effort shutdown notifier for webhook-based transports.
    //
    // • Slack: channels are fixed at startup (channel_ids from bot.toml).
    // • WhatsApp: active senders are tracked at runtime in ambient_rooms.
    //   We keep the WhatsApp context Arc so we can read the rooms at shutdown.
    // • Matrix: the bot task manages its own announcement via matrix_shutdown_tx.
    let bot_shutdown_notifier: Option<Arc<BotShutdownNotifier>> =
        if let Some(ref ctx) = slack_ctx {
            let channels: Vec<String> = ctx.channel_ids.iter().cloned().collect();
            Some(Arc::new(BotShutdownNotifier::new(
                Arc::clone(&ctx.transport) as Arc<dyn crate::transport::ChatTransport>,
                channels,
                ctx.bot_name.clone(),
            )))
        } else {
            None
        };
    // Retain a reference to the WhatsApp context for shutdown notifications.
    // At shutdown time we read ambient_rooms to get the current set of active senders.
    let whatsapp_ctx_for_shutdown: Option<Arc<whatsapp::WhatsAppWebhookContext>> =
        whatsapp_ctx.clone();
    // Watch channel: signals the Matrix bot task to send a shutdown announcement.
    // `None` initial value means "server is running".
    let (matrix_shutdown_tx, matrix_shutdown_rx) =
        tokio::sync::watch::channel::<Option<ShutdownReason>>(None);
    let matrix_shutdown_tx = Arc::new(matrix_shutdown_tx);
    let matrix_shutdown_tx_for_rebuild = Arc::clone(&matrix_shutdown_tx);
    let ctx = AppContext {
        state: app_state,
        store,
        workflow,
        agents,
        watcher_tx,
        reconciliation_tx,
        perm_tx,
        perm_rx,
        qa_app_process: Arc::new(std::sync::Mutex::new(None)),
        bot_shutdown: bot_shutdown_notifier.clone(),
        matrix_shutdown_tx: Some(Arc::clone(&matrix_shutdown_tx)),
    };
    let app = build_routes(ctx, whatsapp_ctx.clone(), slack_ctx.clone());
    // Optional Matrix bot: connect to the homeserver and start listening for
    // messages if `.storkit/bot.toml` is present and enabled.
@@ -264,7 +379,11 @@ async fn main() -> Result<(), std::io::Error> {
            watcher_tx_for_bot,
            perm_rx_for_bot,
            Arc::clone(&startup_agents),
            matrix_shutdown_rx,
        );
    } else {
        // Keep the receiver alive (drop it) so the sender never errors.
        drop(matrix_shutdown_rx);
    }
    // On startup:
@@ -294,6 +413,36 @@ async fn main() -> Result<(), std::io::Error> {
    let result = Server::new(TcpListener::bind(&addr)).run(app).await;
    // ── Shutdown notifications (best-effort) ─────────────────────────────
    //
    // The server is stopping (SIGINT / SIGTERM).  Notify active bot channels
    // so participants know the bot is going offline.  We do this before killing
    // PTY children so network I/O can still complete.
    // Slack: notifier holds the fixed channel list.
    if let Some(ref notifier) = bot_shutdown_notifier {
        notifier.notify(ShutdownReason::Manual).await;
    }
    // WhatsApp: read the current set of ambient rooms and notify each sender.
    if let Some(ref ctx) = whatsapp_ctx_for_shutdown {
        let rooms: Vec<String> = ctx.ambient_rooms.lock().unwrap().iter().cloned().collect();
        if !rooms.is_empty() {
            let wa_notifier = BotShutdownNotifier::new(
                Arc::clone(&ctx.transport) as Arc<dyn crate::transport::ChatTransport>,
                rooms,
                ctx.bot_name.clone(),
            );
            wa_notifier.notify(ShutdownReason::Manual).await;
        }
    }
    // Matrix: signal the bot task and give it a short window to send its message.
    let _ = matrix_shutdown_tx_for_rebuild.send(Some(ShutdownReason::Manual));
    tokio::time::sleep(std::time::Duration::from_millis(1500)).await;
    // ── Cleanup ──────────────────────────────────────────────────────────
    // Kill all active PTY child processes before exiting to prevent orphaned
    // Claude Code processes from running after the server restarts.
    agents_for_shutdown.kill_all_children();
@@ -331,6 +480,61 @@ name = "coder"
            .unwrap_or_else(|e| panic!("Invalid project.toml: {e}"));
    }
    // ── classify_cli_args ─────────────────────────────────────────────────
    #[test]
    fn classify_none_when_no_args() {
        assert_eq!(classify_cli_args(&[]), CliDirective::None);
    }
    #[test]
    fn classify_help_long() {
        assert_eq!(
            classify_cli_args(&["--help".to_string()]),
            CliDirective::Help
        );
    }
    #[test]
    fn classify_help_short() {
        assert_eq!(
            classify_cli_args(&["-h".to_string()]),
            CliDirective::Help
        );
    }
    #[test]
    fn classify_version_long() {
        assert_eq!(
            classify_cli_args(&["--version".to_string()]),
            CliDirective::Version
        );
    }
    #[test]
    fn classify_version_short() {
        assert_eq!(
            classify_cli_args(&["-V".to_string()]),
            CliDirective::Version
        );
    }
    #[test]
    fn classify_unknown_flag() {
        assert_eq!(
            classify_cli_args(&["--serve".to_string()]),
            CliDirective::UnknownFlag("--serve".to_string())
        );
    }
    #[test]
    fn classify_path() {
        assert_eq!(
            classify_cli_args(&["/some/path".to_string()]),
            CliDirective::Path
        );
    }
    // ── parse_project_path_arg ────────────────────────────────────────────
    #[test]
--- a/server/src/matrix/bot.rs
+++ b/server/src/matrix/bot.rs
@@ -213,6 +213,7 @@ pub async fn run_bot(
    watcher_rx: tokio::sync::broadcast::Receiver<crate::io::watcher::WatcherEvent>,
    perm_rx: Arc<TokioMutex<mpsc::UnboundedReceiver<PermissionForward>>>,
    agents: Arc<AgentPool>,
    shutdown_rx: tokio::sync::watch::Receiver<Option<crate::rebuild::ShutdownReason>>,
 ) -> Result<(), String> {
    let store_path = project_root.join(".storkit").join("matrix_store");
    let client = Client::builder()
@@ -426,6 +427,30 @@ pub async fn run_bot(
        notif_project_root,
    );
    // Spawn a shutdown watcher that sends a best-effort goodbye message to all
    // configured rooms when the server is about to stop (SIGINT/SIGTERM or rebuild).
    {
        let shutdown_transport = Arc::clone(&transport);
        let shutdown_rooms: Vec<String> =
            announce_room_ids.iter().map(|r| r.to_string()).collect();
        let shutdown_bot_name = announce_bot_name.clone();
        let mut rx = shutdown_rx;
        tokio::spawn(async move {
            // Wait until the channel holds Some(reason).
            if rx.wait_for(|v| v.is_some()).await.is_ok() {
                let reason = rx.borrow().clone();
                let notifier = crate::rebuild::BotShutdownNotifier::new(
                    shutdown_transport,
                    shutdown_rooms,
                    shutdown_bot_name,
                );
                if let Some(r) = reason {
                    notifier.notify(r).await;
                }
            }
        });
    }
    // Send a startup announcement to each configured room so users know the
    // bot is online.  This runs once per process start — the sync loop handles
    // reconnects internally so this code is never reached again on a network
@@ -960,6 +985,39 @@ async fn on_room_message(
        return;
    }
    // Check for the rebuild command, which requires async agent and process ops
    // and cannot be handled by the sync command registry.
    if super::rebuild::extract_rebuild_command(
        &user_message,
        &ctx.bot_name,
        ctx.bot_user_id.as_str(),
    )
    .is_some()
    {
        slog!("[matrix-bot] Handling rebuild command from {sender}");
        // Acknowledge immediately — the rebuild may take a while or re-exec.
        let ack = "Rebuilding server… this may take a moment.";
        let ack_html = markdown_to_html(ack);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, ack, &ack_html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        let response = super::rebuild::handle_rebuild(
            &ctx.bot_name,
            &ctx.project_root,
            &ctx.agents,
        )
        .await;
        let html = markdown_to_html(&response);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, &response, &html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        return;
    }
    // Spawn a separate task so the Matrix sync loop is not blocked while we
    // wait for the LLM response (which can take several seconds).
    tokio::spawn(async move {
--- a/server/src/matrix/commands/assign.rs
+++ b/server/src/matrix/commands/assign.rs
@@ -0,0 +1,385 @@
 //! Handler for the `assign` command.
 //!
 //! `assign <number> <model>` pre-assigns a coder model (e.g. `opus`, `sonnet`)
 //! to a story before it starts. The assignment persists in the story file's
 //! front matter as `agent: coder-<model>` so that when the pipeline picks up
 //! the story — either via auto-assign or the `start` command — it uses the
 //! assigned model instead of the default.
 use super::CommandContext;
 use crate::io::story_metadata::{parse_front_matter, set_front_matter_field};
 /// All pipeline stage directories to search when finding a work item by number.
 const STAGES: &[&str] = &[
    "1_backlog",
    "2_current",
    "3_qa",
    "4_merge",
    "5_done",
    "6_archived",
 ];
 /// Resolve a model name hint (e.g. `"opus"`) to a full agent name
 /// (e.g. `"coder-opus"`). If the hint already starts with `"coder-"`,
 /// it is returned unchanged to prevent double-prefixing.
 fn resolve_agent_name(model: &str) -> String {
    if model.starts_with("coder-") {
        model.to_string()
    } else {
        format!("coder-{model}")
    }
 }
 pub(super) fn handle_assign(ctx: &CommandContext) -> Option<String> {
    let args = ctx.args.trim();
    // Parse `<number> <model>` from args.
    let (number_str, model_str) = match args.split_once(char::is_whitespace) {
        Some((n, m)) => (n.trim(), m.trim()),
        None => {
            return Some(format!(
                "Usage: `{} assign <number> <model>` (e.g. `assign 42 opus`)",
                ctx.bot_name
            ));
        }
    };
    if number_str.is_empty() || !number_str.chars().all(|c| c.is_ascii_digit()) {
        return Some(format!(
            "Invalid story number `{number_str}`. Usage: `{} assign <number> <model>`",
            ctx.bot_name
        ));
    }
    if model_str.is_empty() {
        return Some(format!(
            "Usage: `{} assign <number> <model>` (e.g. `assign 42 opus`)",
            ctx.bot_name
        ));
    }
    // Find the story file across all pipeline stages.
    let mut found: Option<(std::path::PathBuf, String)> = None;
    'outer: for stage in STAGES {
        let dir = ctx.project_root.join(".storkit").join("work").join(stage);
        if !dir.exists() {
            continue;
        }
        if let Ok(entries) = std::fs::read_dir(&dir) {
            for entry in entries.flatten() {
                let path = entry.path();
                if path.extension().and_then(|e| e.to_str()) != Some("md") {
                    continue;
                }
                if let Some(stem) = path
                    .file_stem()
                    .and_then(|s| s.to_str())
                    .map(|s| s.to_string())
                {
                    let file_num = stem
                        .split('_')
                        .next()
                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
                        .unwrap_or("")
                        .to_string();
                    if file_num == number_str {
                        found = Some((path, stem));
                        break 'outer;
                    }
                }
            }
        }
    }
    let (path, story_id) = match found {
        Some(f) => f,
        None => {
            return Some(format!(
                "No story, bug, or spike with number **{number_str}** found."
            ));
        }
    };
    // Read the human-readable name from front matter for the response.
    let story_name = std::fs::read_to_string(&path)
        .ok()
        .and_then(|contents| {
            parse_front_matter(&contents)
                .ok()
                .and_then(|m| m.name)
        })
        .unwrap_or_else(|| story_id.clone());
    let agent_name = resolve_agent_name(model_str);
    // Write `agent: <agent_name>` into the story's front matter.
    let result = std::fs::read_to_string(&path)
        .map_err(|e| format!("Failed to read story file: {e}"))
        .and_then(|contents| {
            let updated = set_front_matter_field(&contents, "agent", &agent_name);
            std::fs::write(&path, &updated)
                .map_err(|e| format!("Failed to write story file: {e}"))
        });
    match result {
        Ok(()) => Some(format!(
            "Assigned **{agent_name}** to **{story_name}** (story {number_str}). \
             The model will be used when the story starts."
        )),
        Err(e) => Some(format!(
            "Failed to assign model to **{story_name}**: {e}"
        )),
    }
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use crate::agents::AgentPool;
    use std::collections::HashSet;
    use std::sync::{Arc, Mutex};
    use super::super::{CommandDispatch, try_handle_command};
    fn assign_cmd_with_root(root: &std::path::Path, args: &str) -> Option<String> {
        let agents = Arc::new(AgentPool::new_test(3000));
        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
        let room_id = "!test:example.com".to_string();
        let dispatch = CommandDispatch {
            bot_name: "Timmy",
            bot_user_id: "@timmy:homeserver.local",
            project_root: root,
            agents: &agents,
            ambient_rooms: &ambient_rooms,
            room_id: &room_id,
        };
        try_handle_command(&dispatch, &format!("@timmy assign {args}"))
    }
    fn write_story_file(root: &std::path::Path, stage: &str, filename: &str, content: &str) {
        let dir = root.join(".storkit/work").join(stage);
        std::fs::create_dir_all(&dir).unwrap();
        std::fs::write(dir.join(filename), content).unwrap();
    }
    // -- registration / help ------------------------------------------------
    #[test]
    fn assign_command_is_registered() {
        use super::super::commands;
        let found = commands().iter().any(|c| c.name == "assign");
        assert!(found, "assign command must be in the registry");
    }
    #[test]
    fn assign_command_appears_in_help() {
        let result = super::super::tests::try_cmd_addressed(
            "Timmy",
            "@timmy:homeserver.local",
            "@timmy help",
        );
        let output = result.unwrap();
        assert!(
            output.contains("assign"),
            "help should list assign command: {output}"
        );
    }
    // -- argument validation ------------------------------------------------
    #[test]
    fn assign_no_args_returns_usage() {
        let tmp = tempfile::TempDir::new().unwrap();
        let output = assign_cmd_with_root(tmp.path(), "").unwrap();
        assert!(
            output.contains("Usage"),
            "no args should show usage: {output}"
        );
    }
    #[test]
    fn assign_missing_model_returns_usage() {
        let tmp = tempfile::TempDir::new().unwrap();
        let output = assign_cmd_with_root(tmp.path(), "42").unwrap();
        assert!(
            output.contains("Usage"),
            "missing model should show usage: {output}"
        );
    }
    #[test]
    fn assign_non_numeric_number_returns_error() {
        let tmp = tempfile::TempDir::new().unwrap();
        let output = assign_cmd_with_root(tmp.path(), "abc opus").unwrap();
        assert!(
            output.contains("Invalid story number"),
            "non-numeric number should return error: {output}"
        );
    }
    // -- story not found ----------------------------------------------------
    #[test]
    fn assign_unknown_story_returns_friendly_message() {
        let tmp = tempfile::TempDir::new().unwrap();
        // Create stage dirs but no matching story.
        for stage in &["1_backlog", "2_current"] {
            std::fs::create_dir_all(tmp.path().join(".storkit/work").join(stage)).unwrap();
        }
        let output = assign_cmd_with_root(tmp.path(), "999 opus").unwrap();
        assert!(
            output.contains("999") && output.contains("found"),
            "not-found message should include number and 'found': {output}"
        );
    }
    // -- successful assignment ----------------------------------------------
    #[test]
    fn assign_writes_agent_field_to_front_matter() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "1_backlog",
            "42_story_test_feature.md",
            "---\nname: Test Feature\n---\n\n# Story 42\n",
        );
        let output = assign_cmd_with_root(tmp.path(), "42 opus").unwrap();
        assert!(
            output.contains("coder-opus"),
            "confirmation should include resolved agent name: {output}"
        );
        assert!(
            output.contains("Test Feature"),
            "confirmation should include story name: {output}"
        );
        // Verify the file was updated.
        let contents = std::fs::read_to_string(
            tmp.path()
                .join(".storkit/work/1_backlog/42_story_test_feature.md"),
        )
        .unwrap();
        assert!(
            contents.contains("agent: coder-opus"),
            "front matter should contain agent field: {contents}"
        );
    }
    #[test]
    fn assign_with_sonnet_writes_coder_sonnet() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "10_story_current.md",
            "---\nname: Current Story\n---\n",
        );
        assign_cmd_with_root(tmp.path(), "10 sonnet").unwrap();
        let contents = std::fs::read_to_string(
            tmp.path()
                .join(".storkit/work/2_current/10_story_current.md"),
        )
        .unwrap();
        assert!(
            contents.contains("agent: coder-sonnet"),
            "front matter should contain agent: coder-sonnet: {contents}"
        );
    }
    #[test]
    fn assign_with_already_prefixed_name_does_not_double_prefix() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "1_backlog",
            "7_story_small.md",
            "---\nname: Small Story\n---\n",
        );
        let output = assign_cmd_with_root(tmp.path(), "7 coder-opus").unwrap();
        assert!(
            output.contains("coder-opus"),
            "should not double-prefix: {output}"
        );
        assert!(
            !output.contains("coder-coder-opus"),
            "must not double-prefix: {output}"
        );
        let contents = std::fs::read_to_string(
            tmp.path().join(".storkit/work/1_backlog/7_story_small.md"),
        )
        .unwrap();
        assert!(
            contents.contains("agent: coder-opus"),
            "must write coder-opus, not coder-coder-opus: {contents}"
        );
    }
    #[test]
    fn assign_overwrites_existing_agent_field() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "1_backlog",
            "5_story_existing.md",
            "---\nname: Existing\nagent: coder-sonnet\n---\n",
        );
        assign_cmd_with_root(tmp.path(), "5 opus").unwrap();
        let contents = std::fs::read_to_string(
            tmp.path()
                .join(".storkit/work/1_backlog/5_story_existing.md"),
        )
        .unwrap();
        assert!(
            contents.contains("agent: coder-opus"),
            "should overwrite old agent with new: {contents}"
        );
        assert!(
            !contents.contains("coder-sonnet"),
            "old agent should no longer appear: {contents}"
        );
    }
    #[test]
    fn assign_finds_story_in_any_stage() {
        let tmp = tempfile::TempDir::new().unwrap();
        // Story is in 3_qa/, not backlog.
        write_story_file(
            tmp.path(),
            "3_qa",
            "99_story_in_qa.md",
            "---\nname: In QA\n---\n",
        );
        let output = assign_cmd_with_root(tmp.path(), "99 opus").unwrap();
        assert!(
            output.contains("coder-opus"),
            "should find story in qa stage: {output}"
        );
    }
    // -- resolve_agent_name unit tests --------------------------------------
    #[test]
    fn resolve_agent_name_prefixes_bare_model() {
        assert_eq!(super::resolve_agent_name("opus"), "coder-opus");
        assert_eq!(super::resolve_agent_name("sonnet"), "coder-sonnet");
        assert_eq!(super::resolve_agent_name("haiku"), "coder-haiku");
    }
    #[test]
    fn resolve_agent_name_does_not_double_prefix() {
        assert_eq!(super::resolve_agent_name("coder-opus"), "coder-opus");
        assert_eq!(super::resolve_agent_name("coder-sonnet"), "coder-sonnet");
    }
 }
--- a/server/src/matrix/commands/help.rs
+++ b/server/src/matrix/commands/help.rs
@@ -4,7 +4,9 @@ use super::{commands, CommandContext};
 pub(super) fn handle_help(ctx: &CommandContext) -> Option<String> {
    let mut output = format!("**{} Commands**\n\n", ctx.bot_name);
-    for cmd in commands() {
+    let mut sorted: Vec<_> = commands().iter().collect();
    sorted.sort_by_key(|c| c.name);
    for cmd in sorted {
        output.push_str(&format!("- **{}** — {}\n", cmd.name, cmd.description));
    }
    Some(output)
@@ -75,6 +77,26 @@ mod tests {
        assert!(output.contains("status"), "help should list status command: {output}");
    }
    #[test]
    fn help_output_is_alphabetical() {
        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
        let output = result.unwrap();
        // Search for **name** (bold markdown) to avoid substring matches in descriptions.
        let mut positions: Vec<(usize, &str)> = commands()
            .iter()
            .map(|c| {
                let marker = format!("**{}**", c.name);
                let pos = output.find(&marker).expect("command must appear in help as **name**");
                (pos, c.name)
            })
            .collect();
        positions.sort_by_key(|(pos, _)| *pos);
        let names_in_order: Vec<&str> = positions.iter().map(|(_, n)| *n).collect();
        let mut sorted = names_in_order.clone();
        sorted.sort();
        assert_eq!(names_in_order, sorted, "commands must appear in alphabetical order");
    }
    #[test]
    fn help_output_includes_ambient() {
        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
--- a/server/src/matrix/commands/mod.rs
+++ b/server/src/matrix/commands/mod.rs
@@ -6,6 +6,7 @@
 //! as they are added.
 mod ambient;
 mod assign;
 mod cost;
 mod git;
 mod help;
@@ -13,6 +14,7 @@ mod move_story;
 mod overview;
 mod show;
 mod status;
 mod whatsup;
 use crate::agents::AgentPool;
 use std::collections::HashSet;
@@ -75,6 +77,11 @@ pub struct CommandContext<'a> {
 /// Add new commands here — they will automatically appear in `help` output.
 pub fn commands() -> &'static [BotCommand] {
    &[
        BotCommand {
            name: "assign",
            description: "Pre-assign a model to a story: `assign <number> <model>` (e.g. `assign 42 opus`)",
            handler: assign::handle_assign,
        },
        BotCommand {
            name: "help",
            description: "Show this list of available commands",
@@ -82,7 +89,7 @@ pub fn commands() -> &'static [BotCommand] {
        },
        BotCommand {
            name: "status",
-            description: "Show pipeline status and agent availability",
+            description: "Show pipeline status and agent availability; or `status <number>` for a story triage dump",
            handler: status::handle_status,
        },
        BotCommand {
@@ -135,6 +142,11 @@ pub fn commands() -> &'static [BotCommand] {
            description: "Clear the current Claude Code session and start fresh",
            handler: handle_reset_fallback,
        },
        BotCommand {
            name: "rebuild",
            description: "Rebuild the server binary and restart",
            handler: handle_rebuild_fallback,
        },
    ]
 }
@@ -260,6 +272,16 @@ fn handle_reset_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 /// Fallback handler for the `rebuild` command when it is not intercepted by
 /// the async handler in `on_room_message`.  In practice this is never called —
 /// rebuild is detected and handled before `try_handle_command` is invoked.
 /// The entry exists in the registry only so `help` lists it.
 ///
 /// Returns `None` to prevent the LLM from receiving "rebuild" as a prompt.
 fn handle_rebuild_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
--- a/server/src/matrix/commands/status.rs
+++ b/server/src/matrix/commands/status.rs
@@ -7,7 +7,11 @@ use std::collections::{HashMap, HashSet};
 use super::CommandContext;
 pub(super) fn handle_status(ctx: &CommandContext) -> Option<String> {
    if ctx.args.trim().is_empty() {
        Some(build_pipeline_status(ctx.project_root, ctx.agents))
    } else {
        super::whatsup::handle_whatsup(ctx)
    }
 }
 /// Format a short display label for a work item.
--- a/server/src/matrix/commands/whatsup.rs
+++ b/server/src/matrix/commands/whatsup.rs
@@ -0,0 +1,548 @@
 //! Handler for the `whatsup` command.
 //!
 //! Produces a triage dump for a story that is currently in-progress
 //! (`work/2_current/`): metadata, acceptance criteria, worktree/branch state,
 //! git diff, recent commits, and the tail of the agent log.
 //!
 //! The command is handled entirely at the bot level — no LLM invocation.
 use super::CommandContext;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 /// Handle `{bot_name} whatsup {number}`.
 pub(super) fn handle_whatsup(ctx: &CommandContext) -> Option<String> {
    let num_str = ctx.args.trim();
    if num_str.is_empty() {
        return Some(format!(
            "Usage: `{} status <number>`\n\nShows a triage dump for a story currently in progress.",
            ctx.bot_name
        ));
    }
    if !num_str.chars().all(|c| c.is_ascii_digit()) {
        return Some(format!(
            "Invalid story number: `{num_str}`. Usage: `{} status <number>`",
            ctx.bot_name
        ));
    }
    let current_dir = ctx
        .project_root
        .join(".storkit")
        .join("work")
        .join("2_current");
    match find_story_in_dir(&current_dir, num_str) {
        Some((path, stem)) => Some(build_triage_dump(ctx, &path, &stem, num_str)),
        None => Some(format!(
            "Story **{num_str}** is not currently in progress (not found in `work/2_current/`)."
        )),
    }
 }
 /// Find a `.md` file whose numeric prefix matches `num_str` in `dir`.
 ///
 /// Returns `(path, file_stem)` for the first match.
 fn find_story_in_dir(dir: &Path, num_str: &str) -> Option<(PathBuf, String)> {
    let entries = std::fs::read_dir(dir).ok()?;
    for entry in entries.flatten() {
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("md") {
            continue;
        }
        if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
            let file_num = stem
                .split('_')
                .next()
                .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
                .unwrap_or("");
            if file_num == num_str {
                return Some((path.clone(), stem.to_string()));
            }
        }
    }
    None
 }
 /// Build the full triage dump for a story.
 fn build_triage_dump(
    ctx: &CommandContext,
    story_path: &Path,
    story_id: &str,
    num_str: &str,
 ) -> String {
    let contents = match std::fs::read_to_string(story_path) {
        Ok(c) => c,
        Err(e) => return format!("Failed to read story {num_str}: {e}"),
    };
    let meta = crate::io::story_metadata::parse_front_matter(&contents).ok();
    let name = meta.as_ref().and_then(|m| m.name.as_deref()).unwrap_or("(unnamed)");
    let mut out = String::new();
    // ---- Header ----
    out.push_str(&format!("## Story {num_str} — {name}\n"));
    out.push_str("**Stage:** In Progress (`2_current`)\n\n");
    // ---- Front matter fields ----
    if let Some(ref m) = meta {
        let mut fields: Vec<String> = Vec::new();
        if let Some(true) = m.blocked {
            fields.push("**blocked:** true".to_string());
        }
        if let Some(ref agent) = m.agent {
            fields.push(format!("**agent:** {agent}"));
        }
        if let Some(ref qa) = m.qa {
            fields.push(format!("**qa:** {qa}"));
        }
        if let Some(true) = m.review_hold {
            fields.push("**review_hold:** true".to_string());
        }
        if let Some(rc) = m.retry_count
            && rc > 0
        {
            fields.push(format!("**retry_count:** {rc}"));
        }
        if let Some(ref cb) = m.coverage_baseline {
            fields.push(format!("**coverage_baseline:** {cb}"));
        }
        if let Some(ref mf) = m.merge_failure {
            fields.push(format!("**merge_failure:** {mf}"));
        }
        if !fields.is_empty() {
            out.push_str("**Front matter:**\n");
            for f in &fields {
                out.push_str(&format!("  • {f}\n"));
            }
            out.push('\n');
        }
    }
    // ---- Acceptance criteria ----
    let criteria = parse_acceptance_criteria(&contents);
    if !criteria.is_empty() {
        out.push_str("**Acceptance Criteria:**\n");
        for (checked, text) in &criteria {
            let mark = if *checked { "✅" } else { "⬜" };
            out.push_str(&format!("  {mark} {text}\n"));
        }
        let total = criteria.len();
        let done = criteria.iter().filter(|(c, _)| *c).count();
        out.push_str(&format!("  *{done}/{total} complete*\n"));
        out.push('\n');
    }
    // ---- Worktree and branch ----
    let wt_path = crate::worktree::worktree_path(ctx.project_root, story_id);
    let branch = format!("feature/story-{story_id}");
    if wt_path.is_dir() {
        out.push_str(&format!("**Worktree:** `{}`\n", wt_path.display()));
        out.push_str(&format!("**Branch:** `{branch}`\n\n"));
        // ---- git diff --stat ----
        let diff_stat = run_git(
            &wt_path,
            &["diff", "--stat", "master...HEAD"],
        );
        if !diff_stat.is_empty() {
            out.push_str("**Diff stat (vs master):**\n```\n");
            out.push_str(&diff_stat);
            out.push_str("```\n\n");
        } else {
            out.push_str("**Diff stat (vs master):** *(no changes)*\n\n");
        }
        // ---- Last 5 commits on feature branch ----
        let log = run_git(
            &wt_path,
            &[
                "log",
                "master..HEAD",
                "--pretty=format:%h %s",
                "-5",
            ],
        );
        if !log.is_empty() {
            out.push_str("**Recent commits (branch only):**\n```\n");
            out.push_str(&log);
            out.push_str("\n```\n\n");
        } else {
            out.push_str("**Recent commits (branch only):** *(none yet)*\n\n");
        }
    } else {
        out.push_str(&format!("**Branch:** `{branch}`\n"));
        out.push_str("**Worktree:** *(not yet created)*\n\n");
    }
    // ---- Agent log tail ----
    let log_dir = ctx
        .project_root
        .join(".storkit")
        .join("logs")
        .join(story_id);
    match latest_log_file(&log_dir) {
        Some(log_path) => {
            let tail = read_log_tail(&log_path, 20);
            let filename = log_path
                .file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("agent.log");
            if tail.is_empty() {
                out.push_str(&format!("**Agent log** (`{filename}`):** *(empty)*\n"));
            } else {
                out.push_str(&format!("**Agent log tail** (`{filename}`):\n```\n"));
                out.push_str(&tail);
                out.push_str("\n```\n");
            }
        }
        None => {
            out.push_str("**Agent log:** *(no log found)*\n");
        }
    }
    out
 }
 /// Parse acceptance criteria from story markdown.
 ///
 /// Returns a list of `(checked, text)` for every `- [ ] ...` and `- [x] ...` line.
 fn parse_acceptance_criteria(contents: &str) -> Vec<(bool, String)> {
    contents
        .lines()
        .filter_map(|line| {
            let trimmed = line.trim();
            if let Some(text) = trimmed.strip_prefix("- [x] ").or_else(|| trimmed.strip_prefix("- [X] ")) {
                Some((true, text.to_string()))
            } else {
                trimmed.strip_prefix("- [ ] ").map(|text| (false, text.to_string()))
            }
        })
        .collect()
 }
 /// Run a git command in the given directory, returning trimmed stdout (or empty on error).
 fn run_git(dir: &Path, args: &[&str]) -> String {
    Command::new("git")
        .args(args)
        .current_dir(dir)
        .output()
        .ok()
        .filter(|o| o.status.success())
        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
        .unwrap_or_default()
 }
 /// Find the most recently modified `.log` file in the given directory,
 /// regardless of agent name.
 fn latest_log_file(log_dir: &Path) -> Option<PathBuf> {
    if !log_dir.is_dir() {
        return None;
    }
    let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
    for entry in std::fs::read_dir(log_dir).ok()?.flatten() {
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("log") {
            continue;
        }
        let modified = match entry.metadata().and_then(|m| m.modified()) {
            Ok(t) => t,
            Err(_) => continue,
        };
        if best.as_ref().is_none_or(|(_, t)| modified > *t) {
            best = Some((path, modified));
        }
    }
    best.map(|(p, _)| p)
 }
 /// Read the last `n` non-empty lines from a file as a single string.
 fn read_log_tail(path: &Path, n: usize) -> String {
    let contents = match std::fs::read_to_string(path) {
        Ok(c) => c,
        Err(_) => return String::new(),
    };
    let lines: Vec<&str> = contents.lines().filter(|l| !l.trim().is_empty()).collect();
    let start = lines.len().saturating_sub(n);
    lines[start..].join("\n")
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::agents::AgentPool;
    use std::collections::HashSet;
    use std::sync::{Arc, Mutex};
    use super::super::{CommandDispatch, try_handle_command};
    fn whatsup_cmd(root: &Path, args: &str) -> Option<String> {
        let agents = Arc::new(AgentPool::new_test(3000));
        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
        let room_id = "!test:example.com".to_string();
        let dispatch = CommandDispatch {
            bot_name: "Timmy",
            bot_user_id: "@timmy:homeserver.local",
            project_root: root,
            agents: &agents,
            ambient_rooms: &ambient_rooms,
            room_id: &room_id,
        };
        try_handle_command(&dispatch, &format!("@timmy status {args}"))
    }
    fn write_story_file(root: &Path, stage: &str, filename: &str, content: &str) {
        let dir = root.join(".storkit/work").join(stage);
        std::fs::create_dir_all(&dir).unwrap();
        std::fs::write(dir.join(filename), content).unwrap();
    }
    // -- registration -------------------------------------------------------
    #[test]
    fn whatsup_command_is_not_registered() {
        let found = super::super::commands().iter().any(|c| c.name == "whatsup");
        assert!(!found, "whatsup command must not be in the registry (renamed to status)");
    }
    #[test]
    fn status_command_appears_in_help() {
        let result = super::super::tests::try_cmd_addressed(
            "Timmy",
            "@timmy:homeserver.local",
            "@timmy help",
        );
        let output = result.unwrap();
        assert!(
            output.contains("status"),
            "help should list status command: {output}"
        );
    }
    // -- input validation ---------------------------------------------------
    #[test]
    fn whatsup_no_args_returns_usage() {
        let tmp = tempfile::TempDir::new().unwrap();
        let output = whatsup_cmd(tmp.path(), "").unwrap();
        assert!(
            output.contains("Pipeline Status"),
            "no args should show pipeline status: {output}"
        );
    }
    #[test]
    fn whatsup_non_numeric_returns_error() {
        let tmp = tempfile::TempDir::new().unwrap();
        let output = whatsup_cmd(tmp.path(), "abc").unwrap();
        assert!(
            output.contains("Invalid"),
            "non-numeric arg should return error: {output}"
        );
    }
    // -- not found ----------------------------------------------------------
    #[test]
    fn whatsup_story_not_in_current_returns_friendly_message() {
        let tmp = tempfile::TempDir::new().unwrap();
        // Create the directory but put the story in backlog, not current
        write_story_file(
            tmp.path(),
            "1_backlog",
            "42_story_not_in_current.md",
            "---\nname: Not in current\n---\n",
        );
        let output = whatsup_cmd(tmp.path(), "42").unwrap();
        assert!(
            output.contains("42"),
            "message should include story number: {output}"
        );
        assert!(
            output.contains("not") || output.contains("Not"),
            "message should say not found/in progress: {output}"
        );
    }
    // -- found in 2_current -------------------------------------------------
    #[test]
    fn whatsup_shows_story_name_and_stage() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "99_story_my_feature.md",
            "---\nname: My Feature\n---\n\n## Acceptance Criteria\n\n- [ ] First thing\n- [x] Done thing\n",
        );
        let output = whatsup_cmd(tmp.path(), "99").unwrap();
        assert!(output.contains("99"), "should show story number: {output}");
        assert!(
            output.contains("My Feature"),
            "should show story name: {output}"
        );
        assert!(
            output.contains("In Progress") || output.contains("2_current"),
            "should show pipeline stage: {output}"
        );
    }
    #[test]
    fn whatsup_shows_acceptance_criteria() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "99_story_criteria_test.md",
            "---\nname: Criteria Test\n---\n\n- [ ] First thing\n- [x] Done thing\n- [ ] Second thing\n",
        );
        let output = whatsup_cmd(tmp.path(), "99").unwrap();
        assert!(
            output.contains("First thing"),
            "should show unchecked criterion: {output}"
        );
        assert!(
            output.contains("Done thing"),
            "should show checked criterion: {output}"
        );
        // 1 of 3 done
        assert!(
            output.contains("1/3"),
            "should show checked/total count: {output}"
        );
    }
    #[test]
    fn whatsup_shows_blocked_field() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "55_story_blocked_story.md",
            "---\nname: Blocked Story\nblocked: true\n---\n",
        );
        let output = whatsup_cmd(tmp.path(), "55").unwrap();
        assert!(
            output.contains("blocked"),
            "should show blocked field: {output}"
        );
    }
    #[test]
    fn whatsup_shows_agent_field() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "55_story_agent_story.md",
            "---\nname: Agent Story\nagent: coder-1\n---\n",
        );
        let output = whatsup_cmd(tmp.path(), "55").unwrap();
        assert!(
            output.contains("coder-1"),
            "should show agent field: {output}"
        );
    }
    #[test]
    fn whatsup_no_worktree_shows_not_created() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "77_story_no_worktree.md",
            "---\nname: No Worktree\n---\n",
        );
        let output = whatsup_cmd(tmp.path(), "77").unwrap();
        // Branch name should still appear
        assert!(
            output.contains("feature/story-77"),
            "should show branch name: {output}"
        );
    }
    #[test]
    fn whatsup_no_log_shows_no_log_message() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "77_story_no_log.md",
            "---\nname: No Log\n---\n",
        );
        let output = whatsup_cmd(tmp.path(), "77").unwrap();
        assert!(
            output.contains("no log") || output.contains("No log") || output.contains("*(no log found)*"),
            "should indicate no log exists: {output}"
        );
    }
    // -- parse_acceptance_criteria ------------------------------------------
    #[test]
    fn parse_criteria_mixed() {
        let input = "## AC\n- [ ] First\n- [x] Done\n- [X] Also done\n- [ ] Last\n";
        let result = parse_acceptance_criteria(input);
        assert_eq!(result.len(), 4);
        assert_eq!(result[0], (false, "First".to_string()));
        assert_eq!(result[1], (true, "Done".to_string()));
        assert_eq!(result[2], (true, "Also done".to_string()));
        assert_eq!(result[3], (false, "Last".to_string()));
    }
    #[test]
    fn parse_criteria_empty() {
        let input = "# Story\nNo checkboxes here.\n";
        let result = parse_acceptance_criteria(input);
        assert!(result.is_empty());
    }
    // -- read_log_tail -------------------------------------------------------
    #[test]
    fn read_log_tail_returns_last_n_lines() {
        let tmp = tempfile::TempDir::new().unwrap();
        let path = tmp.path().join("test.log");
        let content = (1..=30).map(|i| format!("line {i}")).collect::<Vec<_>>().join("\n");
        std::fs::write(&path, &content).unwrap();
        let tail = read_log_tail(&path, 5);
        let lines: Vec<&str> = tail.lines().collect();
        assert_eq!(lines.len(), 5);
        assert_eq!(lines[0], "line 26");
        assert_eq!(lines[4], "line 30");
    }
    #[test]
    fn read_log_tail_fewer_lines_than_n() {
        let tmp = tempfile::TempDir::new().unwrap();
        let path = tmp.path().join("short.log");
        std::fs::write(&path, "line A\nline B\n").unwrap();
        let tail = read_log_tail(&path, 20);
        assert!(tail.contains("line A"));
        assert!(tail.contains("line B"));
    }
    // -- latest_log_file ----------------------------------------------------
    #[test]
    fn latest_log_file_returns_none_for_missing_dir() {
        let tmp = tempfile::TempDir::new().unwrap();
        let result = latest_log_file(&tmp.path().join("nonexistent"));
        assert!(result.is_none());
    }
    #[test]
    fn latest_log_file_finds_log() {
        let tmp = tempfile::TempDir::new().unwrap();
        let log_path = tmp.path().join("coder-1-sess-abc.log");
        std::fs::write(&log_path, "some log content\n").unwrap();
        let result = latest_log_file(tmp.path());
        assert!(result.is_some());
        assert_eq!(result.unwrap(), log_path);
    }
 }
--- a/server/src/matrix/mod.rs
+++ b/server/src/matrix/mod.rs
@@ -20,6 +20,7 @@ pub mod commands;
 mod config;
 pub mod delete;
 pub mod htop;
 pub mod rebuild;
 pub mod reset;
 pub mod start;
 pub mod notifications;
@@ -31,9 +32,10 @@ pub use config::BotConfig;
 use crate::agents::AgentPool;
 use crate::http::context::PermissionForward;
 use crate::io::watcher::WatcherEvent;
 use crate::rebuild::ShutdownReason;
 use std::path::Path;
 use std::sync::Arc;
-use tokio::sync::{Mutex as TokioMutex, broadcast, mpsc};
+use tokio::sync::{Mutex as TokioMutex, broadcast, mpsc, watch};
 /// Attempt to start the Matrix bot.
 ///
@@ -49,12 +51,17 @@ use tokio::sync::{Mutex as TokioMutex, broadcast, mpsc};
 /// `prompt_permission` tool. The bot locks it during active chat sessions
 /// to surface permission prompts to the Matrix room and relay user decisions.
 ///
 /// `shutdown_rx` is a watch channel that delivers a `ShutdownReason` when the
 /// server is about to stop (SIGINT/SIGTERM or rebuild).  The bot uses this to
 /// announce the shutdown to all configured rooms before the process exits.
 ///
 /// Must be called from within a Tokio runtime context (e.g., from `main`).
 pub fn spawn_bot(
    project_root: &Path,
    watcher_tx: broadcast::Sender<WatcherEvent>,
    perm_rx: Arc<TokioMutex<mpsc::UnboundedReceiver<PermissionForward>>>,
    agents: Arc<AgentPool>,
    shutdown_rx: watch::Receiver<Option<ShutdownReason>>,
 ) {
    let config = match BotConfig::load(project_root) {
        Some(c) => c,
@@ -82,7 +89,8 @@ pub fn spawn_bot(
    let root = project_root.to_path_buf();
    let watcher_rx = watcher_tx.subscribe();
    tokio::spawn(async move {
-        if let Err(e) = bot::run_bot(config, root, watcher_rx, perm_rx, agents).await {
+        if let Err(e) = bot::run_bot(config, root, watcher_rx, perm_rx, agents, shutdown_rx).await
        {
            crate::slog!("[matrix-bot] Fatal error: {e}");
        }
    });
--- a/server/src/matrix/notifications.rs
+++ b/server/src/matrix/notifications.rs
@@ -7,8 +7,10 @@ use crate::io::story_metadata::parse_front_matter;
 use crate::io::watcher::WatcherEvent;
 use crate::slog;
 use crate::transport::ChatTransport;
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tokio::sync::broadcast;
 /// Human-readable display name for a pipeline stage directory.
@@ -99,6 +101,44 @@ pub fn format_error_notification(
    (plain, html)
 }
 /// Search all pipeline stages for a story name.
 ///
 /// Tries each known pipeline stage directory in order and returns the first
 /// name found.  Used for events (like rate-limit warnings) that arrive without
 /// a known stage.
 fn find_story_name_any_stage(project_root: &Path, item_id: &str) -> Option<String> {
    for stage in &["2_current", "3_qa", "4_merge", "1_backlog", "5_done"] {
        if let Some(name) = read_story_name(project_root, stage, item_id) {
            return Some(name);
        }
    }
    None
 }
 /// Minimum time between rate-limit notifications for the same agent.
 const RATE_LIMIT_DEBOUNCE: Duration = Duration::from_secs(60);
 /// Format a rate limit warning notification message.
 ///
 /// Returns `(plain_text, html)` suitable for `ChatTransport::send_message`.
 pub fn format_rate_limit_notification(
    item_id: &str,
    story_name: Option<&str>,
    agent_name: &str,
 ) -> (String, String) {
    let number = extract_story_number(item_id).unwrap_or(item_id);
    let name = story_name.unwrap_or(item_id);
    let plain = format!(
        "\u{26a0}\u{fe0f} #{number} {name} \u{2014} {agent_name} hit an API rate limit"
    );
    let html = format!(
        "\u{26a0}\u{fe0f} <strong>#{number}</strong> <em>{name}</em> \u{2014} \
         {agent_name} hit an API rate limit"
    );
    (plain, html)
 }
 /// Spawn a background task that listens for watcher events and posts
 /// stage-transition notifications to all configured rooms via the
 /// [`ChatTransport`] abstraction.
@@ -110,6 +150,10 @@ pub fn spawn_notification_listener(
 ) {
    tokio::spawn(async move {
        let mut rx = watcher_rx;
        // Tracks when a rate-limit notification was last sent for each
        // "story_id:agent_name" key, to debounce repeated warnings.
        let mut rate_limit_last_notified: HashMap<String, Instant> = HashMap::new();
        loop {
            match rx.recv().await {
                Ok(WatcherEvent::WorkItem {
@@ -163,6 +207,43 @@ pub fn spawn_notification_listener(
                        }
                    }
                }
                Ok(WatcherEvent::RateLimitWarning {
                    ref story_id,
                    ref agent_name,
                }) => {
                    // Debounce: skip if we sent a notification for this agent
                    // within the last RATE_LIMIT_DEBOUNCE seconds.
                    let debounce_key = format!("{story_id}:{agent_name}");
                    let now = Instant::now();
                    if let Some(&last) = rate_limit_last_notified.get(&debounce_key)
                        && now.duration_since(last) < RATE_LIMIT_DEBOUNCE
                    {
                        slog!(
                            "[matrix-bot] Rate-limit notification debounced for \
                             {story_id}:{agent_name}"
                        );
                        continue;
                    }
                    rate_limit_last_notified.insert(debounce_key, now);
                    let story_name = find_story_name_any_stage(&project_root, story_id);
                    let (plain, html) = format_rate_limit_notification(
                        story_id,
                        story_name.as_deref(),
                        agent_name,
                    );
                    slog!("[matrix-bot] Sending rate-limit notification: {plain}");
                    for room_id in &room_ids {
                        if let Err(e) = transport.send_message(room_id, &plain, &html).await {
                            slog!(
                                "[matrix-bot] Failed to send rate-limit notification \
                                 to {room_id}: {e}"
                            );
                        }
                    }
                }
                Ok(_) => {} // Ignore non-work-item events
                Err(broadcast::error::RecvError::Lagged(n)) => {
                    slog!(
@@ -183,6 +264,144 @@ pub fn spawn_notification_listener(
 #[cfg(test)]
 mod tests {
    use super::*;
    use async_trait::async_trait;
    use crate::transport::MessageId;
    // ── MockTransport ───────────────────────────────────────────────────────
    type CallLog = Arc<std::sync::Mutex<Vec<(String, String, String)>>>;
    /// Records every `send_message` call for inspection in tests.
    struct MockTransport {
        calls: CallLog,
    }
    impl MockTransport {
        fn new() -> (Arc<Self>, CallLog) {
            let calls: CallLog = Arc::new(std::sync::Mutex::new(Vec::new()));
            (Arc::new(Self { calls: Arc::clone(&calls) }), calls)
        }
    }
    #[async_trait]
    impl crate::transport::ChatTransport for MockTransport {
        async fn send_message(&self, room_id: &str, plain: &str, html: &str) -> Result<MessageId, String> {
            self.calls.lock().unwrap().push((room_id.to_string(), plain.to_string(), html.to_string()));
            Ok("mock-msg-id".to_string())
        }
        async fn edit_message(&self, _room_id: &str, _id: &str, _plain: &str, _html: &str) -> Result<(), String> {
            Ok(())
        }
        async fn send_typing(&self, _room_id: &str, _typing: bool) -> Result<(), String> {
            Ok(())
        }
    }
    // ── spawn_notification_listener: RateLimitWarning ───────────────────────
    /// AC2 + AC3: when a RateLimitWarning event arrives, send_message is called
    /// with a notification that names the agent and story.
    #[tokio::test]
    async fn rate_limit_warning_sends_notification_with_agent_and_story() {
        let tmp = tempfile::tempdir().unwrap();
        let stage_dir = tmp.path().join(".storkit").join("work").join("2_current");
        std::fs::create_dir_all(&stage_dir).unwrap();
        std::fs::write(
            stage_dir.join("365_story_rate_limit.md"),
            "---\nname: Rate Limit Test Story\n---\n",
        )
        .unwrap();
        let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
        let (transport, calls) = MockTransport::new();
        spawn_notification_listener(
            transport,
            vec!["!room123:example.org".to_string()],
            watcher_rx,
            tmp.path().to_path_buf(),
        );
        watcher_tx.send(WatcherEvent::RateLimitWarning {
            story_id: "365_story_rate_limit".to_string(),
            agent_name: "coder-1".to_string(),
        }).unwrap();
        // Give the spawned task time to process the event.
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
        let calls = calls.lock().unwrap();
        assert_eq!(calls.len(), 1, "Expected exactly one notification");
        let (room_id, plain, _html) = &calls[0];
        assert_eq!(room_id, "!room123:example.org");
        assert!(plain.contains("365"), "plain should contain story number");
        assert!(plain.contains("Rate Limit Test Story"), "plain should contain story name");
        assert!(plain.contains("coder-1"), "plain should contain agent name");
        assert!(plain.contains("rate limit"), "plain should mention rate limit");
    }
    /// AC4: a second RateLimitWarning for the same agent within the debounce
    /// window must NOT trigger a second notification.
    #[tokio::test]
    async fn rate_limit_warning_is_debounced() {
        let tmp = tempfile::tempdir().unwrap();
        let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
        let (transport, calls) = MockTransport::new();
        spawn_notification_listener(
            transport,
            vec!["!room1:example.org".to_string()],
            watcher_rx,
            tmp.path().to_path_buf(),
        );
        // Send the same warning twice in rapid succession.
        for _ in 0..2 {
            watcher_tx.send(WatcherEvent::RateLimitWarning {
                story_id: "42_story_debounce".to_string(),
                agent_name: "coder-2".to_string(),
            }).unwrap();
        }
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
        let calls = calls.lock().unwrap();
        assert_eq!(calls.len(), 1, "Debounce should suppress the second notification");
    }
    /// AC4 (corollary): warnings for different agents are NOT debounced against
    /// each other — both should produce notifications.
    #[tokio::test]
    async fn rate_limit_warnings_for_different_agents_both_notify() {
        let tmp = tempfile::tempdir().unwrap();
        let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
        let (transport, calls) = MockTransport::new();
        spawn_notification_listener(
            transport,
            vec!["!room1:example.org".to_string()],
            watcher_rx,
            tmp.path().to_path_buf(),
        );
        watcher_tx.send(WatcherEvent::RateLimitWarning {
            story_id: "42_story_foo".to_string(),
            agent_name: "coder-1".to_string(),
        }).unwrap();
        watcher_tx.send(WatcherEvent::RateLimitWarning {
            story_id: "42_story_foo".to_string(),
            agent_name: "coder-2".to_string(),
        }).unwrap();
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
        let calls = calls.lock().unwrap();
        assert_eq!(calls.len(), 2, "Different agents should each trigger a notification");
    }
    // ── stage_display_name ──────────────────────────────────────────────────
@@ -319,6 +538,35 @@ mod tests {
        );
    }
    // ── format_rate_limit_notification ─────────────────────────────────────
    #[test]
    fn format_rate_limit_notification_includes_agent_and_story() {
        let (plain, html) = format_rate_limit_notification(
            "365_story_my_feature",
            Some("My Feature"),
            "coder-2",
        );
        assert_eq!(
            plain,
            "\u{26a0}\u{fe0f} #365 My Feature \u{2014} coder-2 hit an API rate limit"
        );
        assert_eq!(
            html,
            "\u{26a0}\u{fe0f} <strong>#365</strong> <em>My Feature</em> \u{2014} coder-2 hit an API rate limit"
        );
    }
    #[test]
    fn format_rate_limit_notification_falls_back_to_item_id() {
        let (plain, _html) =
            format_rate_limit_notification("42_story_thing", None, "coder-1");
        assert_eq!(
            plain,
            "\u{26a0}\u{fe0f} #42 42_story_thing \u{2014} coder-1 hit an API rate limit"
        );
    }
    // ── format_stage_notification ───────────────────────────────────────────
    #[test]
--- a/server/src/matrix/rebuild.rs
+++ b/server/src/matrix/rebuild.rs
@@ -0,0 +1,145 @@
 //! Rebuild command: trigger a server rebuild and restart.
 //!
 //! `{bot_name} rebuild` stops all running agents, rebuilds the server binary
 //! with `cargo build`, and re-execs the process with the new binary.  If the
 //! build fails the error is reported back to the room and the server keeps
 //! running.
 use crate::agents::AgentPool;
 use std::path::Path;
 use std::sync::Arc;
 /// A parsed rebuild command.
 #[derive(Debug, PartialEq)]
 pub struct RebuildCommand;
 /// Parse a rebuild command from a raw message body.
 ///
 /// Strips the bot mention prefix and checks whether the command word is
 /// `rebuild`.  Returns `None` when the message is not a rebuild command.
 pub fn extract_rebuild_command(
    message: &str,
    bot_name: &str,
    bot_user_id: &str,
 ) -> Option<RebuildCommand> {
    let stripped = strip_mention(message, bot_name, bot_user_id);
    let trimmed = stripped
        .trim()
        .trim_start_matches(|c: char| !c.is_alphanumeric());
    let cmd = match trimmed.split_once(char::is_whitespace) {
        Some((c, _)) => c,
        None => trimmed,
    };
    if cmd.eq_ignore_ascii_case("rebuild") {
        Some(RebuildCommand)
    } else {
        None
    }
 }
 /// Handle a rebuild command: trigger server rebuild and restart.
 ///
 /// Returns a string describing the outcome.  On build failure the error
 /// message is returned so it can be posted to the room; the server keeps
 /// running.  On success this function never returns (the process re-execs).
 pub async fn handle_rebuild(
    bot_name: &str,
    project_root: &Path,
    agents: &Arc<AgentPool>,
 ) -> String {
    crate::slog!("[matrix-bot] rebuild command received (bot={bot_name})");
    match crate::rebuild::rebuild_and_restart(agents, project_root, None).await {
        Ok(msg) => msg,
        Err(e) => format!("Rebuild failed: {e}"),
    }
 }
 /// Strip the bot mention prefix from a raw Matrix message body.
 fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
    let trimmed = message.trim();
    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
        return rest;
    }
    if let Some(localpart) = bot_user_id.split(':').next()
        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
    {
        return rest;
    }
    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
        return rest;
    }
    trimmed
 }
 fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
    if text.len() < prefix.len() {
        return None;
    }
    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
        return None;
    }
    let rest = &text[prefix.len()..];
    match rest.chars().next() {
        None => Some(rest),
        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
        _ => Some(rest),
    }
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn extract_with_display_name() {
        let cmd = extract_rebuild_command("Timmy rebuild", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_with_full_user_id() {
        let cmd = extract_rebuild_command(
            "@timmy:home.local rebuild",
            "Timmy",
            "@timmy:home.local",
        );
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_with_localpart() {
        let cmd = extract_rebuild_command("@timmy rebuild", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_case_insensitive() {
        let cmd = extract_rebuild_command("Timmy REBUILD", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_non_rebuild_returns_none() {
        let cmd = extract_rebuild_command("Timmy help", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, None);
    }
    #[test]
    fn extract_ignores_extra_args() {
        // "rebuild" with trailing text is still a rebuild command
        let cmd = extract_rebuild_command("Timmy rebuild now", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_no_match_returns_none() {
        let cmd = extract_rebuild_command("Timmy status", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, None);
    }
 }
--- a/server/src/matrix/start.rs
+++ b/server/src/matrix/start.rs
@@ -165,6 +165,12 @@ pub async fn handle_start(
                info.agent_name
            )
        }
        Err(e) if e.contains("All coder agents are busy") => {
            format!(
                "**{story_name}** has been queued in `work/2_current/` and will start \
                 automatically when a coder becomes available."
            )
        }
        Err(e) => {
            format!("Failed to start **{story_name}**: {e}")
        }
@@ -312,6 +318,42 @@ mod tests {
        );
    }
    #[tokio::test]
    async fn handle_start_says_queued_not_error_when_all_coders_busy() {
        use crate::agents::{AgentPool, AgentStatus};
        use std::sync::Arc;
        let tmp = tempfile::tempdir().unwrap();
        let project_root = tmp.path();
        let sk = project_root.join(".storkit");
        let backlog = sk.join("work/1_backlog");
        std::fs::create_dir_all(&backlog).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
        )
        .unwrap();
        std::fs::write(
            backlog.join("356_story_test.md"),
            "---\nname: Test Story\n---\n",
        )
        .unwrap();
        let agents = Arc::new(AgentPool::new_test(3000));
        agents.inject_test_agent("other-story", "coder-1", AgentStatus::Running);
        let response = handle_start("Timmy", "356", None, project_root, &agents).await;
        assert!(
            !response.contains("Failed"),
            "response must not say 'Failed' when coders are busy: {response}"
        );
        assert!(
            response.to_lowercase().contains("queue") || response.to_lowercase().contains("available"),
            "response must mention queued/available state: {response}"
        );
    }
    #[test]
    fn start_command_is_registered() {
        use crate::matrix::commands::commands;
--- a/server/src/rebuild.rs
+++ b/server/src/rebuild.rs
@@ -0,0 +1,348 @@
 //! Server rebuild and restart logic shared between the MCP tool and Matrix bot command.
 use crate::agents::AgentPool;
 use crate::slog;
 use crate::transport::ChatTransport;
 use std::path::Path;
 use std::sync::Arc;
 // ── Shutdown notification ────────────────────────────────────────────────
 /// The reason the server is shutting down.
 ///
 /// Used to select the appropriate shutdown message sent to active bot channels.
 #[derive(Clone, Debug, PartialEq)]
 pub enum ShutdownReason {
    /// The operator stopped the server manually (SIGINT / SIGTERM / ctrl-c).
    Manual,
    /// A rebuild-and-restart was requested (via MCP tool or bot command).
    Rebuild,
 }
 /// Sends a shutdown announcement to all configured bot channels.
 ///
 /// Wraps a [`ChatTransport`] together with the list of channel/room IDs the
 /// bot is active in.  Calling [`notify`] is best-effort — failures are logged
 /// but never propagate, so shutdown is never blocked by a failed send.
 pub struct BotShutdownNotifier {
    transport: Arc<dyn ChatTransport>,
    channels: Vec<String>,
    bot_name: String,
 }
 impl BotShutdownNotifier {
    pub fn new(
        transport: Arc<dyn ChatTransport>,
        channels: Vec<String>,
        bot_name: String,
    ) -> Self {
        Self {
            transport,
            channels,
            bot_name,
        }
    }
    /// Send a shutdown message to all configured channels.
    ///
    /// Errors from individual sends are logged and ignored so that a single
    /// failing channel does not prevent messages from reaching the rest.
    pub async fn notify(&self, reason: ShutdownReason) {
        let msg = match reason {
            ShutdownReason::Manual => {
                format!("{} is going offline (server stopped).", self.bot_name)
            }
            ShutdownReason::Rebuild => {
                format!(
                    "{} is going offline to pick up a new build.",
                    self.bot_name
                )
            }
        };
        for channel in &self.channels {
            if let Err(e) = self.transport.send_message(channel, &msg, &msg).await {
                slog!("[shutdown] Failed to send shutdown message to {channel}: {e}");
            }
        }
    }
 }
 // ── Rebuild ──────────────────────────────────────────────────────────────
 /// Rebuild the server binary and re-exec.
 ///
 /// 1. Gracefully stops all running agents (kills PTY children).
 /// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
 ///    the current build profile (debug or release).
 /// 3. If the build fails, returns the build error (server stays up).
 /// 4. If the build succeeds, sends a best-effort shutdown notification (if a
 ///    [`BotShutdownNotifier`] is provided), then re-execs the process with
 ///    the new binary via `std::os::unix::process::CommandExt::exec()`.
 pub async fn rebuild_and_restart(
    agents: &AgentPool,
    project_root: &Path,
    notifier: Option<&BotShutdownNotifier>,
 ) -> Result<String, String> {
    slog!("[rebuild] Rebuild and restart requested");
    // 1. Gracefully stop all running agents.
    let running_count = agents
        .list_agents()
        .unwrap_or_default()
        .iter()
        .filter(|a| a.status == crate::agents::AgentStatus::Running)
        .count();
    if running_count > 0 {
        slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
    }
    agents.kill_all_children();
    // 2. Find the workspace root (parent of the server binary's source).
    //    CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
    //    the workspace root is its parent.
    let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
    let workspace_root = manifest_dir
        .parent()
        .ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
    slog!(
        "[rebuild] Building server from workspace root: {}",
        workspace_root.display()
    );
    // 3. Build the server binary, matching the current build profile so the
    //    re-exec via current_exe() picks up the new binary.
    let build_args: Vec<&str> = if cfg!(debug_assertions) {
        vec!["build", "-p", "storkit"]
    } else {
        vec!["build", "--release", "-p", "storkit"]
    };
    slog!("[rebuild] cargo {}", build_args.join(" "));
    let output = tokio::task::spawn_blocking({
        let workspace_root = workspace_root.to_path_buf();
        move || {
            std::process::Command::new("cargo")
                .args(&build_args)
                .current_dir(&workspace_root)
                .output()
        }
    })
    .await
    .map_err(|e| format!("Build task panicked: {e}"))?
    .map_err(|e| format!("Failed to run cargo build: {e}"))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        slog!("[rebuild] Build failed:\n{stderr}");
        return Err(format!("Build failed:\n{stderr}"));
    }
    slog!("[rebuild] Build succeeded, re-execing with new binary");
    // 4. Send shutdown notification before replacing the process so that chat
    //    participants know the bot is going offline.  Best-effort only — we
    //    do not abort the rebuild if the send fails.
    if let Some(n) = notifier {
        n.notify(ShutdownReason::Rebuild).await;
    }
    // 5. Re-exec with the new binary.
    //    Use the cargo output path rather than current_exe() so that rebuilds
    //    inside Docker work correctly — the running binary may be installed at
    //    /usr/local/bin/storkit (read-only) while cargo writes the new binary
    //    to /app/target/release/storkit (a writable volume).
    let new_exe = if cfg!(debug_assertions) {
        workspace_root.join("target/debug/storkit")
    } else {
        workspace_root.join("target/release/storkit")
    };
    let args: Vec<String> = std::env::args().collect();
    // Remove the port file before re-exec so the new process can write its own.
    let port_file = project_root.join(".storkit_port");
    if port_file.exists() {
        let _ = std::fs::remove_file(&port_file);
    }
    // Also check cwd for port file.
    let cwd_port_file = std::path::Path::new(".storkit_port");
    if cwd_port_file.exists() {
        let _ = std::fs::remove_file(cwd_port_file);
    }
    // Use exec() to replace the current process.
    // This never returns on success.
    use std::os::unix::process::CommandExt;
    let err = std::process::Command::new(&new_exe)
        .args(&args[1..])
        .exec();
    // If we get here, exec() failed.
    Err(format!("Failed to exec new binary: {err}"))
 }
 // ── Tests ────────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    use async_trait::async_trait;
    use crate::transport::MessageId;
    use std::sync::Mutex;
    /// In-memory transport that records sent messages.
    struct CapturingTransport {
        sent: Mutex<Vec<(String, String)>>,
        fail: bool,
    }
    impl CapturingTransport {
        fn new() -> Self {
            Self {
                sent: Mutex::new(Vec::new()),
                fail: false,
            }
        }
        fn failing() -> Self {
            Self {
                sent: Mutex::new(Vec::new()),
                fail: true,
            }
        }
        fn messages(&self) -> Vec<(String, String)> {
            self.sent.lock().unwrap().clone()
        }
    }
    #[async_trait]
    impl ChatTransport for CapturingTransport {
        async fn send_message(
            &self,
            room_id: &str,
            plain: &str,
            _html: &str,
        ) -> Result<MessageId, String> {
            if self.fail {
                return Err("send failed".to_string());
            }
            self.sent
                .lock()
                .unwrap()
                .push((room_id.to_string(), plain.to_string()));
            Ok("msg-id".to_string())
        }
        async fn edit_message(
            &self,
            _room_id: &str,
            _original_message_id: &str,
            _plain: &str,
            _html: &str,
        ) -> Result<(), String> {
            Ok(())
        }
        async fn send_typing(&self, _room_id: &str, _typing: bool) -> Result<(), String> {
            Ok(())
        }
    }
    #[tokio::test]
    async fn notify_manual_sends_to_all_channels() {
        let transport = Arc::new(CapturingTransport::new());
        let notifier = BotShutdownNotifier::new(
            Arc::clone(&transport) as Arc<dyn ChatTransport>,
            vec!["#channel1".to_string(), "#channel2".to_string()],
            "Timmy".to_string(),
        );
        notifier.notify(ShutdownReason::Manual).await;
        let msgs = transport.messages();
        assert_eq!(msgs.len(), 2);
        assert_eq!(msgs[0].0, "#channel1");
        assert_eq!(msgs[1].0, "#channel2");
        // Message must indicate manual stop.
        assert!(
            msgs[0].1.contains("offline"),
            "expected 'offline' in manual message: {}",
            msgs[0].1
        );
        assert!(
            msgs[0].1.contains("stopped") || msgs[0].1.contains("manual"),
            "expected reason in manual message: {}",
            msgs[0].1
        );
    }
    #[tokio::test]
    async fn notify_rebuild_sends_rebuild_reason() {
        let transport = Arc::new(CapturingTransport::new());
        let notifier = BotShutdownNotifier::new(
            Arc::clone(&transport) as Arc<dyn ChatTransport>,
            vec!["#general".to_string()],
            "Timmy".to_string(),
        );
        notifier.notify(ShutdownReason::Rebuild).await;
        let msgs = transport.messages();
        assert_eq!(msgs.len(), 1);
        // Message must indicate rebuild, not manual stop.
        assert!(
            msgs[0].1.contains("build") || msgs[0].1.contains("rebuild"),
            "expected rebuild reason in message: {}",
            msgs[0].1
        );
    }
    #[tokio::test]
    async fn notify_manual_and_rebuild_messages_are_distinct() {
        let transport_a = Arc::new(CapturingTransport::new());
        let notifier_a = BotShutdownNotifier::new(
            Arc::clone(&transport_a) as Arc<dyn ChatTransport>,
            vec!["C1".to_string()],
            "Bot".to_string(),
        );
        notifier_a.notify(ShutdownReason::Manual).await;
        let transport_b = Arc::new(CapturingTransport::new());
        let notifier_b = BotShutdownNotifier::new(
            Arc::clone(&transport_b) as Arc<dyn ChatTransport>,
            vec!["C1".to_string()],
            "Bot".to_string(),
        );
        notifier_b.notify(ShutdownReason::Rebuild).await;
        let manual_msg = &transport_a.messages()[0].1;
        let rebuild_msg = &transport_b.messages()[0].1;
        assert_ne!(manual_msg, rebuild_msg, "manual and rebuild messages must differ");
    }
    #[tokio::test]
    async fn notify_is_best_effort_failing_send_does_not_panic() {
        // A transport that always fails should not cause notify() to panic or
        // return an error — the failure is swallowed silently.
        let transport = Arc::new(CapturingTransport::failing());
        let notifier = BotShutdownNotifier::new(
            Arc::clone(&transport) as Arc<dyn ChatTransport>,
            vec!["#channel".to_string()],
            "Timmy".to_string(),
        );
        // Should complete without panicking.
        notifier.notify(ShutdownReason::Manual).await;
    }
    #[tokio::test]
    async fn notify_with_no_channels_is_noop() {
        let transport = Arc::new(CapturingTransport::new());
        let notifier = BotShutdownNotifier::new(
            Arc::clone(&transport) as Arc<dyn ChatTransport>,
            vec![],
            "Timmy".to_string(),
        );
        notifier.notify(ShutdownReason::Manual).await;
        assert!(transport.messages().is_empty());
    }
 }
--- a/server/src/worktree.rs
+++ b/server/src/worktree.rs
@@ -253,8 +253,24 @@ fn remove_worktree_sync(project_root: &Path, wt_path: &Path, branch: &str) -> Re
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        if stderr.contains("not a working tree") {
            // Orphaned directory: git doesn't recognise it as a worktree.
            // Remove the directory directly and prune stale git metadata.
            slog!(
                "[worktree] orphaned worktree detected, removing directory: {}",
                wt_path.display()
            );
            if let Err(e) = std::fs::remove_dir_all(wt_path) {
                slog!("[worktree] failed to remove orphaned directory: {e}");
            }
            let _ = Command::new("git")
                .args(["worktree", "prune"])
                .current_dir(project_root)
                .output();
        } else {
            slog!("[worktree] remove warning: {stderr}");
        }
    }
    // Delete branch (best effort)
    let _ = Command::new("git")
@@ -630,6 +646,28 @@ mod tests {
        );
    }
    #[test]
    fn remove_worktree_sync_removes_orphaned_directory() {
        let tmp = TempDir::new().unwrap();
        let project_root = tmp.path().join("my-project");
        fs::create_dir_all(&project_root).unwrap();
        init_git_repo(&project_root);
        // Create a directory that looks like a worktree but isn't registered with git
        let wt_path = project_root
            .join(".storkit")
            .join("worktrees")
            .join("orphan");
        fs::create_dir_all(&wt_path).unwrap();
        fs::write(wt_path.join("some_file.txt"), "stale").unwrap();
        assert!(wt_path.exists());
        // git worktree remove will fail with "not a working tree",
        // but the fallback should rm -rf the directory
        remove_worktree_sync(&project_root, &wt_path, "feature/orphan").unwrap();
        assert!(!wt_path.exists());
    }
    #[test]
    fn remove_worktree_sync_cleans_up_directory() {
        let tmp = TempDir::new().unwrap();
Author	SHA1	Message	Date
Timmy	fcc2b9c3eb	Bump version to 0.5.0	2026-03-23 13:11:57 +00:00
dave	0c4239501a	storkit: done 370_bug_scaffold_does_not_create_mcp_json_in_project_root	2026-03-23 13:00:46 +00:00
dave	13b6ecd958	storkit: merge 370_bug_scaffold_does_not_create_mcp_json_in_project_root	2026-03-23 13:00:43 +00:00
dave	1816a94617	storkit: merge 369_bug_cli_treats_help_and_version_as_project_paths	2026-03-23 12:55:58 +00:00
dave	56d3373e69	Revert gVisor (runsc) from Docker setup gVisor is incompatible with OrbStack bind mounts on macOS — writes to /mnt/mac are blocked by the gVisor filesystem sandbox. Removing runtime: runsc from docker-compose.yml, the gVisor setup docs from README.md, and the runsc assertion test from rebuild.rs. The existing Docker hardening (read-only root, cap_drop ALL, no-new-privileges, resource limits) remains in place. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-23 12:53:10 +00:00
dave	efdb0c5814	storkit: create 370_bug_scaffold_does_not_create_mcp_json_in_project_root	2026-03-23 12:43:48 +00:00
dave	b8365275d8	storkit: create 370_bug_scaffold_does_not_create_mcp_json_in_project_root	2026-03-23 12:43:15 +00:00
dave	6ddfd29927	storkit: create 369_bug_cli_treats_help_and_version_as_project_paths	2026-03-23 12:43:04 +00:00
dave	01b157a2e4	storkit: create 369_bug_cli_treats_help_and_version_as_project_paths	2026-03-23 12:42:04 +00:00
dave	99a59d7ad1	storkit: create 369_bug_cli_treats_help_and_version_as_project_paths	2026-03-23 12:41:27 +00:00
dave	eb8adb6225	storkit: create 369_bug_cli_treats_help_and_version_as_project_paths	2026-03-23 12:39:15 +00:00
dave	2262f2ca6b	storkit: create 368_story_web_ui_oauth_flow_for_claude_authentication	2026-03-23 11:59:43 +00:00
dave	2bb36d0e68	storkit: accept 360_story_run_storkit_container_under_gvisor_runsc_runtime	2026-03-23 11:48:32 +00:00
dave	86102f8ad6	storkit: done 360_story_run_storkit_container_under_gvisor_runsc_runtime	2026-03-23 11:45:47 +00:00
dave	edf47601c4	storkit: merge 360_story_run_storkit_container_under_gvisor_runsc_runtime	2026-03-23 11:45:43 +00:00
dave	b606e1de92	storkit: accept 367_story_rename_bot_whatsup_command_to_status	2026-03-23 02:38:20 +00:00
dave	0d5f0de876	storkit: accept 365_story_surface_api_rate_limit_warnings_in_chat	2026-03-22 23:29:13 +00:00
dave	bb41f3951c	storkit: accept 366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild	2026-03-22 23:11:51 +00:00
dave	e3d7931f17	storkit: done 367_story_rename_bot_whatsup_command_to_status	2026-03-22 22:40:47 +00:00
dave	87b5648123	storkit: merge 367_story_rename_bot_whatsup_command_to_status	2026-03-22 22:40:43 +00:00
dave	506bdd4df8	storkit: accept 363_story_mcp_tool_for_whatsup_story_triage	2026-03-22 21:44:39 +00:00
dave	a9bec3c29e	storkit: accept 362_story_bot_whatsup_command_shows_in_progress_work_summary	2026-03-22 21:43:38 +00:00
dave	69936f457f	storkit: done 365_story_surface_api_rate_limit_warnings_in_chat	2026-03-22 19:28:48 +00:00
dave	24dd3d9fa9	storkit: merge 365_story_surface_api_rate_limit_warnings_in_chat	2026-03-22 19:28:45 +00:00
dave	bc45a91b3e	Fix frontend tests failing in Docker due to wrong-platform rollup binary The bind-mounted node_modules from macOS contains darwin-arm64 native binaries which don't work in the Linux container. Run npm install on container startup to get the correct platform binaries. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 19:28:21 +00:00
dave	db7c11508e	storkit: done 366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild	2026-03-22 19:11:33 +00:00
dave	47173e0d3a	storkit: merge 366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild	2026-03-22 19:11:29 +00:00
dave	f610ef6046	Restore codebase deleted by bad auto-commit `e4227cf` Commit `e4227cf` (a story creation auto-commit) erroneously deleted 175 files from master's tree, likely due to a race condition between concurrent git operations. This commit re-adds all files from the working directory. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 19:07:07 +00:00
dave	89f776b978	storkit: create 367_story_rename_bot_whatsup_command_to_status	2026-03-22 19:01:22 +00:00
dave	e4227cf673	storkit: create 365_story_surface_api_rate_limit_warnings_in_chat	2026-03-22 18:19:23 +00:00
dave	f346712dd1	storkit: create 365_story_surface_api_rate_limit_warnings_in_chat	2026-03-22 18:19:23 +00:00
dave	f9419e5ea7	Fix worktree cleanup looping on orphaned directories When git worktree remove fails with "not a working tree", fall back to removing the directory directly and run git worktree prune to clean stale metadata. Fixes bug 364. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 18:18:08 +00:00
dave	c32bab03a4	storkit: delete 364_bug_worktree_cleanup_loops_on_orphaned_directories	2026-03-22 18:17:43 +00:00
dave	ea23042698	storkit: create 366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild	2026-03-22 18:17:04 +00:00
dave	3825b03fda	storkit: create 365_story_surface_api_rate_limit_warnings_in_chat	2026-03-22 18:12:37 +00:00
dave	d6cfd18e6a	storkit: create 364_bug_worktree_cleanup_loops_on_orphaned_directories	2026-03-22 18:07:37 +00:00
dave	01ac8a8345	Remove empty serve submodule reference Blank folder, no longer needed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 18:06:23 +00:00
dave	153f8812d7	Remove obsolete TIMMY_BRIEFING.md One-time briefing doc from spike 329, no longer needed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 18:06:15 +00:00
dave	01c7c39872	Update .ignore to use renamed storkit paths .story_kit/ and .story_kit_port were stale references from before the rename to storkit. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 18:06:07 +00:00
dave	eec8f3ac15	storkit: delete 364_bug_test_suite_incompatible_with_hardened_docker_environment	2026-03-22 17:54:48 +00:00
dave	28626ab80a	storkit: done 363_story_mcp_tool_for_whatsup_story_triage	2026-03-22 17:47:08 +00:00
dave	4262af7faa	storkit: merge 363_story_mcp_tool_for_whatsup_story_triage	2026-03-22 17:47:05 +00:00
dave	628b60ad15	storkit: done 362_story_bot_whatsup_command_shows_in_progress_work_summary	2026-03-22 17:43:01 +00:00
dave	c504738949	storkit: merge 362_story_bot_whatsup_command_shows_in_progress_work_summary	2026-03-22 17:42:57 +00:00
Timmy	0d5b9724c1	Make ANTHROPIC_API_KEY optional in docker-compose When unset, Claude Code falls back to OAuth credentials from `claude login`, allowing agents to run on a Max subscription instead of prepaid API credits. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 17:31:09 +00:00
Timmy	b189ca845c	Pre-create target dirs with storkit ownership in Dockerfile Docker named volumes inherit directory ownership when first created. By creating /workspace/target and /app/target as storkit-owned before the USER directive, the volumes will be writable by the storkit user. Without this, cargo build/test/clippy all fail with Permission Denied. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-22 16:59:32 +00:00
Dave	8094d32cbb	revert: remove Docker workarounds now that container is fixed Reverts workarounds added by the 361 agent when the hardened Docker container broke the test suite: - gates.rs: restore tempfile::tempdir() (was changed to tempdir_in CARGO_MANIFEST_DIR to avoid noexec /tmp; noexec is now removed) - pool/mod.rs: restore ps -p <pid> check in process_is_running (was changed to /proc/<pid> existence check; procps is now installed) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-22 14:21:34 +00:00
Dave	1c2824fa31	fix: harden Docker environment so tests pass inside container - Add procps to runtime stage so `ps` is available for process management - Remove noexec from /tmp and /home/storkit tmpfs mounts so test scripts can be executed from tempdir - Update coder agent system_prompt to run clippy --all-targets --all-features matching what the server acceptance gate actually runs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-22 14:21:27 +00:00
Dave	af72f593e8	storkit: create 364_bug_test_suite_incompatible_with_hardened_docker_environment	2026-03-22 13:50:14 +00:00
Dave	ac8112bf0b	storkit: accept 361_story_remove_deprecated_manual_qa_front_matter_field	2026-03-22 01:48:51 +00:00
Dave	9bf4b65707	storkit: accept 359_story_harden_docker_setup_for_security	2026-03-22 00:23:40 +00:00
Dave	240ebf055a	storkit: accept 329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting	2026-03-22 00:22:39 +00:00
Dave	293a2fcfb6	storkit: done 361_story_remove_deprecated_manual_qa_front_matter_field	2026-03-21 21:51:31 +00:00
Dave	4ccc3d9149	storkit: merge 361_story_remove_deprecated_manual_qa_front_matter_field	2026-03-21 21:51:27 +00:00
Timmy	eef0f3ee7d	Add clippy to Docker image Acceptance gates run cargo clippy but the component wasn't installed in the build stage. Agents were doing real work then failing every gate check because clippy wasn't available. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-21 21:43:09 +00:00
Dave	9dc7c21b05	storkit: create 363_story_mcp_tool_for_whatsup_story_triage	2026-03-21 21:29:42 +00:00
Dave	76369de391	storkit: create 362_story_bot_whatsup_command_shows_in_progress_work_summary	2026-03-21 21:26:02 +00:00
Dave	b747cc0fab	storkit: create 362_story_bot_whatsup_command_shows_in_progress_work_summary	2026-03-21 21:25:36 +00:00
Dave	f74a0425a9	storkit: create 362_story_bot_whatsup_command_shows_in_progress_work_summary	2026-03-21 21:22:52 +00:00
Dave	b0b21765d9	storkit: create 362_story_bot_whatsup_command_shows_in_progress_work_summary	2026-03-21 21:22:16 +00:00
Timmy	9075bc1a84	Fix tmpfs ownership so storkit user can write to home dir The tmpfs at /home/storkit defaulted to root ownership (mode=755), so Claude Code couldn't write ~/.claude.json or ~/.cache/. Set uid=999,gid=999 to match the storkit user. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-21 21:19:16 +00:00
Timmy	9f873dc839	Fix Claude Code hanging in hardened container Claude Code writes to ~/.claude.json, ~/.cache/, and ~/.npm/ which failed silently on the read-only root filesystem. Add tmpfs at /home/storkit so the home dir is writable (the claude-state volume overlays on top for persistent .claude/ data). Also fix .dockerignore: use /target/ to match nested target dirs, add .storkit/logs/ and /node_modules/ to prevent multi-GB build context transfers. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-21 21:16:24 +00:00
Dave	3774c3dca7	storkit: done 359_story_harden_docker_setup_for_security	2026-03-21 20:57:07 +00:00
Timmy	cd095f9a99	Fix rebuild_and_restart in Docker by using cargo output path Use the known cargo build output path instead of current_exe() when re-execing after a rebuild. In Docker, the running binary lives at /usr/local/bin/storkit (read-only) while cargo writes the new binary to /app/target/release/storkit (a writable volume), so current_exe() would just restart the old binary. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-21 20:42:38 +00:00
Timmy	fe0f560b58	Harden Docker container security Run as non-root user (fixes Claude Code refusing bypassPermissions as root, which caused all agent spawns to exit instantly with no session). Add read-only root filesystem, drop all capabilities, set no-new-privileges, bind port to localhost only, and require GIT_USER_NAME/GIT_USER_EMAIL env vars at startup. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-21 20:33:50 +00:00
Timmy	0416bf343c	storkit: delete 57_story_live_test_gate_updates	2026-03-21 20:23:45 +00:00
Timmy	c3e4f85903	storkit: done 329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting	2026-03-21 20:22:02 +00:00
Timmy	52d9d0f9ce	storkit: done 329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting	2026-03-21 20:20:41 +00:00
Timmy	996ba82682	storkit: create 329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting	2026-03-21 20:19:56 +00:00
Timmy	1f4152c894	storkit: create 361_story_remove_deprecated_manual_qa_front_matter_field	2026-03-21 19:59:52 +00:00
Timmy	02b481ee4c	storkit: create 359_story_harden_docker_setup_for_security	2026-03-21 19:48:44 +00:00
Timmy	9c339c118f	storkit: create 359_story_harden_docker_setup_for_security	2026-03-21 19:45:26 +00:00
Timmy	4790aac286	storkit: create 359_story_harden_docker_setup_for_security and 360_story_run_storkit_container_under_gvisor_runsc_runtime	2026-03-21 19:43:48 +00:00
Dave	b2d92d6059	storkit: accept 90_story_fetch_real_context_window_size_from_anthropic_models_api	2026-03-21 15:58:15 +00:00
Dave	71887af2d3	storkit: accept 358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases	2026-03-21 15:55:15 +00:00
Dave	5db9965962	storkit: done 358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases	2026-03-21 12:04:11 +00:00
Dave	e109e1ba5c	storkit: merge 358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases	2026-03-21 12:04:08 +00:00
Dave	3554594d8d	storkit: done 90_story_fetch_real_context_window_size_from_anthropic_models_api	2026-03-21 12:01:24 +00:00
Dave	a6c8cf0daf	storkit: merge 90_story_fetch_real_context_window_size_from_anthropic_models_api	2026-03-21 12:01:21 +00:00
Dave	30a56d03e5	storkit: create 358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases	2026-03-21 11:55:13 +00:00
Dave	4734bd943f	Fixing release	2026-03-21 11:52:18 +00:00
Dave	a1dd88579b	storkit: accept 344_story_chatgpt_agent_backend_via_openai_api	2026-03-21 03:40:23 +00:00
Dave	759a289894	storkit: done 344_story_chatgpt_agent_backend_via_openai_api	2026-03-20 23:52:24 +00:00
Dave	be3b5b0b60	storkit: merge 344_story_chatgpt_agent_backend_via_openai_api	2026-03-20 23:52:21 +00:00
Dave	fbf391684a	storkit: create 344_story_chatgpt_agent_backend_via_openai_api	2026-03-20 23:39:34 +00:00
Dave	65546a42b7	storkit: accept 343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends	2026-03-20 22:58:45 +00:00
Dave	4e014d45c3	storkit: accept 345_story_gemini_agent_backend_via_google_ai_api	2026-03-20 22:54:45 +00:00
Dave	4f39de437f	storkit: done 345_story_gemini_agent_backend_via_google_ai_api	2026-03-20 22:53:44 +00:00
Dave	79ee6eb0dc	storkit: merge 345_story_gemini_agent_backend_via_google_ai_api	2026-03-20 22:53:41 +00:00
Dave	c930c537bc	storkit: accept 357_story_bot_assign_command_to_pre_assign_a_model_to_a_story	2026-03-20 22:41:00 +00:00
Dave	f129a38704	storkit: done 343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends	2026-03-20 22:07:52 +00:00
Dave	4344081b54	storkit: merge 343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends	2026-03-20 22:07:49 +00:00
Dave	52c5344ce5	storkit: accept 350_story_mcp_tool_for_code_definitions_lookup	2026-03-20 19:30:08 +00:00
Dave	35bd196790	storkit: accept 356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy	2026-03-20 19:09:02 +00:00
Dave	65c8dc19d6	storkit: create 329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting	2026-03-20 19:05:18 +00:00
Dave	645a141d2d	storkit: create 343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends	2026-03-20 18:57:52 +00:00
Dave	11d1980920	storkit: done 357_story_bot_assign_command_to_pre_assign_a_model_to_a_story	2026-03-20 18:51:48 +00:00
Dave	83879cfa9e	storkit: merge 357_story_bot_assign_command_to_pre_assign_a_model_to_a_story	2026-03-20 18:51:45 +00:00
Dave	972d8f3c12	storkit: create 357_story_bot_assign_command_to_pre_assign_a_model_to_a_story	2026-03-20 18:40:31 +00:00
Dave	4b1167025c	storkit: accept 355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart	2026-03-20 16:24:54 +00:00
Dave	23eb752e3b	storkit: accept 354_story_make_help_command_output_alphabetical	2026-03-20 16:22:53 +00:00
Dave	7aa1d0e322	storkit: done 356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy	2026-03-20 16:04:49 +00:00
Dave	a6dcd48da9	storkit: merge 356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy	2026-03-20 16:04:45 +00:00
Dave	87958b0a2a	storkit: done 354_story_make_help_command_output_alphabetical	2026-03-20 15:39:35 +00:00
Dave	ea061d868d	storkit: merge 354_story_make_help_command_output_alphabetical	2026-03-20 15:39:32 +00:00
Dave	6a03ca725e	storkit: done 350_story_mcp_tool_for_code_definitions_lookup	2026-03-20 15:36:30 +00:00
Dave	0cd7c15227	storkit: done 355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart	2026-03-20 15:30:19 +00:00
Dave	0cb43a4de4	storkit: merge 355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart	2026-03-20 15:30:16 +00:00
Dave	cb663b620b	storkit: accept 348_story_mcp_tools_for_code_search_grep_and_glob	2026-03-20 15:28:16 +00:00
Dave	0653af701c	storkit: done 348_story_mcp_tools_for_code_search_grep_and_glob	2026-03-20 15:28:09 +00:00
Dave	b1a96990c4	storkit: accept 349_story_mcp_tools_for_git_operations	2026-03-20 15:21:40 +00:00
Dave	e46f855ab3	storkit: done 349_story_mcp_tools_for_git_operations	2026-03-20 15:20:39 +00:00
Dave	d838dd7127	storkit: merge 349_story_mcp_tools_for_git_operations	2026-03-20 15:20:34 +00:00
Dave	02ee48911e	storkit: accept 353_story_add_party_emoji_to_done_stage_notification_messages	2026-03-20 15:18:19 +00:00
Dave	6429b20974	storkit: accept 352_bug_ambient_on_off_command_not_intercepted_by_bot_after_refactors	2026-03-20 15:16:38 +00:00
Dave	dcf0be2998	storkit: create 356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy	2026-03-20 15:08:05 +00:00
Dave	efea81b487	storkit: accept 351_story_bot_reset_command_to_clear_conversation_context	2026-03-20 15:03:49 +00:00
Dave	491ca19a0b	storkit: accept 347_story_mcp_tool_for_shell_command_execution	2026-03-20 13:19:25 +00:00
Dave	243b75e966	storkit: accept 346_story_mcp_tools_for_file_operations_read_write_edit_list	2026-03-20 13:18:24 +00:00
Dave	7693cc820c	storkit: accept 340_story_web_ui_rebuild_and_restart_button	2026-03-20 13:04:21 +00:00
Dave	ba4af4179e	storkit: accept 339_story_web_ui_agent_assignment_dropdown_on_work_items	2026-03-20 12:59:20 +00:00