Bump version to 0.6.0

Fixing code warnings
Upgrading toml and pulldown-cmark
2026-03-24 21:27:38 +00:00 · 2026-03-24 21:26:48 +00:00 · 2026-03-24 21:25:15 +00:00 · 2026-03-24 21:24:57 +00:00 · 2026-03-24 21:24:38 +00:00 · 2026-03-24 21:06:15 +00:00
134 changed files with 18814 additions and 9917 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,11 @@
 # Docker build context exclusions
 **/target/
 **/node_modules/
 frontend/dist/
 .storkit/worktrees/
 .storkit/logs/
 .storkit/work/6_archived/
 .git/
 *.swp
 *.swo
 .DS_Store
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
 # App specific (root-level; storkit subdirectory patterns live in .storkit/.gitignore)
 store.json
 .storkit_port
 .storkit/bot.toml.bak
 # Rust stuff
 target
--- a/.ignore
+++ b/.ignore
@@ -3,6 +3,6 @@ frontend/
 node_modules/
 .claude/
 .git/
-.story_kit/
+.storkit/
 store.json
-.story_kit_port
+.storkit_port
--- a/server/.mcp.json
+++ b/server/.mcp.json
--- a/.storkit/.gitignore
+++ b/.storkit/.gitignore
@@ -20,3 +20,6 @@ coverage/
 # Token usage log (generated at runtime, contains cost data)
 token_usage.jsonl
 # Chat service logs
 whatsapp_history.json
--- a/.storkit/README.md
+++ b/.storkit/README.md
@@ -228,7 +228,29 @@ If a user hands you this document and says "Apply this process to my project":
 ---
-## 6. Code Quality
+## 6. Chat Bot Configuration
 Story Kit includes a chat bot that can be connected to one messaging platform at a time. The bot handles commands, LLM conversations, and pipeline notifications.
 **Only one transport can be active at a time.** To configure the bot, copy the appropriate example file to `.storkit/bot.toml`:
 | Transport | Example file | Webhook endpoint |
 |-----------|-------------|-----------------|
 | Matrix | `bot.toml.matrix.example` | *(uses Matrix sync, no webhook)* |
 | WhatsApp (Meta Cloud API) | `bot.toml.whatsapp-meta.example` | `/webhook/whatsapp` |
 | WhatsApp (Twilio) | `bot.toml.whatsapp-twilio.example` | `/webhook/whatsapp` |
 | Slack | `bot.toml.slack.example` | `/webhook/slack` |
 ```bash
 cp .storkit/bot.toml.matrix.example .storkit/bot.toml
 # Edit bot.toml with your credentials
 ```
 The `bot.toml` file is gitignored (it contains secrets). The example files are checked in for reference.
 ---
 ## 7. Code Quality
 **MANDATORY:** Before completing Step 3 (Verification) of any story, you MUST run all applicable linters, formatters, and test suites and fix ALL errors and warnings. Zero tolerance for warnings or errors.
--- a/.storkit/bot.toml.example
+++ b/.storkit/bot.toml.example
@@ -1,61 +0,0 @@
 homeserver = "https://matrix.example.com"
 username = "@botname:example.com"
 password = "your-bot-password"
 # List one or more rooms to listen in.  Use a single-element list for one room.
 room_ids = ["!roomid:example.com"]
 # Optional: the deprecated single-room key is still accepted for backwards compat.
 # room_id = "!roomid:example.com"
 allowed_users = ["@youruser:example.com"]
 enabled = false
 # Maximum conversation turns to remember per room (default: 20).
 # history_size = 20
 # Rooms where the bot responds to all messages (not just addressed ones).
 # This list is updated automatically when users toggle ambient mode at runtime.
 # ambient_rooms = ["!roomid:example.com"]
 # ── WhatsApp Business API ──────────────────────────────────────────────
 # Set transport = "whatsapp" to use WhatsApp instead of Matrix.
 # The webhook endpoint will be available at /webhook/whatsapp.
 # You must configure this URL in the Meta Developer Dashboard.
 #
 # transport = "whatsapp"
 # whatsapp_phone_number_id = "123456789012345"
 # whatsapp_access_token = "EAAx..."
 # whatsapp_verify_token = "my-secret-verify-token"
 #
 # ── 24-hour messaging window & notification templates ─────────────────
 # WhatsApp only allows free-form text messages within 24 hours of the last
 # inbound message from a user.  For proactive pipeline notifications sent
 # after the window expires, an approved Meta message template is used.
 #
 # Register the template in the Meta Business Manager:
 #   1. Go to Business Settings → WhatsApp → Message Templates → Create.
 #   2. Category: UTILITY
 #   3. Template name: pipeline_notification   (or your chosen name below)
 #   4. Language: English (en_US)
 #   5. Body text (example):
 #        Story *{{1}}* has moved to *{{2}}*.
 #      Where {{1}} = story name, {{2}} = pipeline stage.
 #   6. Submit for review.  Meta typically approves utility templates within
 #      minutes; transactional categories may take longer.
 #
 # Once approved, set the name below (default: "pipeline_notification"):
 # whatsapp_notification_template = "pipeline_notification"
 # ── Slack Bot API ─────────────────────────────────────────────────────
 # Set transport = "slack" to use Slack instead of Matrix.
 # The webhook endpoint will be available at /webhook/slack.
 # Configure this URL in the Slack App → Event Subscriptions → Request URL.
 #
 # Required Slack App scopes: chat:write, chat:update
 # Subscribe to bot events: message.channels, message.groups, message.im
 #
 # transport = "slack"
 # slack_bot_token = "xoxb-..."
 # slack_signing_secret = "your-signing-secret"
 # slack_channel_ids = ["C01ABCDEF"]
--- a/.storkit/bot.toml.matrix.example
+++ b/.storkit/bot.toml.matrix.example
@@ -0,0 +1,26 @@
 # Matrix Transport
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 enabled = true
 transport = "matrix"
 homeserver = "https://matrix.example.com"
 username = "@botname:example.com"
 password = "your-bot-password"
 # List one or more rooms to listen in.
 room_ids = ["!roomid:example.com"]
 # Users allowed to interact with the bot (fail-closed: empty = nobody).
 allowed_users = ["@youruser:example.com"]
 # Bot display name in chat.
 # display_name = "Assistant"
 # Maximum conversation turns to remember per room (default: 20).
 # history_size = 20
 # Rooms where the bot responds to all messages (not just addressed ones).
 # This list is updated automatically when users toggle ambient mode at runtime.
 # ambient_rooms = ["!roomid:example.com"]
--- a/.storkit/bot.toml.slack.example
+++ b/.storkit/bot.toml.slack.example
@@ -0,0 +1,23 @@
 # Slack Transport
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 #
 # Setup:
 #   1. Create a Slack App at api.slack.com/apps
 #   2. Add OAuth scopes: chat:write, chat:update
 #   3. Subscribe to bot events: message.channels, message.groups, message.im
 #   4. Install the app to your workspace
 #   5. Set your webhook URL in Event Subscriptions: https://your-server/webhook/slack
 enabled = true
 transport = "slack"
 slack_bot_token = "xoxb-..."
 slack_signing_secret = "your-signing-secret"
 slack_channel_ids = ["C01ABCDEF"]
 # Bot display name (used in formatted messages).
 # display_name = "Assistant"
 # Maximum conversation turns to remember per channel (default: 20).
 # history_size = 20
--- a/.storkit/bot.toml.whatsapp-meta.example
+++ b/.storkit/bot.toml.whatsapp-meta.example
@@ -0,0 +1,28 @@
 # WhatsApp Transport (Meta Cloud API)
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 #
 # Setup:
 #   1. Create a Meta Business App at developers.facebook.com
 #   2. Add the WhatsApp product
 #   3. Copy your Phone Number ID and generate a permanent access token
 #   4. Register your webhook URL: https://your-server/webhook/whatsapp
 #   5. Set the verify token below to match what you configure in Meta's dashboard
 enabled = true
 transport = "whatsapp"
 whatsapp_provider = "meta"
 whatsapp_phone_number_id = "123456789012345"
 whatsapp_access_token = "EAAx..."
 whatsapp_verify_token = "my-secret-verify-token"
 # Optional: name of the approved Meta message template used for notifications
 # sent outside the 24-hour messaging window (default: "pipeline_notification").
 # whatsapp_notification_template = "pipeline_notification"
 # Bot display name (used in formatted messages).
 # display_name = "Assistant"
 # Maximum conversation turns to remember per user (default: 20).
 # history_size = 20
--- a/.storkit/bot.toml.whatsapp-twilio.example
+++ b/.storkit/bot.toml.whatsapp-twilio.example
@@ -0,0 +1,24 @@
 # WhatsApp Transport (Twilio)
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 #
 # Setup:
 #   1. Sign up at twilio.com
 #   2. Activate the WhatsApp sandbox (Messaging > Try it out > Send a WhatsApp message)
 #   3. Send the sandbox join code from your WhatsApp to the sandbox number
 #   4. Copy your Account SID, Auth Token, and sandbox number below
 #   5. Set your webhook URL in the Twilio console: https://your-server/webhook/whatsapp
 enabled = true
 transport = "whatsapp"
 whatsapp_provider = "twilio"
 twilio_account_sid = "ACxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 twilio_auth_token = "your_auth_token"
 twilio_whatsapp_number = "+14155238886"
 # Bot display name (used in formatted messages).
 # display_name = "Assistant"
 # Maximum conversation turns to remember per user (default: 20).
 # history_size = 20
--- a/.storkit/project.toml
+++ b/.storkit/project.toml
@@ -33,7 +33,7 @@ model = "sonnet"
 max_turns = 50
 max_budget_usd = 5.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "coder-2"
@@ -43,7 +43,7 @@ model = "sonnet"
 max_turns = 50
 max_budget_usd = 5.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "coder-3"
@@ -53,7 +53,7 @@ model = "sonnet"
 max_turns = 50
 max_budget_usd = 5.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "qa-2"
@@ -130,7 +130,7 @@ model = "opus"
 max_turns = 80
 max_budget_usd = 20.00
 prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .story_kit/README.md to understand the dev process. The story details are in your prompt above. Follow the SDTW process through implementation and verification (Steps 1-3). The worktree and feature branch already exist - do not create them. Check .mcp.json for MCP tools. Do NOT accept the story or merge - commit your work and stop. If the user asks to review your changes, tell them to run: cd \"{{worktree_path}}\" && git difftool {{base_branch}}...HEAD\n\nIMPORTANT: Commit all your work before your process exits. The server will automatically run acceptance gates (cargo clippy + tests) when your process exits and advance the pipeline based on the results.\n\n## Bug Workflow: Root Cause First\nWhen working on bugs:\n1. Investigate the root cause before writing any fix. Use `git bisect` to find the breaking commit or `git log` to trace history. Read the relevant code before touching anything.\n2. Fix the root cause with a surgical, minimal change. Do NOT add new abstractions, wrappers, or workarounds when a targeted fix to the original code is possible.\n3. Write commit messages that explain what broke and why, not just what was changed.\n4. If you cannot determine the root cause after thorough investigation, document what you tried and why it was inconclusive — do not guess and ship a speculative fix."
-system_prompt = "You are a senior full-stack engineer working autonomously in a git worktree. You handle complex tasks requiring deep architectural understanding. Follow the Story-Driven Test Workflow strictly. Run cargo clippy and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
+system_prompt = "You are a senior full-stack engineer working autonomously in a git worktree. You handle complex tasks requiring deep architectural understanding. Follow the Story-Driven Test Workflow strictly. Run cargo clippy --all-targets --all-features and biome checks before considering work complete. Commit all your work before finishing - use a descriptive commit message. Do not accept stories, move them to archived, or merge to master - a human will do that. Do not coordinate with other agents - focus on your assigned story. The server automatically runs acceptance gates when your process exits. For bugs, always find and fix the root cause. Use git bisect to find breaking commits. Do not layer new code on top of existing code when a surgical fix is possible. If root cause is unclear after investigation, document what you tried rather than guessing."
 [[agent]]
 name = "qa"
--- a/.storkit/work/1_backlog/169_story_gate_pipeline_transitions_on_ensure_acceptance.md
+++ b/.storkit/work/1_backlog/169_story_gate_pipeline_transitions_on_ensure_acceptance.md
@@ -1,20 +0,0 @@
 ---
 name: "Gate pipeline transitions on ensure_acceptance"
 ---
 # Story 169: Gate pipeline transitions on ensure_acceptance
 ## User Story
 As a project owner, I want story progression to be blocked unless ensure_acceptance passes, so that agents can't skip the testing workflow.
 ## Acceptance Criteria
 - [ ] move_story_to_merge rejects stories that haven't passed ensure_acceptance
 - [ ] accept_story rejects stories that haven't passed ensure_acceptance
 - [ ] Rejection returns a clear error message telling the agent what's missing
 - [ ] Existing passing stories (all criteria checked, tests recorded) still flow through normally
 ## Out of Scope
 - TBD
--- a/.storkit/work/1_backlog/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
+++ b/.storkit/work/1_backlog/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
@@ -1,69 +0,0 @@
 ---
 name: "Evaluate Docker/OrbStack for agent isolation and resource limiting"
 agent: coder-opus
 ---
 # Spike 329: Evaluate Docker/OrbStack for agent isolation and resource limiting
 ## Question
 Investigate running the entire storkit system (server, Matrix bot, agents, web UI) inside a single Docker container, using OrbStack as the macOS runtime for better performance. The goal is to isolate storkit from the host machine — not to isolate agents from each other.
 Currently storkit runs as bare processes on the host with full filesystem and network access. A single container would provide:
 1. **Host isolation** — storkit can't touch anything outside the container
 2. **Clean install/uninstall** — `docker run` to start, `docker rm` to remove
 3. **Reproducible environment** — same container works on any machine
 4. **Distributable product** — `docker pull storkit` for new users
 5. **Resource limits** — cap total CPU/memory for the whole system
 ## Architecture
 ```
 Docker Container (single)
 ├── storkit server
 │   ├── Matrix bot
 │   ├── WhatsApp webhook
 │   ├── Slack webhook
 │   ├── Web UI
 │   └── MCP server
 ├── Agent processes (coder-1, coder-2, coder-opus, qa, mergemaster)
 ├── Rust toolchain + Node.js + Claude Code CLI
 └── /workspace (bind-mounted project repo from host)
 ```
 ## Key questions to answer:
 - **Performance**: How much slower are cargo builds inside the container on macOS? Compare Docker Desktop vs OrbStack for bind-mounted volumes.
 - **Dockerfile**: What's the minimal image for the full stack? Rust toolchain + Node.js + Claude Code CLI + cargo-nextest + git.
 - **Bind mounts**: The project repo is bind-mounted from the host. Any filesystem performance concerns with OrbStack?
 - **Networking**: Container exposes web UI port (3000). Matrix/WhatsApp/Slack connect outbound. Any issues?
 - **API key**: Pass ANTHROPIC_API_KEY as env var to the container.
 - **Git**: Git operations happen inside the container on the bind-mounted repo. Commits are visible on the host immediately.
 - **Cargo cache**: Use a named Docker volume for ~/.cargo/registry so dependencies persist across container restarts.
 - **Claude Code state**: Where does Claude Code store its session data? Needs to persist or be in a volume.
 - **OrbStack vs Docker Desktop**: Is OrbStack required for acceptable performance, or does Docker Desktop work too?
 - **Server restart**: Does `rebuild_and_restart` work inside a container (re-exec with new binary)?
 ## Deliverable:
 A proof-of-concept Dockerfile, docker-compose.yml, and a short write-up with findings and performance benchmarks.
 ## Hypothesis
 - TBD
 ## Timebox
 - TBD
 ## Investigation Plan
 - TBD
 ## Findings
 - TBD
 ## Recommendation
 - TBD
--- a/.storkit/work/1_backlog/35_story_agent_security_and_sandboxing.md
+++ b/.storkit/work/1_backlog/35_story_agent_security_and_sandboxing.md
@@ -1,31 +0,0 @@
 ---
 name: Agent Security and Sandboxing
 ---
 # Story 34: Agent Security and Sandboxing
 ## User Story
 **As a** supervisor orchestrating multiple autonomous agents,
 **I want to** constrain what each agent can access and do,
 **So that** agents can't escape their worktree, damage shared state, or perform unintended actions.
 ## Acceptance Criteria
 - [ ] Agent creation accepts an `allowed_tools` list to restrict Claude Code tool access per agent.
 - [ ] Agent creation accepts a `disallowed_tools` list as an alternative to allowlisting.
 - [ ] Agents without Bash access can still perform useful coding work (Read, Edit, Write, Glob, Grep).
 - [ ] Investigate replacing direct Bash/shell access with Rust-implemented tool proxies that enforce boundaries:
  - Scoped `exec_shell` that only runs allowlisted commands (e.g., `cargo test`, `npm test`) within the agent's worktree.
  - Scoped `read_file` / `write_file` that reject paths outside the agent's worktree root.
  - Scoped `git` operations that only work within the agent's worktree.
 - [ ] Evaluate `--max-turns` and `--max-budget-usd` as safety limits for runaway agents.
 - [ ] Document the trust model: what the supervisor controls vs what agents can do autonomously.
 ## Questions to Explore
 - Can we use MCP (Model Context Protocol) to expose our Rust-implemented tools to Claude Code, replacing its built-in Bash/filesystem tools with scoped versions?
 - What's the right granularity for shell allowlists — command-level (`cargo test`) or pattern-level (`cargo *`)?
 - Should agents have read access outside their worktree (e.g., to reference shared specs) but write access only within it?
 - Is OS-level sandboxing (Docker, macOS sandbox profiles) worth the complexity for a personal tool?
 ## Out of Scope
 - Multi-user authentication or authorization (single-user personal tool).
 - Network-level isolation between agents.
 - Encrypting agent communication channels (all local).
--- a/.storkit/work/1_backlog/57_story_live_test_gate_updates.md
+++ b/.storkit/work/1_backlog/57_story_live_test_gate_updates.md
@@ -1,18 +0,0 @@
 ---
 name: Live Test Gate Updates
 ---
 # Story 57: Live Test Gate Updates
 ## User Story
 As a user, I want the Gate and Todo panels to update automatically when tests are recorded or acceptance is checked, so I can see progress without manually refreshing.
 ## Acceptance Criteria
 - [ ] Server broadcasts a `{"type": "notification", "topic": "tests"}` event over `/ws` when tests are recorded, acceptance is checked, or coverage is collected
 - [ ] GatePanel auto-refreshes its data when it receives a `tests` notification
 - [ ] TodoPanel auto-refreshes its data when it receives a `tests` notification
 - [ ] Manual refresh buttons continue to work
 - [ ] Panels do not flicker or lose scroll position on auto-refresh
 - [ ] End-to-end test: record test results via MCP, verify Gate panel updates without manual refresh
--- a/.storkit/work/5_done/382_story_whatsapp_transport_supports_twilio_api_as_alternative_to_meta_cloud_api.md
+++ b/.storkit/work/5_done/382_story_whatsapp_transport_supports_twilio_api_as_alternative_to_meta_cloud_api.md
@@ -0,0 +1,22 @@
 ---
 name: "WhatsApp transport supports Twilio API as alternative to Meta Cloud API"
 ---
 # Story 382: WhatsApp transport supports Twilio API as alternative to Meta Cloud API
 ## User Story
 As a user, I want to use Twilio's WhatsApp API instead of Meta's Cloud API directly, so that I can avoid Meta's painful developer onboarding and use Twilio's simpler signup process.
 ## Acceptance Criteria
 - [ ] bot.toml supports a `whatsapp_provider` field with values `meta` (default, current behavior) or `twilio`
 - [ ] When provider is `twilio`, messages are sent via Twilio's REST API (`api.twilio.com`) using Account SID + Auth Token
 - [ ] When provider is `twilio`, inbound webhooks parse Twilio's form-encoded format instead of Meta's JSON
 - [ ] Twilio config requires `twilio_account_sid`, `twilio_auth_token`, and `twilio_whatsapp_number` in bot.toml
 - [ ] All existing bot commands and LLM passthrough work identically regardless of provider
 - [ ] 24-hour messaging window logic still applies (Twilio enforces this server-side too)
 ## Out of Scope
 - TBD
--- a/.storkit/work/5_done/383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules.md
+++ b/.storkit/work/5_done/383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules.md
@@ -0,0 +1,41 @@
 ---
 name: "Reorganize chat system into chat module with transport submodules"
 ---
 # Refactor 383: Reorganize chat system into chat module with transport submodules
 ## Current State
 - TBD
 ## Desired State
 Currently chat-related code is scattered at the top level of `src/`: `transport.rs`, `whatsapp.rs`, `slack.rs`, plus `matrix/` as a directory module. This should be reorganized into a clean module hierarchy:
 ```
 src/
  chat/
    mod.rs          # Generic chat traits, types, ChatTransport etc.
    transport/
      mod.rs
      matrix/       # Existing matrix module moved here
      whatsapp.rs   # Existing whatsapp.rs moved here
      slack.rs      # Existing slack.rs moved here
      twilio.rs     # Future Twilio transport
 ```
 The `ChatTransport` trait and shared chat types should live in `chat/mod.rs`. Each transport implementation becomes a submodule of `chat::transport`.
 ## Acceptance Criteria
 - [ ] ChatTransport trait and shared chat types live in `chat/mod.rs`
 - [ ] Matrix transport lives in `chat/transport/matrix/`
 - [ ] WhatsApp transport lives in `chat/transport/whatsapp.rs`
 - [ ] Slack transport lives in `chat/transport/slack.rs`
 - [ ] Top-level `transport.rs`, `whatsapp.rs`, `slack.rs`, and `matrix/` are removed
 - [ ] All existing tests pass without modification (or with only import path changes)
 - [ ] No functional changes — pure file reorganization and re-exports
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
+++ b/.storkit/work/6_archived/329_spike_evaluate_docker_orbstack_for_agent_isolation_and_resource_limiting.md
@@ -0,0 +1,212 @@
 ---
 name: "Evaluate Docker/OrbStack for agent isolation and resource limiting"
 agent: "coder-opus"
 ---
 # Spike 329: Evaluate Docker/OrbStack for agent isolation and resource limiting
 ## Question
 Investigate running the entire storkit system (server, Matrix bot, agents, web UI) inside a single Docker container, using OrbStack as the macOS runtime for better performance. The goal is to isolate storkit from the host machine — not to isolate agents from each other.
 **Important context:** Storkit developing itself is the dogfood edge case. The primary use case is storkit managing agents that develop *other* projects, driven by multiple users in chat rooms (Matrix, WhatsApp, Slack). Isolation must account for untrusted codebases, multi-user command surfaces, and running against arbitrary repos — not just the single-developer self-hosted setup.
 Currently storkit runs as bare processes on the host with full filesystem and network access. A single container would provide:
 1. **Host isolation** — storkit can't touch anything outside the container
 2. **Clean install/uninstall** — `docker run` to start, `docker rm` to remove
 3. **Reproducible environment** — same container works on any machine
 4. **Distributable product** — `docker pull storkit` for new users
 5. **Resource limits** — cap total CPU/memory for the whole system
 ## Architecture
 ```
 Docker Container (single)
 ├── storkit server
 │   ├── Matrix bot
 │   ├── WhatsApp webhook
 │   ├── Slack webhook
 │   ├── Web UI
 │   └── MCP server
 ├── Agent processes (coder-1, coder-2, coder-opus, qa, mergemaster)
 ├── Rust toolchain + Node.js + Claude Code CLI
 └── /workspace (bind-mounted project repo from host)
 ```
 ## Key questions to answer:
 - **Performance**: How much slower are cargo builds inside the container on macOS? Compare Docker Desktop vs OrbStack for bind-mounted volumes.
 - **Dockerfile**: What's the minimal image for the full stack? Rust toolchain + Node.js + Claude Code CLI + cargo-nextest + git.
 - **Bind mounts**: The project repo is bind-mounted from the host. Any filesystem performance concerns with OrbStack?
 - **Networking**: Container exposes web UI port (3000). Matrix/WhatsApp/Slack connect outbound. Any issues?
 - **API key**: Pass ANTHROPIC_API_KEY as env var to the container.
 - **Git**: Git operations happen inside the container on the bind-mounted repo. Commits are visible on the host immediately.
 - **Cargo cache**: Use a named Docker volume for ~/.cargo/registry so dependencies persist across container restarts.
 - **Claude Code state**: Where does Claude Code store its session data? Needs to persist or be in a volume.
 - **OrbStack vs Docker Desktop**: Is OrbStack required for acceptable performance, or does Docker Desktop work too?
 - **Server restart**: Does `rebuild_and_restart` work inside a container (re-exec with new binary)?
 ## Deliverable:
 A proof-of-concept Dockerfile, docker-compose.yml, and a short write-up with findings and performance benchmarks.
 ## Hypothesis
 A single Docker container running the entire storkit stack (server + agents + toolchain) on OrbStack will provide acceptable performance for the primary use case (developing other projects) while giving us host isolation, resource limits, and a distributable product. OrbStack's VirtioFS should make bind-mounted filesystem performance close to native.
 ## Timebox
 4 hours
 ## Investigation Plan
 1. Audit storkit's runtime dependencies (Rust toolchain, Node.js, Claude Code CLI, cargo-nextest, git)
 2. Determine where Claude Code stores session state (~/.claude)
 3. Analyze how rebuild_and_restart works (exec() replacement) and whether it's container-compatible
 4. Draft a multi-stage Dockerfile and docker-compose.yml
 5. Document findings for each key question
 6. Provide recommendation and follow-up stories
 ## Findings
 ### 1. Dockerfile: Minimal image for the full stack
 **Result:** Multi-stage Dockerfile created at `docker/Dockerfile`.
 The image requires these runtime components:
 - **Rust 1.90+ toolchain** (~1.5 GB) — needed at runtime for `rebuild_and_restart` and agent-driven `cargo clippy`, `cargo test`, etc.
 - **Node.js 22.x** (~100 MB) — needed at runtime for Claude Code CLI (npm global package)
 - **Claude Code CLI** (`@anthropic-ai/claude-code`) — npm global, spawned by storkit via PTY
 - **cargo-nextest** — pre-built binary, used by acceptance gates
 - **git** — used extensively by agents and worktree management
 - **System libs:** libssl3, ca-certificates
 The build stage compiles the storkit binary with embedded frontend assets (build.rs runs `npm run build`). The runtime stage is based on `debian:bookworm-slim` but still needs Rust + Node because agents use them at runtime.
 **Total estimated image size:** ~3-4 GB (dominated by the Rust toolchain). This is large but acceptable for a development tool that runs locally.
 ### 2. Bind mounts and filesystem performance
 **OrbStack** uses Apple's VirtioFS for bind mounts, which is near-native speed. This is a significant advantage over Docker Desktop's older options:
 | Runtime | Bind mount driver | Performance | Notes |
 |---------|------------------|-------------|-------|
 | OrbStack | VirtioFS (native) | ~95% native | Default, no config needed |
 | Docker Desktop | VirtioFS | ~85-90% native | Must enable in settings (Docker Desktop 4.15+) |
 | Docker Desktop | gRPC-FUSE (legacy) | ~40-60% native | Default on older versions, very slow for cargo builds |
 | Docker Desktop | osxfs (deprecated) | ~30-50% native | Ancient default, unusable for Rust projects |
 **For cargo builds on bind-mounted volumes:** The critical path is `target/` directory I/O. Since `target/` lives inside the bind-mounted project, large Rust projects will see a noticeable slowdown on Docker Desktop with gRPC-FUSE. OrbStack's VirtioFS makes this tolerable.
 **Mitigation option:** Keep `target/` in a named Docker volume instead of on the bind mount. This gives native Linux filesystem speed for compilation artifacts while the source code remains bind-mounted. The trade-off is that `target/` won't be visible on the host, which is fine since it's a build cache.
 ### 3. Claude Code state persistence
 Claude Code stores all state in `~/.claude/`:
 - `sessions/` — conversation transcripts (used by `--resume`)
 - `projects/` — per-project settings and memory
 - `history.jsonl` — command history
 - `session-env/` — environment snapshots
 - `settings.json` — global preferences
 **Solution:** Mount `~/.claude` as a named Docker volume (`claude-state`). This persists across container restarts. Session resumption (`--resume <session_id>`) will work correctly since the session files are preserved.
 ### 4. Networking
 **Straightforward.** The container exposes port 3001 for the web UI + MCP endpoint. All chat integrations (Matrix, Slack, WhatsApp) connect outbound from the container, which works by default in Docker's bridge networking. No special configuration needed.
 Port mapping: `3001:3001` in docker-compose.yml. Users access the web UI at `http://localhost:3001`.
 ### 5. API key handling
 **Simple.** Pass `ANTHROPIC_API_KEY` as an environment variable via docker-compose.yml. The storkit server already reads it from the environment. Claude Code also reads `ANTHROPIC_API_KEY` from the environment.
 ### 6. Git operations on bind-mounted repos
 **Works correctly.** Git operations inside the container on a bind-mounted volume are immediately visible on the host (and vice versa). The key considerations:
 - **Git config:** The container runs as root, so `git config --global user.name/email` needs to be set inside the container (or mounted from host). Without this, commits have no author identity.
 - **File ownership:** OrbStack maps the container's root user to the host user automatically (uid remapping). Docker Desktop does not — files created by the container may be owned by root on the host. OrbStack handles this transparently.
 - **Worktrees:** `git worktree add` inside the container creates worktrees within the bind-mounted repo, which are visible on the host. This is correct behavior.
 ### 7. Cargo cache
 **Named Docker volumes** for `/usr/local/cargo/registry` and `/usr/local/cargo/git` persist downloaded crates across container restarts. First `cargo build` downloads everything; subsequent builds use the cached crates. This is a standard Docker pattern.
 ### 8. OrbStack vs Docker Desktop
 | Capability | OrbStack | Docker Desktop |
 |-----------|----------|----------------|
 | **VirtioFS (fast mounts)** | Default, always on | Must enable manually |
 | **UID remapping** | Automatic (root → host user) | Manual or not available |
 | **Memory usage** | ~50% less than Docker Desktop | Higher baseline overhead |
 | **Startup time** | 1-2 seconds | 10-30 seconds |
 | **License** | Free for personal use, paid for teams | Free for personal/small business, paid for enterprise |
 | **Linux compatibility** | Full (Rosetta for x86 on ARM) | Full (QEMU for x86 on ARM) |
 **Verdict:** OrbStack is strongly recommended for macOS. Docker Desktop works but requires VirtioFS to be enabled manually and has worse file ownership semantics. On Linux hosts, Docker Engine (not Desktop) is native and has none of these issues.
 ### 9. rebuild_and_restart inside a container
 **Works with caveats.** The current implementation:
 1. Runs `cargo build` from `CARGO_MANIFEST_DIR` (baked at compile time to `/app/server`)
 2. Calls `exec()` to replace the process with the new binary
 Inside a container, `exec()` works fine — it replaces the PID 1 process. However:
 - The source tree must exist at `/app` inside the container (the path baked into the binary)
 - The Rust toolchain must be available at runtime
 - If the container is configured with `restart: unless-stopped`, a crash during rebuild could cause a restart loop
 **The Dockerfile handles this** by copying the full source tree into `/app` in the runtime stage and including the Rust toolchain.
 **Future improvement:** For the storkit-developing-itself case, mount the source tree as a volume at `/app` so code changes on the host are immediately available for rebuild. For the primary use case (developing other projects), the baked-in source is fine — the server doesn't change.
 ### 10. Multi-user / untrusted codebase considerations
 The single-container model provides **host isolation** but no **agent-to-agent isolation**:
 - All agents share the same filesystem, network, and process namespace
 - A malicious codebase could interfere with other agents or the storkit server itself
 - This is acceptable as a first step since the primary threat model is "storkit shouldn't wreck the host"
 For true multi-tenant isolation (multiple untrusted projects), a future architecture could:
 - Run one container per project (each with its own bind mount)
 - Use Docker's `--read-only` with specific writable mounts
 - Apply seccomp/AppArmor profiles to limit syscalls
 ### 11. Image distribution
 The single-container approach enables simple distribution:
 ```
 docker pull ghcr.io/crashlabs/storkit:latest
 docker run -e ANTHROPIC_API_KEY=sk-ant-... -v /my/project:/workspace -p 3001:3001 storkit
 ```
 This is a massive UX improvement over "install Rust, install Node, install Claude Code, clone the repo, cargo build, etc."
 ## Recommendation
 **Proceed with implementation.** The single-container Docker approach is viable and solves the stated goals:
 1. **Host isolation** — achieved via standard Docker containerization
 2. **Clean install/uninstall** — `docker compose up` / `docker compose down -v`
 3. **Reproducible environment** — Dockerfile pins all versions
 4. **Distributable product** — `docker pull` for new users
 5. **Resource limits** — `deploy.resources.limits` in compose
 ### Follow-up stories to create:
 1. **Story: Implement Docker container build and CI** — Set up automated image builds, push to registry, test that the image works end-to-end with a sample project.
 2. **Story: Target directory optimization** — Move `target/` to a named volume to avoid bind mount I/O overhead for cargo builds. Benchmark the improvement.
 3. **Story: Git identity in container** — Configure git user.name/email inside the container (from env vars or mounted .gitconfig).
 4. **Story: Per-project container isolation** — For multi-tenant deployments, run one storkit container per project with tighter security (read-only root, seccomp, no-new-privileges).
 5. **Story: Health endpoint** — Add a `/health` HTTP endpoint to the storkit server for the Docker healthcheck.
 ### Risks and open questions:
 - **Image size (~3-4 GB):** Acceptable for a dev tool but worth optimizing later. The Rust toolchain dominates.
 - **Rust toolchain at runtime:** Required for rebuild_and_restart and agent cargo commands. Cannot be eliminated without changing the architecture.
 - **Claude Code CLI updates:** The CLI version is pinned at image build time. Users need to rebuild the image to get updates. Could use a volume mount for the npm global dir to allow in-place updates.
--- a/.storkit/work/6_archived/339_story_web_ui_agent_assignment_dropdown_on_work_items.md
+++ b/.storkit/work/6_archived/339_story_web_ui_agent_assignment_dropdown_on_work_items.md
--- a/.storkit/work/6_archived/340_story_web_ui_rebuild_and_restart_button.md
+++ b/.storkit/work/6_archived/340_story_web_ui_rebuild_and_restart_button.md
--- a/.storkit/work/6_archived/343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends.md
+++ b/.storkit/work/6_archived/343_refactor_abstract_agent_runtime_to_support_non_claude_code_backends.md
@@ -1,5 +1,6 @@
 ---
 name: "Abstract agent runtime to support non-Claude-Code backends"
 agent: coder-opus
 ---
 # Refactor 343: Abstract agent runtime to support non-Claude-Code backends
--- a/.storkit/work/6_archived/344_story_chatgpt_agent_backend_via_openai_api.md
+++ b/.storkit/work/6_archived/344_story_chatgpt_agent_backend_via_openai_api.md
@@ -1,5 +1,6 @@
 ---
 name: "ChatGPT agent backend via OpenAI API"
 agent: coder-opus
 ---
 # Story 344: ChatGPT agent backend via OpenAI API
--- a/.storkit/work/6_archived/345_story_gemini_agent_backend_via_google_ai_api.md
+++ b/.storkit/work/6_archived/345_story_gemini_agent_backend_via_google_ai_api.md
--- a/.storkit/work/6_archived/346_story_mcp_tools_for_file_operations_read_write_edit_list.md
+++ b/.storkit/work/6_archived/346_story_mcp_tools_for_file_operations_read_write_edit_list.md
--- a/.storkit/work/6_archived/347_story_mcp_tool_for_shell_command_execution.md
+++ b/.storkit/work/6_archived/347_story_mcp_tool_for_shell_command_execution.md
--- a/.storkit/work/6_archived/348_story_mcp_tools_for_code_search_grep_and_glob.md
+++ b/.storkit/work/6_archived/348_story_mcp_tools_for_code_search_grep_and_glob.md
--- a/.storkit/work/6_archived/349_story_mcp_tools_for_git_operations.md
+++ b/.storkit/work/6_archived/349_story_mcp_tools_for_git_operations.md
--- a/.storkit/work/6_archived/350_story_mcp_tool_for_code_definitions_lookup.md
+++ b/.storkit/work/6_archived/350_story_mcp_tool_for_code_definitions_lookup.md
--- a/.storkit/work/6_archived/351_story_bot_reset_command_to_clear_conversation_context.md
+++ b/.storkit/work/6_archived/351_story_bot_reset_command_to_clear_conversation_context.md
--- a/.storkit/work/6_archived/352_bug_ambient_on_off_command_not_intercepted_by_bot_after_refactors.md
+++ b/.storkit/work/6_archived/352_bug_ambient_on_off_command_not_intercepted_by_bot_after_refactors.md
--- a/.storkit/work/6_archived/353_story_add_party_emoji_to_done_stage_notification_messages.md
+++ b/.storkit/work/6_archived/353_story_add_party_emoji_to_done_stage_notification_messages.md
--- a/.storkit/work/6_archived/354_story_make_help_command_output_alphabetical.md
+++ b/.storkit/work/6_archived/354_story_make_help_command_output_alphabetical.md
--- a/.storkit/work/6_archived/355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart.md
+++ b/.storkit/work/6_archived/355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart.md
--- a/.storkit/work/6_archived/356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy.md
+++ b/.storkit/work/6_archived/356_story_start_command_should_say_queued_not_error_when_all_coders_are_busy.md
@@ -0,0 +1,18 @@
 ---
 name: "Start command should say queued not error when all coders are busy"
 ---
 # Story 356: Start command should say queued not error when all coders are busy
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] When all coders are busy, 'start' command responds with a short queued message instead of an error
 - [ ] Message tone is neutral/positive, not a failure message
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/357_story_bot_assign_command_to_pre_assign_a_model_to_a_story.md
+++ b/.storkit/work/6_archived/357_story_bot_assign_command_to_pre_assign_a_model_to_a_story.md
@@ -0,0 +1,20 @@
 ---
 name: "Bot assign command to pre-assign a model to a story"
 ---
 # Story 357: Bot assign command to pre-assign a model to a story
 ## User Story
 As a user, I want to assign a specific model (e.g. opus) to a story before it starts, so that when a coder picks it up it uses the model I chose.
 ## Acceptance Criteria
 - [ ] Bot recognizes `assign <number> <model>` command
 - [ ] Assignment persists in the story file so it's used when the story starts
 - [ ] Command appears in help output
 - [ ] Works with available model names (e.g. opus, sonnet)
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases.md
+++ b/.storkit/work/6_archived/358_story_remove_makefile_and_make_script_release_the_single_entry_point_for_releases.md
@@ -0,0 +1,20 @@
 ---
 name: "Remove Makefile and make script/release the single entry point for releases"
 ---
 # Story 358: Remove Makefile and make script/release the single entry point for releases
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] Makefile is deleted
 - [ ] script/release requires a version argument and prints usage if missing
 - [ ] script/release still builds macOS and Linux binaries, bumps versions, generates changelog, tags, and publishes to Gitea
 - [ ] No dependency on make
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/359_story_harden_docker_setup_for_security.md
+++ b/.storkit/work/6_archived/359_story_harden_docker_setup_for_security.md
@@ -0,0 +1,28 @@
 ---
 name: "Harden Docker setup for security"
 retry_count: 3
 blocked: true
 ---
 # Story 359: Harden Docker setup for security
 ## User Story
 As a storkit operator, I want the Docker container to run with hardened security settings, so that a compromised agent or malicious codebase cannot escape the container or affect the host.
 ## Acceptance Criteria
 - [ ] Container runs as a non-root user
 - [ ] Root filesystem is read-only with only necessary paths writable (e.g. /tmp, cargo cache, claude state volumes)
 - [ ] Linux capabilities dropped to minimum required (cap_drop: ALL, add back only what's needed)
 - [ ] no-new-privileges flag is set
 - [ ] Resource limits (CPU and memory) are configured in docker-compose.yml
 - [ ] Outbound network access is restricted where possible
 - [ ] ANTHROPIC_API_KEY is passed via Docker secrets or .env file, not hardcoded in compose
 - [ ] Image passes a CVE scan with no critical vulnerabilities
 - [ ] Port binding uses 127.0.0.1 instead of 0.0.0.0 (e.g. "127.0.0.1:3001:3001") so the web UI is not exposed on all interfaces
 - [ ] Git identity is configured via explicit GIT_USER_NAME and GIT_USER_EMAIL env vars; container fails loudly on startup if either is missing (note: multi-user/distributed case where different users need different identities is out of scope and will require a different solution)
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/360_story_run_storkit_container_under_gvisor_runsc_runtime.md
+++ b/.storkit/work/6_archived/360_story_run_storkit_container_under_gvisor_runsc_runtime.md
@@ -0,0 +1,21 @@
 ---
 name: "Run storkit container under gVisor (runsc) runtime"
 ---
 # Story 360: Run storkit container under gVisor (runsc) runtime
 ## User Story
 As a storkit operator, I want the container to run under gVisor so that even if a malicious codebase escapes the container's process namespace, it cannot make raw syscalls to the host kernel.
 ## Acceptance Criteria
 - [ ] docker-compose.yml specifies runtime: runsc
 - [ ] PTY-based agent spawning (Claude Code via PTY) works correctly under runsc
 - [ ] rebuild_and_restart (exec() replacement) works correctly under runsc
 - [ ] Rust compilation inside the container completes successfully under runsc
 - [ ] Document host setup requirement: runsc must be installed and registered in /etc/docker/daemon.json
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/361_story_remove_deprecated_manual_qa_front_matter_field.md
+++ b/.storkit/work/6_archived/361_story_remove_deprecated_manual_qa_front_matter_field.md
@@ -0,0 +1,20 @@
 ---
 name: "Remove deprecated manual_qa front matter field"
 ---
 # Story 361: Remove deprecated manual_qa front matter field
 ## User Story
 As a developer, I want the deprecated manual_qa boolean field removed from the codebase, so that the front matter schema stays clean and doesn't accumulate legacy boolean flags alongside the more expressive qa: server|agent|human field that replaced it.
 ## Acceptance Criteria
 - [ ] manual_qa field is removed from the FrontMatter and StoryMetadata structs in story_metadata.rs
 - [ ] Legacy mapping from manual_qa: true → qa: human is removed
 - [ ] Any existing story files using manual_qa are migrated to qa: human
 - [ ] Codebase compiles cleanly with no references to manual_qa remaining
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/362_story_bot_whatsup_command_shows_in_progress_work_summary.md
+++ b/.storkit/work/6_archived/362_story_bot_whatsup_command_shows_in_progress_work_summary.md
@@ -0,0 +1,28 @@
 ---
 name: "Bot whatsup command shows in-progress work summary"
 ---
 # Story 362: Bot whatsup command shows in-progress work summary
 ## User Story
 As a project owner in a Matrix room, I want to type "{bot_name} whatsup {story_number}" and see a full triage dump for that story, so that when something goes wrong I can immediately understand its state — blocked status, agent activity, git changes, and log tail — without hunting across multiple places or asking the bot to investigate.
 ## Acceptance Criteria
 - [ ] '{bot_name} whatsup {number}' finds the story in work/2_current/ by story number
 - [ ] Shows the story number, name, and current pipeline stage
 - [ ] Shows relevant front matter fields: blocked, agent, and any other non-empty fields
 - [ ] Shows which Acceptance Criteria are checked vs unchecked
 - [ ] Shows active branch and worktree path if one exists
 - [ ] Shows git diff --stat of changes on the branch since branching from master
 - [ ] Shows last 5 commit messages on the feature branch (not master)
 - [ ] Shows the last 20 lines of the agent log for this story (if a log exists)
 - [ ] Returns a friendly message if the story is not found or not currently in progress
 - [ ] Registered in the command registry so it appears in help output
 - [ ] Handled at bot level without LLM invocation — uses git, filesystem, and log files only
 ## Out of Scope
 - Interpreting or summarising log output with an LLM
 - Showing logs from previous agent runs (only the current/most recent)
--- a/.storkit/work/6_archived/363_story_mcp_tool_for_whatsup_story_triage.md
+++ b/.storkit/work/6_archived/363_story_mcp_tool_for_whatsup_story_triage.md
@@ -0,0 +1,25 @@
 ---
 name: "MCP tool for whatsup story triage"
 ---
 # Story 363: MCP tool for whatsup story triage
 ## User Story
 As an LLM assistant, I want to call a single MCP tool to get a full triage dump for an in-progress story, so that I can answer status questions quickly without making 8+ separate calls to piece together the picture myself.
 ## Acceptance Criteria
 - [ ] 'whatsup' MCP tool accepts a story_id parameter
 - [ ] Returns story front matter fields (name, blocked, agent, and any other non-empty fields)
 - [ ] Returns AC checklist with checked/unchecked status
 - [ ] Returns active branch and worktree path if one exists
 - [ ] Returns git diff --stat of changes on the feature branch since branching from master
 - [ ] Returns last 5 commit messages on the feature branch
 - [ ] Returns last 20 lines of the most recent agent log for the story
 - [ ] Returns a clear error if the story is not found or not in work/2_current/
 - [ ] Registered and discoverable via the MCP tools/list endpoint
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/365_story_surface_api_rate_limit_warnings_in_chat.md
+++ b/.storkit/work/6_archived/365_story_surface_api_rate_limit_warnings_in_chat.md
@@ -0,0 +1,64 @@
 ---
 name: "Surface API rate limit warnings in chat"
 ---
 # Story 365: Surface API rate limit warnings in chat
 ## User Story
 As a project owner watching the chat, I want to see rate limit warnings surfaced directly in the conversation when they appear in the agent's PTY output, so that I know immediately when an agent is being throttled without having to watch server logs.
 ## Acceptance Criteria
 - [x] Server detects rate limit warnings in pty-debug output lines
 - [x] When a rate limit warning is detected, a notification is sent to the active chat (Matrix/Slack/WhatsApp)
 - [x] The notification includes which agent/story triggered the rate limit
 - [x] Rate limit notifications are debounced to avoid spamming the chat with repeated warnings
 ## Technical Context
 Claude Code emits `rate_limit_event` JSON in its streaming output:
 ```json
 {
  "type": "rate_limit_event",
  "rate_limit_info": {
    "status": "allowed_warning",
    "resetsAt": 1774443600,
    "rateLimitType": "seven_day",
    "utilization": 0.82,
    "isUsingOverage": false,
    "surpassedThreshold": 0.75
  }
 }
 ```
 Key fields:
 - `status`: `"allowed_warning"` when approaching limit, likely `"blocked"` or similar when hard-limited
 - `rateLimitType`: e.g. `"seven_day"` rolling window
 - `utilization`: 0.0–1.0 fraction of limit consumed
 - `resetsAt`: Unix timestamp when the window resets
 - `surpassedThreshold`: the threshold that triggered the warning (e.g. 0.75 = 75%)
 These events are already logged as `[pty-debug] raw line:` in the server logs. The PTY reader in `server/src/llm/providers/claude_code.rs` (line ~234) sees them but doesn't currently parse or act on them.
 ## Out of Scope
 - TBD
 ## Test Results
 <!-- storkit-test-results: {"unit":[{"name":"rate_limit_event_json_sends_watcher_warning","status":"pass","details":"PTY reader detects rate_limit_event JSON and emits RateLimitWarning watcher event"},{"name":"rate_limit_warning_sends_notification_with_agent_and_story","status":"pass","details":"Notification listener sends chat message with agent and story info"},{"name":"rate_limit_warning_is_debounced","status":"pass","details":"Second warning within 60s window is suppressed"},{"name":"rate_limit_warnings_for_different_agents_both_notify","status":"pass","details":"Different agents are debounced independently"},{"name":"format_rate_limit_notification_includes_agent_and_story","status":"pass","details":"Notification text includes story number, name, and agent name"},{"name":"format_rate_limit_notification_falls_back_to_item_id","status":"pass","details":"Falls back to item_id when story name is unavailable"}],"integration":[]} -->
 ### Unit Tests (6 passed, 0 failed)
 - ✅ rate_limit_event_json_sends_watcher_warning — PTY reader detects rate_limit_event JSON and emits RateLimitWarning watcher event
 - ✅ rate_limit_warning_sends_notification_with_agent_and_story — Notification listener sends chat message with agent and story info
 - ✅ rate_limit_warning_is_debounced — Second warning within 60s window is suppressed
 - ✅ rate_limit_warnings_for_different_agents_both_notify — Different agents are debounced independently
 - ✅ format_rate_limit_notification_includes_agent_and_story — Notification text includes story number, name, and agent name
 - ✅ format_rate_limit_notification_falls_back_to_item_id — Falls back to item_id when story name is unavailable
 ### Integration Tests (0 passed, 0 failed)
 *No integration tests recorded.*
--- a/.storkit/work/6_archived/366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild.md
+++ b/.storkit/work/6_archived/366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild.md
@@ -0,0 +1,20 @@
 ---
 name: "Bot sends shutdown message on server stop or rebuild"
 ---
 # Story 366: Bot sends shutdown message on server stop or rebuild
 ## User Story
 As a project owner in a chat room, I want the bot to send a message when the server is shutting down (via ctrl-c or rebuild_and_restart), so that I know the bot is going offline and won't wonder why it stopped responding.
 ## Acceptance Criteria
 - [ ] Bot sends a shutdown message to active chat channels when the server receives SIGINT/SIGTERM (ctrl-c)
 - [ ] Bot sends a shutdown message before rebuild_and_restart kills the current process
 - [ ] Message indicates the reason (manual stop vs rebuild)
 - [ ] Message is sent best-effort — shutdown is not blocked if the message fails to send
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/367_story_rename_bot_whatsup_command_to_status.md
+++ b/.storkit/work/6_archived/367_story_rename_bot_whatsup_command_to_status.md
@@ -0,0 +1,20 @@
 ---
 name: "Rename bot whatsup command to status"
 ---
 # Story 367: Rename bot whatsup command to status
 ## User Story
 As a project owner using the bot from a phone, I want to type "status {number}" instead of "whatsup {number}" to get a story triage dump, because "whatsup" gets autocorrected to "WhatsApp" on mobile keyboards.
 ## Acceptance Criteria
 - [ ] '{bot_name} status {number}' returns the same triage dump that 'whatsup' currently returns
 - [ ] The 'whatsup' command is removed or aliased to 'status'
 - [ ] Help output shows 'status' as the command name
 - [ ] The MCP tool name (whatsup) is unaffected — this only changes the bot command
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/368_story_web_ui_oauth_flow_for_claude_authentication.md
+++ b/.storkit/work/6_archived/368_story_web_ui_oauth_flow_for_claude_authentication.md
@@ -0,0 +1,25 @@
 ---
 name: "Web UI OAuth flow for Claude authentication"
 agent: "coder-opus"
 ---
 # Story 368: Web UI OAuth flow for Claude authentication
 ## User Story
 As a new user running storkit in Docker, I want to authenticate Claude through the web UI instead of running `claude login` in a terminal inside the container, so that the entire setup experience stays in the browser after `docker compose up`.
 ## Acceptance Criteria
 - [ ] Backend exposes /auth/start endpoint that generates the Claude OAuth URL with redirect_uri pointing to localhost:3001
 - [ ] Backend exposes /auth/callback endpoint that receives the OAuth token and stores it where Claude Code expects it
 - [ ] Backend exposes /auth/status endpoint that reports whether valid Claude credentials exist
 - [ ] Frontend shows a setup screen when no Claude auth is detected on first visit
 - [ ] Setup screen has a 'Connect Claude Account' button that initiates the OAuth flow
 - [ ] OAuth redirect returns to the web UI which confirms success and dismisses the setup screen
 - [ ] Credentials are persisted in the claude-state Docker volume so they survive container restarts
 - [ ] The entire flow works without any terminal interaction after docker compose up
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/369_bug_cli_treats_help_and_version_as_project_paths.md
+++ b/.storkit/work/6_archived/369_bug_cli_treats_help_and_version_as_project_paths.md
@@ -0,0 +1,34 @@
 ---
 name: "CLI treats --help and --version as project paths"
 ---
 # Bug 369: CLI treats --help and --version as project paths
 ## Description
 When running `storkit <anything>`, the binary treats the first argument as a project path, creates a directory for it, and scaffolds `.storkit/` inside. This happens for `--help`, `--version`, `serve`, `x`, or any other string. There is no validation that the argument is an existing directory or a reasonable path before creating it.
 ## How to Reproduce
 1. Run `storkit --help` or `storkit serve` or `storkit x` in any directory
 2. Observe that a directory with that name is created with a full `.storkit/` scaffold inside it
 ## Actual Result
 Any argument is treated as a project path and a directory is created and scaffolded. No flags are recognised.
 ## Expected Result
 - `storkit --help` prints usage info and exits
 - `storkit --version` prints the version and exits
 - `storkit <path>` only works if the path already exists as a directory
 - If the path does not exist, storkit prints a clear error and exits non-zero
 ## Acceptance Criteria
 - [ ] storkit --help prints usage information and exits with code 0
 - [ ] storkit --version prints the version and exits with code 0
 - [ ] storkit -h and storkit -V work as short aliases
 - [ ] storkit does not create directories for any argument — the path must already exist
 - [ ] If the path does not exist, storkit prints a clear error and exits non-zero
 - [ ] Arguments starting with - that are not recognised produce a clear error message
--- a/.storkit/work/6_archived/370_bug_scaffold_does_not_create_mcp_json_in_project_root.md
+++ b/.storkit/work/6_archived/370_bug_scaffold_does_not_create_mcp_json_in_project_root.md
@@ -0,0 +1,33 @@
 ---
 name: "Scaffold does not create .mcp.json in project root"
 ---
 # Bug 370: Scaffold does not create .mcp.json in project root
 ## Description
 Two related problems with project setup:
 1. When the user clicks the "project setup" button in the web UI to open a new project, the scaffold does not reliably run — the `.storkit/` directory and associated files may not be created.
 2. Even when the scaffold does run, it does not write `.mcp.json` to the project root. Without this file, agents spawned in worktrees cannot find the MCP server, causing `--permission-prompt-tool mcp__storkit__prompt_permission not found` errors and agent failures.
 ## How to Reproduce
 1. Open the storkit web UI and use the project setup button to open a new project directory
 2. Check whether the full scaffold was created (`.storkit/`, `CLAUDE.md`, `script/test`, etc.)
 3. Check the project root for `.mcp.json`
 ## Actual Result
 The scaffold may not run when using the UI project setup flow. When it does run, `.mcp.json` is not created in the project root. Agents fail because MCP tools are unavailable.
 ## Expected Result
 Clicking the project setup button reliably runs the full scaffold, including `.mcp.json` pointing to the server's port.
 ## Acceptance Criteria
 - [ ] The web UI project setup button triggers the full scaffold for new projects
 - [ ] scaffold_story_kit writes .mcp.json to the project root with the server's port
 - [ ] Existing .mcp.json is not overwritten if already present
 - [ ] .mcp.json is included in .gitignore since the port is environment-specific
--- a/.storkit/work/6_archived/371_bug_no_arg_storkit_in_empty_directory_skips_scaffold.md
+++ b/.storkit/work/6_archived/371_bug_no_arg_storkit_in_empty_directory_skips_scaffold.md
@@ -0,0 +1,32 @@
 ---
 name: "No-arg storkit in empty directory skips scaffold"
 ---
 # Bug 371: No-arg storkit in empty directory skips scaffold
 ## Description
 When running `storkit` with no path argument from an empty directory (no `.storkit/`), the server starts but never calls `open_project` or the scaffold. The `find_story_kit_root` check fails to find `.storkit/`, so the fallback at main.rs:179-186 just sets `project_root = cwd` without scaffolding. This means no `.storkit/`, no `project.toml`, no `.mcp.json`, no `CLAUDE.md` — the project is non-functional.
 The explicit path branch (`storkit .`) works correctly because it calls `open_project` → `ensure_project_root_with_story_kit` → `scaffold_story_kit`. The no-arg branch should do the same.
 ## How to Reproduce
 1. Create a new empty directory
 2. cd into it
 3. Run `storkit` (no path argument)
 4. Observe that no scaffold is created — `.storkit/`, `CLAUDE.md`, `.mcp.json`, etc. are all missing
 ## Actual Result
 Server starts with project_root set to cwd but no scaffold runs. The project is non-functional — no agent config, no MCP endpoint, no work pipeline directories.
 ## Expected Result
 Running `storkit` with no arguments from a directory without `.storkit/` should scaffold the project the same as `storkit .` does — calling `open_project` and triggering `ensure_project_root_with_story_kit`.
 ## Acceptance Criteria
 - [ ] Running `storkit` with no args from a dir without `.storkit/` calls `open_project` and triggers the full scaffold
 - [ ] The no-arg fallback path in main.rs calls `open_project(cwd)` instead of just setting project_root directly
 - [ ] After `storkit` completes startup, `.storkit/project.toml`, `.mcp.json`, `CLAUDE.md`, and `script/test` all exist
--- a/.storkit/work/6_archived/372_story_scaffold_auto_detects_tech_stack_and_configures_script_test.md
+++ b/.storkit/work/6_archived/372_story_scaffold_auto_detects_tech_stack_and_configures_script_test.md
@@ -0,0 +1,24 @@
 ---
 name: "Scaffold auto-detects tech stack and configures script/test"
 ---
 # Story 372: Scaffold auto-detects tech stack and configures script/test
 ## User Story
 As a user setting up a new project with storkit, I want the scaffold to detect my project's tech stack and generate a working `script/test` automatically, so that agents can run tests immediately without manual configuration.
 ## Acceptance Criteria
 - [ ] Scaffold detects Go projects (go.mod) and adds `go test ./...` to script/test
 - [ ] Scaffold detects Node.js projects (package.json) and adds `npm test` to script/test
 - [ ] Scaffold detects Rust projects (Cargo.toml) and adds `cargo test` to script/test
 - [ ] Scaffold detects Python projects (pyproject.toml or requirements.txt) and adds `pytest` to script/test
 - [ ] Scaffold handles multi-stack projects (e.g. Go + Next.js) by combining the relevant test commands
 - [ ] project.toml component entries are generated to match detected tech stack
 - [ ] Falls back to the generic 'No tests configured' stub if no known stack is detected
 - [ ] Coder agent prompt includes instruction to configure `script/test` for the project's test framework if it still contains the generic stub
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/373_bug_scaffold_gitignore_missing_transient_pipeline_stage_directories.md
+++ b/.storkit/work/6_archived/373_bug_scaffold_gitignore_missing_transient_pipeline_stage_directories.md
@@ -0,0 +1,28 @@
 ---
 name: "Scaffold gitignore missing transient pipeline stage directories"
 ---
 # Bug 373: Scaffold gitignore missing transient pipeline stage directories
 ## Description
 The `write_story_kit_gitignore` function in `server/src/io/fs.rs` does not include the transient pipeline stages (`work/2_current/`, `work/3_qa/`, `work/4_merge/`) in the `.storkit/.gitignore` entries list. These stages are not committed to git (only `1_backlog`, `5_done`, and `6_archived` are commit-worthy per spike 92), so they should be ignored for new projects.
 ## How to Reproduce
 1. Scaffold a new project with storkit
 2. Check `.storkit/.gitignore`
 ## Actual Result
 `.storkit/.gitignore` only contains `bot.toml`, `matrix_store/`, `matrix_device_id`, `worktrees/`, `merge_workspace/`, `coverage/`. The transient pipeline directories are missing.
 ## Expected Result
 `.storkit/.gitignore` also includes `work/2_current/`, `work/3_qa/`, `work/4_merge/`.
 ## Acceptance Criteria
 - [ ] Scaffold writes work/2_current/, work/3_qa/, work/4_merge/ to .storkit/.gitignore
 - [ ] Idempotent — running scaffold again does not duplicate entries
 - [ ] Existing .storkit/.gitignore files get the new entries appended on next scaffold run
--- a/.storkit/work/6_archived/374_story_web_ui_implements_all_bot_commands_as_slash_commands.md
+++ b/.storkit/work/6_archived/374_story_web_ui_implements_all_bot_commands_as_slash_commands.md
@@ -0,0 +1,30 @@
 ---
 name: "Web UI implements all bot commands as slash commands"
 ---
 # Story 374: Web UI implements all bot commands as slash commands
 ## User Story
 As a user working in the storkit web UI, I want to type slash commands (e.g. `/status`, `/start 42`, `/cost`) in the chat input to trigger the same deterministic bot commands available in Matrix, so that I can manage my project entirely from the browser without needing a chat bot.
 ## Acceptance Criteria
 - [ ] /status — shows pipeline status and agent availability; /status <number> shows story triage dump
 - [ ] /assign <number> <model> — pre-assign a model to a story
 - [ ] /start <number> — start a coder on a story; /start <number> opus for specific model
 - [ ] /show <number> — display full text of a work item
 - [ ] /move <number> <stage> — move a work item to a pipeline stage
 - [ ] /delete <number> — remove a work item from the pipeline
 - [ ] /cost — show token spend (24h total, top stories, by agent type, all-time)
 - [ ] /git — show git status (branch, uncommitted changes, ahead/behind)
 - [ ] /overview <number> — show implementation summary for a merged story
 - [ ] /rebuild — rebuild the server binary and restart
 - [ ] /reset — clear the current Claude Code session
 - [ ] /help — list all available slash commands
 - [ ] Slash commands are handled at the frontend/backend level without LLM invocation
 - [ ] Unrecognised slash commands show a helpful error message
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/375_bug_default_project_toml_contains_rust_specific_setup_commands_for_non_rust_projects.md
+++ b/.storkit/work/6_archived/375_bug_default_project_toml_contains_rust_specific_setup_commands_for_non_rust_projects.md
@@ -0,0 +1,43 @@
 ---
 name: "Default project.toml contains Rust-specific setup commands for non-Rust projects"
 ---
 # Bug 375: Default project.toml contains Rust-specific setup commands for non-Rust projects
 ## Description
 When scaffolding a new project where no tech stack is detected, the generated `project.toml` contains Rust-specific setup commands (`cargo check`) as example fallback components. This causes coder agents to try to satisfy Rust gates on non-Rust projects.
 ## Fix
 1. In `detect_components_toml()` fallback (when no stack markers found): replace the Rust/pnpm example components with a single generic `app` component with empty `setup = []`
 2. In the onboarding prompt Step 4: simplify to configure `[[component]]` entries based on what the user told the LLM in Step 2 (tech stack), rather than re-scanning the filesystem independently
 ## Acceptance Criteria
 - [ ] Default project.toml does not contain language-specific setup commands when that language is not detected in the project
 - [ ] If go.mod is present, setup commands use Go tooling
 - [ ] If package.json is present, setup commands use npm/node tooling
 - [ ] If no known stack is detected, setup commands are empty or just echo a placeholder
 ## How to Reproduce
 1. Create a new Go + Next.js project directory with `go.mod` and `package.json`
 2. Run `storkit .` to scaffold
 3. Check `.storkit/project.toml` — the component setup commands reference cargo/Rust
 4. Start a coder agent — it creates a `Cargo.toml` trying to satisfy the Rust setup commands
 ## Actual Result
 The scaffolded `project.toml` has Rust-specific setup commands (`cargo check`) even for non-Rust projects. Agents try to satisfy these and create spurious files.
 ## Expected Result
 The scaffolded `project.toml` should have generic or stack-appropriate setup commands. If no known stack is detected, setup commands should be empty or minimal (not Rust-specific).
 ## Acceptance Criteria
 - [ ] Default project.toml does not contain language-specific setup commands when that language is not detected in the project
 - [ ] If go.mod is present, setup commands use Go tooling
 - [ ] If package.json is present, setup commands use npm/node tooling
 - [ ] If no known stack is detected, setup commands are empty or just echo a placeholder
--- a/.storkit/work/6_archived/376_story_rename_mcp_whatsup_tool_to_status_for_consistency.md
+++ b/.storkit/work/6_archived/376_story_rename_mcp_whatsup_tool_to_status_for_consistency.md
@@ -0,0 +1,22 @@
 ---
 name: "Rename MCP whatsup tool to status for consistency"
 agent: coder-opus
 ---
 # Story 376: Rename MCP whatsup tool to status for consistency
 ## User Story
 As a developer using storkit's MCP tools, I want the MCP tool to be called `status` instead of `whatsup`, so that the naming is consistent between the bot command (`status`), the web UI slash command (`/status`), and the MCP tool.
 ## Acceptance Criteria
 - [ ] MCP tool is renamed from 'whatsup' to 'status'
 - [ ] MCP tool is discoverable as 'status' via tools/list
 - [ ] The tool still accepts a story_id parameter and returns the same triage data
 - [ ] Old 'whatsup' tool name is removed from the MCP registry
 - [ ] Any internal references to the whatsup tool name are updated
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/377_bug_update_story_mcp_tool_writes_front_matter_values_as_yaml_strings_instead_of_native_types.md
+++ b/.storkit/work/6_archived/377_bug_update_story_mcp_tool_writes_front_matter_values_as_yaml_strings_instead_of_native_types.md
@@ -0,0 +1,30 @@
 ---
 name: "update_story MCP tool writes front matter values as YAML strings instead of native types"
 ---
 # Bug 377: update_story MCP tool writes front matter values as YAML strings instead of native types
 ## Description
 The `update_story` MCP tool accepts `front_matter` as a `Map<String, String>`, so all values are written as quoted YAML strings. Fields like `retry_count` (expected `u32`) and `blocked` (expected `bool`) end up as `"0"` and `"false"` in the YAML. This causes `parse_front_matter()` to fail because serde_yaml cannot deserialize a quoted string into `u32` or `bool`. When parsing fails, the story `name` comes back as `None`, so the status command shows no title for the story.
 ## How to Reproduce
 1. Call `update_story` with `front_matter: {"blocked": "false", "retry_count": "0"}`
 2. Read the story file — front matter contains `blocked: "false"` and `retry_count: "0"` (quoted strings)
 3. Call `get_pipeline_status` or the bot `status` command
 4. The story shows with no title/name
 ## Actual Result
 Front matter values are written as quoted YAML strings. `parse_front_matter()` fails to deserialize `"false"` as `bool` and `"0"` as `u32`, returning an error. The story name is lost and the status command shows no title.
 ## Expected Result
 The `update_story` tool should write `blocked` and `retry_count` as native YAML types (unquoted `false` and `0`), or `parse_front_matter()` should accept both string and native representations. The story name should always be displayed correctly in the status command.
 ## Acceptance Criteria
 - [ ] update_story with front_matter {"blocked": "false"} writes `blocked: false` (unquoted) in the YAML
 - [ ] update_story with front_matter {"retry_count": "0"} writes `retry_count: 0` (unquoted) in the YAML
 - [ ] Story name is displayed correctly in the status command after update_story modifies front matter fields
--- a/.storkit/work/6_archived/378_story_status_command_shows_work_item_type_story_bug_spike_refactor_next_to_each_item.md
+++ b/.storkit/work/6_archived/378_story_status_command_shows_work_item_type_story_bug_spike_refactor_next_to_each_item.md
@@ -0,0 +1,20 @@
 ---
 name: "Status command shows work item type (story, bug, spike, refactor) next to each item"
 ---
 # Story 378: Status command shows work item type (story, bug, spike, refactor) next to each item
 ## User Story
 As a user viewing the pipeline status, I want to see the type of each work item (story, bug, spike, refactor) so that I can quickly understand what kind of work is in progress without having to open individual files.
 ## Acceptance Criteria
 - [ ] The status command displays the work item type (story, bug, spike, refactor) as a label next to each item — e.g. "375 [bug] — Default project.toml contains Rust-specific setup commands"
 - [ ] The type is extracted from the story_id filename convention ({id}_{type}_{slug})
 - [ ] All known types are supported: story, bug, spike, refactor
 - [ ] Unknown or missing types are omitted gracefully (no crash, no placeholder)
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/379_bug_start_agent_ignores_story_front_matter_agent_assignment.md
+++ b/.storkit/work/6_archived/379_bug_start_agent_ignores_story_front_matter_agent_assignment.md
@@ -0,0 +1,34 @@
 ---
 name: "start_agent ignores story front matter agent assignment"
 ---
 # Bug 379: start_agent ignores story front matter agent assignment
 ## Description
 When a model is pre-assigned to a story via the `assign` command (which writes `agent: coder-opus` to the story's YAML front matter), the MCP `start_agent` tool ignores this field. It only looks at the `agent_name` argument passed directly in the tool call. If none is passed, it auto-selects the first idle coder (usually sonnet), bypassing the user's assignment.
 The auto-assign pipeline (`auto_assign.rs`) correctly reads and respects the front matter `agent` field, but the direct `tool_start_agent` path in `agent_tools.rs` does not.
 Additionally, the `show` (whatsup/triage) command should display the assigned agent from the story's front matter so users can verify their assignment took effect.
 ## How to Reproduce
 1. Run `assign 368 opus` — this writes `agent: coder-opus` to story 368's front matter
 2. Run `start 368` (without specifying a model)
 3. Observe that a sonnet coder is assigned, not coder-opus
 4. Run `show 368` — the assigned agent is not displayed
 ## Actual Result
 The `start_agent` MCP tool ignores the `agent` field in the story's front matter and picks the first idle coder. The `show` command does not display the pre-assigned agent.
 ## Expected Result
 When no explicit `agent_name` is passed to `start_agent`, it should read the story's front matter `agent` field and use that agent if it's available. The `show` command should display the assigned agent from front matter.
 ## Acceptance Criteria
 - [ ] start_agent without an explicit agent_name reads the story's front matter `agent` field and uses it if the agent is idle
 - [ ] If the preferred agent from front matter is busy, start_agent either waits or falls back to auto-selection (matching auto_assign behavior)
 - [ ] The show/triage command displays the assigned agent from story front matter when present
--- a/.storkit/work/6_archived/380_story_assign_command_restarts_coder_when_story_is_already_in_progress.md
+++ b/.storkit/work/6_archived/380_story_assign_command_restarts_coder_when_story_is_already_in_progress.md
@@ -0,0 +1,20 @@
 ---
 name: "Assign command restarts coder when story is already in progress"
 ---
 # Story 380: Assign command restarts coder when story is already in progress
 ## User Story
 As a user, I want `assign X opus` on a running story to stop the current coder, update the front matter, and start the newly assigned agent, so that I can switch models mid-flight without manually stopping and restarting.
 ## Acceptance Criteria
 - [ ] When assign is called on a story with a running coder, the current coder agent is stopped
 - [ ] The story's front matter `agent` field is updated to the new agent name
 - [ ] The newly assigned agent is started on the story automatically
 - [ ] When assign is called on a story with no running coder, it behaves as before (just updates front matter)
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/381_story_bot_command_to_delete_a_worktree.md
+++ b/.storkit/work/6_archived/381_story_bot_command_to_delete_a_worktree.md
@@ -0,0 +1,20 @@
 ---
 name: "Bot command to delete a worktree"
 ---
 # Story 381: Bot command to delete a worktree
 ## User Story
 As a user, I want a bot command to delete a worktree so that I can clean up orphaned or unwanted worktrees without SSHing into the server.
 ## Acceptance Criteria
 - [ ] A new bot command (e.g. `rmtree <story_number>`) deletes the worktree for the given story
 - [ ] The command stops any running agent on that story before removing the worktree
 - [ ] The command returns a confirmation message on success
 - [ ] The command returns a helpful error if no worktree exists for the given story
 ## Out of Scope
 - TBD
--- a/.storkit/work/6_archived/90_story_fetch_real_context_window_size_from_anthropic_models_api.md
+++ b/.storkit/work/6_archived/90_story_fetch_real_context_window_size_from_anthropic_models_api.md
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1774,9 +1774,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
 [[package]]
 name = "iri-string"
-version = "0.7.10"
+version = "0.7.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb"
 dependencies = [
 "memchr",
 "serde",
@@ -1815,7 +1815,7 @@ dependencies = [
 "cesu8",
 "cfg-if",
 "combine",
- "jni-sys",
+ "jni-sys 0.3.1",
 "log",
 "thiserror 1.0.69",
 "walkdir",
@@ -1824,9 +1824,31 @@ dependencies = [
 [[package]]
 name = "jni-sys"
-version = "0.3.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
+checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258"
 dependencies = [
 "jni-sys 0.4.1",
 ]
 [[package]]
 name = "jni-sys"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2"
 dependencies = [
 "jni-sys-macros",
 ]
 [[package]]
 name = "jni-sys-macros"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264"
 dependencies = [
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "jobserver"
@@ -1932,9 +1954,9 @@ checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
 [[package]]
 name = "libredox"
-version = "0.1.14"
+version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
+checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
 dependencies = [
 "bitflags 2.11.0",
 "libc",
@@ -2948,9 +2970,9 @@ dependencies = [
 [[package]]
 name = "pulldown-cmark"
-version = "0.13.1"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6"
+checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
 dependencies = [
 "bitflags 2.11.0",
 "memchr",
@@ -3252,6 +3274,7 @@ dependencies = [
 "rustls-platform-verifier",
 "serde",
 "serde_json",
 "serde_urlencoded",
 "sync_wrapper",
 "tokio",
 "tokio-rustls",
@@ -3625,9 +3648,9 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
 [[package]]
 name = "rustls-webpki"
-version = "0.103.9"
+version = "0.103.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
+checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
 dependencies = [
 "aws-lc-rs",
 "ring",
@@ -3801,9 +3824,9 @@ dependencies = [
 [[package]]
 name = "serde_spanned"
-version = "1.0.4"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
+checksum = "876ac351060d4f882bb1032b6369eb0aef79ad9df1ea8bc404874d8cc3d0cd98"
 dependencies = [
 "serde_core",
 ]
@@ -3994,7 +4017,7 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
 [[package]]
 name = "storkit"
-version = "0.4.1"
+version = "0.6.0"
 dependencies = [
 "async-stream",
 "async-trait",
@@ -4024,7 +4047,7 @@ dependencies = [
 "tempfile",
 "tokio",
 "tokio-tungstenite 0.29.0",
- "toml 1.0.7+spec-1.1.0",
+ "toml 1.1.0+spec-1.1.0",
 "uuid",
 "wait-timeout",
 "walkdir",
@@ -4371,14 +4394,14 @@ dependencies = [
 [[package]]
 name = "toml"
-version = "1.0.7+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd28d57d8a6f6e458bc0b8784f8fdcc4b99a437936056fa122cb234f18656a96"
+checksum = "f8195ca05e4eb728f4ba94f3e3291661320af739c4e43779cbdfae82ab239fcc"
 dependencies = [
 "indexmap",
 "serde_core",
 "serde_spanned",
- "toml_datetime 1.0.1+spec-1.1.0",
+ "toml_datetime 1.1.0+spec-1.1.0",
 "toml_parser",
 "toml_writer",
 "winnow 1.0.0",
@@ -4395,39 +4418,39 @@ dependencies = [
 [[package]]
 name = "toml_datetime"
-version = "1.0.1+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9"
+checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f"
 dependencies = [
 "serde_core",
 ]
 [[package]]
 name = "toml_edit"
-version = "0.25.5+spec-1.1.0"
+version = "0.25.8+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1"
+checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c"
 dependencies = [
 "indexmap",
- "toml_datetime 1.0.1+spec-1.1.0",
+ "toml_datetime 1.1.0+spec-1.1.0",
 "toml_parser",
 "winnow 1.0.0",
 ]
 [[package]]
 name = "toml_parser"
-version = "1.0.10+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420"
+checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011"
 dependencies = [
 "winnow 1.0.0",
 ]
 [[package]]
 name = "toml_writer"
-version = "1.0.7+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f17aaa1c6e3dc22b1da4b6bba97d066e354c7945cac2f7852d4e4e7ca7a6b56d"
+checksum = "d282ade6016312faf3e41e57ebbba0c073e4056dab1232ab1cb624199648f8ed"
 [[package]]
 name = "tower"
@@ -4638,9 +4661,9 @@ dependencies = [
 [[package]]
 name = "unicode-segmentation"
-version = "1.12.0"
+version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b"
 [[package]]
 name = "unicode-xid"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,7 +25,7 @@ serde_yaml = "0.9"
 strip-ansi-escapes = "0.2"
 tempfile = "3"
 tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
-toml = "1.0.7"
+toml = "1.1.0"
 uuid = { version = "1.22.0", features = ["v4", "serde"] }
 tokio-tungstenite = "0.29.0"
 walkdir = "2.5.0"
@@ -35,6 +35,6 @@ matrix-sdk = { version = "0.16.0", default-features = false, features = [
    "sqlite",
    "e2e-encryption",
 ] }
-pulldown-cmark = { version = "0.13.1", default-features = false, features = [
+pulldown-cmark = { version = "0.13.3", default-features = false, features = [
    "html",
 ] }
--- a/38
+++ b/38
@@ -1,38 +0,0 @@
 .PHONY: help build-macos build-linux release
 help:
 	@echo "Story Kit – cross-platform build targets"
 	@echo ""
 	@echo "  make build-macos    Build native macOS release binary"
 	@echo "  make build-linux    Build static Linux x86_64 release binary (requires cross + Docker)"
 	@echo "  make release V=x.y.z  Build both targets and publish a Gitea release"
 	@echo ""
 	@echo "Prerequisites:"
 	@echo "  build-macos: Rust stable toolchain, npm"
 	@echo "  build-linux: cargo install cross   AND   Docker Desktop running"
 	@echo ""
 	@echo "Output:"
 	@echo "  macOS : target/release/storkit"
 	@echo "  Linux : target/x86_64-unknown-linux-musl/release/storkit"
 ## Build a native macOS release binary.
 ## The frontend is compiled by build.rs (npm run build) and embedded via rust-embed.
 ## Verify dynamic deps afterwards: otool -L target/release/storkit
 build-macos:
 	cargo build --release
 ## Build a fully static Linux x86_64 binary using the musl libc target.
 ## cross (https://github.com/cross-rs/cross) handles the Docker-based cross-compilation.
 ## Install cross:  cargo install cross
 ## The resulting binary has zero dynamic library dependencies (ldd reports "not a dynamic executable").
 build-linux:
 	cross build --release --target x86_64-unknown-linux-musl
 ## Publish a release to Gitea with macOS and Linux binaries.
 ## Requires: GITEA_TOKEN env var, cross, Docker running.
 ## Usage: make release V=0.2.0
 release:
 ifndef V
 	$(error Usage: make release V=x.y.z)
 endif
 	script/release $(V)
--- a/docker/.dockerignore
+++ b/docker/.dockerignore
@@ -0,0 +1,11 @@
 # Docker build context exclusions
 **/target/
 **/node_modules/
 frontend/dist/
 .storkit/worktrees/
 .storkit/logs/
 .storkit/work/6_archived/
 .git/
 *.swp
 *.swo
 .DS_Store
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -0,0 +1,132 @@
 # Story Kit – single-container runtime
 # All components (server, agents, web UI) run inside this container.
 # The target project repo is bind-mounted at /workspace.
 #
 # Build:   docker build -t storkit -f docker/Dockerfile .
 # Run:     docker compose -f docker/docker-compose.yml up
 #
 # Tested with: OrbStack (recommended on macOS), Docker Desktop (slower bind mounts)
 FROM rust:1.90-bookworm AS base
 # Clippy is needed at runtime for acceptance gates (cargo clippy)
 RUN rustup component add clippy
 # ── System deps ──────────────────────────────────────────────────────
 RUN apt-get update && apt-get install -y --no-install-recommends \
        git \
        curl \
        ca-certificates \
        build-essential \
        pkg-config \
        libssl-dev \
        # cargo-nextest is a pre-built binary
    && rm -rf /var/lib/apt/lists/*
 # ── Node.js 22.x (matches host) ─────────────────────────────────────
 RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
    && apt-get install -y --no-install-recommends nodejs \
    && rm -rf /var/lib/apt/lists/*
 # ── cargo-nextest (test runner) ──────────────────────────────────────
 RUN curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C /usr/local/bin
 # ── Claude Code CLI ──────────────────────────────────────────────────
 # Claude Code is distributed as an npm global package.
 # The CLI binary is `claude`.
 RUN npm install -g @anthropic-ai/claude-code
 # ── Working directory ────────────────────────────────────────────────
 # /app holds the storkit source (copied in at build time for the binary).
 # /workspace is where the target project repo gets bind-mounted at runtime.
 WORKDIR /app
 # ── Build the storkit server binary ─────────────────────────────────
 # Copy the full project tree so `cargo build` and `npm run build` (via
 # build.rs) can produce the release binary with embedded frontend assets.
 COPY . .
 # Build frontend deps first (better layer caching)
 RUN cd frontend && npm ci
 # Build the release binary (build.rs runs npm run build for the frontend)
 RUN cargo build --release \
    && cp target/release/storkit /usr/local/bin/storkit
 # ── Runtime stage (smaller image) ───────────────────────────────────
 FROM debian:bookworm-slim AS runtime
 RUN apt-get update && apt-get install -y --no-install-recommends \
        git \
        curl \
        ca-certificates \
        libssl3 \
        # build-essential (gcc/cc) needed at runtime for:
        # - rebuild_and_restart (cargo build --release)
        # - agent-driven cargo commands (clippy, test, build)
        build-essential \
        pkg-config \
        libssl-dev \
        # procps provides ps, needed by tests and process management
        procps \
    && rm -rf /var/lib/apt/lists/*
 # Node.js in runtime
 RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
    && apt-get install -y --no-install-recommends nodejs \
    && rm -rf /var/lib/apt/lists/*
 # Claude Code CLI in runtime
 RUN npm install -g @anthropic-ai/claude-code
 # Cargo and Rust toolchain needed at runtime for:
 # - rebuild_and_restart (cargo build inside the container)
 # - Agent-driven cargo commands (cargo clippy, cargo test, etc.)
 COPY --from=base /usr/local/cargo /usr/local/cargo
 COPY --from=base /usr/local/rustup /usr/local/rustup
 ENV PATH="/usr/local/cargo/bin:${PATH}"
 ENV RUSTUP_HOME="/usr/local/rustup"
 ENV CARGO_HOME="/usr/local/cargo"
 # cargo-nextest
 COPY --from=base /usr/local/bin/cargo-nextest /usr/local/bin/cargo-nextest
 # The storkit binary
 COPY --from=base /usr/local/bin/storkit /usr/local/bin/storkit
 # Copy the full source tree so rebuild_and_restart can do `cargo build`
 # from the workspace root (CARGO_MANIFEST_DIR is baked into the binary).
 # Alternative: mount the source as a volume.
 COPY --from=base /app /app
 # ── Non-root user ────────────────────────────────────────────────────
 # Claude Code refuses --dangerously-skip-permissions (bypassPermissions)
 # when running as root. Create a dedicated user so agents can launch.
 RUN groupadd -r storkit \
    && useradd -r -g storkit -m -d /home/storkit storkit \
    && mkdir -p /home/storkit/.claude \
    && chown -R storkit:storkit /home/storkit \
    && chown -R storkit:storkit /usr/local/cargo /usr/local/rustup \
    && chown -R storkit:storkit /app \
    && mkdir -p /workspace/target /app/target \
    && chown storkit:storkit /workspace/target /app/target
 # ── Entrypoint ───────────────────────────────────────────────────────
 # Validates required env vars (GIT_USER_NAME, GIT_USER_EMAIL) and
 # configures git identity before starting the server.
 COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
 USER storkit
 WORKDIR /workspace
 # ── Ports ────────────────────────────────────────────────────────────
 # Web UI + MCP server
 EXPOSE 3001
 # ── Volumes (defined in docker-compose.yml) ──────────────────────────
 # /workspace                    – bind mount: target project repo
 # /home/storkit/.claude         – named volume: Claude Code sessions/state
 # /usr/local/cargo/registry     – named volume: cargo dependency cache
 ENTRYPOINT ["entrypoint.sh"]
 CMD ["storkit", "/workspace"]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -0,0 +1,120 @@
 # Story Kit – single-container deployment
 #
 # Usage:
 #   # Set your API key and project path, then:
 #   ANTHROPIC_API_KEY=sk-ant-... PROJECT_PATH=/path/to/your/repo \
 #     docker compose -f docker/docker-compose.yml up
 #
 # OrbStack users: just install OrbStack and use `docker compose` normally.
 # OrbStack's VirtioFS bind mount driver is significantly faster than
 # Docker Desktop's default (see spike findings).
 services:
  storkit:
    build:
      context: ..
      dockerfile: docker/Dockerfile
    container_name: storkit
    ports:
      # Bind to localhost only — not exposed on all interfaces.
      - "127.0.0.1:3001:3001"
    environment:
      # Optional: Anthropic API key. If unset, Claude Code falls back to
      # OAuth credentials from `claude login` (e.g. Max subscription).
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      # Required: git identity for agent commits
      - GIT_USER_NAME=${GIT_USER_NAME:?Set GIT_USER_NAME}
      - GIT_USER_EMAIL=${GIT_USER_EMAIL:?Set GIT_USER_EMAIL}
      # Optional: override the server port (default 3001)
      - STORKIT_PORT=3001
      # Bind to all interfaces so Docker port forwarding works.
      - STORKIT_HOST=0.0.0.0
      # Optional: Matrix bot credentials (if using Matrix integration)
      - MATRIX_HOMESERVER=${MATRIX_HOMESERVER:-}
      - MATRIX_USER=${MATRIX_USER:-}
      - MATRIX_PASSWORD=${MATRIX_PASSWORD:-}
      # Optional: Slack webhook (if using Slack integration)
      - SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN:-}
      - SLACK_APP_TOKEN=${SLACK_APP_TOKEN:-}
    volumes:
      # The target project repo – bind-mounted from host.
      # Changes made by agents inside the container are immediately
      # visible on the host (and vice versa).
      - ${PROJECT_PATH:?Set PROJECT_PATH}:/workspace
      # Cargo registry cache – persists downloaded crates across
      # container restarts so `cargo build` doesn't re-download.
      - cargo-registry:/usr/local/cargo/registry
      # Cargo git checkouts – persists git-based dependencies.
      - cargo-git:/usr/local/cargo/git
      # Claude Code state – persists session history, projects config,
      # and conversation transcripts so --resume works across restarts.
      - claude-state:/home/storkit/.claude
      # Storkit source tree for rebuild_and_restart.
      # The binary has CARGO_MANIFEST_DIR baked in at compile time
      # pointing to /app/server, so the source must be at /app.
      # This is COPY'd in the Dockerfile; mounting over it allows
      # live source updates without rebuilding the image.
      # Mount host source so rebuild_and_restart picks up live changes:
      - ./..:/app
      # Keep cargo build artifacts off the bind mount.
      # Bind-mount directory traversal is ~23x slower than Docker volumes
      # (confirmed in spike 329). Cargo stat-checks every file in target/
      # on incremental builds — leaving it on the bind mount makes builds
      # catastrophically slow (~12s just to traverse the tree).
      - workspace-target:/workspace/target
      - storkit-target:/app/target
    # ── Security hardening ──────────────────────────────────────────
    # Read-only root filesystem. Only explicitly mounted volumes and
    # tmpfs paths are writable.
    read_only: true
    tmpfs:
      - /tmp:size=512M,exec
      - /home/storkit:size=512M,uid=999,gid=999,exec
    # Drop all Linux capabilities, then add back only what's needed.
    # SETUID/SETGID needed by Claude Code's PTY allocation (openpty).
    cap_drop:
      - ALL
    cap_add:
      - SETUID
      - SETGID
    # Prevent child processes from gaining new privileges via setuid,
    # setgid, or other mechanisms.
    security_opt:
      - no-new-privileges:true
    # Resource limits – cap the whole system.
    # Adjust based on your machine. These are conservative defaults.
    deploy:
      resources:
        limits:
          cpus: "8"
          memory: 24G
        reservations:
          cpus: "2"
          memory: 4G
    # Health check – verify the MCP endpoint responds
    healthcheck:
      test: ["CMD", "curl", "-sf", "http://localhost:3001/health"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s
    # Restart policy – restart on crash but not on manual stop
    restart: unless-stopped
 volumes:
  cargo-registry:
  cargo-git:
  claude-state:
  workspace-target:
  storkit-target:
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -0,0 +1,34 @@
 #!/bin/sh
 set -e
 # ── Git identity ─────────────────────────────────────────────────────
 # Agents commit code inside the container. Without a git identity,
 # commits fail or use garbage defaults. Fail loudly at startup so the
 # operator knows immediately.
 if [ -z "$GIT_USER_NAME" ]; then
    echo "FATAL: GIT_USER_NAME is not set. Export it in your environment or docker-compose.yml." >&2
    exit 1
 fi
 if [ -z "$GIT_USER_EMAIL" ]; then
    echo "FATAL: GIT_USER_EMAIL is not set. Export it in your environment or docker-compose.yml." >&2
    exit 1
 fi
 # Use GIT_AUTHOR/COMMITTER env vars instead of git config --global,
 # so the root filesystem can stay read-only (no ~/.gitconfig write).
 export GIT_AUTHOR_NAME="$GIT_USER_NAME"
 export GIT_COMMITTER_NAME="$GIT_USER_NAME"
 export GIT_AUTHOR_EMAIL="$GIT_USER_EMAIL"
 export GIT_COMMITTER_EMAIL="$GIT_USER_EMAIL"
 # ── Frontend native deps ────────────────────────────────────────────
 # The project repo is bind-mounted from the host, so node_modules/
 # may contain native binaries for the wrong platform (e.g. darwin
 # binaries on a Linux container). Reinstall to get the right ones.
 if [ -d /workspace/frontend ] && [ -f /workspace/frontend/package.json ]; then
    echo "Installing frontend dependencies for container platform..."
    cd /workspace/frontend && npm install --prefer-offline 2>/dev/null || true
    cd /workspace
 fi
 exec "$@"
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1,12 +1,12 @@
 {
 	"name": "living-spec-standalone",
-  "version": "0.4.1",
+	"version": "0.6.0",
 	"lockfileVersion": 3,
 	"requires": true,
 	"packages": {
 		"": {
 			"name": "living-spec-standalone",
-      "version": "0.4.1",
+			"version": "0.6.0",
 			"dependencies": {
 				"@types/react-syntax-highlighter": "^15.5.13",
 				"react": "^19.1.0",
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "living-spec-standalone",
 	"private": true,
-  "version": "0.4.1",
+	"version": "0.6.0",
 	"type": "module",
 	"scripts": {
 		"dev": "vite",
--- a/frontend/playwright.config.ts
+++ b/frontend/playwright.config.ts
@@ -1,6 +1,6 @@
 import { defineConfig } from "@playwright/test";
 import { dirname, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 import { defineConfig } from "@playwright/test";
 const configDir = dirname(fileURLToPath(new URL(import.meta.url)));
 const frontendRoot = resolve(configDir, ".");
--- a/frontend/src/api/client.ts
+++ b/frontend/src/api/client.ts
@@ -115,6 +115,11 @@ export interface Message {
 	tool_call_id?: string;
 }
 export interface AnthropicModelInfo {
 	id: string;
 	context_window: number;
 }
 export interface WorkItemContent {
 	content: string;
 	stage: string;
@@ -266,7 +271,7 @@ export const api = {
 		return requestJson<boolean>("/anthropic/key/exists", {}, baseUrl);
 	},
 	getAnthropicModels(baseUrl?: string) {
-    return requestJson<string[]>("/anthropic/models", {}, baseUrl);
+		return requestJson<AnthropicModelInfo[]>("/anthropic/models", {}, baseUrl);
 	},
 	setAnthropicApiKey(api_key: string, baseUrl?: string) {
 		return requestJson<boolean>(
@@ -377,6 +382,14 @@ export const api = {
 	deleteStory(storyId: string) {
 		return callMcpTool("delete_story", { story_id: storyId });
 	},
 	/** Execute a bot slash command without LLM invocation. Returns markdown response text. */
 	botCommand(command: string, args: string, baseUrl?: string) {
 		return requestJson<{ response: string }>(
 			"/bot/command",
 			{ method: "POST", body: JSON.stringify({ command, args }) },
 			baseUrl,
 		);
 	},
 };
 async function callMcpTool(
--- a/frontend/src/components/Chat.test.tsx
+++ b/frontend/src/components/Chat.test.tsx
@@ -40,6 +40,7 @@ vi.mock("../api/client", () => {
 		setAnthropicApiKey: vi.fn(),
 		readFile: vi.fn(),
 		listProjectFiles: vi.fn(),
 		botCommand: vi.fn(),
 	};
 	class ChatWebSocket {
 		connect(handlers: WsHandlers) {
@@ -64,6 +65,7 @@ const mockedApi = {
 	setAnthropicApiKey: vi.mocked(api.setAnthropicApiKey),
 	readFile: vi.mocked(api.readFile),
 	listProjectFiles: vi.mocked(api.listProjectFiles),
 	botCommand: vi.mocked(api.botCommand),
 };
 function setupMocks() {
@@ -76,6 +78,7 @@ function setupMocks() {
 	mockedApi.listProjectFiles.mockResolvedValue([]);
 	mockedApi.cancelChat.mockResolvedValue(true);
 	mockedApi.setAnthropicApiKey.mockResolvedValue(true);
 	mockedApi.botCommand.mockResolvedValue({ response: "Bot response" });
 }
 describe("Default provider selection (Story 206)", () => {
@@ -1457,3 +1460,204 @@ describe("File reference expansion (Story 269 AC4)", () => {
 		expect(mockedApi.readFile).not.toHaveBeenCalled();
 	});
 });
 describe("Slash command handling (Story 374)", () => {
 	beforeEach(() => {
 		capturedWsHandlers = null;
 		lastSendChatArgs = null;
 		setupMocks();
 	});
 	afterEach(() => {
 		vi.clearAllMocks();
 	});
 	it("AC: /status calls botCommand and displays response", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "Pipeline: 3 active" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/status" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith(
 				"status",
 				"",
 				undefined,
 			);
 		});
 		expect(await screen.findByText("Pipeline: 3 active")).toBeInTheDocument();
 		// Should NOT go to LLM
 		expect(lastSendChatArgs).toBeNull();
 	});
 	it("AC: /status <number> passes args to botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "Story 42 details" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/status 42" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith(
 				"status",
 				"42",
 				undefined,
 			);
 		});
 	});
 	it("AC: /start <number> calls botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "Started agent" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/start 42 opus" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith(
 				"start",
 				"42 opus",
 				undefined,
 			);
 		});
 		expect(await screen.findByText("Started agent")).toBeInTheDocument();
 	});
 	it("AC: /git calls botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "On branch main" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/git" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith("git", "", undefined);
 		});
 	});
 	it("AC: /cost calls botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "$1.23 today" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/cost" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith("cost", "", undefined);
 		});
 	});
 	it("AC: /reset clears messages and session without LLM", async () => {
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		// First add a message so there is history to clear
 		act(() => {
 			capturedWsHandlers?.onUpdate([
 				{ role: "user", content: "hello" },
 				{ role: "assistant", content: "world" },
 			]);
 		});
 		expect(await screen.findByText("world")).toBeInTheDocument();
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/reset" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		// LLM must NOT be invoked
 		expect(lastSendChatArgs).toBeNull();
 		// botCommand must NOT be invoked (reset is frontend-only)
 		expect(mockedApi.botCommand).not.toHaveBeenCalled();
 		// Confirmation message should appear
 		expect(await screen.findByText(/Session reset/)).toBeInTheDocument();
 	});
 	it("AC: unrecognised slash command shows error message", async () => {
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/foobar" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		expect(await screen.findByText(/Unknown command/)).toBeInTheDocument();
 		// Should NOT go to LLM
 		expect(lastSendChatArgs).toBeNull();
 		// Should NOT call botCommand
 		expect(mockedApi.botCommand).not.toHaveBeenCalled();
 	});
 	it("AC: /help shows help overlay", async () => {
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/help" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		expect(await screen.findByTestId("help-overlay")).toBeInTheDocument();
 		expect(lastSendChatArgs).toBeNull();
 		expect(mockedApi.botCommand).not.toHaveBeenCalled();
 	});
 	it("AC: botCommand API error shows error message in chat", async () => {
 		mockedApi.botCommand.mockRejectedValue(new Error("Server error"));
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/git" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		expect(
 			await screen.findByText(/Error running command/),
 		).toBeInTheDocument();
 	});
 });
--- a/frontend/src/components/Chat.tsx
+++ b/frontend/src/components/Chat.tsx
@@ -4,7 +4,7 @@ import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
 import { oneDark } from "react-syntax-highlighter/dist/esm/styles/prism";
 import type { AgentConfigInfo } from "../api/agents";
 import { agentsApi } from "../api/agents";
-import type { PipelineState } from "../api/client";
+import type { AnthropicModelInfo, PipelineState } from "../api/client";
 import { api, ChatWebSocket } from "../api/client";
 import { useChatHistory } from "../hooks/useChatHistory";
 import type { Message, ProviderConfig } from "../types";
@@ -143,8 +143,13 @@ function formatToolActivity(toolName: string): string {
 const estimateTokens = (text: string): number => Math.ceil(text.length / 4);
-const getContextWindowSize = (modelName: string): number => {
+const getContextWindowSize = (
-	if (modelName.startsWith("claude-")) return 200000;
+	modelName: string,
 	claudeContextWindows?: Map<string, number>,
 ): number => {
 	if (modelName.startsWith("claude-")) {
 		return claudeContextWindows?.get(modelName) ?? 200000;
 	}
 	if (modelName.includes("llama3")) return 8192;
 	if (modelName.includes("qwen2.5")) return 32768;
 	if (modelName.includes("deepseek")) return 16384;
@@ -163,6 +168,9 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 	const [enableTools, setEnableTools] = useState(true);
 	const [availableModels, setAvailableModels] = useState<string[]>([]);
 	const [claudeModels, setClaudeModels] = useState<string[]>([]);
 	const [claudeContextWindowMap, setClaudeContextWindowMap] = useState<
 		Map<string, number>
 	>(new Map());
 	const [streamingContent, setStreamingContent] = useState("");
 	const [streamingThinking, setStreamingThinking] = useState("");
 	const [showApiKeyDialog, setShowApiKeyDialog] = useState(false);
@@ -285,7 +293,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			totalTokens += estimateTokens(streamingContent);
 		}
-		const contextWindow = getContextWindowSize(model);
+		const contextWindow = getContextWindowSize(model, claudeContextWindowMap);
 		const percentage = Math.round((totalTokens / contextWindow) * 100);
 		return {
@@ -293,7 +301,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			total: contextWindow,
 			percentage,
 		};
-	}, [messages, streamingContent, model]);
+	}, [messages, streamingContent, model, claudeContextWindowMap]);
 	useEffect(() => {
 		try {
@@ -337,14 +345,18 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			.then((exists) => {
 				setHasAnthropicKey(exists);
 				if (!exists) return;
-				return api.getAnthropicModels().then((models) => {
+				return api.getAnthropicModels().then((models: AnthropicModelInfo[]) => {
 					if (models.length > 0) {
 						const sortedModels = models.sort((a, b) =>
-							a.toLowerCase().localeCompare(b.toLowerCase()),
+							a.id.toLowerCase().localeCompare(b.id.toLowerCase()),
 						);
 						setClaudeModels(sortedModels.map((m) => m.id));
 						setClaudeContextWindowMap(
 							new Map(sortedModels.map((m) => [m.id, m.context_window])),
 						);
 						setClaudeModels(sortedModels);
 					} else {
 						setClaudeModels([]);
 						setClaudeContextWindowMap(new Map());
 					}
 				});
 			})
@@ -600,6 +612,80 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			return;
 		}
 		// /reset — clear session and message history without LLM
 		if (/^\/reset\s*$/i.test(messageText)) {
 			setMessages([]);
 			setClaudeSessionId(null);
 			setStreamingContent("");
 			setStreamingThinking("");
 			setActivityStatus(null);
 			setMessages([
 				{
 					role: "assistant",
 					content: "Session reset. Starting a fresh conversation.",
 				},
 			]);
 			return;
 		}
 		// Slash commands forwarded to the backend bot command endpoint
 		const slashMatch = messageText.match(/^\/(\S+)(?:\s+([\s\S]*))?$/);
 		if (slashMatch) {
 			const cmd = slashMatch[1].toLowerCase();
 			const args = (slashMatch[2] ?? "").trim();
 			// Ignore commands handled elsewhere
 			if (cmd !== "btw") {
 				const knownCommands = new Set([
 					"status",
 					"assign",
 					"start",
 					"show",
 					"move",
 					"delete",
 					"cost",
 					"git",
 					"overview",
 					"rebuild",
 				]);
 				if (knownCommands.has(cmd)) {
 					// Show the slash command in chat as a user message (display only)
 					setMessages((prev: Message[]) => [
 						...prev,
 						{ role: "user", content: messageText },
 					]);
 					try {
 						const result = await api.botCommand(cmd, args, undefined);
 						setMessages((prev: Message[]) => [
 							...prev,
 							{ role: "assistant", content: result.response },
 						]);
 					} catch (e) {
 						setMessages((prev: Message[]) => [
 							...prev,
 							{
 								role: "assistant",
 								content: `**Error running command:** ${e}`,
 							},
 						]);
 					}
 					return;
 				}
 				// Unknown slash command
 				setMessages((prev: Message[]) => [
 					...prev,
 					{ role: "user", content: messageText },
 					{
 						role: "assistant",
 						content: `Unknown command: \`/${cmd}\`. Type \`/help\` to see available commands.`,
 					},
 				]);
 				return;
 			}
 		}
 		// /btw <question> — answered from context without disrupting main chat
 		const btwMatch = messageText.match(/^\/btw\s+(.+)/s);
 		if (btwMatch) {
--- a/frontend/src/components/HelpOverlay.tsx
+++ b/frontend/src/components/HelpOverlay.tsx
@@ -12,6 +12,57 @@ const SLASH_COMMANDS: SlashCommand[] = [
 		name: "/help",
 		description: "Show this list of available slash commands.",
 	},
 	{
 		name: "/status",
 		description:
 			"Show pipeline status and agent availability. `/status <number>` shows a story triage dump.",
 	},
 	{
 		name: "/assign <number> <model>",
 		description: "Pre-assign a model to a story (e.g. `/assign 42 opus`).",
 	},
 	{
 		name: "/start <number>",
 		description:
 			"Start a coder on a story. Optionally specify a model: `/start <number> opus`.",
 	},
 	{
 		name: "/show <number>",
 		description: "Display the full text of a work item.",
 	},
 	{
 		name: "/move <number> <stage>",
 		description:
 			"Move a work item to a pipeline stage (backlog, current, qa, merge, done).",
 	},
 	{
 		name: "/delete <number>",
 		description:
 			"Remove a work item from the pipeline and stop any running agent.",
 	},
 	{
 		name: "/cost",
 		description:
 			"Show token spend: 24h total, top stories, breakdown by agent type, and all-time total.",
 	},
 	{
 		name: "/git",
 		description:
 			"Show git status: branch, uncommitted changes, and ahead/behind remote.",
 	},
 	{
 		name: "/overview <number>",
 		description: "Show the implementation summary for a merged story.",
 	},
 	{
 		name: "/rebuild",
 		description: "Rebuild the server binary and restart.",
 	},
 	{
 		name: "/reset",
 		description:
 			"Clear the current Claude Code session and start fresh (messages and session ID are cleared locally).",
 	},
 	{
 		name: "/btw <question>",
 		description:
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,5 +1,5 @@
 {
-  "name": "storkit",
+  "name": "workspace",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {}
--- a/script/release
+++ b/script/release
@@ -49,7 +49,16 @@ PACKAGE_JSON="${SCRIPT_DIR}/frontend/package.json"
 sed -i '' "s/\"version\": \".*\"/\"version\": \"${VERSION}\"/" "$PACKAGE_JSON"
 echo "==> Bumped ${PACKAGE_JSON} to ${VERSION}"
-git add "$CARGO_TOML" "$PACKAGE_JSON"
+# Regenerate lock files so they stay in sync with the version bump.
 CARGO_LOCK="${SCRIPT_DIR}/Cargo.lock"
 (cd "${SCRIPT_DIR}/server" && cargo generate-lockfile)
 echo "==> Regenerated Cargo.lock"
 PACKAGE_LOCK="${SCRIPT_DIR}/frontend/package-lock.json"
 (cd "${SCRIPT_DIR}/frontend" && npm install --package-lock-only --ignore-scripts --silent 2>/dev/null)
 echo "==> Regenerated package-lock.json"
 git add "$CARGO_TOML" "$CARGO_LOCK" "$PACKAGE_JSON" "$PACKAGE_LOCK"
 git commit -m "Bump version to ${VERSION}"
 if ! command -v cross >/dev/null 2>&1; then
@@ -138,9 +147,65 @@ else
    | sed 's/^/- /')
 fi
 # ── Generate summary overview ─────────────────────────────────
 # Group completed items by keyword clusters to identify the
 # release's focus areas.
 generate_summary() {
  local all_items="$1"
  local themes=""
  # Count items matching each theme keyword (one item per line via echo -e)
  local expanded
  expanded=$(echo -e "$all_items")
  local bot_count=$(echo "$expanded" | grep -icE 'bot|command|chat|matrix|slack|whatsapp|status|help|assign|rebuild|shutdown|whatsup' || true)
  local mcp_count=$(echo "$expanded" | grep -icE 'mcp|tool' || true)
  local docker_count=$(echo "$expanded" | grep -icE 'docker|container|gvisor|orbstack|harden|security' || true)
  local agent_count=$(echo "$expanded" | grep -icE 'agent|runtime|chatgpt|gemini|openai|model|coder' || true)
  local ui_count=$(echo "$expanded" | grep -icE 'frontend|ui|web|oauth|scaffold' || true)
  local infra_count=$(echo "$expanded" | grep -icE 'release|makefile|refactor|upgrade|worktree|pipeline' || true)
  # Build theme list, highest count first
  local -a theme_pairs=()
  [ "$agent_count" -gt 0 ] && theme_pairs+=("${agent_count}:multi-model agents")
  [ "$bot_count" -gt 0 ] && theme_pairs+=("${bot_count}:bot commands")
  [ "$mcp_count" -gt 0 ] && theme_pairs+=("${mcp_count}:MCP tools")
  [ "$docker_count" -gt 0 ] && theme_pairs+=("${docker_count}:Docker hardening")
  [ "$ui_count" -gt 0 ] && theme_pairs+=("${ui_count}:developer experience")
  [ "$infra_count" -gt 0 ] && theme_pairs+=("${infra_count}:infrastructure")
  # Sort by count descending, take top 3
  local sorted=$(printf '%s\n' "${theme_pairs[@]}" | sort -t: -k1 -nr | head -3)
  local labels=""
  while IFS=: read -r count label; do
    [ -z "$label" ] && continue
    if [ -z "$labels" ]; then
      # Capitalise first theme
      labels="$(echo "${label:0:1}" | tr '[:lower:]' '[:upper:]')${label:1}"
    else
      labels="${labels}, ${label}"
    fi
  done <<< "$sorted"
  echo "$labels"
 }
 ALL_ITEMS="${FEATURES}${FIXES}${REFACTORS}"
 SUMMARY=$(generate_summary "$ALL_ITEMS")
 if [ -n "$SUMMARY" ]; then
  SUMMARY_LINE="**Focus:** ${SUMMARY}"
 else
  SUMMARY_LINE=""
 fi
 # Assemble the release body.
 RELEASE_BODY="## What's Changed"
 if [ -n "$SUMMARY_LINE" ]; then
  RELEASE_BODY="${RELEASE_BODY}
 ${SUMMARY_LINE}"
 fi
 if [ -n "$FEATURES" ]; then
  RELEASE_BODY="${RELEASE_BODY}
@@ -188,20 +253,29 @@ git push origin "$TAG"
 # ── Create Gitea Release ──────────────────────────────────────
 echo "==> Creating release on Gitea..."
-RELEASE_JSON=$(python3 -c "
+RELEASE_JSON_FILE=$(mktemp)
 trap "rm -f '$RELEASE_JSON_FILE'" EXIT
 python3 -c "
 import json, sys
-print(json.dumps({
+with open(sys.argv[3], 'w') as f:
    json.dump({
        'tag_name': sys.argv[1],
        'name': sys.argv[1],
        'body': sys.argv[2]
-}))
+    }, f)
-" "$TAG" "$RELEASE_BODY")
+" "$TAG" "$RELEASE_BODY" "$RELEASE_JSON_FILE"
-RELEASE_RESPONSE=$(curl -sf -X POST \
+RELEASE_RESPONSE=$(curl -s --fail-with-body -X POST \
  -H "Authorization: token ${GITEA_TOKEN}" \
  -H "Content-Type: application/json" \
  "${GITEA_URL}/api/v1/repos/${REPO}/releases" \
-  -d "$RELEASE_JSON")
+  -d "@${RELEASE_JSON_FILE}")
 if [ $? -ne 0 ]; then
  echo "Error: Failed to create Gitea release."
  echo "Response: ${RELEASE_RESPONSE}"
  exit 1
 fi
 RELEASE_ID=$(echo "$RELEASE_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
--- a/1
+++ b/1
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "storkit"
-version = "0.4.1"
+version = "0.6.0"
 edition = "2024"
 build = "build.rs"
@@ -18,7 +18,7 @@ notify = { workspace = true }
 poem = { workspace = true, features = ["websocket"] }
 poem-openapi = { workspace = true, features = ["swagger-ui"] }
 portable-pty = { workspace = true }
-reqwest = { workspace = true, features = ["json", "stream"] }
+reqwest = { workspace = true, features = ["json", "stream", "form"] }
 rust-embed = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
--- a/server/src/agents/gates.rs
+++ b/server/src/agents/gates.rs
@@ -102,13 +102,29 @@ fn run_command_with_timeout(
    args: &[&str],
    dir: &Path,
 ) -> Result<(bool, String), String> {
-    let mut child = Command::new(program)
+    // On Linux, execve can return ETXTBSY (26) briefly after a file is written
-        .args(args)
+    // before the kernel releases its "write open" state. Retry once after a
    // short pause to handle this race condition.
    let mut last_err = None;
    let mut cmd = Command::new(&program);
    cmd.args(args)
        .current_dir(dir)
        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped());
-        .spawn()
+    let mut child = loop {
-        .map_err(|e| format!("Failed to spawn command: {e}"))?;
+        match cmd.spawn() {
            Ok(c) => break c,
            Err(e) if e.raw_os_error() == Some(26) => {
                // ETXTBSY — wait briefly and retry once
                if last_err.is_some() {
                    return Err(format!("Failed to spawn command: {e}"));
                }
                last_err = Some(e);
                std::thread::sleep(std::time::Duration::from_millis(50));
            }
            Err(e) => return Err(format!("Failed to spawn command: {e}")),
        }
    };
    // Drain stdout/stderr in background threads so the pipe buffers never fill.
    let stdout_handle = child.stdout.take().map(|r| {
@@ -254,9 +270,8 @@ mod tests {
    fn run_project_tests_uses_script_test_when_present_and_passes() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
@@ -276,9 +291,8 @@ mod tests {
    fn run_project_tests_reports_failure_when_script_test_exits_nonzero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
@@ -313,9 +327,8 @@ mod tests {
    fn coverage_gate_passes_when_script_exits_zero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
@@ -342,9 +355,8 @@ mod tests {
    fn coverage_gate_fails_when_script_exits_nonzero() {
        use std::fs;
        use std::os::unix::fs::PermissionsExt;
        use tempfile::tempdir;
-        let tmp = tempdir().unwrap();
+        let tmp = tempfile::tempdir().unwrap();
        let path = tmp.path();
        let script_dir = path.join("script");
        fs::create_dir_all(&script_dir).unwrap();
--- a/server/src/agents/mod.rs
+++ b/server/src/agents/mod.rs
@@ -2,7 +2,8 @@ pub mod gates;
 pub mod lifecycle;
 pub mod merge;
 mod pool;
-mod pty;
+pub(crate) mod pty;
 pub mod runtime;
 pub mod token_usage;
 use crate::config::AgentConfig;
--- a/server/src/agents/pool/mod.rs
+++ b/server/src/agents/pool/mod.rs
@@ -17,6 +17,7 @@ use super::{
    AgentEvent, AgentInfo, AgentStatus, CompletionReport, PipelineStage, agent_config_stage,
    pipeline_stage,
 };
 use super::runtime::{AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext};
 /// Build the composite key used to track agents in the pool.
 fn composite_key(story_id: &str, agent_name: &str) -> String {
@@ -143,6 +144,10 @@ impl AgentPool {
        }
    }
    pub fn port(&self) -> u16 {
        self.port
    }
    /// Create a pool with a dummy watcher channel for unit tests.
    #[cfg(test)]
    pub fn new_test(port: u16) -> Self {
@@ -248,6 +253,24 @@ impl AgentPool {
            }
        }
        // Read the preferred agent from the story's front matter before acquiring
        // the lock.  When no explicit agent_name is given, this lets start_agent
        // honour `agent: coder-opus` written by the `assign` command — mirroring
        // the auto_assign path (bug 379).
        let front_matter_agent: Option<String> = if agent_name.is_none() {
            find_active_story_stage(project_root, story_id).and_then(|stage_dir| {
                let path = project_root
                    .join(".storkit")
                    .join("work")
                    .join(stage_dir)
                    .join(format!("{story_id}.md"));
                let contents = std::fs::read_to_string(path).ok()?;
                crate::io::story_metadata::parse_front_matter(&contents).ok()?.agent
            })
        } else {
            None
        };
        // Atomically resolve agent name, check availability, and register as
        // Pending.  When `agent_name` is `None` the first idle coder is
        // selected inside the lock so no TOCTOU race can occur between the
@@ -263,7 +286,32 @@ impl AgentPool {
            resolved_name = match agent_name {
                Some(name) => name.to_string(),
-                None => auto_assign::find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder)
+                None => {
                    // Honour the `agent:` field in the story's front matter so that
                    // `start 368` after `assign 368 opus` picks the right agent
                    // (bug 379).  Mirrors the auto_assign selection logic.
                    if let Some(ref pref) = front_matter_agent {
                        let stage_matches = config
                            .find_agent(pref)
                            .map(|cfg| agent_config_stage(cfg) == PipelineStage::Coder)
                            .unwrap_or(false);
                        if stage_matches {
                            if auto_assign::is_agent_free(&agents, pref) {
                                pref.clone()
                            } else {
                                return Err(format!(
                                    "Preferred agent '{pref}' from story front matter is busy; \
                                     story '{story_id}' has been queued in work/2_current/ and will \
                                     be auto-assigned when it becomes available"
                                ));
                            }
                        } else {
                            // Stage mismatch — fall back to any free coder.
                            auto_assign::find_free_agent_for_stage(
                                &config,
                                &agents,
                                &PipelineStage::Coder,
                            )
                            .map(|s| s.to_string())
                            .ok_or_else(|| {
                                if config
@@ -280,7 +328,33 @@ impl AgentPool {
                                    "No coder agent configured. Specify an agent_name explicitly."
                                        .to_string()
                                }
-                    })?,
+                            })?
                        }
                    } else {
                        auto_assign::find_free_agent_for_stage(
                            &config,
                            &agents,
                            &PipelineStage::Coder,
                        )
                        .map(|s| s.to_string())
                        .ok_or_else(|| {
                            if config
                                .agent
                                .iter()
                                .any(|a| agent_config_stage(a) == PipelineStage::Coder)
                            {
                                format!(
                                    "All coder agents are busy; story '{story_id}' has been \
                                     queued in work/2_current/ and will be auto-assigned when \
                                     one becomes available"
                                )
                            } else {
                                "No coder agent configured. Specify an agent_name explicitly."
                                    .to_string()
                            }
                        })?
                    }
                }
            };
            key = composite_key(story_id, &resolved_name);
@@ -513,25 +587,71 @@ impl AgentPool {
            });
            Self::notify_agent_state_changed(&watcher_tx_clone);
-            // Step 4: launch the agent process.
+            // Step 4: launch the agent process via the configured runtime.
-            match super::pty::run_agent_pty_streaming(
+            let runtime_name = config_clone
-                &sid,
+                .find_agent(&aname)
-                &aname,
+                .and_then(|a| a.runtime.as_deref())
-                &command,
+                .unwrap_or("claude-code");
-                &args,
+
-                &prompt,
+            let run_result = match runtime_name {
-                &wt_path_str,
+                "claude-code" => {
-                &tx_clone,
+                    let runtime = ClaudeCodeRuntime::new(child_killers_clone.clone(), watcher_tx_clone.clone());
-                &log_clone,
+                    let ctx = RuntimeContext {
-                log_writer_clone,
+                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
-                child_killers_clone,
+                        mcp_port: port_for_task,
-            )
+                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
-            {
+                }
-                Ok(pty_result) => {
+                "gemini" => {
                    let runtime = GeminiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "openai" => {
                    let runtime = OpenAiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                other => Err(format!(
                    "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
                     Supported: 'claude-code', 'gemini', 'openai'"
                )),
            };
            match run_result {
                Ok(result) => {
                    // Persist token usage if the agent reported it.
-                    if let Some(ref usage) = pty_result.token_usage
+                    if let Some(ref usage) = result.token_usage
                        && let Ok(agents) = agents_ref.lock()
                        && let Some(agent) = agents.get(&key_clone)
                        && let Some(ref pr) = agent.project_root
@@ -557,7 +677,7 @@ impl AgentPool {
                        port_for_task,
                        &sid,
                        &aname,
-                        pty_result.session_id,
+                        result.session_id,
                        watcher_tx_clone.clone(),
                    )
                    .await;
@@ -1054,6 +1174,7 @@ mod tests {
    use crate::agents::{AgentEvent, AgentStatus, PipelineStage};
    use crate::config::ProjectConfig;
    use portable_pty::{CommandBuilder, PtySize, native_pty_system};
    use std::process::Command;
    fn make_config(toml_str: &str) -> ProjectConfig {
        ProjectConfig::parse(toml_str).unwrap()
@@ -1140,13 +1261,10 @@ mod tests {
    /// Returns true if a process with the given PID is currently running.
    fn process_is_running(pid: u32) -> bool {
-        std::process::Command::new("ps")
+        Command::new("ps")
-            .arg("-p")
+            .args(["-p", &pid.to_string()])
-            .arg(pid.to_string())
+            .output()
-            .stdout(std::process::Stdio::null())
+            .map(|o| o.status.success())
            .stderr(std::process::Stdio::null())
            .status()
            .map(|s| s.success())
            .unwrap_or(false)
    }
@@ -2147,6 +2265,108 @@ stage = "coder"
        assert_eq!(agents.len(), 1, "existing agents should not be affected");
    }
    // ── front matter agent preference (bug 379) ──────────────────────────────
    #[tokio::test]
    async fn start_agent_honours_front_matter_agent_when_idle() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".storkit");
        let backlog = sk.join("work/1_backlog");
        std::fs::create_dir_all(&backlog).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
 [[agent]]
 name = "coder-sonnet"
 stage = "coder"
 [[agent]]
 name = "coder-opus"
 stage = "coder"
 "#,
        )
        .unwrap();
        // Story file with agent preference in front matter.
        std::fs::write(
            backlog.join("368_story_test.md"),
            "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n",
        )
        .unwrap();
        let pool = AgentPool::new_test(3010);
        // coder-sonnet is busy so without front matter the auto-selection
        // would skip coder-opus and try something else.
        pool.inject_test_agent("other-story", "coder-sonnet", AgentStatus::Running);
        let result = pool
            .start_agent(tmp.path(), "368_story_test", None, None)
            .await;
        match result {
            Ok(info) => {
                assert_eq!(
                    info.agent_name, "coder-opus",
                    "should pick the front-matter preferred agent"
                );
            }
            Err(err) => {
                // Allowed to fail for infrastructure reasons (no git repo),
                // but NOT due to agent selection ignoring the preference.
                assert!(
                    !err.contains("All coder agents are busy"),
                    "should not report busy when coder-opus is idle: {err}"
                );
                assert!(
                    !err.contains("coder-sonnet"),
                    "should not have picked coder-sonnet: {err}"
                );
            }
        }
    }
    #[tokio::test]
    async fn start_agent_returns_error_when_front_matter_agent_busy() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".storkit");
        let backlog = sk.join("work/1_backlog");
        std::fs::create_dir_all(&backlog).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
 [[agent]]
 name = "coder-sonnet"
 stage = "coder"
 [[agent]]
 name = "coder-opus"
 stage = "coder"
 "#,
        )
        .unwrap();
        std::fs::write(
            backlog.join("368_story_test.md"),
            "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n",
        )
        .unwrap();
        let pool = AgentPool::new_test(3011);
        // Preferred agent is busy — should NOT fall back to coder-sonnet.
        pool.inject_test_agent("other-story", "coder-opus", AgentStatus::Running);
        let result = pool
            .start_agent(tmp.path(), "368_story_test", None, None)
            .await;
        assert!(result.is_err(), "expected error when preferred agent is busy");
        let err = result.unwrap_err();
        assert!(
            err.contains("coder-opus"),
            "error should mention the preferred agent: {err}"
        );
        assert!(
            err.contains("busy") || err.contains("queued"),
            "error should say agent is busy or story is queued: {err}"
        );
    }
    // ── archive + cleanup integration test ───────────────────────────────────
    #[tokio::test]
--- a/server/src/agents/pty.rs
+++ b/server/src/agents/pty.rs
@@ -7,11 +7,12 @@ use tokio::sync::broadcast;
 use super::{AgentEvent, TokenUsage};
 use crate::agent_log::AgentLogWriter;
 use crate::io::watcher::WatcherEvent;
 use crate::slog;
 use crate::slog_warn;
 /// Result from a PTY agent session, containing the session ID and token usage.
-pub(super) struct PtyResult {
+pub(in crate::agents) struct PtyResult {
    pub session_id: Option<String>,
    pub token_usage: Option<TokenUsage>,
 }
@@ -35,7 +36,7 @@ impl Drop for ChildKillerGuard {
 /// Spawn claude agent in a PTY and stream events through the broadcast channel.
 #[allow(clippy::too_many_arguments)]
-pub(super) async fn run_agent_pty_streaming(
+pub(in crate::agents) async fn run_agent_pty_streaming(
    story_id: &str,
    agent_name: &str,
    command: &str,
@@ -47,6 +48,7 @@ pub(super) async fn run_agent_pty_streaming(
    log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    inactivity_timeout_secs: u64,
    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
    watcher_tx: broadcast::Sender<WatcherEvent>,
 ) -> Result<PtyResult, String> {
    let sid = story_id.to_string();
    let aname = agent_name.to_string();
@@ -70,6 +72,7 @@ pub(super) async fn run_agent_pty_streaming(
            log_writer.as_deref(),
            inactivity_timeout_secs,
            &child_killers,
            &watcher_tx,
        )
    })
    .await
@@ -162,6 +165,7 @@ fn run_agent_pty_blocking(
    log_writer: Option<&Mutex<AgentLogWriter>>,
    inactivity_timeout_secs: u64,
    child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
    watcher_tx: &broadcast::Sender<WatcherEvent>,
 ) -> Result<PtyResult, String> {
    let pty_system = native_pty_system();
@@ -342,6 +346,15 @@ fn run_agent_pty_blocking(
            // because thinking and text already arrived via stream_event.
            // The raw JSON is still forwarded as AgentJson below.
            "assistant" | "user" => {}
            "rate_limit_event" => {
                slog!(
                    "[agent:{story_id}:{agent_name}] API rate limit warning received"
                );
                let _ = watcher_tx.send(WatcherEvent::RateLimitWarning {
                    story_id: story_id.to_string(),
                    agent_name: agent_name.to_string(),
                });
            }
            "result" => {
                // Extract token usage from the result event.
                if let Some(usage) = TokenUsage::from_result_event(&json) {
@@ -390,6 +403,70 @@ fn run_agent_pty_blocking(
 mod tests {
    use super::*;
    use crate::agents::AgentEvent;
    use crate::io::watcher::WatcherEvent;
    use std::collections::HashMap;
    use std::sync::Arc;
    // ── AC1: pty detects rate_limit_event and emits RateLimitWarning ─────────
    /// Verify that when a `rate_limit_event` JSON line appears in PTY output,
    /// `run_agent_pty_streaming` sends a `WatcherEvent::RateLimitWarning` with
    /// the correct story_id and agent_name.
    ///
    /// The command invoked is: `sh -p -- <script>` where `--` terminates
    /// option parsing so the script path is treated as the operand.
    #[tokio::test]
    async fn rate_limit_event_json_sends_watcher_warning() {
        use std::os::unix::fs::PermissionsExt;
        let tmp = tempfile::tempdir().unwrap();
        let script = tmp.path().join("emit_rate_limit.sh");
        std::fs::write(
            &script,
            "#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"allowed_warning\"}}'\n",
        )
        .unwrap();
        std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
        let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
        let (watcher_tx, mut watcher_rx) = broadcast::channel::<WatcherEvent>(16);
        let event_log = Arc::new(Mutex::new(Vec::new()));
        let child_killers = Arc::new(Mutex::new(HashMap::new()));
        // sh -p "--" <script>: -p = privileged mode, "--" = end options,
        // then the script path is the file operand.
        let result = run_agent_pty_streaming(
            "365_story_test",
            "coder-1",
            "sh",
            &[script.to_string_lossy().to_string()],
            "--",
            "/tmp",
            &tx,
            &event_log,
            None,
            0,
            child_killers,
            watcher_tx,
        )
        .await;
        assert!(result.is_ok(), "PTY run should succeed: {:?}", result.err());
        let evt = watcher_rx
            .try_recv()
            .expect("Expected a RateLimitWarning to be sent on watcher_tx");
        match evt {
            WatcherEvent::RateLimitWarning {
                story_id,
                agent_name,
            } => {
                assert_eq!(story_id, "365_story_test");
                assert_eq!(agent_name, "coder-1");
            }
            other => panic!("Expected RateLimitWarning, got: {other:?}"),
        }
    }
    #[test]
    fn test_emit_event_writes_to_log_writer() {
--- a/server/src/agents/runtime/claude_code.rs
+++ b/server/src/agents/runtime/claude_code.rs
@@ -0,0 +1,73 @@
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use portable_pty::ChildKiller;
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use crate::io::watcher::WatcherEvent;
 use super::{AgentEvent, AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
 /// Agent runtime that spawns the `claude` CLI in a PTY and streams JSON events.
 ///
 /// This is the default runtime (`runtime = "claude-code"` in project.toml).
 /// It wraps the existing PTY-based execution logic, preserving all streaming,
 /// token tracking, and inactivity timeout behaviour.
 pub struct ClaudeCodeRuntime {
    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
    watcher_tx: broadcast::Sender<WatcherEvent>,
 }
 impl ClaudeCodeRuntime {
    pub fn new(
        child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
        watcher_tx: broadcast::Sender<WatcherEvent>,
    ) -> Self {
        Self {
            child_killers,
            watcher_tx,
        }
    }
 }
 impl AgentRuntime for ClaudeCodeRuntime {
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String> {
        let pty_result = super::super::pty::run_agent_pty_streaming(
            &ctx.story_id,
            &ctx.agent_name,
            &ctx.command,
            &ctx.args,
            &ctx.prompt,
            &ctx.cwd,
            &tx,
            &event_log,
            log_writer,
            ctx.inactivity_timeout_secs,
            Arc::clone(&self.child_killers),
            self.watcher_tx.clone(),
        )
        .await?;
        Ok(RuntimeResult {
            session_id: pty_result.session_id,
            token_usage: pty_result.token_usage,
        })
    }
    fn stop(&self) {
        // Stopping is handled externally by the pool via kill_child_for_key().
        // The ChildKillerGuard in pty.rs deregisters automatically on process exit.
    }
    fn get_status(&self) -> RuntimeStatus {
        // Lifecycle status is tracked by the pool; the runtime itself is stateless.
        RuntimeStatus::Idle
    }
 }
--- a/server/src/agents/runtime/gemini.rs
+++ b/server/src/agents/runtime/gemini.rs
@@ -0,0 +1,809 @@
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use crate::slog;
 use super::super::{AgentEvent, TokenUsage};
 use super::{AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
 // ── Public runtime struct ────────────────────────────────────────────
 /// Agent runtime that drives a Gemini model through the Google AI
 /// `generateContent` REST API.
 ///
 /// The runtime:
 /// 1. Fetches MCP tool definitions from storkit's MCP server.
 /// 2. Converts them to Gemini function-calling format.
 /// 3. Sends the agent prompt + tools to the Gemini API.
 /// 4. Executes any requested function calls via MCP `tools/call`.
 /// 5. Loops until the model produces a text-only response or an error.
 /// 6. Tracks token usage from the API response metadata.
 pub struct GeminiRuntime {
    /// Whether a stop has been requested.
    cancelled: Arc<AtomicBool>,
 }
 impl GeminiRuntime {
    pub fn new() -> Self {
        Self {
            cancelled: Arc::new(AtomicBool::new(false)),
        }
    }
 }
 impl AgentRuntime for GeminiRuntime {
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String> {
        let api_key = std::env::var("GOOGLE_AI_API_KEY").map_err(|_| {
            "GOOGLE_AI_API_KEY environment variable is not set. \
             Set it to your Google AI API key to use the Gemini runtime."
                .to_string()
        })?;
        let model = if ctx.command.starts_with("gemini") {
            // The pool puts the model into `command` for non-CLI runtimes,
            // but also check args for a --model flag.
            ctx.command.clone()
        } else {
            // Fall back to args: look for --model <value>
            ctx.args
                .iter()
                .position(|a| a == "--model")
                .and_then(|i| ctx.args.get(i + 1))
                .cloned()
                .unwrap_or_else(|| "gemini-2.5-pro".to_string())
        };
        let mcp_port = ctx.mcp_port;
        let mcp_base = format!("http://localhost:{mcp_port}/mcp");
        let client = Client::new();
        let cancelled = Arc::clone(&self.cancelled);
        // Step 1: Fetch MCP tool definitions and convert to Gemini format.
        let gemini_tools = fetch_and_convert_mcp_tools(&client, &mcp_base).await?;
        // Step 2: Build the initial conversation contents.
        let system_instruction = build_system_instruction(&ctx);
        let mut contents: Vec<Value> = vec![json!({
            "role": "user",
            "parts": [{ "text": ctx.prompt }]
        })];
        let mut total_usage = TokenUsage {
            input_tokens: 0,
            output_tokens: 0,
            cache_creation_input_tokens: 0,
            cache_read_input_tokens: 0,
            total_cost_usd: 0.0,
        };
        let emit = |event: AgentEvent| {
            super::super::pty::emit_event(
                event,
                &tx,
                &event_log,
                log_writer.as_ref().map(|w| w.as_ref()),
            );
        };
        emit(AgentEvent::Status {
            story_id: ctx.story_id.clone(),
            agent_name: ctx.agent_name.clone(),
            status: "running".to_string(),
        });
        // Step 3: Conversation loop.
        let mut turn = 0u32;
        let max_turns = 200; // Safety limit
        loop {
            if cancelled.load(Ordering::Relaxed) {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: "Agent was stopped by user".to_string(),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            turn += 1;
            if turn > max_turns {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: format!("Exceeded maximum turns ({max_turns})"),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            slog!("[gemini] Turn {turn} for {}:{}", ctx.story_id, ctx.agent_name);
            let request_body = build_generate_content_request(
                &system_instruction,
                &contents,
                &gemini_tools,
            );
            let url = format!(
                "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
            );
            let response = client
                .post(&url)
                .json(&request_body)
                .send()
                .await
                .map_err(|e| format!("Gemini API request failed: {e}"))?;
            let status = response.status();
            let body: Value = response
                .json()
                .await
                .map_err(|e| format!("Failed to parse Gemini API response: {e}"))?;
            if !status.is_success() {
                let error_msg = body["error"]["message"]
                    .as_str()
                    .unwrap_or("Unknown API error");
                let err = format!("Gemini API error ({status}): {error_msg}");
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: err.clone(),
                });
                return Err(err);
            }
            // Accumulate token usage.
            if let Some(usage) = parse_usage_metadata(&body) {
                total_usage.input_tokens += usage.input_tokens;
                total_usage.output_tokens += usage.output_tokens;
            }
            // Extract the candidate response.
            let candidate = body["candidates"]
                .as_array()
                .and_then(|c| c.first())
                .ok_or_else(|| "No candidates in Gemini response".to_string())?;
            let parts = candidate["content"]["parts"]
                .as_array()
                .ok_or_else(|| "No parts in Gemini response candidate".to_string())?;
            // Check finish reason.
            let finish_reason = candidate["finishReason"].as_str().unwrap_or("");
            // Separate text parts and function call parts.
            let mut text_parts: Vec<String> = Vec::new();
            let mut function_calls: Vec<GeminiFunctionCall> = Vec::new();
            for part in parts {
                if let Some(text) = part["text"].as_str() {
                    text_parts.push(text.to_string());
                }
                if let Some(fc) = part.get("functionCall")
                    && let (Some(name), Some(args)) =
                        (fc["name"].as_str(), fc.get("args"))
                {
                    function_calls.push(GeminiFunctionCall {
                        name: name.to_string(),
                        args: args.clone(),
                    });
                }
            }
            // Emit any text output.
            for text in &text_parts {
                if !text.is_empty() {
                    emit(AgentEvent::Output {
                        story_id: ctx.story_id.clone(),
                        agent_name: ctx.agent_name.clone(),
                        text: text.clone(),
                    });
                }
            }
            // If no function calls, the model is done.
            if function_calls.is_empty() {
                emit(AgentEvent::Done {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    session_id: None,
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            // Add the model's response to the conversation.
            let model_parts: Vec<Value> = parts.to_vec();
            contents.push(json!({
                "role": "model",
                "parts": model_parts
            }));
            // Execute function calls via MCP and build response parts.
            let mut response_parts: Vec<Value> = Vec::new();
            for fc in &function_calls {
                if cancelled.load(Ordering::Relaxed) {
                    break;
                }
                slog!(
                    "[gemini] Calling MCP tool '{}' for {}:{}",
                    fc.name,
                    ctx.story_id,
                    ctx.agent_name
                );
                emit(AgentEvent::Output {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    text: format!("\n[Tool call: {}]\n", fc.name),
                });
                let tool_result =
                    call_mcp_tool(&client, &mcp_base, &fc.name, &fc.args).await;
                let response_value = match &tool_result {
                    Ok(result) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!(
                                "[Tool result: {} chars]\n",
                                result.len()
                            ),
                        });
                        json!({ "result": result })
                    }
                    Err(e) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!("[Tool error: {e}]\n"),
                        });
                        json!({ "error": e })
                    }
                };
                response_parts.push(json!({
                    "functionResponse": {
                        "name": fc.name,
                        "response": response_value
                    }
                }));
            }
            // Add function responses to the conversation.
            contents.push(json!({
                "role": "user",
                "parts": response_parts
            }));
            // If the model indicated it's done despite having function calls,
            // respect the finish reason.
            if finish_reason == "STOP" && function_calls.is_empty() {
                break;
            }
        }
        emit(AgentEvent::Done {
            story_id: ctx.story_id.clone(),
            agent_name: ctx.agent_name.clone(),
            session_id: None,
        });
        Ok(RuntimeResult {
            session_id: None,
            token_usage: Some(total_usage),
        })
    }
    fn stop(&self) {
        self.cancelled.store(true, Ordering::Relaxed);
    }
    fn get_status(&self) -> RuntimeStatus {
        if self.cancelled.load(Ordering::Relaxed) {
            RuntimeStatus::Failed
        } else {
            RuntimeStatus::Idle
        }
    }
 }
 // ── Internal types ───────────────────────────────────────────────────
 struct GeminiFunctionCall {
    name: String,
    args: Value,
 }
 // ── Gemini API types (for serde) ─────────────────────────────────────
 #[derive(Debug, Serialize, Deserialize)]
 struct GeminiFunctionDeclaration {
    name: String,
    description: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    parameters: Option<Value>,
 }
 // ── Helper functions ─────────────────────────────────────────────────
 /// Build the system instruction content from the RuntimeContext.
 fn build_system_instruction(ctx: &RuntimeContext) -> Value {
    // Use system_prompt from args if provided via --append-system-prompt,
    // otherwise use a sensible default.
    let system_text = ctx
        .args
        .iter()
        .position(|a| a == "--append-system-prompt")
        .and_then(|i| ctx.args.get(i + 1))
        .cloned()
        .unwrap_or_else(|| {
            format!(
                "You are an AI coding agent working on story {}. \
                 You have access to tools via function calling. \
                 Use them to complete the task. \
                 Work in the directory: {}",
                ctx.story_id, ctx.cwd
            )
        });
    json!({
        "parts": [{ "text": system_text }]
    })
 }
 /// Build the full `generateContent` request body.
 fn build_generate_content_request(
    system_instruction: &Value,
    contents: &[Value],
    gemini_tools: &[GeminiFunctionDeclaration],
 ) -> Value {
    let mut body = json!({
        "system_instruction": system_instruction,
        "contents": contents,
        "generationConfig": {
            "temperature": 0.2,
            "maxOutputTokens": 65536,
        }
    });
    if !gemini_tools.is_empty() {
        body["tools"] = json!([{
            "functionDeclarations": gemini_tools
        }]);
    }
    body
 }
 /// Fetch MCP tool definitions from storkit's MCP server and convert
 /// them to Gemini function declaration format.
 async fn fetch_and_convert_mcp_tools(
    client: &Client,
    mcp_base: &str,
 ) -> Result<Vec<GeminiFunctionDeclaration>, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/list",
        "params": {}
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("Failed to fetch MCP tools: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tools response: {e}"))?;
    let tools = body["result"]["tools"]
        .as_array()
        .ok_or_else(|| "No tools array in MCP response".to_string())?;
    let mut declarations = Vec::new();
    for tool in tools {
        let name = tool["name"].as_str().unwrap_or("").to_string();
        let description = tool["description"].as_str().unwrap_or("").to_string();
        if name.is_empty() {
            continue;
        }
        // Convert MCP inputSchema (JSON Schema) to Gemini parameters
        // (OpenAPI-subset schema). They are structurally compatible for
        // simple object schemas.
        let parameters = convert_mcp_schema_to_gemini(tool.get("inputSchema"));
        declarations.push(GeminiFunctionDeclaration {
            name,
            description,
            parameters,
        });
    }
    slog!("[gemini] Loaded {} MCP tools as function declarations", declarations.len());
    Ok(declarations)
 }
 /// Convert an MCP inputSchema (JSON Schema) to a Gemini-compatible
 /// OpenAPI-subset parameter schema.
 ///
 /// Gemini function calling expects parameters in OpenAPI format, which
 /// is structurally similar to JSON Schema for simple object types.
 /// We strip unsupported fields and ensure the type is "object".
 fn convert_mcp_schema_to_gemini(schema: Option<&Value>) -> Option<Value> {
    let schema = schema?;
    // If the schema has no properties (empty tool), return None.
    let properties = schema.get("properties")?;
    if properties.as_object().is_some_and(|p| p.is_empty()) {
        return None;
    }
    let mut result = json!({
        "type": "object",
        "properties": clean_schema_properties(properties),
    });
    // Preserve required fields if present.
    if let Some(required) = schema.get("required") {
        result["required"] = required.clone();
    }
    Some(result)
 }
 /// Recursively clean schema properties to be Gemini-compatible.
 /// Removes unsupported JSON Schema keywords.
 fn clean_schema_properties(properties: &Value) -> Value {
    let Some(obj) = properties.as_object() else {
        return properties.clone();
    };
    let mut cleaned = serde_json::Map::new();
    for (key, value) in obj {
        let mut prop = value.clone();
        // Remove JSON Schema keywords not supported by Gemini
        if let Some(p) = prop.as_object_mut() {
            p.remove("$schema");
            p.remove("additionalProperties");
            // Recursively clean nested object properties
            if let Some(nested_props) = p.get("properties").cloned() {
                p.insert(
                    "properties".to_string(),
                    clean_schema_properties(&nested_props),
                );
            }
            // Clean items schema for arrays
            if let Some(items) = p.get("items").cloned()
                && let Some(items_obj) = items.as_object()
            {
                let mut cleaned_items = items_obj.clone();
                cleaned_items.remove("$schema");
                cleaned_items.remove("additionalProperties");
                p.insert("items".to_string(), Value::Object(cleaned_items));
            }
        }
        cleaned.insert(key.clone(), prop);
    }
    Value::Object(cleaned)
 }
 /// Call an MCP tool via storkit's MCP server.
 async fn call_mcp_tool(
    client: &Client,
    mcp_base: &str,
    tool_name: &str,
    args: &Value,
 ) -> Result<String, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/call",
        "params": {
            "name": tool_name,
            "arguments": args
        }
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("MCP tool call failed: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tool response: {e}"))?;
    if let Some(error) = body.get("error") {
        let msg = error["message"].as_str().unwrap_or("Unknown MCP error");
        return Err(format!("MCP tool '{tool_name}' error: {msg}"));
    }
    // MCP tools/call returns { result: { content: [{ type: "text", text: "..." }] } }
    let content = &body["result"]["content"];
    if let Some(arr) = content.as_array() {
        let texts: Vec<&str> = arr
            .iter()
            .filter_map(|c| c["text"].as_str())
            .collect();
        if !texts.is_empty() {
            return Ok(texts.join("\n"));
        }
    }
    // Fall back to serializing the entire result.
    Ok(body["result"].to_string())
 }
 /// Parse token usage metadata from a Gemini API response.
 fn parse_usage_metadata(response: &Value) -> Option<TokenUsage> {
    let metadata = response.get("usageMetadata")?;
    Some(TokenUsage {
        input_tokens: metadata
            .get("promptTokenCount")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        output_tokens: metadata
            .get("candidatesTokenCount")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        // Gemini doesn't have cache token fields, but we keep the struct uniform.
        cache_creation_input_tokens: 0,
        cache_read_input_tokens: 0,
        // Google AI API doesn't report cost; leave at 0.
        total_cost_usd: 0.0,
    })
 }
 // ── Tests ────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn convert_mcp_schema_simple_object() {
        let schema = json!({
            "type": "object",
            "properties": {
                "story_id": {
                    "type": "string",
                    "description": "Story identifier"
                }
            },
            "required": ["story_id"]
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        assert_eq!(result["type"], "object");
        assert!(result["properties"]["story_id"].is_object());
        assert_eq!(result["required"][0], "story_id");
    }
    #[test]
    fn convert_mcp_schema_empty_properties_returns_none() {
        let schema = json!({
            "type": "object",
            "properties": {}
        });
        assert!(convert_mcp_schema_to_gemini(Some(&schema)).is_none());
    }
    #[test]
    fn convert_mcp_schema_none_returns_none() {
        assert!(convert_mcp_schema_to_gemini(None).is_none());
    }
    #[test]
    fn convert_mcp_schema_strips_additional_properties() {
        let schema = json!({
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "additionalProperties": false,
                    "$schema": "http://json-schema.org/draft-07/schema#"
                }
            }
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        let name_prop = &result["properties"]["name"];
        assert!(name_prop.get("additionalProperties").is_none());
        assert!(name_prop.get("$schema").is_none());
        assert_eq!(name_prop["type"], "string");
    }
    #[test]
    fn convert_mcp_schema_with_nested_objects() {
        let schema = json!({
            "type": "object",
            "properties": {
                "config": {
                    "type": "object",
                    "properties": {
                        "key": { "type": "string" }
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        assert!(result["properties"]["config"]["properties"]["key"].is_object());
    }
    #[test]
    fn convert_mcp_schema_with_array_items() {
        let schema = json!({
            "type": "object",
            "properties": {
                "items": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": { "type": "string" }
                        },
                        "additionalProperties": false
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_gemini(Some(&schema)).unwrap();
        let items_schema = &result["properties"]["items"]["items"];
        assert!(items_schema.get("additionalProperties").is_none());
    }
    #[test]
    fn build_system_instruction_uses_args() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gemini-2.5-pro".to_string(),
            args: vec![
                "--append-system-prompt".to_string(),
                "Custom system prompt".to_string(),
            ],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        let instruction = build_system_instruction(&ctx);
        assert_eq!(instruction["parts"][0]["text"], "Custom system prompt");
    }
    #[test]
    fn build_system_instruction_default() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gemini-2.5-pro".to_string(),
            args: vec![],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        let instruction = build_system_instruction(&ctx);
        let text = instruction["parts"][0]["text"].as_str().unwrap();
        assert!(text.contains("42_story_test"));
        assert!(text.contains("/tmp/wt"));
    }
    #[test]
    fn build_generate_content_request_includes_tools() {
        let system = json!({"parts": [{"text": "system"}]});
        let contents = vec![json!({"role": "user", "parts": [{"text": "hello"}]})];
        let tools = vec![GeminiFunctionDeclaration {
            name: "my_tool".to_string(),
            description: "A tool".to_string(),
            parameters: Some(json!({"type": "object", "properties": {"x": {"type": "string"}}})),
        }];
        let body = build_generate_content_request(&system, &contents, &tools);
        assert!(body["tools"][0]["functionDeclarations"].is_array());
        assert_eq!(body["tools"][0]["functionDeclarations"][0]["name"], "my_tool");
    }
    #[test]
    fn build_generate_content_request_no_tools() {
        let system = json!({"parts": [{"text": "system"}]});
        let contents = vec![json!({"role": "user", "parts": [{"text": "hello"}]})];
        let tools: Vec<GeminiFunctionDeclaration> = vec![];
        let body = build_generate_content_request(&system, &contents, &tools);
        assert!(body.get("tools").is_none());
    }
    #[test]
    fn parse_usage_metadata_valid() {
        let response = json!({
            "usageMetadata": {
                "promptTokenCount": 100,
                "candidatesTokenCount": 50,
                "totalTokenCount": 150
            }
        });
        let usage = parse_usage_metadata(&response).unwrap();
        assert_eq!(usage.input_tokens, 100);
        assert_eq!(usage.output_tokens, 50);
        assert_eq!(usage.cache_creation_input_tokens, 0);
        assert_eq!(usage.total_cost_usd, 0.0);
    }
    #[test]
    fn parse_usage_metadata_missing() {
        let response = json!({"candidates": []});
        assert!(parse_usage_metadata(&response).is_none());
    }
    #[test]
    fn gemini_runtime_stop_sets_cancelled() {
        let runtime = GeminiRuntime::new();
        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
        runtime.stop();
        assert_eq!(runtime.get_status(), RuntimeStatus::Failed);
    }
    #[test]
    fn model_extraction_from_command() {
        // When command starts with "gemini", use it as model name
        let ctx = RuntimeContext {
            story_id: "1".to_string(),
            agent_name: "coder".to_string(),
            command: "gemini-2.5-pro".to_string(),
            args: vec![],
            prompt: "test".to_string(),
            cwd: "/tmp".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        // The model extraction logic is inside start(), but we test the
        // condition here.
        assert!(ctx.command.starts_with("gemini"));
    }
 }
--- a/server/src/agents/runtime/mod.rs
+++ b/server/src/agents/runtime/mod.rs
@@ -0,0 +1,163 @@
 mod claude_code;
 mod gemini;
 mod openai;
 pub use claude_code::ClaudeCodeRuntime;
 pub use gemini::GeminiRuntime;
 pub use openai::OpenAiRuntime;
 use std::sync::{Arc, Mutex};
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use super::{AgentEvent, TokenUsage};
 /// Context passed to a runtime when launching an agent session.
 pub struct RuntimeContext {
    pub story_id: String,
    pub agent_name: String,
    pub command: String,
    pub args: Vec<String>,
    pub prompt: String,
    pub cwd: String,
    pub inactivity_timeout_secs: u64,
    /// Port of the storkit MCP server, used by API-based runtimes (Gemini, OpenAI)
    /// to call back for tool execution.
    pub mcp_port: u16,
 }
 /// Result returned by a runtime after the agent session completes.
 pub struct RuntimeResult {
    pub session_id: Option<String>,
    pub token_usage: Option<TokenUsage>,
 }
 /// Runtime status reported by the backend.
 #[derive(Debug, Clone, PartialEq)]
 #[allow(dead_code)]
 pub enum RuntimeStatus {
    Idle,
    Running,
    Completed,
    Failed,
 }
 /// Abstraction over different agent execution backends.
 ///
 /// Implementations:
 /// - [`ClaudeCodeRuntime`]: spawns the `claude` CLI via a PTY (default, `runtime = "claude-code"`)
 ///
 /// Future implementations could include OpenAI and Gemini API runtimes.
 #[allow(dead_code)]
 pub trait AgentRuntime: Send + Sync {
    /// Start the agent and drive it to completion, streaming events through
    /// the provided broadcast sender and event log.
    ///
    /// Returns when the agent session finishes (success or error).
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String>;
    /// Stop the running agent.
    fn stop(&self);
    /// Get the current runtime status.
    fn get_status(&self) -> RuntimeStatus;
    /// Return any events buffered outside the broadcast channel.
    ///
    /// PTY-based runtimes stream directly to the broadcast channel; this
    /// returns empty by default. API-based runtimes may buffer events here.
    fn stream_events(&self) -> Vec<AgentEvent> {
        vec![]
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn runtime_context_fields() {
        let ctx = RuntimeContext {
            story_id: "42_story_foo".to_string(),
            agent_name: "coder-1".to_string(),
            command: "claude".to_string(),
            args: vec!["--model".to_string(), "sonnet".to_string()],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert_eq!(ctx.story_id, "42_story_foo");
        assert_eq!(ctx.agent_name, "coder-1");
        assert_eq!(ctx.command, "claude");
        assert_eq!(ctx.args.len(), 2);
        assert_eq!(ctx.prompt, "Do the thing");
        assert_eq!(ctx.cwd, "/tmp/wt");
        assert_eq!(ctx.inactivity_timeout_secs, 300);
        assert_eq!(ctx.mcp_port, 3001);
    }
    #[test]
    fn runtime_result_fields() {
        let result = RuntimeResult {
            session_id: Some("sess-123".to_string()),
            token_usage: Some(TokenUsage {
                input_tokens: 100,
                output_tokens: 50,
                cache_creation_input_tokens: 0,
                cache_read_input_tokens: 0,
                total_cost_usd: 0.01,
            }),
        };
        assert_eq!(result.session_id, Some("sess-123".to_string()));
        assert!(result.token_usage.is_some());
        let usage = result.token_usage.unwrap();
        assert_eq!(usage.input_tokens, 100);
        assert_eq!(usage.output_tokens, 50);
        assert_eq!(usage.total_cost_usd, 0.01);
    }
    #[test]
    fn runtime_result_no_usage() {
        let result = RuntimeResult {
            session_id: None,
            token_usage: None,
        };
        assert!(result.session_id.is_none());
        assert!(result.token_usage.is_none());
    }
    #[test]
    fn runtime_status_variants() {
        assert_eq!(RuntimeStatus::Idle, RuntimeStatus::Idle);
        assert_ne!(RuntimeStatus::Running, RuntimeStatus::Completed);
        assert_ne!(RuntimeStatus::Failed, RuntimeStatus::Idle);
    }
    #[test]
    fn claude_code_runtime_get_status_returns_idle() {
        use std::collections::HashMap;
        use crate::io::watcher::WatcherEvent;
        let killers = Arc::new(Mutex::new(HashMap::new()));
        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(16);
        let runtime = ClaudeCodeRuntime::new(killers, watcher_tx);
        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
    }
    #[test]
    fn claude_code_runtime_stream_events_empty() {
        use std::collections::HashMap;
        use crate::io::watcher::WatcherEvent;
        let killers = Arc::new(Mutex::new(HashMap::new()));
        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(16);
        let runtime = ClaudeCodeRuntime::new(killers, watcher_tx);
        assert!(runtime.stream_events().is_empty());
    }
 }
--- a/server/src/agents/runtime/openai.rs
+++ b/server/src/agents/runtime/openai.rs
@@ -0,0 +1,704 @@
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 use reqwest::Client;
 use serde_json::{json, Value};
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use crate::slog;
 use super::super::{AgentEvent, TokenUsage};
 use super::{AgentRuntime, RuntimeContext, RuntimeResult, RuntimeStatus};
 // ── Public runtime struct ────────────────────────────────────────────
 /// Agent runtime that drives an OpenAI model (GPT-4o, o3, etc.) through
 /// the OpenAI Chat Completions API.
 ///
 /// The runtime:
 /// 1. Fetches MCP tool definitions from storkit's MCP server.
 /// 2. Converts them to OpenAI function-calling format.
 /// 3. Sends the agent prompt + tools to the Chat Completions API.
 /// 4. Executes any requested tool calls via MCP `tools/call`.
 /// 5. Loops until the model produces a response with no tool calls.
 /// 6. Tracks token usage from the API response.
 pub struct OpenAiRuntime {
    /// Whether a stop has been requested.
    cancelled: Arc<AtomicBool>,
 }
 impl OpenAiRuntime {
    pub fn new() -> Self {
        Self {
            cancelled: Arc::new(AtomicBool::new(false)),
        }
    }
 }
 impl AgentRuntime for OpenAiRuntime {
    async fn start(
        &self,
        ctx: RuntimeContext,
        tx: broadcast::Sender<AgentEvent>,
        event_log: Arc<Mutex<Vec<AgentEvent>>>,
        log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    ) -> Result<RuntimeResult, String> {
        let api_key = std::env::var("OPENAI_API_KEY").map_err(|_| {
            "OPENAI_API_KEY environment variable is not set. \
             Set it to your OpenAI API key to use the OpenAI runtime."
                .to_string()
        })?;
        let model = if ctx.command.starts_with("gpt") || ctx.command.starts_with("o") {
            // The pool puts the model into `command` for non-CLI runtimes.
            ctx.command.clone()
        } else {
            // Fall back to args: look for --model <value>
            ctx.args
                .iter()
                .position(|a| a == "--model")
                .and_then(|i| ctx.args.get(i + 1))
                .cloned()
                .unwrap_or_else(|| "gpt-4o".to_string())
        };
        let mcp_port = ctx.mcp_port;
        let mcp_base = format!("http://localhost:{mcp_port}/mcp");
        let client = Client::new();
        let cancelled = Arc::clone(&self.cancelled);
        // Step 1: Fetch MCP tool definitions and convert to OpenAI format.
        let openai_tools = fetch_and_convert_mcp_tools(&client, &mcp_base).await?;
        // Step 2: Build the initial conversation messages.
        let system_text = build_system_text(&ctx);
        let mut messages: Vec<Value> = vec![
            json!({ "role": "system", "content": system_text }),
            json!({ "role": "user", "content": ctx.prompt }),
        ];
        let mut total_usage = TokenUsage {
            input_tokens: 0,
            output_tokens: 0,
            cache_creation_input_tokens: 0,
            cache_read_input_tokens: 0,
            total_cost_usd: 0.0,
        };
        let emit = |event: AgentEvent| {
            super::super::pty::emit_event(
                event,
                &tx,
                &event_log,
                log_writer.as_ref().map(|w| w.as_ref()),
            );
        };
        emit(AgentEvent::Status {
            story_id: ctx.story_id.clone(),
            agent_name: ctx.agent_name.clone(),
            status: "running".to_string(),
        });
        // Step 3: Conversation loop.
        let mut turn = 0u32;
        let max_turns = 200; // Safety limit
        loop {
            if cancelled.load(Ordering::Relaxed) {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: "Agent was stopped by user".to_string(),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            turn += 1;
            if turn > max_turns {
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: format!("Exceeded maximum turns ({max_turns})"),
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            slog!(
                "[openai] Turn {turn} for {}:{}",
                ctx.story_id,
                ctx.agent_name
            );
            let mut request_body = json!({
                "model": model,
                "messages": messages,
                "temperature": 0.2,
            });
            if !openai_tools.is_empty() {
                request_body["tools"] = json!(openai_tools);
            }
            let response = client
                .post("https://api.openai.com/v1/chat/completions")
                .bearer_auth(&api_key)
                .json(&request_body)
                .send()
                .await
                .map_err(|e| format!("OpenAI API request failed: {e}"))?;
            let status = response.status();
            let body: Value = response
                .json()
                .await
                .map_err(|e| format!("Failed to parse OpenAI API response: {e}"))?;
            if !status.is_success() {
                let error_msg = body["error"]["message"]
                    .as_str()
                    .unwrap_or("Unknown API error");
                let err = format!("OpenAI API error ({status}): {error_msg}");
                emit(AgentEvent::Error {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    message: err.clone(),
                });
                return Err(err);
            }
            // Accumulate token usage.
            if let Some(usage) = parse_usage(&body) {
                total_usage.input_tokens += usage.input_tokens;
                total_usage.output_tokens += usage.output_tokens;
            }
            // Extract the first choice.
            let choice = body["choices"]
                .as_array()
                .and_then(|c| c.first())
                .ok_or_else(|| "No choices in OpenAI response".to_string())?;
            let message = &choice["message"];
            let content = message["content"].as_str().unwrap_or("");
            // Emit any text content.
            if !content.is_empty() {
                emit(AgentEvent::Output {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    text: content.to_string(),
                });
            }
            // Check for tool calls.
            let tool_calls = message["tool_calls"].as_array();
            if tool_calls.is_none() || tool_calls.is_some_and(|tc| tc.is_empty()) {
                // No tool calls — model is done.
                emit(AgentEvent::Done {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    session_id: None,
                });
                return Ok(RuntimeResult {
                    session_id: None,
                    token_usage: Some(total_usage),
                });
            }
            let tool_calls = tool_calls.unwrap();
            // Add the assistant message (with tool_calls) to the conversation.
            messages.push(message.clone());
            // Execute each tool call via MCP and add results.
            for tc in tool_calls {
                if cancelled.load(Ordering::Relaxed) {
                    break;
                }
                let call_id = tc["id"].as_str().unwrap_or("");
                let function = &tc["function"];
                let tool_name = function["name"].as_str().unwrap_or("");
                let arguments_str = function["arguments"].as_str().unwrap_or("{}");
                let args: Value = serde_json::from_str(arguments_str).unwrap_or(json!({}));
                slog!(
                    "[openai] Calling MCP tool '{}' for {}:{}",
                    tool_name,
                    ctx.story_id,
                    ctx.agent_name
                );
                emit(AgentEvent::Output {
                    story_id: ctx.story_id.clone(),
                    agent_name: ctx.agent_name.clone(),
                    text: format!("\n[Tool call: {tool_name}]\n"),
                });
                let tool_result = call_mcp_tool(&client, &mcp_base, tool_name, &args).await;
                let result_content = match &tool_result {
                    Ok(result) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!("[Tool result: {} chars]\n", result.len()),
                        });
                        result.clone()
                    }
                    Err(e) => {
                        emit(AgentEvent::Output {
                            story_id: ctx.story_id.clone(),
                            agent_name: ctx.agent_name.clone(),
                            text: format!("[Tool error: {e}]\n"),
                        });
                        format!("Error: {e}")
                    }
                };
                // OpenAI expects tool results as role=tool messages with
                // the matching tool_call_id.
                messages.push(json!({
                    "role": "tool",
                    "tool_call_id": call_id,
                    "content": result_content,
                }));
            }
        }
    }
    fn stop(&self) {
        self.cancelled.store(true, Ordering::Relaxed);
    }
    fn get_status(&self) -> RuntimeStatus {
        if self.cancelled.load(Ordering::Relaxed) {
            RuntimeStatus::Failed
        } else {
            RuntimeStatus::Idle
        }
    }
 }
 // ── Helper functions ─────────────────────────────────────────────────
 /// Build the system message text from the RuntimeContext.
 fn build_system_text(ctx: &RuntimeContext) -> String {
    ctx.args
        .iter()
        .position(|a| a == "--append-system-prompt")
        .and_then(|i| ctx.args.get(i + 1))
        .cloned()
        .unwrap_or_else(|| {
            format!(
                "You are an AI coding agent working on story {}. \
                 You have access to tools via function calling. \
                 Use them to complete the task. \
                 Work in the directory: {}",
                ctx.story_id, ctx.cwd
            )
        })
 }
 /// Fetch MCP tool definitions from storkit's MCP server and convert
 /// them to OpenAI function-calling format.
 async fn fetch_and_convert_mcp_tools(
    client: &Client,
    mcp_base: &str,
 ) -> Result<Vec<Value>, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/list",
        "params": {}
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("Failed to fetch MCP tools: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tools response: {e}"))?;
    let tools = body["result"]["tools"]
        .as_array()
        .ok_or_else(|| "No tools array in MCP response".to_string())?;
    let mut openai_tools = Vec::new();
    for tool in tools {
        let name = tool["name"].as_str().unwrap_or("").to_string();
        let description = tool["description"].as_str().unwrap_or("").to_string();
        if name.is_empty() {
            continue;
        }
        // OpenAI function calling uses JSON Schema natively for parameters,
        // so the MCP inputSchema can be used with minimal cleanup.
        let parameters = convert_mcp_schema_to_openai(tool.get("inputSchema"));
        openai_tools.push(json!({
            "type": "function",
            "function": {
                "name": name,
                "description": description,
                "parameters": parameters.unwrap_or_else(|| json!({"type": "object", "properties": {}})),
            }
        }));
    }
    slog!(
        "[openai] Loaded {} MCP tools as function definitions",
        openai_tools.len()
    );
    Ok(openai_tools)
 }
 /// Convert an MCP inputSchema (JSON Schema) to OpenAI-compatible
 /// function parameters.
 ///
 /// OpenAI uses JSON Schema natively, so less transformation is needed
 /// compared to Gemini. We still strip `$schema` to keep payloads clean.
 fn convert_mcp_schema_to_openai(schema: Option<&Value>) -> Option<Value> {
    let schema = schema?;
    let mut result = json!({
        "type": "object",
    });
    if let Some(properties) = schema.get("properties") {
        result["properties"] = clean_schema_properties(properties);
    } else {
        result["properties"] = json!({});
    }
    if let Some(required) = schema.get("required") {
        result["required"] = required.clone();
    }
    // OpenAI recommends additionalProperties: false for strict mode.
    result["additionalProperties"] = json!(false);
    Some(result)
 }
 /// Recursively clean schema properties, removing unsupported keywords.
 fn clean_schema_properties(properties: &Value) -> Value {
    let Some(obj) = properties.as_object() else {
        return properties.clone();
    };
    let mut cleaned = serde_json::Map::new();
    for (key, value) in obj {
        let mut prop = value.clone();
        if let Some(p) = prop.as_object_mut() {
            p.remove("$schema");
            // Recursively clean nested object properties.
            if let Some(nested_props) = p.get("properties").cloned() {
                p.insert(
                    "properties".to_string(),
                    clean_schema_properties(&nested_props),
                );
            }
            // Clean items schema for arrays.
            if let Some(items) = p.get("items").cloned()
                && let Some(items_obj) = items.as_object()
            {
                let mut cleaned_items = items_obj.clone();
                cleaned_items.remove("$schema");
                p.insert("items".to_string(), Value::Object(cleaned_items));
            }
        }
        cleaned.insert(key.clone(), prop);
    }
    Value::Object(cleaned)
 }
 /// Call an MCP tool via storkit's MCP server.
 async fn call_mcp_tool(
    client: &Client,
    mcp_base: &str,
    tool_name: &str,
    args: &Value,
 ) -> Result<String, String> {
    let request = json!({
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/call",
        "params": {
            "name": tool_name,
            "arguments": args
        }
    });
    let response = client
        .post(mcp_base)
        .json(&request)
        .send()
        .await
        .map_err(|e| format!("MCP tool call failed: {e}"))?;
    let body: Value = response
        .json()
        .await
        .map_err(|e| format!("Failed to parse MCP tool response: {e}"))?;
    if let Some(error) = body.get("error") {
        let msg = error["message"].as_str().unwrap_or("Unknown MCP error");
        return Err(format!("MCP tool '{tool_name}' error: {msg}"));
    }
    // MCP tools/call returns { result: { content: [{ type: "text", text: "..." }] } }
    let content = &body["result"]["content"];
    if let Some(arr) = content.as_array() {
        let texts: Vec<&str> = arr
            .iter()
            .filter_map(|c| c["text"].as_str())
            .collect();
        if !texts.is_empty() {
            return Ok(texts.join("\n"));
        }
    }
    // Fall back to serializing the entire result.
    Ok(body["result"].to_string())
 }
 /// Parse token usage from an OpenAI API response.
 fn parse_usage(response: &Value) -> Option<TokenUsage> {
    let usage = response.get("usage")?;
    Some(TokenUsage {
        input_tokens: usage
            .get("prompt_tokens")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        output_tokens: usage
            .get("completion_tokens")
            .and_then(|v| v.as_u64())
            .unwrap_or(0),
        cache_creation_input_tokens: 0,
        cache_read_input_tokens: 0,
        // OpenAI API doesn't report cost directly; leave at 0.
        total_cost_usd: 0.0,
    })
 }
 // ── Tests ────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn convert_mcp_schema_simple_object() {
        let schema = json!({
            "type": "object",
            "properties": {
                "story_id": {
                    "type": "string",
                    "description": "Story identifier"
                }
            },
            "required": ["story_id"]
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        assert_eq!(result["type"], "object");
        assert!(result["properties"]["story_id"].is_object());
        assert_eq!(result["required"][0], "story_id");
        assert_eq!(result["additionalProperties"], false);
    }
    #[test]
    fn convert_mcp_schema_empty_properties() {
        let schema = json!({
            "type": "object",
            "properties": {}
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        assert_eq!(result["type"], "object");
        assert!(result["properties"].as_object().unwrap().is_empty());
    }
    #[test]
    fn convert_mcp_schema_none_returns_none() {
        assert!(convert_mcp_schema_to_openai(None).is_none());
    }
    #[test]
    fn convert_mcp_schema_strips_dollar_schema() {
        let schema = json!({
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "$schema": "http://json-schema.org/draft-07/schema#"
                }
            }
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        let name_prop = &result["properties"]["name"];
        assert!(name_prop.get("$schema").is_none());
        assert_eq!(name_prop["type"], "string");
    }
    #[test]
    fn convert_mcp_schema_with_nested_objects() {
        let schema = json!({
            "type": "object",
            "properties": {
                "config": {
                    "type": "object",
                    "properties": {
                        "key": { "type": "string" }
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        assert!(result["properties"]["config"]["properties"]["key"].is_object());
    }
    #[test]
    fn convert_mcp_schema_with_array_items() {
        let schema = json!({
            "type": "object",
            "properties": {
                "items": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": { "type": "string" }
                        },
                        "$schema": "http://json-schema.org/draft-07/schema#"
                    }
                }
            }
        });
        let result = convert_mcp_schema_to_openai(Some(&schema)).unwrap();
        let items_schema = &result["properties"]["items"]["items"];
        assert!(items_schema.get("$schema").is_none());
    }
    #[test]
    fn build_system_text_uses_args() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gpt-4o".to_string(),
            args: vec![
                "--append-system-prompt".to_string(),
                "Custom system prompt".to_string(),
            ],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert_eq!(build_system_text(&ctx), "Custom system prompt");
    }
    #[test]
    fn build_system_text_default() {
        let ctx = RuntimeContext {
            story_id: "42_story_test".to_string(),
            agent_name: "coder-1".to_string(),
            command: "gpt-4o".to_string(),
            args: vec![],
            prompt: "Do the thing".to_string(),
            cwd: "/tmp/wt".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        let text = build_system_text(&ctx);
        assert!(text.contains("42_story_test"));
        assert!(text.contains("/tmp/wt"));
    }
    #[test]
    fn parse_usage_valid() {
        let response = json!({
            "usage": {
                "prompt_tokens": 100,
                "completion_tokens": 50,
                "total_tokens": 150
            }
        });
        let usage = parse_usage(&response).unwrap();
        assert_eq!(usage.input_tokens, 100);
        assert_eq!(usage.output_tokens, 50);
        assert_eq!(usage.cache_creation_input_tokens, 0);
        assert_eq!(usage.total_cost_usd, 0.0);
    }
    #[test]
    fn parse_usage_missing() {
        let response = json!({"choices": []});
        assert!(parse_usage(&response).is_none());
    }
    #[test]
    fn openai_runtime_stop_sets_cancelled() {
        let runtime = OpenAiRuntime::new();
        assert_eq!(runtime.get_status(), RuntimeStatus::Idle);
        runtime.stop();
        assert_eq!(runtime.get_status(), RuntimeStatus::Failed);
    }
    #[test]
    fn model_extraction_from_command_gpt() {
        let ctx = RuntimeContext {
            story_id: "1".to_string(),
            agent_name: "coder".to_string(),
            command: "gpt-4o".to_string(),
            args: vec![],
            prompt: "test".to_string(),
            cwd: "/tmp".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert!(ctx.command.starts_with("gpt"));
    }
    #[test]
    fn model_extraction_from_command_o3() {
        let ctx = RuntimeContext {
            story_id: "1".to_string(),
            agent_name: "coder".to_string(),
            command: "o3".to_string(),
            args: vec![],
            prompt: "test".to_string(),
            cwd: "/tmp".to_string(),
            inactivity_timeout_secs: 300,
            mcp_port: 3001,
        };
        assert!(ctx.command.starts_with("o"));
    }
 }
--- a/server/src/transport.rs
+++ b/server/src/transport.rs
@@ -4,6 +4,8 @@
 //! sending and editing messages, allowing the bot logic (commands, htop,
 //! notifications) to work against any chat platform — Matrix, WhatsApp, etc.
 pub mod transport;
 use async_trait::async_trait;
 /// A platform-agnostic identifier for a sent message.
@@ -13,9 +15,6 @@ use async_trait::async_trait;
 /// producing and consuming these identifiers.
 pub type MessageId = String;
 /// A platform-agnostic identifier for a chat room / channel / conversation.
 pub type RoomId = String;
 /// Abstraction over a chat platform's message-sending capabilities.
 ///
 /// Implementations must be `Send + Sync` so they can be shared across
@@ -65,11 +64,11 @@ mod tests {
    #[test]
    fn whatsapp_transport_satisfies_trait() {
        fn assert_transport<T: ChatTransport>() {}
-        assert_transport::<crate::whatsapp::WhatsAppTransport>();
+        assert_transport::<crate::chat::transport::whatsapp::WhatsAppTransport>();
        // Verify it can be wrapped in Arc<dyn ChatTransport>.
        let _: Arc<dyn ChatTransport> =
-            Arc::new(crate::whatsapp::WhatsAppTransport::new(
+            Arc::new(crate::chat::transport::whatsapp::WhatsAppTransport::new(
                "test-phone".to_string(),
                "test-token".to_string(),
                "pipeline_notification".to_string(),
@@ -81,7 +80,7 @@ mod tests {
    #[test]
    fn matrix_transport_is_send_sync() {
        fn assert_send_sync<T: Send + Sync>() {}
-        assert_send_sync::<crate::matrix::transport_impl::MatrixTransport>();
+        assert_send_sync::<crate::chat::transport::matrix::transport_impl::MatrixTransport>();
    }
    /// Verify that SlackTransport satisfies the ChatTransport trait and
@@ -89,9 +88,24 @@ mod tests {
    #[test]
    fn slack_transport_satisfies_trait() {
        fn assert_transport<T: ChatTransport>() {}
-        assert_transport::<crate::slack::SlackTransport>();
+        assert_transport::<crate::chat::transport::slack::SlackTransport>();
        let _: Arc<dyn ChatTransport> =
-            Arc::new(crate::slack::SlackTransport::new("xoxb-test".to_string()));
+            Arc::new(crate::chat::transport::slack::SlackTransport::new("xoxb-test".to_string()));
    }
    /// Verify that TwilioWhatsAppTransport satisfies the ChatTransport trait
    /// and can be used as `Arc<dyn ChatTransport>` (compile-time check).
    #[test]
    fn twilio_transport_satisfies_trait() {
        fn assert_transport<T: ChatTransport>() {}
        assert_transport::<crate::chat::transport::whatsapp::TwilioWhatsAppTransport>();
        let _: Arc<dyn ChatTransport> =
            Arc::new(crate::chat::transport::whatsapp::TwilioWhatsAppTransport::new(
                "ACtest".to_string(),
                "authtoken".to_string(),
                "+14155551234".to_string(),
            ));
    }
 }
--- a/server/src/chat/transport/matrix/assign.rs
+++ b/server/src/chat/transport/matrix/assign.rs
@@ -0,0 +1,537 @@
 //! Assign command: pre-assign or re-assign a coder model to a story.
 //!
 //! `{bot_name} assign {number} {model}` finds the story by number, updates the
 //! `agent` field in its front matter, and — when a coder is already running on
 //! the story — stops the current coder and starts the newly-assigned one.
 //!
 //! When no coder is running (the story has not been started yet), the command
 //! behaves as before: it simply persists the assignment in the front matter so
 //! that the next `start` invocation picks it up automatically.
 use crate::agents::{AgentPool, AgentStatus};
 use crate::io::story_metadata::{parse_front_matter, set_front_matter_field};
 use std::path::Path;
 /// All pipeline stage directories to search when finding a work item by number.
 const STAGES: &[&str] = &[
    "1_backlog",
    "2_current",
    "3_qa",
    "4_merge",
    "5_done",
    "6_archived",
 ];
 /// A parsed assign command from a Matrix message body.
 #[derive(Debug, PartialEq)]
 pub enum AssignCommand {
    /// Assign the story with this number to the given model.
    Assign {
        story_number: String,
        model: String,
    },
    /// The user typed `assign` but without valid arguments.
    BadArgs,
 }
 /// Parse an assign command from a raw Matrix message body.
 ///
 /// Strips the bot mention prefix and checks whether the first word is `assign`.
 /// Returns `None` when the message is not an assign command at all.
 pub fn extract_assign_command(
    message: &str,
    bot_name: &str,
    bot_user_id: &str,
 ) -> Option<AssignCommand> {
    let stripped = strip_mention(message, bot_name, bot_user_id);
    let trimmed = stripped
        .trim()
        .trim_start_matches(|c: char| !c.is_alphanumeric());
    let (cmd, args) = match trimmed.split_once(char::is_whitespace) {
        Some((c, a)) => (c, a.trim()),
        None => (trimmed, ""),
    };
    if !cmd.eq_ignore_ascii_case("assign") {
        return None;
    }
    // Split args into story number and model.
    let (number_str, model_str) = match args.split_once(char::is_whitespace) {
        Some((n, m)) => (n.trim(), m.trim()),
        None => (args, ""),
    };
    if number_str.is_empty()
        || !number_str.chars().all(|c| c.is_ascii_digit())
        || model_str.is_empty()
    {
        return Some(AssignCommand::BadArgs);
    }
    Some(AssignCommand::Assign {
        story_number: number_str.to_string(),
        model: model_str.to_string(),
    })
 }
 /// Resolve a model name hint (e.g. `"opus"`) to a full agent name
 /// (e.g. `"coder-opus"`). If the hint already starts with `"coder-"`,
 /// it is returned unchanged to prevent double-prefixing.
 pub fn resolve_agent_name(model: &str) -> String {
    if model.starts_with("coder-") {
        model.to_string()
    } else {
        format!("coder-{model}")
    }
 }
 /// Handle an assign command asynchronously.
 ///
 /// Finds the work item by `story_number` across all pipeline stages, updates
 /// the `agent` field in its front matter, and — if a coder is currently
 /// running on the story — stops it and starts the newly-assigned agent.
 /// Returns a markdown-formatted response string.
 pub async fn handle_assign(
    bot_name: &str,
    story_number: &str,
    model_str: &str,
    project_root: &Path,
    agents: &AgentPool,
 ) -> String {
    // Find the story file across all pipeline stages.
    let mut found: Option<(std::path::PathBuf, String)> = None;
    'outer: for stage in STAGES {
        let dir = project_root.join(".storkit").join("work").join(stage);
        if !dir.exists() {
            continue;
        }
        if let Ok(entries) = std::fs::read_dir(&dir) {
            for entry in entries.flatten() {
                let path = entry.path();
                if path.extension().and_then(|e| e.to_str()) != Some("md") {
                    continue;
                }
                if let Some(stem) = path
                    .file_stem()
                    .and_then(|s| s.to_str())
                    .map(|s| s.to_string())
                {
                    let file_num = stem
                        .split('_')
                        .next()
                        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
                        .unwrap_or("")
                        .to_string();
                    if file_num == story_number {
                        found = Some((path, stem));
                        break 'outer;
                    }
                }
            }
        }
    }
    let (path, story_id) = match found {
        Some(f) => f,
        None => {
            return format!(
                "No story, bug, or spike with number **{story_number}** found."
            );
        }
    };
    // Read the human-readable name from front matter for the response.
    let story_name = std::fs::read_to_string(&path)
        .ok()
        .and_then(|contents| {
            parse_front_matter(&contents)
                .ok()
                .and_then(|m| m.name)
        })
        .unwrap_or_else(|| story_id.clone());
    let agent_name = resolve_agent_name(model_str);
    // Write `agent: <agent_name>` into the story's front matter.
    let write_result = std::fs::read_to_string(&path)
        .map_err(|e| format!("Failed to read story file: {e}"))
        .and_then(|contents| {
            let updated = set_front_matter_field(&contents, "agent", &agent_name);
            std::fs::write(&path, &updated)
                .map_err(|e| format!("Failed to write story file: {e}"))
        });
    if let Err(e) = write_result {
        return format!("Failed to assign model to **{story_name}**: {e}");
    }
    // Check whether a coder is already running on this story.
    let running_coders: Vec<_> = agents
        .list_agents()
        .unwrap_or_default()
        .into_iter()
        .filter(|a| {
            a.story_id == story_id
                && a.agent_name.starts_with("coder")
                && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
        })
        .collect();
    if running_coders.is_empty() {
        // No coder running — just persist the assignment.
        return format!(
            "Assigned **{agent_name}** to **{story_name}** (story {story_number}). \
             The model will be used when the story starts."
        );
    }
    // Stop each running coder, then start the newly assigned one.
    let stopped: Vec<String> = running_coders
        .iter()
        .map(|a| a.agent_name.clone())
        .collect();
    for coder in &running_coders {
        if let Err(e) = agents
            .stop_agent(project_root, &story_id, &coder.agent_name)
            .await
        {
            crate::slog!(
                "[matrix-bot] assign: failed to stop agent {} for {}: {e}",
                coder.agent_name,
                story_id
            );
        }
    }
    crate::slog!(
        "[matrix-bot] assign (bot={bot_name}): stopped {:?} for {}; starting {agent_name}",
        stopped,
        story_id
    );
    match agents
        .start_agent(project_root, &story_id, Some(&agent_name), None)
        .await
    {
        Ok(info) => {
            format!(
                "Reassigned **{story_name}** (story {story_number}): \
                 stopped **{}** and started **{}**.",
                stopped.join(", "),
                info.agent_name
            )
        }
        Err(e) => {
            format!(
                "Assigned **{agent_name}** to **{story_name}** (story {story_number}): \
                 stopped **{}** but failed to start the new agent: {e}",
                stopped.join(", ")
            )
        }
    }
 }
 /// Strip the bot mention prefix from a raw Matrix message body.
 ///
 /// Mirrors the logic in `commands::strip_bot_mention` and `start::strip_mention`.
 fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
    let trimmed = message.trim();
    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
        return rest;
    }
    if let Some(localpart) = bot_user_id.split(':').next()
        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
    {
        return rest;
    }
    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
        return rest;
    }
    trimmed
 }
 fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
    if text.len() < prefix.len() {
        return None;
    }
    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
        return None;
    }
    let rest = &text[prefix.len()..];
    match rest.chars().next() {
        None => Some(rest),
        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
        _ => Some(rest),
    }
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    // -- extract_assign_command -----------------------------------------------
    #[test]
    fn extract_with_full_user_id() {
        let cmd = extract_assign_command(
            "@timmy:home.local assign 42 opus",
            "Timmy",
            "@timmy:home.local",
        );
        assert_eq!(
            cmd,
            Some(AssignCommand::Assign {
                story_number: "42".to_string(),
                model: "opus".to_string()
            })
        );
    }
    #[test]
    fn extract_with_display_name() {
        let cmd = extract_assign_command("Timmy assign 42 sonnet", "Timmy", "@timmy:home.local");
        assert_eq!(
            cmd,
            Some(AssignCommand::Assign {
                story_number: "42".to_string(),
                model: "sonnet".to_string()
            })
        );
    }
    #[test]
    fn extract_with_localpart() {
        let cmd = extract_assign_command("@timmy assign 7 opus", "Timmy", "@timmy:home.local");
        assert_eq!(
            cmd,
            Some(AssignCommand::Assign {
                story_number: "7".to_string(),
                model: "opus".to_string()
            })
        );
    }
    #[test]
    fn extract_case_insensitive_command() {
        let cmd = extract_assign_command("Timmy ASSIGN 99 opus", "Timmy", "@timmy:home.local");
        assert_eq!(
            cmd,
            Some(AssignCommand::Assign {
                story_number: "99".to_string(),
                model: "opus".to_string()
            })
        );
    }
    #[test]
    fn extract_no_args_is_bad_args() {
        let cmd = extract_assign_command("Timmy assign", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(AssignCommand::BadArgs));
    }
    #[test]
    fn extract_missing_model_is_bad_args() {
        let cmd = extract_assign_command("Timmy assign 42", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(AssignCommand::BadArgs));
    }
    #[test]
    fn extract_non_numeric_number_is_bad_args() {
        let cmd = extract_assign_command("Timmy assign abc opus", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(AssignCommand::BadArgs));
    }
    #[test]
    fn extract_non_assign_command_returns_none() {
        let cmd = extract_assign_command("Timmy help", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, None);
    }
    // -- resolve_agent_name --------------------------------------------------
    #[test]
    fn resolve_agent_name_prefixes_bare_model() {
        assert_eq!(resolve_agent_name("opus"), "coder-opus");
        assert_eq!(resolve_agent_name("sonnet"), "coder-sonnet");
        assert_eq!(resolve_agent_name("haiku"), "coder-haiku");
    }
    #[test]
    fn resolve_agent_name_does_not_double_prefix() {
        assert_eq!(resolve_agent_name("coder-opus"), "coder-opus");
        assert_eq!(resolve_agent_name("coder-sonnet"), "coder-sonnet");
    }
    // -- handle_assign (no running coder) ------------------------------------
    fn write_story_file(root: &Path, stage: &str, filename: &str, content: &str) {
        let dir = root.join(".storkit/work").join(stage);
        std::fs::create_dir_all(&dir).unwrap();
        std::fs::write(dir.join(filename), content).unwrap();
    }
    #[tokio::test]
    async fn handle_assign_returns_not_found_for_unknown_number() {
        let tmp = tempfile::tempdir().unwrap();
        for stage in STAGES {
            std::fs::create_dir_all(tmp.path().join(".storkit/work").join(stage)).unwrap();
        }
        let agents = std::sync::Arc::new(AgentPool::new_test(3000));
        let response = handle_assign("Timmy", "999", "opus", tmp.path(), &agents).await;
        assert!(
            response.contains("No story") && response.contains("999"),
            "unexpected response: {response}"
        );
    }
    #[tokio::test]
    async fn handle_assign_writes_front_matter_when_no_coder_running() {
        let tmp = tempfile::tempdir().unwrap();
        write_story_file(
            tmp.path(),
            "1_backlog",
            "42_story_test.md",
            "---\nname: Test Feature\n---\n\n# Story 42\n",
        );
        let agents = std::sync::Arc::new(AgentPool::new_test(3000));
        let response = handle_assign("Timmy", "42", "opus", tmp.path(), &agents).await;
        assert!(
            response.contains("coder-opus"),
            "response should mention agent: {response}"
        );
        assert!(
            response.contains("Test Feature"),
            "response should mention story name: {response}"
        );
        // Should say "will be used when the story starts" (no restart)
        assert!(
            response.contains("start"),
            "response should indicate assignment for future start: {response}"
        );
        let contents = std::fs::read_to_string(
            tmp.path().join(".storkit/work/1_backlog/42_story_test.md"),
        )
        .unwrap();
        assert!(
            contents.contains("agent: coder-opus"),
            "front matter should contain agent field: {contents}"
        );
    }
    #[tokio::test]
    async fn handle_assign_with_already_prefixed_name_does_not_double_prefix() {
        let tmp = tempfile::tempdir().unwrap();
        write_story_file(
            tmp.path(),
            "1_backlog",
            "7_story_small.md",
            "---\nname: Small Story\n---\n",
        );
        let agents = std::sync::Arc::new(AgentPool::new_test(3000));
        let response = handle_assign("Timmy", "7", "coder-opus", tmp.path(), &agents).await;
        assert!(
            response.contains("coder-opus"),
            "should not double-prefix: {response}"
        );
        assert!(
            !response.contains("coder-coder-opus"),
            "must not double-prefix: {response}"
        );
        let contents = std::fs::read_to_string(
            tmp.path().join(".storkit/work/1_backlog/7_story_small.md"),
        )
        .unwrap();
        assert!(
            contents.contains("agent: coder-opus"),
            "must write coder-opus, not coder-coder-opus: {contents}"
        );
    }
    #[tokio::test]
    async fn handle_assign_overwrites_existing_agent_field() {
        let tmp = tempfile::tempdir().unwrap();
        write_story_file(
            tmp.path(),
            "1_backlog",
            "5_story_existing.md",
            "---\nname: Existing\nagent: coder-sonnet\n---\n",
        );
        let agents = std::sync::Arc::new(AgentPool::new_test(3000));
        handle_assign("Timmy", "5", "opus", tmp.path(), &agents).await;
        let contents = std::fs::read_to_string(
            tmp.path().join(".storkit/work/1_backlog/5_story_existing.md"),
        )
        .unwrap();
        assert!(
            contents.contains("agent: coder-opus"),
            "should overwrite old agent: {contents}"
        );
        assert!(
            !contents.contains("coder-sonnet"),
            "old agent should no longer appear: {contents}"
        );
    }
    #[tokio::test]
    async fn handle_assign_finds_story_in_any_stage() {
        let tmp = tempfile::tempdir().unwrap();
        write_story_file(
            tmp.path(),
            "3_qa",
            "99_story_in_qa.md",
            "---\nname: In QA\n---\n",
        );
        let agents = std::sync::Arc::new(AgentPool::new_test(3000));
        let response = handle_assign("Timmy", "99", "opus", tmp.path(), &agents).await;
        assert!(
            response.contains("coder-opus"),
            "should find story in qa stage: {response}"
        );
    }
    // -- handle_assign (with running coder) ----------------------------------
    #[tokio::test]
    async fn handle_assign_stops_running_coder_and_reports_reassignment() {
        let tmp = tempfile::tempdir().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "10_story_current.md",
            "---\nname: Current Story\nagent: coder-sonnet\n---\n",
        );
        let agents = std::sync::Arc::new(AgentPool::new_test(3000));
        // Inject a running coder for this story.
        agents.inject_test_agent("10_story_current", "coder-sonnet", AgentStatus::Running);
        let response = handle_assign("Timmy", "10", "opus", tmp.path(), &agents).await;
        // The response should mention both stopped and started agents.
        assert!(
            response.contains("coder-sonnet"),
            "response should mention the stopped agent: {response}"
        );
        // Should indicate a restart occurred (not just "will be used when starts")
        assert!(
            response.to_lowercase().contains("stop") || response.to_lowercase().contains("reassign"),
            "response should indicate stop/reassign: {response}"
        );
    }
 }
--- a/server/src/chat/transport/matrix/bot.rs
+++ b/server/src/chat/transport/matrix/bot.rs
@@ -2,7 +2,7 @@ use crate::agents::AgentPool;
 use crate::http::context::{PermissionDecision, PermissionForward};
 use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
 use crate::slog;
-use crate::transport::ChatTransport;
+use crate::chat::ChatTransport;
 use matrix_sdk::{
    Client,
    config::SyncSettings,
@@ -213,10 +213,11 @@ pub async fn run_bot(
    watcher_rx: tokio::sync::broadcast::Receiver<crate::io::watcher::WatcherEvent>,
    perm_rx: Arc<TokioMutex<mpsc::UnboundedReceiver<PermissionForward>>>,
    agents: Arc<AgentPool>,
    shutdown_rx: tokio::sync::watch::Receiver<Option<crate::rebuild::ShutdownReason>>,
 ) -> Result<(), String> {
    let store_path = project_root.join(".storkit").join("matrix_store");
    let client = Client::builder()
-        .homeserver_url(&config.homeserver)
+        .homeserver_url(config.homeserver.as_deref().unwrap_or_default())
        .sqlite_store(&store_path, None)
        .build()
        .await
@@ -231,7 +232,10 @@ pub async fn run_bot(
    let mut login_builder = client
        .matrix_auth()
-        .login_username(&config.username, &config.password)
+        .login_username(
            config.username.as_deref().unwrap_or_default(),
            config.password.as_deref().unwrap_or_default(),
        )
        .initial_device_display_name("Storkit Bot");
    if let Some(ref device_id) = saved_device_id {
@@ -264,8 +268,10 @@ pub async fn run_bot(
    {
        use matrix_sdk::ruma::api::client::uiaa;
        let password_auth = uiaa::AuthData::Password(uiaa::Password::new(
-            uiaa::UserIdentifier::UserIdOrLocalpart(config.username.clone()),
+            uiaa::UserIdentifier::UserIdOrLocalpart(
-            config.password.clone(),
+                config.username.clone().unwrap_or_default(),
            ),
            config.password.clone().unwrap_or_default(),
        ));
        if let Err(e) = client
            .encryption()
@@ -368,8 +374,16 @@ pub async fn run_bot(
    // Create the transport abstraction based on the configured transport type.
    let transport: Arc<dyn ChatTransport> = match config.transport.as_str() {
        "whatsapp" => {
-            slog!("[matrix-bot] Using WhatsApp transport");
+            if config.whatsapp_provider == "twilio" {
-            Arc::new(crate::whatsapp::WhatsAppTransport::new(
+                slog!("[matrix-bot] Using WhatsApp/Twilio transport");
                Arc::new(crate::chat::transport::whatsapp::TwilioWhatsAppTransport::new(
                    config.twilio_account_sid.clone().unwrap_or_default(),
                    config.twilio_auth_token.clone().unwrap_or_default(),
                    config.twilio_whatsapp_number.clone().unwrap_or_default(),
                ))
            } else {
                slog!("[matrix-bot] Using WhatsApp/Meta transport");
                Arc::new(crate::chat::transport::whatsapp::WhatsAppTransport::new(
                    config.whatsapp_phone_number_id.clone().unwrap_or_default(),
                    config.whatsapp_access_token.clone().unwrap_or_default(),
                    config
@@ -378,6 +392,7 @@ pub async fn run_bot(
                        .unwrap_or_else(|| "pipeline_notification".to_string()),
                ))
            }
        }
        _ => {
            slog!("[matrix-bot] Using Matrix transport");
            Arc::new(super::transport_impl::MatrixTransport::new(client.clone()))
@@ -426,6 +441,30 @@ pub async fn run_bot(
        notif_project_root,
    );
    // Spawn a shutdown watcher that sends a best-effort goodbye message to all
    // configured rooms when the server is about to stop (SIGINT/SIGTERM or rebuild).
    {
        let shutdown_transport = Arc::clone(&transport);
        let shutdown_rooms: Vec<String> =
            announce_room_ids.iter().map(|r| r.to_string()).collect();
        let shutdown_bot_name = announce_bot_name.clone();
        let mut rx = shutdown_rx;
        tokio::spawn(async move {
            // Wait until the channel holds Some(reason).
            if rx.wait_for(|v| v.is_some()).await.is_ok() {
                let reason = rx.borrow().clone();
                let notifier = crate::rebuild::BotShutdownNotifier::new(
                    shutdown_transport,
                    shutdown_rooms,
                    shutdown_bot_name,
                );
                if let Some(r) = reason {
                    notifier.notify(r).await;
                }
            }
        });
    }
    // Send a startup announcement to each configured room so users know the
    // bot is online.  This runs once per process start — the sync loop handles
    // reconnects internally so this code is never reached again on a network
@@ -836,6 +875,46 @@ async fn on_room_message(
        return;
    }
    // Check for the assign command, which requires async agent ops (stop +
    // start) and cannot be handled by the sync command registry.
    if let Some(assign_cmd) = super::assign::extract_assign_command(
        &user_message,
        &ctx.bot_name,
        ctx.bot_user_id.as_str(),
    ) {
        let response = match assign_cmd {
            super::assign::AssignCommand::Assign {
                story_number,
                model,
            } => {
                slog!(
                    "[matrix-bot] Handling assign command from {sender}: story {story_number} model={model}"
                );
                super::assign::handle_assign(
                    &ctx.bot_name,
                    &story_number,
                    &model,
                    &ctx.project_root,
                    &ctx.agents,
                )
                .await
            }
            super::assign::AssignCommand::BadArgs => {
                format!(
                    "Usage: `{} assign <number> <model>` (e.g. `assign 42 opus`)",
                    ctx.bot_name
                )
            }
        };
        let html = markdown_to_html(&response);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, &response, &html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        return;
    }
    // Check for the htop command, which requires async Matrix access (Room)
    // and cannot be handled by the sync command registry.
    if let Some(htop_cmd) =
@@ -894,6 +973,39 @@ async fn on_room_message(
        return;
    }
    // Check for the rmtree command, which requires async agent/worktree ops
    // and cannot be handled by the sync command registry.
    if let Some(rmtree_cmd) = super::rmtree::extract_rmtree_command(
        &user_message,
        &ctx.bot_name,
        ctx.bot_user_id.as_str(),
    ) {
        let response = match rmtree_cmd {
            super::rmtree::RmtreeCommand::Rmtree { story_number } => {
                slog!(
                    "[matrix-bot] Handling rmtree command from {sender}: story {story_number}"
                );
                super::rmtree::handle_rmtree(
                    &ctx.bot_name,
                    &story_number,
                    &ctx.project_root,
                    &ctx.agents,
                )
                .await
            }
            super::rmtree::RmtreeCommand::BadArgs => {
                format!("Usage: `{} rmtree <number>`", ctx.bot_name)
            }
        };
        let html = markdown_to_html(&response);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, &response, &html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        return;
    }
    // Check for the start command, which requires async agent ops and cannot
    // be handled by the sync command registry.
    if let Some(start_cmd) = super::start::extract_start_command(
@@ -960,6 +1072,39 @@ async fn on_room_message(
        return;
    }
    // Check for the rebuild command, which requires async agent and process ops
    // and cannot be handled by the sync command registry.
    if super::rebuild::extract_rebuild_command(
        &user_message,
        &ctx.bot_name,
        ctx.bot_user_id.as_str(),
    )
    .is_some()
    {
        slog!("[matrix-bot] Handling rebuild command from {sender}");
        // Acknowledge immediately — the rebuild may take a while or re-exec.
        let ack = "Rebuilding server… this may take a moment.";
        let ack_html = markdown_to_html(ack);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, ack, &ack_html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        let response = super::rebuild::handle_rebuild(
            &ctx.bot_name,
            &ctx.project_root,
            &ctx.agents,
        )
        .await;
        let html = markdown_to_html(&response);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, &response, &html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        return;
    }
    // Spawn a separate task so the Matrix sync loop is not blocked while we
    // wait for the LLM response (which can take several seconds).
    tokio::spawn(async move {
@@ -1472,7 +1617,7 @@ mod tests {
            ambient_rooms: Arc::new(std::sync::Mutex::new(HashSet::new())),
            agents: Arc::new(AgentPool::new_test(3000)),
            htop_sessions: Arc::new(TokioMutex::new(HashMap::new())),
-            transport: Arc::new(crate::whatsapp::WhatsAppTransport::new("test-phone".to_string(), "test-token".to_string(), "pipeline_notification".to_string())),
+            transport: Arc::new(crate::chat::transport::whatsapp::WhatsAppTransport::new("test-phone".to_string(), "test-token".to_string(), "pipeline_notification".to_string())),
        };
        // Clone must work (required by Matrix SDK event handler injection).
        let _cloned = ctx.clone();
--- a/server/src/chat/transport/matrix/commands/ambient.rs
+++ b/server/src/chat/transport/matrix/commands/ambient.rs
@@ -1,7 +1,7 @@
 //! Handler for the `ambient` command.
 use super::CommandContext;
-use crate::matrix::config::save_ambient_rooms;
+use crate::chat::transport::matrix::config::save_ambient_rooms;
 /// Toggle ambient mode for this room.
 ///
--- a/server/src/chat/transport/matrix/commands/assign.rs
+++ b/server/src/chat/transport/matrix/commands/assign.rs
@@ -0,0 +1,57 @@
 //! Handler stub for the `assign` command.
 //!
 //! The real implementation lives in `crate::chat::transport::matrix::assign` (async).  This
 //! stub exists only so that `assign` appears in the help registry — the
 //! handler always returns `None` so the bot's message loop falls through to
 //! the async handler in `bot.rs`.
 use super::CommandContext;
 pub(super) fn handle_assign(_ctx: &CommandContext) -> Option<String> {
    // Handled asynchronously in bot.rs / crate::chat::transport::matrix::assign.
    None
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    // -- registration / help ------------------------------------------------
    #[test]
    fn assign_command_is_registered() {
        use super::super::commands;
        let found = commands().iter().any(|c| c.name == "assign");
        assert!(found, "assign command must be in the registry");
    }
    #[test]
    fn assign_command_appears_in_help() {
        let result = super::super::tests::try_cmd_addressed(
            "Timmy",
            "@timmy:homeserver.local",
            "@timmy help",
        );
        let output = result.unwrap();
        assert!(
            output.contains("assign"),
            "help should list assign command: {output}"
        );
    }
    #[test]
    fn assign_command_falls_through_to_none_in_registry() {
        // The assign handler in the registry returns None (handled async in bot.rs).
        let result = super::super::tests::try_cmd_addressed(
            "Timmy",
            "@timmy:homeserver.local",
            "@timmy assign 42 opus",
        );
        assert!(
            result.is_none(),
            "assign should not produce a sync response (handled async): {result:?}"
        );
    }
 }
--- a/server/src/chat/transport/matrix/commands/cost.rs
+++ b/server/src/chat/transport/matrix/commands/cost.rs
--- a/server/src/chat/transport/matrix/commands/git.rs
+++ b/server/src/chat/transport/matrix/commands/git.rs
--- a/server/src/chat/transport/matrix/commands/help.rs
+++ b/server/src/chat/transport/matrix/commands/help.rs
@@ -4,7 +4,9 @@ use super::{commands, CommandContext};
 pub(super) fn handle_help(ctx: &CommandContext) -> Option<String> {
    let mut output = format!("**{} Commands**\n\n", ctx.bot_name);
-    for cmd in commands() {
+    let mut sorted: Vec<_> = commands().iter().collect();
    sorted.sort_by_key(|c| c.name);
    for cmd in sorted {
        output.push_str(&format!("- **{}** — {}\n", cmd.name, cmd.description));
    }
    Some(output)
@@ -75,6 +77,26 @@ mod tests {
        assert!(output.contains("status"), "help should list status command: {output}");
    }
    #[test]
    fn help_output_is_alphabetical() {
        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
        let output = result.unwrap();
        // Search for **name** (bold markdown) to avoid substring matches in descriptions.
        let mut positions: Vec<(usize, &str)> = commands()
            .iter()
            .map(|c| {
                let marker = format!("**{}**", c.name);
                let pos = output.find(&marker).expect("command must appear in help as **name**");
                (pos, c.name)
            })
            .collect();
        positions.sort_by_key(|(pos, _)| *pos);
        let names_in_order: Vec<&str> = positions.iter().map(|(_, n)| *n).collect();
        let mut sorted = names_in_order.clone();
        sorted.sort();
        assert_eq!(names_in_order, sorted, "commands must appear in alphabetical order");
    }
    #[test]
    fn help_output_includes_ambient() {
        let result = try_cmd_addressed("Timmy", "@timmy:homeserver.local", "@timmy help");
--- a/server/src/chat/transport/matrix/commands/mod.rs
+++ b/server/src/chat/transport/matrix/commands/mod.rs
@@ -6,6 +6,7 @@
 //! as they are added.
 mod ambient;
 mod assign;
 mod cost;
 mod git;
 mod help;
@@ -13,6 +14,7 @@ mod move_story;
 mod overview;
 mod show;
 mod status;
 mod triage;
 use crate::agents::AgentPool;
 use std::collections::HashSet;
@@ -38,7 +40,7 @@ pub struct BotCommand {
 /// message body.
 ///
 /// All identifiers are platform-agnostic strings so this struct works with
-/// any [`ChatTransport`](crate::transport::ChatTransport) implementation.
+/// any [`ChatTransport`](crate::chat::ChatTransport) implementation.
 pub struct CommandDispatch<'a> {
    /// The bot's display name (e.g., "Timmy").
    pub bot_name: &'a str,
@@ -75,6 +77,11 @@ pub struct CommandContext<'a> {
 /// Add new commands here — they will automatically appear in `help` output.
 pub fn commands() -> &'static [BotCommand] {
    &[
        BotCommand {
            name: "assign",
            description: "Pre-assign a model to a story: `assign <number> <model>` (e.g. `assign 42 opus`)",
            handler: assign::handle_assign,
        },
        BotCommand {
            name: "help",
            description: "Show this list of available commands",
@@ -82,7 +89,7 @@ pub fn commands() -> &'static [BotCommand] {
        },
        BotCommand {
            name: "status",
-            description: "Show pipeline status and agent availability",
+            description: "Show pipeline status and agent availability; or `status <number>` for a story triage dump",
            handler: status::handle_status,
        },
        BotCommand {
@@ -130,11 +137,21 @@ pub fn commands() -> &'static [BotCommand] {
            description: "Remove a work item from the pipeline: `delete <number>`",
            handler: handle_delete_fallback,
        },
        BotCommand {
            name: "rmtree",
            description: "Delete the worktree for a story without removing it from the pipeline: `rmtree <number>`",
            handler: handle_rmtree_fallback,
        },
        BotCommand {
            name: "reset",
            description: "Clear the current Claude Code session and start fresh",
            handler: handle_reset_fallback,
        },
        BotCommand {
            name: "rebuild",
            description: "Rebuild the server binary and restart",
            handler: handle_rebuild_fallback,
        },
    ]
 }
@@ -240,6 +257,16 @@ fn handle_start_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 /// Fallback handler for the `rmtree` command when it is not intercepted by
 /// the async handler in `on_room_message`.  In practice this is never called —
 /// rmtree is detected and handled before `try_handle_command` is invoked.
 /// The entry exists in the registry only so `help` lists it.
 ///
 /// Returns `None` to prevent the LLM from receiving "rmtree" as a prompt.
 fn handle_rmtree_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 /// Fallback handler for the `delete` command when it is not intercepted by
 /// the async handler in `on_room_message`.  In practice this is never called —
 /// delete is detected and handled before `try_handle_command` is invoked.
@@ -260,6 +287,16 @@ fn handle_reset_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 /// Fallback handler for the `rebuild` command when it is not intercepted by
 /// the async handler in `on_room_message`.  In practice this is never called —
 /// rebuild is detected and handled before `try_handle_command` is invoked.
 /// The entry exists in the registry only so `help` lists it.
 ///
 /// Returns `None` to prevent the LLM from receiving "rebuild" as a prompt.
 fn handle_rebuild_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
--- a/server/src/chat/transport/matrix/commands/move_story.rs
+++ b/server/src/chat/transport/matrix/commands/move_story.rs
--- a/server/src/chat/transport/matrix/commands/overview.rs
+++ b/server/src/chat/transport/matrix/commands/overview.rs
--- a/server/src/chat/transport/matrix/commands/show.rs
+++ b/server/src/chat/transport/matrix/commands/show.rs
--- a/server/src/chat/transport/matrix/commands/status.rs
+++ b/server/src/chat/transport/matrix/commands/status.rs
@@ -7,28 +7,45 @@ use std::collections::{HashMap, HashSet};
 use super::CommandContext;
 pub(super) fn handle_status(ctx: &CommandContext) -> Option<String> {
    if ctx.args.trim().is_empty() {
        Some(build_pipeline_status(ctx.project_root, ctx.agents))
    } else {
        super::triage::handle_triage(ctx)
    }
 }
 /// Format a short display label for a work item.
 ///
-/// Extracts the leading numeric ID from the file stem (e.g. `"293"` from
+/// Extracts the leading numeric ID and optional type tag from the file stem
-/// `"293_story_register_all_bot_commands"`) and combines it with the human-
+/// (e.g. `"293"` and `"story"` from `"293_story_register_all_bot_commands"`)
-/// readable name from the front matter when available.
+/// and combines them with the human-readable name from the front matter when
 /// available. Known types (`story`, `bug`, `spike`, `refactor`) are shown as
 /// bracketed labels; unknown or missing types are omitted silently.
 ///
 /// Examples:
-/// - `("293_story_foo", Some("Register all bot commands"))` → `"293 — Register all bot commands"`
+/// - `("293_story_foo", Some("Register all bot commands"))` → `"293 [story] — Register all bot commands"`
-/// - `("293_story_foo", None)` → `"293"`
+/// - `("375_bug_foo", None)` → `"375 [bug]"`
 /// - `("293_story_foo", None)` → `"293 [story]"`
 /// - `("no_number_here", None)` → `"no_number_here"`
 pub(super) fn story_short_label(stem: &str, name: Option<&str>) -> String {
-    let number = stem
+    let mut parts = stem.splitn(3, '_');
-        .split('_')
+    let first = parts.next().unwrap_or(stem);
-        .next()
+    let (number, type_label) = if !first.is_empty() && first.chars().all(|c| c.is_ascii_digit()) {
-        .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
+        let t = parts.next().and_then(|t| match t {
-        .unwrap_or(stem);
+            "story" | "bug" | "spike" | "refactor" => Some(t),
-    match name {
+            _ => None,
-        Some(n) => format!("{number} — {n}"),
+        });
        (first, t)
    } else {
        (stem, None)
    };
    let prefix = match type_label {
        Some(t) => format!("{number} [{t}]"),
        None => number.to_string(),
    };
    match name {
        Some(n) => format!("{prefix} — {n}"),
        None => prefix,
    }
 }
@@ -196,13 +213,13 @@ mod tests {
    #[test]
    fn short_label_extracts_number_and_name() {
        let label = story_short_label("293_story_register_all_bot_commands", Some("Register all bot commands"));
-        assert_eq!(label, "293 — Register all bot commands");
+        assert_eq!(label, "293 [story] — Register all bot commands");
    }
    #[test]
    fn short_label_number_only_when_no_name() {
        let label = story_short_label("297_story_improve_bot_status_command_formatting", None);
-        assert_eq!(label, "297");
+        assert_eq!(label, "297 [story]");
    }
    #[test]
@@ -220,6 +237,37 @@ mod tests {
        );
    }
    #[test]
    fn short_label_shows_bug_type() {
        let label = story_short_label("375_bug_default_project_toml", Some("Default project.toml issue"));
        assert_eq!(label, "375 [bug] — Default project.toml issue");
    }
    #[test]
    fn short_label_shows_spike_type() {
        let label = story_short_label("61_spike_filesystem_watcher_architecture", Some("Filesystem watcher architecture"));
        assert_eq!(label, "61 [spike] — Filesystem watcher architecture");
    }
    #[test]
    fn short_label_shows_refactor_type() {
        let label = story_short_label("260_refactor_upgrade_libsqlite3_sys", Some("Upgrade libsqlite3-sys"));
        assert_eq!(label, "260 [refactor] — Upgrade libsqlite3-sys");
    }
    #[test]
    fn short_label_omits_unknown_type() {
        let label = story_short_label("42_task_do_something", Some("Do something"));
        assert_eq!(label, "42 — Do something");
    }
    #[test]
    fn short_label_no_type_when_only_id() {
        // Stem with only a numeric ID and no type segment
        let label = story_short_label("42", Some("Some item"));
        assert_eq!(label, "42 — Some item");
    }
    // -- build_pipeline_status formatting -----------------------------------
    #[test]
@@ -244,8 +292,8 @@ mod tests {
            "output must not show full filename stem: {output}"
        );
        assert!(
-            output.contains("293 — Register all bot commands"),
+            output.contains("293 [story] — Register all bot commands"),
-            "output must show number and title: {output}"
+            "output must show number, type, and title: {output}"
        );
    }
@@ -284,7 +332,7 @@ mod tests {
        let output = build_pipeline_status(tmp.path(), &agents);
        assert!(
-            output.contains("293 — Register all bot commands — $0.29"),
+            output.contains("293 [story] — Register all bot commands — $0.29"),
            "output must show cost next to story: {output}"
        );
    }
@@ -347,7 +395,7 @@ mod tests {
        let output = build_pipeline_status(tmp.path(), &agents);
        assert!(
-            output.contains("293 — Register all bot commands — $0.29"),
+            output.contains("293 [story] — Register all bot commands — $0.29"),
            "output must show aggregated cost: {output}"
        );
    }
--- a/server/src/chat/transport/matrix/commands/triage.rs
+++ b/server/src/chat/transport/matrix/commands/triage.rs
@@ -0,0 +1,548 @@
 //! Handler for the story triage dump subcommand of `status`.
 //!
 //! Produces a triage dump for a story that is currently in-progress
 //! (`work/2_current/`): metadata, acceptance criteria, worktree/branch state,
 //! git diff, recent commits, and the tail of the agent log.
 //!
 //! The command is handled entirely at the bot level — no LLM invocation.
 use super::CommandContext;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 /// Handle `{bot_name} status {number}`.
 pub(super) fn handle_triage(ctx: &CommandContext) -> Option<String> {
    let num_str = ctx.args.trim();
    if num_str.is_empty() {
        return Some(format!(
            "Usage: `{} status <number>`\n\nShows a triage dump for a story currently in progress.",
            ctx.bot_name
        ));
    }
    if !num_str.chars().all(|c| c.is_ascii_digit()) {
        return Some(format!(
            "Invalid story number: `{num_str}`. Usage: `{} status <number>`",
            ctx.bot_name
        ));
    }
    let current_dir = ctx
        .project_root
        .join(".storkit")
        .join("work")
        .join("2_current");
    match find_story_in_dir(&current_dir, num_str) {
        Some((path, stem)) => Some(build_triage_dump(ctx, &path, &stem, num_str)),
        None => Some(format!(
            "Story **{num_str}** is not currently in progress (not found in `work/2_current/`)."
        )),
    }
 }
 /// Find a `.md` file whose numeric prefix matches `num_str` in `dir`.
 ///
 /// Returns `(path, file_stem)` for the first match.
 fn find_story_in_dir(dir: &Path, num_str: &str) -> Option<(PathBuf, String)> {
    let entries = std::fs::read_dir(dir).ok()?;
    for entry in entries.flatten() {
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("md") {
            continue;
        }
        if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
            let file_num = stem
                .split('_')
                .next()
                .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))
                .unwrap_or("");
            if file_num == num_str {
                return Some((path.clone(), stem.to_string()));
            }
        }
    }
    None
 }
 /// Build the full triage dump for a story.
 fn build_triage_dump(
    ctx: &CommandContext,
    story_path: &Path,
    story_id: &str,
    num_str: &str,
 ) -> String {
    let contents = match std::fs::read_to_string(story_path) {
        Ok(c) => c,
        Err(e) => return format!("Failed to read story {num_str}: {e}"),
    };
    let meta = crate::io::story_metadata::parse_front_matter(&contents).ok();
    let name = meta.as_ref().and_then(|m| m.name.as_deref()).unwrap_or("(unnamed)");
    let mut out = String::new();
    // ---- Header ----
    out.push_str(&format!("## Story {num_str} — {name}\n"));
    out.push_str("**Stage:** In Progress (`2_current`)\n\n");
    // ---- Front matter fields ----
    if let Some(ref m) = meta {
        let mut fields: Vec<String> = Vec::new();
        if let Some(true) = m.blocked {
            fields.push("**blocked:** true".to_string());
        }
        if let Some(ref agent) = m.agent {
            fields.push(format!("**agent:** {agent}"));
        }
        if let Some(ref qa) = m.qa {
            fields.push(format!("**qa:** {qa}"));
        }
        if let Some(true) = m.review_hold {
            fields.push("**review_hold:** true".to_string());
        }
        if let Some(rc) = m.retry_count
            && rc > 0
        {
            fields.push(format!("**retry_count:** {rc}"));
        }
        if let Some(ref cb) = m.coverage_baseline {
            fields.push(format!("**coverage_baseline:** {cb}"));
        }
        if let Some(ref mf) = m.merge_failure {
            fields.push(format!("**merge_failure:** {mf}"));
        }
        if !fields.is_empty() {
            out.push_str("**Front matter:**\n");
            for f in &fields {
                out.push_str(&format!("  • {f}\n"));
            }
            out.push('\n');
        }
    }
    // ---- Acceptance criteria ----
    let criteria = parse_acceptance_criteria(&contents);
    if !criteria.is_empty() {
        out.push_str("**Acceptance Criteria:**\n");
        for (checked, text) in &criteria {
            let mark = if *checked { "✅" } else { "⬜" };
            out.push_str(&format!("  {mark} {text}\n"));
        }
        let total = criteria.len();
        let done = criteria.iter().filter(|(c, _)| *c).count();
        out.push_str(&format!("  *{done}/{total} complete*\n"));
        out.push('\n');
    }
    // ---- Worktree and branch ----
    let wt_path = crate::worktree::worktree_path(ctx.project_root, story_id);
    let branch = format!("feature/story-{story_id}");
    if wt_path.is_dir() {
        out.push_str(&format!("**Worktree:** `{}`\n", wt_path.display()));
        out.push_str(&format!("**Branch:** `{branch}`\n\n"));
        // ---- git diff --stat ----
        let diff_stat = run_git(
            &wt_path,
            &["diff", "--stat", "master...HEAD"],
        );
        if !diff_stat.is_empty() {
            out.push_str("**Diff stat (vs master):**\n```\n");
            out.push_str(&diff_stat);
            out.push_str("```\n\n");
        } else {
            out.push_str("**Diff stat (vs master):** *(no changes)*\n\n");
        }
        // ---- Last 5 commits on feature branch ----
        let log = run_git(
            &wt_path,
            &[
                "log",
                "master..HEAD",
                "--pretty=format:%h %s",
                "-5",
            ],
        );
        if !log.is_empty() {
            out.push_str("**Recent commits (branch only):**\n```\n");
            out.push_str(&log);
            out.push_str("\n```\n\n");
        } else {
            out.push_str("**Recent commits (branch only):** *(none yet)*\n\n");
        }
    } else {
        out.push_str(&format!("**Branch:** `{branch}`\n"));
        out.push_str("**Worktree:** *(not yet created)*\n\n");
    }
    // ---- Agent log tail ----
    let log_dir = ctx
        .project_root
        .join(".storkit")
        .join("logs")
        .join(story_id);
    match latest_log_file(&log_dir) {
        Some(log_path) => {
            let tail = read_log_tail(&log_path, 20);
            let filename = log_path
                .file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("agent.log");
            if tail.is_empty() {
                out.push_str(&format!("**Agent log** (`{filename}`):** *(empty)*\n"));
            } else {
                out.push_str(&format!("**Agent log tail** (`{filename}`):\n```\n"));
                out.push_str(&tail);
                out.push_str("\n```\n");
            }
        }
        None => {
            out.push_str("**Agent log:** *(no log found)*\n");
        }
    }
    out
 }
 /// Parse acceptance criteria from story markdown.
 ///
 /// Returns a list of `(checked, text)` for every `- [ ] ...` and `- [x] ...` line.
 fn parse_acceptance_criteria(contents: &str) -> Vec<(bool, String)> {
    contents
        .lines()
        .filter_map(|line| {
            let trimmed = line.trim();
            if let Some(text) = trimmed.strip_prefix("- [x] ").or_else(|| trimmed.strip_prefix("- [X] ")) {
                Some((true, text.to_string()))
            } else {
                trimmed.strip_prefix("- [ ] ").map(|text| (false, text.to_string()))
            }
        })
        .collect()
 }
 /// Run a git command in the given directory, returning trimmed stdout (or empty on error).
 fn run_git(dir: &Path, args: &[&str]) -> String {
    Command::new("git")
        .args(args)
        .current_dir(dir)
        .output()
        .ok()
        .filter(|o| o.status.success())
        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
        .unwrap_or_default()
 }
 /// Find the most recently modified `.log` file in the given directory,
 /// regardless of agent name.
 fn latest_log_file(log_dir: &Path) -> Option<PathBuf> {
    if !log_dir.is_dir() {
        return None;
    }
    let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
    for entry in std::fs::read_dir(log_dir).ok()?.flatten() {
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("log") {
            continue;
        }
        let modified = match entry.metadata().and_then(|m| m.modified()) {
            Ok(t) => t,
            Err(_) => continue,
        };
        if best.as_ref().is_none_or(|(_, t)| modified > *t) {
            best = Some((path, modified));
        }
    }
    best.map(|(p, _)| p)
 }
 /// Read the last `n` non-empty lines from a file as a single string.
 fn read_log_tail(path: &Path, n: usize) -> String {
    let contents = match std::fs::read_to_string(path) {
        Ok(c) => c,
        Err(_) => return String::new(),
    };
    let lines: Vec<&str> = contents.lines().filter(|l| !l.trim().is_empty()).collect();
    let start = lines.len().saturating_sub(n);
    lines[start..].join("\n")
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::agents::AgentPool;
    use std::collections::HashSet;
    use std::sync::{Arc, Mutex};
    use super::super::{CommandDispatch, try_handle_command};
    fn status_triage_cmd(root: &Path, args: &str) -> Option<String> {
        let agents = Arc::new(AgentPool::new_test(3000));
        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
        let room_id = "!test:example.com".to_string();
        let dispatch = CommandDispatch {
            bot_name: "Timmy",
            bot_user_id: "@timmy:homeserver.local",
            project_root: root,
            agents: &agents,
            ambient_rooms: &ambient_rooms,
            room_id: &room_id,
        };
        try_handle_command(&dispatch, &format!("@timmy status {args}"))
    }
    fn write_story_file(root: &Path, stage: &str, filename: &str, content: &str) {
        let dir = root.join(".storkit/work").join(stage);
        std::fs::create_dir_all(&dir).unwrap();
        std::fs::write(dir.join(filename), content).unwrap();
    }
    // -- registration -------------------------------------------------------
    #[test]
    fn whatsup_command_is_not_registered() {
        let found = super::super::commands().iter().any(|c| c.name == "whatsup");
        assert!(!found, "whatsup command must not be in the registry (renamed to status)");
    }
    #[test]
    fn status_command_appears_in_help() {
        let result = super::super::tests::try_cmd_addressed(
            "Timmy",
            "@timmy:homeserver.local",
            "@timmy help",
        );
        let output = result.unwrap();
        assert!(
            output.contains("status"),
            "help should list status command: {output}"
        );
    }
    // -- input validation ---------------------------------------------------
    #[test]
    fn whatsup_no_args_returns_usage() {
        let tmp = tempfile::TempDir::new().unwrap();
        let output = status_triage_cmd(tmp.path(), "").unwrap();
        assert!(
            output.contains("Pipeline Status"),
            "no args should show pipeline status: {output}"
        );
    }
    #[test]
    fn whatsup_non_numeric_returns_error() {
        let tmp = tempfile::TempDir::new().unwrap();
        let output = status_triage_cmd(tmp.path(), "abc").unwrap();
        assert!(
            output.contains("Invalid"),
            "non-numeric arg should return error: {output}"
        );
    }
    // -- not found ----------------------------------------------------------
    #[test]
    fn whatsup_story_not_in_current_returns_friendly_message() {
        let tmp = tempfile::TempDir::new().unwrap();
        // Create the directory but put the story in backlog, not current
        write_story_file(
            tmp.path(),
            "1_backlog",
            "42_story_not_in_current.md",
            "---\nname: Not in current\n---\n",
        );
        let output = status_triage_cmd(tmp.path(), "42").unwrap();
        assert!(
            output.contains("42"),
            "message should include story number: {output}"
        );
        assert!(
            output.contains("not") || output.contains("Not"),
            "message should say not found/in progress: {output}"
        );
    }
    // -- found in 2_current -------------------------------------------------
    #[test]
    fn whatsup_shows_story_name_and_stage() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "99_story_my_feature.md",
            "---\nname: My Feature\n---\n\n## Acceptance Criteria\n\n- [ ] First thing\n- [x] Done thing\n",
        );
        let output = status_triage_cmd(tmp.path(), "99").unwrap();
        assert!(output.contains("99"), "should show story number: {output}");
        assert!(
            output.contains("My Feature"),
            "should show story name: {output}"
        );
        assert!(
            output.contains("In Progress") || output.contains("2_current"),
            "should show pipeline stage: {output}"
        );
    }
    #[test]
    fn whatsup_shows_acceptance_criteria() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "99_story_criteria_test.md",
            "---\nname: Criteria Test\n---\n\n- [ ] First thing\n- [x] Done thing\n- [ ] Second thing\n",
        );
        let output = status_triage_cmd(tmp.path(), "99").unwrap();
        assert!(
            output.contains("First thing"),
            "should show unchecked criterion: {output}"
        );
        assert!(
            output.contains("Done thing"),
            "should show checked criterion: {output}"
        );
        // 1 of 3 done
        assert!(
            output.contains("1/3"),
            "should show checked/total count: {output}"
        );
    }
    #[test]
    fn whatsup_shows_blocked_field() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "55_story_blocked_story.md",
            "---\nname: Blocked Story\nblocked: true\n---\n",
        );
        let output = status_triage_cmd(tmp.path(), "55").unwrap();
        assert!(
            output.contains("blocked"),
            "should show blocked field: {output}"
        );
    }
    #[test]
    fn whatsup_shows_agent_field() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "55_story_agent_story.md",
            "---\nname: Agent Story\nagent: coder-1\n---\n",
        );
        let output = status_triage_cmd(tmp.path(), "55").unwrap();
        assert!(
            output.contains("coder-1"),
            "should show agent field: {output}"
        );
    }
    #[test]
    fn whatsup_no_worktree_shows_not_created() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "77_story_no_worktree.md",
            "---\nname: No Worktree\n---\n",
        );
        let output = status_triage_cmd(tmp.path(), "77").unwrap();
        // Branch name should still appear
        assert!(
            output.contains("feature/story-77"),
            "should show branch name: {output}"
        );
    }
    #[test]
    fn whatsup_no_log_shows_no_log_message() {
        let tmp = tempfile::TempDir::new().unwrap();
        write_story_file(
            tmp.path(),
            "2_current",
            "77_story_no_log.md",
            "---\nname: No Log\n---\n",
        );
        let output = status_triage_cmd(tmp.path(), "77").unwrap();
        assert!(
            output.contains("no log") || output.contains("No log") || output.contains("*(no log found)*"),
            "should indicate no log exists: {output}"
        );
    }
    // -- parse_acceptance_criteria ------------------------------------------
    #[test]
    fn parse_criteria_mixed() {
        let input = "## AC\n- [ ] First\n- [x] Done\n- [X] Also done\n- [ ] Last\n";
        let result = parse_acceptance_criteria(input);
        assert_eq!(result.len(), 4);
        assert_eq!(result[0], (false, "First".to_string()));
        assert_eq!(result[1], (true, "Done".to_string()));
        assert_eq!(result[2], (true, "Also done".to_string()));
        assert_eq!(result[3], (false, "Last".to_string()));
    }
    #[test]
    fn parse_criteria_empty() {
        let input = "# Story\nNo checkboxes here.\n";
        let result = parse_acceptance_criteria(input);
        assert!(result.is_empty());
    }
    // -- read_log_tail -------------------------------------------------------
    #[test]
    fn read_log_tail_returns_last_n_lines() {
        let tmp = tempfile::TempDir::new().unwrap();
        let path = tmp.path().join("test.log");
        let content = (1..=30).map(|i| format!("line {i}")).collect::<Vec<_>>().join("\n");
        std::fs::write(&path, &content).unwrap();
        let tail = read_log_tail(&path, 5);
        let lines: Vec<&str> = tail.lines().collect();
        assert_eq!(lines.len(), 5);
        assert_eq!(lines[0], "line 26");
        assert_eq!(lines[4], "line 30");
    }
    #[test]
    fn read_log_tail_fewer_lines_than_n() {
        let tmp = tempfile::TempDir::new().unwrap();
        let path = tmp.path().join("short.log");
        std::fs::write(&path, "line A\nline B\n").unwrap();
        let tail = read_log_tail(&path, 20);
        assert!(tail.contains("line A"));
        assert!(tail.contains("line B"));
    }
    // -- latest_log_file ----------------------------------------------------
    #[test]
    fn latest_log_file_returns_none_for_missing_dir() {
        let tmp = tempfile::TempDir::new().unwrap();
        let result = latest_log_file(&tmp.path().join("nonexistent"));
        assert!(result.is_none());
    }
    #[test]
    fn latest_log_file_finds_log() {
        let tmp = tempfile::TempDir::new().unwrap();
        let log_path = tmp.path().join("coder-1-sess-abc.log");
        std::fs::write(&log_path, "some log content\n").unwrap();
        let result = latest_log_file(tmp.path());
        assert!(result.is_some());
        assert_eq!(result.unwrap(), log_path);
    }
 }
--- a/Show More
+++ b/Show More