Bump version to 0.7.0

storkit: merge 428_refactor_split_pool_pipeline_rs_into_submodules
storkit: done 428_refactor_split_pool_pipeline_rs_into_submodules
2026-03-28 12:20:00 +00:00 · 2026-03-28 11:50:15 +00:00 · 2026-03-28 11:50:02 +00:00 · 2026-03-28 11:47:26 +00:00 · 2026-03-28 11:35:03 +00:00 · 2026-03-28 11:33:43 +00:00
196 changed files with 27847 additions and 21111 deletions
@@ -8,6 +8,7 @@
 # App specific (root-level; storkit subdirectory patterns live in .storkit/.gitignore)
 store.json
 .storkit_port
 .storkit/bot.toml.bak
 # Rust stuff
 target
@@ -20,3 +20,6 @@ coverage/
 # Token usage log (generated at runtime, contains cost data)
 token_usage.jsonl
 # Chat service logs
 whatsapp_history.json
@@ -228,7 +228,29 @@ If a user hands you this document and says "Apply this process to my project":
 ---
-## 6. Code Quality
+## 6. Chat Bot Configuration
 Story Kit includes a chat bot that can be connected to one messaging platform at a time. The bot handles commands, LLM conversations, and pipeline notifications.
 **Only one transport can be active at a time.** To configure the bot, copy the appropriate example file to `.storkit/bot.toml`:
 | Transport | Example file | Webhook endpoint |
 |-----------|-------------|-----------------|
 | Matrix | `bot.toml.matrix.example` | *(uses Matrix sync, no webhook)* |
 | WhatsApp (Meta Cloud API) | `bot.toml.whatsapp-meta.example` | `/webhook/whatsapp` |
 | WhatsApp (Twilio) | `bot.toml.whatsapp-twilio.example` | `/webhook/whatsapp` |
 | Slack | `bot.toml.slack.example` | `/webhook/slack` |
 ```bash
 cp .storkit/bot.toml.matrix.example .storkit/bot.toml
 # Edit bot.toml with your credentials
 ```
 The `bot.toml` file is gitignored (it contains secrets). The example files are checked in for reference.
 ---
 ## 7. Code Quality
 **MANDATORY:** Before completing Step 3 (Verification) of any story, you MUST run all applicable linters, formatters, and test suites and fix ALL errors and warnings. Zero tolerance for warnings or errors.
@@ -1,61 +0,0 @@
 homeserver = "https://matrix.example.com"
 username = "@botname:example.com"
 password = "your-bot-password"
 # List one or more rooms to listen in.  Use a single-element list for one room.
 room_ids = ["!roomid:example.com"]
 # Optional: the deprecated single-room key is still accepted for backwards compat.
 # room_id = "!roomid:example.com"
 allowed_users = ["@youruser:example.com"]
 enabled = false
 # Maximum conversation turns to remember per room (default: 20).
 # history_size = 20
 # Rooms where the bot responds to all messages (not just addressed ones).
 # This list is updated automatically when users toggle ambient mode at runtime.
 # ambient_rooms = ["!roomid:example.com"]
 # ── WhatsApp Business API ──────────────────────────────────────────────
 # Set transport = "whatsapp" to use WhatsApp instead of Matrix.
 # The webhook endpoint will be available at /webhook/whatsapp.
 # You must configure this URL in the Meta Developer Dashboard.
 #
 # transport = "whatsapp"
 # whatsapp_phone_number_id = "123456789012345"
 # whatsapp_access_token = "EAAx..."
 # whatsapp_verify_token = "my-secret-verify-token"
 #
 # ── 24-hour messaging window & notification templates ─────────────────
 # WhatsApp only allows free-form text messages within 24 hours of the last
 # inbound message from a user.  For proactive pipeline notifications sent
 # after the window expires, an approved Meta message template is used.
 #
 # Register the template in the Meta Business Manager:
 #   1. Go to Business Settings → WhatsApp → Message Templates → Create.
 #   2. Category: UTILITY
 #   3. Template name: pipeline_notification   (or your chosen name below)
 #   4. Language: English (en_US)
 #   5. Body text (example):
 #        Story *{{1}}* has moved to *{{2}}*.
 #      Where {{1}} = story name, {{2}} = pipeline stage.
 #   6. Submit for review.  Meta typically approves utility templates within
 #      minutes; transactional categories may take longer.
 #
 # Once approved, set the name below (default: "pipeline_notification"):
 # whatsapp_notification_template = "pipeline_notification"
 # ── Slack Bot API ─────────────────────────────────────────────────────
 # Set transport = "slack" to use Slack instead of Matrix.
 # The webhook endpoint will be available at /webhook/slack.
 # Configure this URL in the Slack App → Event Subscriptions → Request URL.
 #
 # Required Slack App scopes: chat:write, chat:update
 # Subscribe to bot events: message.channels, message.groups, message.im
 #
 # transport = "slack"
 # slack_bot_token = "xoxb-..."
 # slack_signing_secret = "your-signing-secret"
 # slack_channel_ids = ["C01ABCDEF"]
@@ -0,0 +1,26 @@
 # Matrix Transport
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 enabled = true
 transport = "matrix"
 homeserver = "https://matrix.example.com"
 username = "@botname:example.com"
 password = "your-bot-password"
 # List one or more rooms to listen in.
 room_ids = ["!roomid:example.com"]
 # Users allowed to interact with the bot (fail-closed: empty = nobody).
 allowed_users = ["@youruser:example.com"]
 # Bot display name in chat.
 # display_name = "Assistant"
 # Maximum conversation turns to remember per room (default: 20).
 # history_size = 20
 # Rooms where the bot responds to all messages (not just addressed ones).
 # This list is updated automatically when users toggle ambient mode at runtime.
 # ambient_rooms = ["!roomid:example.com"]
@@ -0,0 +1,23 @@
 # Slack Transport
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 #
 # Setup:
 #   1. Create a Slack App at api.slack.com/apps
 #   2. Add OAuth scopes: chat:write, chat:update
 #   3. Subscribe to bot events: message.channels, message.groups, message.im
 #   4. Install the app to your workspace
 #   5. Set your webhook URL in Event Subscriptions: https://your-server/webhook/slack
 enabled = true
 transport = "slack"
 slack_bot_token = "xoxb-..."
 slack_signing_secret = "your-signing-secret"
 slack_channel_ids = ["C01ABCDEF"]
 # Bot display name (used in formatted messages).
 # display_name = "Assistant"
 # Maximum conversation turns to remember per channel (default: 20).
 # history_size = 20
@@ -0,0 +1,33 @@
 # WhatsApp Transport (Meta Cloud API)
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 #
 # Setup:
 #   1. Create a Meta Business App at developers.facebook.com
 #   2. Add the WhatsApp product
 #   3. Copy your Phone Number ID and generate a permanent access token
 #   4. Register your webhook URL: https://your-server/webhook/whatsapp
 #   5. Set the verify token below to match what you configure in Meta's dashboard
 enabled = true
 transport = "whatsapp"
 whatsapp_provider = "meta"
 whatsapp_phone_number_id = "123456789012345"
 whatsapp_access_token = "EAAx..."
 whatsapp_verify_token = "my-secret-verify-token"
 # Optional: name of the approved Meta message template used for notifications
 # sent outside the 24-hour messaging window (default: "pipeline_notification").
 # whatsapp_notification_template = "pipeline_notification"
 # Bot display name (used in formatted messages).
 # display_name = "Assistant"
 # Maximum conversation turns to remember per user (default: 20).
 # history_size = 20
 # Optional: restrict which phone numbers can interact with the bot.
 # When set, only listed numbers are processed; all others are silently ignored.
 # When absent or empty, all numbers are allowed (open by default).
 # whatsapp_allowed_phones = ["+15551234567", "+15559876543"]
@@ -0,0 +1,29 @@
 # WhatsApp Transport (Twilio)
 # Copy this file to bot.toml and fill in your values.
 # Only one transport can be active at a time.
 #
 # Setup:
 #   1. Sign up at twilio.com
 #   2. Activate the WhatsApp sandbox (Messaging > Try it out > Send a WhatsApp message)
 #   3. Send the sandbox join code from your WhatsApp to the sandbox number
 #   4. Copy your Account SID, Auth Token, and sandbox number below
 #   5. Set your webhook URL in the Twilio console: https://your-server/webhook/whatsapp
 enabled = true
 transport = "whatsapp"
 whatsapp_provider = "twilio"
 twilio_account_sid = "ACxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 twilio_auth_token = "your_auth_token"
 twilio_whatsapp_number = "+14155238886"
 # Bot display name (used in formatted messages).
 # display_name = "Assistant"
 # Maximum conversation turns to remember per user (default: 20).
 # history_size = 20
 # Optional: restrict which phone numbers can interact with the bot.
 # When set, only listed numbers are processed; all others are silently ignored.
 # When absent or empty, all numbers are allowed (open by default).
 # whatsapp_allowed_phones = ["+15551234567", "+15559876543"]
@@ -11,12 +11,17 @@ max_coders = 3
 # Maximum retries per story per pipeline stage before marking as blocked.
 # Set to 0 to disable retry limits.
-max_retries = 2
+max_retries = 3
 # Base branch name for this project. Worktree creation, merges, and agent prompts
 # use this value for {{base_branch}}. When not set, falls back to auto-detection
 # (reads current HEAD branch).
 base_branch = "master"
 [[component]]
 name = "frontend"
 path = "frontend"
-setup = ["npm install", "npm run build"]
+setup = ["npm ci", "npm run build"]
 teardown = []
 [[component]]
@@ -0,0 +1,43 @@
 # Example project.toml — copy to .storkit/project.toml and customise.
 # This file is checked in; project.toml itself is gitignored (it may contain
 # instance-specific settings).
 # Project-wide default QA mode: "server", "agent", or "human".
 # Per-story `qa` front matter overrides this setting.
 default_qa = "server"
 # Default model for coder agents. Only agents with this model are auto-assigned.
 # Opus coders are reserved for explicit per-story `agent:` front matter requests.
 default_coder_model = "sonnet"
 # Maximum concurrent coder agents. Stories wait in 2_current/ when all slots are full.
 max_coders = 3
 # Maximum retries per story per pipeline stage before marking as blocked.
 # Set to 0 to disable retry limits.
 max_retries = 2
 # Base branch name for this project. Worktree creation, merges, and agent prompts
 # use this value for {{base_branch}}. When not set, falls back to auto-detection
 # (reads current HEAD branch).
 base_branch = "main"
 [[component]]
 name = "server"
 path = "."
 setup = ["cargo build"]
 teardown = []
 [[agent]]
 name = "coder-1"
 role = "Full-stack engineer"
 stage = "coder"
 model = "sonnet"
 max_turns = 50
 max_budget_usd = 5.00
 prompt = """
 You are working in a git worktree on story {{story_id}}.
 Read CLAUDE.md first, then .storkit/README.md to understand the dev process.
 Run: cd "{{worktree_path}}" && git difftool {{base_branch}}...HEAD
 Commit all your work before your process exits.
 """
@@ -0,0 +1,24 @@
 ---
 name: "WhatsApp webhook HMAC signature verification"
 retry_count: 3
 blocked: true
 ---
 # Story 388: WhatsApp webhook HMAC signature verification
 ## User Story
 As a bot operator, I want incoming WhatsApp webhook requests to be cryptographically verified, so that forged requests from unauthorized sources are rejected.
 ## Acceptance Criteria
 - [ ] Meta webhooks: validate X-Hub-Signature-256 HMAC-SHA256 header using the app secret before processing
 - [ ] Twilio webhooks: validate request signature using the auth token before processing
 - [ ] Requests with missing or invalid signatures are rejected with 403 Forbidden
 - [ ] Verification is fail-closed: if signature checking is configured, unsigned requests are rejected
 - [ ] Existing bot.toml config is extended with any needed secrets (e.g. Meta app_secret for HMAC verification)
 - [ ] MUST use audited crypto crates (hmac, sha2, sha1, base64) — no hand-rolled cryptographic primitives
 ## Out of Scope
 - TBD
@@ -0,0 +1,40 @@
 ---
 name: "Fly.io Machines API integration for multi-tenant storkit SaaS"
 ---
 # Spike 408: Fly.io Machines API integration for multi-tenant storkit SaaS
 ## Question
 Can we build a working Rust integration that creates and manages per-tenant Fly.io Machines, attaches volumes, injects Claude credentials, and proxies JWT-authenticated HTTP/WebSocket traffic to the right machine?
 ## Hypothesis
 A thin Rust service using `reqwest` for the Machines API and `axum` for the reverse proxy is sufficient. No heavyweight orchestration framework needed.
 ## Prerequisites
 - Fly.io account with API token (set `FLY_API_TOKEN` env var)
 - Spike 407 findings reviewed
 ## Timebox
 4 hours
 ## Investigation Plan
 - [ ] Create a minimal Rust crate in `spikes/fly_machines/` — do not touch production code
 - [ ] Implement machine lifecycle: create, start, stop, destroy via Fly Machines REST API using `reqwest`
 - [ ] Test attaching a persistent volume to a machine and verify it persists across stop/start
 - [ ] Test secret injection — pass a dummy `credentials.json` as a Fly secret and verify it's readable inside the machine
 - [ ] Sketch the auth proxy: JWT validation → machine lookup → reverse proxy to machine's private IP; verify WebSocket proxying works
 - [ ] Measure actual cold start time for a minimal storkit container image
 - [ ] Document any API quirks, rate limits, or sharp edges discovered during testing
 ## Findings
 - TBD
 ## Recommendation
 - TBD
@@ -0,0 +1,22 @@
 ---
 name: "Multi-account OAuth token rotation on rate limit"
 ---
 # Story 411: Multi-account OAuth token rotation on rate limit
 ## User Story
 As a storkit user with multiple Claude Max subscriptions, I want the system to automatically rotate to a different account when one gets rate limited, so that agents and chat don't stall out waiting for limits to reset.
 ## Acceptance Criteria
 - [ ] OAuth login flow stores credentials per-account (keyed by email), not overwriting previous accounts
 - [ ] GET /oauth/status returns all stored accounts and their status (active, rate-limited, expired)
 - [ ] When the active account hits a rate limit, storkit automatically swaps to the next available account's refresh token, refreshes, and retries
 - [ ] The bot sends a notification in Matrix/WhatsApp when it swaps accounts
 - [ ] If all accounts are rate limited, the bot surfaces a clear message with the time until the earliest reset
 - [ ] A new /oauth/authorize login adds to the account pool rather than replacing the current credentials
 ## Out of Scope
 - TBD
@@ -0,0 +1,24 @@
 ---
 name: "Recheck bot command to re-run gates without restarting agent"
 ---
 # Story 412: Recheck bot command to re-run gates without restarting agent
 ## User Story
 As a user, I want to send `recheck <number>` to the bot so that it re-runs acceptance gates on an existing worktree without spawning a new agent, so I can unblock stories that failed due to environment issues without wasting agent turns.
 ## Acceptance Criteria
 - [ ] recheck command is registered in chat/commands/mod.rs and appears in help output
 - [ ] `recheck <number>` runs run_acceptance_gates on the story's existing worktree
 - [ ] If gates pass, the story advances through the pipeline (same as if a coder completed successfully)
 - [ ] If gates fail, the error output is returned to the user (not silently retried)
 - [ ] If no worktree exists for the story, returns a clear error
 - [ ] Does not spawn a new agent or increment retry_count
 - [ ] Works from all transports (Matrix, WhatsApp, Slack)
 - [ ] Works from web UI slash commands
 ## Out of Scope
 - TBD
@@ -0,0 +1,57 @@
 ---
 name: "Mergemaster pipeline marks story done without verifying code landed on master"
 retry_count: 1
 ---
 # Bug 426: Mergemaster pipeline marks story done without verifying code landed on master
 ## Description
 The mergemaster pipeline can mark a story as done even when the feature code never makes it to master. The cherry-pick step in merge.rs may fail or be skipped, but the pipeline still advances the story to done via the filesystem watcher. There is no post-merge verification that the code actually exists on master before marking done.
 ## How to Reproduce
 Observed on stories 422 and 403. For 422: mergemaster created merge-queue branch, resolved 2 conflicts in chat/commands/mod.rs and http/mcp/mod.rs, passed quality gates, created merge-queue commit cb2ef6b (4 files, 333 insertions including unblock.rs). But the done commit on master (05db012) only moves the story file — zero code changes. There is no 'storkit: merge 422' commit on master at all. The feature branch (db3157f) still has the code but it was never cherry-picked onto master.
 ## Manual Merge Notes
 When manually cherry-picking 422 onto master, two conflicts arose:
 1. `server/src/chat/commands/mod.rs` — both 421 (timer) and 422 (unblock) added entries to the same BotCommand registry. Resolution: keep both.
 2. `server/src/http/mcp/mod.rs` — 420 (loc_file) and 422 (unblock) both bumped the tool count assertion from 49→50. Resolution: keep loc_file assertion, bump count to 51.
 Additionally, the cherry-pick could not proceed at all because master was on the `merge-queue/424` branch with 3 unresolved files (notifications.rs, ws.rs, watcher.rs). A concurrent in-progress merge left the working tree dirty, which likely caused the original cherry-pick to fail silently. This suggests a race condition: the filesystem watcher commits (story file moves) can leave master in a state where the cherry-pick step in merge.rs fails.
 ## Full Audit of Done Stories (2026-03-28)
 Audited all 9 stories in `5_done/` to check whether their code actually landed on master:
 | Story | Merge Commit | Code on Master |
 |-------|-------------|----------------|
 | 417 — Split matrix/bot.rs | `665c036` (9 files, +1973/-1926) | YES |
 | 418 — Split pool/auto_assign.rs | `d375c4b` (7 files, +1901/-1813) | YES |
 | 419 — Matrix bot network error | `1193b7a` (1 file, +121/-3) | YES |
 | 420 — loc file command | `d6f8239` (5 files, +112/-32) | YES |
 | 421 — Timer command | `cf5424f` (7 files, +836) | YES |
 | 422 — Unblock command | `6c6bc35` (4 files, +336) — manual cherry-pick | YES |
 | 423 — Auto-schedule timer on rate limit | `b44f3a3` + `8ab2e19` (6 files, +375/-8) — manual cherry-pick | YES |
 | **424 — Rate limit traffic light** | **None** | **NO — moved back to backlog for redo** |
 | 425 — Chat notification on story block | `98b5475` (5 files, +184/-15) | YES |
 | **427 — Text normalization for line breaks** | **None** | **NO — phantom done, code never landed** |
 **4 out of 10 stories (422, 423, 424, 427) had broken merges.** 422 and 423 were fixed via manual cherry-pick. 424 was moved back to backlog for a fresh run. 427 also hit the same bug — marked done without code on master.
 ## Actual Result
 Story moved to done with no code on master. The merge-queue commit exists on a detached branch but was never applied to master. No merge commit appears in git log on master.
 ## Expected Result
 Pipeline should verify that the cherry-pick produced a merge commit on master before advancing to done. If cherry-pick fails or is missing, the story should remain in merge stage with a merge_failure flag.
 ## Acceptance Criteria
 - [ ] Pipeline must not move a story to done unless a merge commit containing the feature code exists on master
 - [ ] If cherry-pick fails or produces no code diff on master, the merge must be reported as failed
 - [ ] Add a post-merge verification step that checks git log on master for the expected merge commit before advancing to done
 - [ ] When verification fails, emit a merge_failure and leave the story in the merge stage for retry
@@ -0,0 +1,20 @@
 ---
 name: "Server-side text normalization for chat message line breaks"
 ---
 # Story 427: Server-side text normalization for chat message line breaks
 ## User Story
 As a user reading bot messages in Matrix, I want single newlines between sentences to render correctly, so that messages don't show up with words joined together like "sentence one.Sentence two".
 ## Acceptance Criteria
 - [ ] Add a text normalization step before markdown-to-HTML conversion in the Matrix transport that converts single newlines between non-empty prose lines into double newlines
 - [ ] Preserve intentional single-newline formatting in bullet lists, headings, table rows, and code fences
 - [ ] Apply the same normalization in WhatsApp and Slack transports
 - [ ] Unit tests covering prose paragraphs, bullet lists, code blocks, and mixed content
 ## Out of Scope
 - TBD
@@ -0,0 +1,30 @@
 ---
 name: "Split matrix/bot.rs into focused modules"
 ---
 # Refactor 417: Split matrix/bot.rs into focused modules
 ## Current State
 - TBD
 ## Desired State
 Refactor the monolithic server/src/chat/transport/matrix/bot.rs (1926 lines) into focused submodules.
 ## Acceptance Criteria
 - [ ] history.rs contains ConversationRole, ConversationEntry, RoomConversation, PersistedHistory, load_history, save_history and their unit tests
 - [ ] context.rs contains BotContext struct
 - [ ] run.rs contains run_bot main event loop
 - [ ] messages.rs contains on_room_message, handle_message, format_user_prompt, is_permission_approval and their unit tests
 - [ ] mentions.rs contains mentions_bot, contains_word, is_reply_to_bot and their unit tests
 - [ ] verification.rs contains check_sender_verified, on_to_device_verification_request, handle_sas_verification and their unit tests
 - [ ] format.rs contains markdown_to_html, format_startup_announcement and their unit tests
 - [ ] mod.rs re-exports all public types
 - [ ] Unit tests live in their respective module files
 - [ ] No public API changes — all existing imports continue to work
 ## Out of Scope
 - TBD
@@ -0,0 +1,28 @@
 ---
 name: "Split pool/auto_assign.rs into submodules"
 ---
 # Refactor 418: Split pool/auto_assign.rs into submodules
 ## Current State
 - TBD
 ## Desired State
 Refactor the monolithic server/src/agents/pool/auto_assign.rs (1813 lines) into focused submodules.
 ## Acceptance Criteria
 - [ ] auto_assign.rs contains auto_assign_available_work and its unit tests
 - [ ] reconcile.rs contains reconcile_on_startup and its unit tests
 - [ ] watchdog.rs contains run_watchdog_once, spawn_watchdog, check_orphaned_agents and their unit tests
 - [ ] scan.rs contains scan_stage_items, is_story_assigned_for_stage, count_active_agents_for_stage, find_free_agent_for_stage, is_agent_free and their unit tests
 - [ ] story_checks.rs contains read_story_front_matter_agent, has_review_hold, is_story_blocked, has_merge_failure and their unit tests
 - [ ] mod.rs wires the submodules and re-exports all public items
 - [ ] Unit tests live in their respective module files
 - [ ] No public API changes — all existing imports continue to work
 ## Out of Scope
 - TBD
@@ -0,0 +1,29 @@
 ---
 name: "Matrix bot crashes on transient network error instead of retrying"
 ---
 # Bug 419: Matrix bot crashes on transient network error instead of retrying
 ## Description
 The Matrix bot treats a transient sync error as fatal and stops entirely. A single failed HTTP request to the homeserver kills the bot, requiring a full server rebuild to recover.
 ## How to Reproduce
 1. Run storkit with Matrix bot enabled\n2. Homeserver becomes temporarily unreachable (network blip, DNS hiccup, server restart)\n3. Bot hits sync error and crashes
 ## Actual Result
 Bot logs "Fatal error: Matrix sync error: error sending request for url (...)" and stops responding. No retry, no recovery.
 ## Expected Result
 Bot logs a warning, backs off with exponential delay, and retries the sync. Only crash on unrecoverable errors (invalid credentials, banned, etc).
 ## Acceptance Criteria
 - [ ] Transient network errors (connection refused, timeout, DNS failure) trigger a retry with exponential backoff
 - [ ] Bot logs a warning on each failed retry attempt
 - [ ] Bot resumes normal operation once the homeserver is reachable again
 - [ ] Unrecoverable errors (401, 403) still cause a clean shutdown with a clear error message
 - [ ] Bot sends a notification after recovering from a network outage
@@ -0,0 +1,23 @@
 ---
 name: "loc for a specified file — bot command and web UI slash command"
 ---
 # Story 420: loc for a specified file — bot command and web UI slash command
 ## User Story
 As a developer, I want to send `loc <filepath>` to the bot or use it as a slash command in the web UI to see the line count for a specific file, so I can quickly check how large a file is without leaving my workflow.
 ## Acceptance Criteria
 - [ ] loc <filepath> returns the line count for the specified file
 - [ ] Relative paths are resolved against the project root
 - [ ] If the file does not exist, returns a clear error
 - [ ] Works from all transports (Matrix, WhatsApp, Slack)
 - [ ] Works as a slash command in the web UI
 - [ ] loc with no argument retains existing behavior (top files by line count)
 - [ ] Exposed as an MCP tool so agents can query file line counts programmatically
 ## Out of Scope
 - TBD
@@ -0,0 +1,24 @@
 ---
 name: "Timer command for deferred agent start"
 ---
 # Story 421: Timer command for deferred agent start
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] Bot command `timer <story_id> <HH:MM>` schedules a one-shot deferred start for the given story at the next occurrence of that time (server-local timezone)
 - [ ] Bot command `timer list` shows all pending timers with story ID and scheduled time
 - [ ] Bot command `timer cancel <story_id>` removes the pending timer for that story
 - [ ] Timers are persisted to .storkit/timers.json so they survive server restarts
 - [ ] A 30s tick loop (tokio task, same pattern as watchdog) checks for due timers and calls start_agent when triggered
 - [ ] When a timer fires, the story must already be in current — timer does not move stories between stages
 - [ ] Fired timers are removed after execution (one-shot, not recurring)
 - [ ] Multiple timers for the same time are supported and respect agent slot contention via auto-assign
 ## Out of Scope
 - TBD
@@ -0,0 +1,22 @@
 ---
 name: "Unblock command to reset blocked stories"
 ---
 # Story 422: Unblock command to reset blocked stories
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] Bot command `unblock <story_id>` clears blocked flag and resets retry_count to 0 on the story front matter
 - [ ] Replies with confirmation including story ID and name
 - [ ] Returns clear error if story is not found or not blocked
 - [ ] Works from all transports (Matrix, WhatsApp, Slack)
 - [ ] Exposed as an MCP tool so agents can unblock stories programmatically
 - [ ] Works as a slash command in the web UI
 ## Out of Scope
 - TBD
@@ -0,0 +1,22 @@
 ---
 name: "Auto-schedule timer on rate limit to resume after reset"
 ---
 # Story 423: Auto-schedule timer on rate limit to resume after reset
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] When a rate_limit_event with a hard block (not just allowed_warning) is received from the PTY stream, parse the reset time from rate_limit_info
 - [ ] Automatically create a timer (via TimerStore from story 421) for the blocked story at the parsed reset time
 - [ ] If a timer already exists for that story, update it to the later reset time rather than creating a duplicate
 - [ ] Log the auto-scheduled timer with story ID, agent name, and scheduled resume time
 - [ ] Notify chat transports that the story was rate-limited and will auto-resume at the scheduled time
 - [ ] When the timer fires and restarts the agent, the existing worktree and committed work are preserved
 ## Out of Scope
 - TBD
@@ -0,0 +1,23 @@
 ---
 name: "Rate limit traffic light status and hard block alerts"
 agent: coder-opus
 ---
 # Story 424: Rate limit traffic light status and hard block alerts
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] Remove repetitive per-message throttle warnings (allowed_warning) from chat transports entirely
 - [ ] Pipeline status messages show a coloured dot next to each work item: green for running normally, yellow for throttled, red for hard blocked, white/grey for idle/no agent
 - [ ] Hard block events (429 / rate_limit_exceeded) still send an individual chat notification with a red icon, including the reset time
 - [ ] Throttle and block state tracked per-agent so the status dot updates in real time
 - [ ] Server-side logging of throttle warnings is preserved for debugging
 - [ ] Traffic light dots in status report should be small/compact, not large emoji
 ## Out of Scope
 - TBD
@@ -0,0 +1,20 @@
 ---
 name: "Chat notification when a story blocks with reason"
 ---
 # Story 425: Chat notification when a story blocks with reason
 ## User Story
 As a project owner monitoring agent progress via chat, I want to receive a notification when a story gets blocked, including the reason, so that I can decide whether to unblock it or investigate the failure.
 ## Acceptance Criteria
 - [ ] When a story transitions to blocked state, send a chat notification to all configured transports
 - [ ] Notification includes the story ID, story name, and the reason for blocking (e.g. gate failure output, max retries exceeded, empty diff)
 - [ ] Notification uses a red or warning icon to distinguish from normal status messages
 - [ ] Works across Matrix, WhatsApp, and Slack transports
 ## Out of Scope
 - TBD
@@ -0,0 +1,26 @@
 ---
 name: "Split pool/pipeline.rs into submodules"
 ---
 # Refactor 428: Split pool/pipeline.rs into submodules
 ## Current State
 - TBD
 ## Desired State
 Refactor the monolithic server/src/agents/pool/pipeline.rs (1789 lines) into focused submodules.
 ## Acceptance Criteria
 - [ ] advance.rs contains run_pipeline_advance, spawn_pipeline_advance, should_block_story and their unit tests
 - [ ] completion.rs contains run_server_owned_completion, report_completion and their unit tests
 - [ ] merge.rs contains start_merge_agent_work, run_merge_pipeline, get_merge_status, set_merge_failure_reported and their unit tests
 - [ ] mod.rs re-exports all public items and wires the submodules
 - [ ] Unit tests live in their respective module files
 - [ ] No public API changes — all existing imports continue to work
 ## Out of Scope
 - TBD
@@ -1,6 +1,5 @@
 ---
 name: "Work item titles render too large in expanded view"
 merge_failure: "Merge pipeline infrastructure failure: squash merge committed successfully on merge-queue branch, but cherry-pick onto master failed with 'fatal: bad revision merge-queue/237_bug_work_item_titles_render_too_large_in_expanded_view'. The merge worktree setup also failed (ENOENT for .story_kit/merge_workspace — pnpm install, pnpm build, cargo check all skipped). The merge-queue branch appears to have been cleaned up before the cherry-pick step could reference it. Master is untouched."
 ---
 # Bug 237: Work item titles render too large in expanded view
@@ -1,6 +1,5 @@
 ---
 name: "Add refactor work item type"
 merge_failure: "merge_agent_work tool returned empty output on two attempts. The merge-queue branch (merge-queue/254_story_add_refactor_work_item_type) was created with squash merge commit 27d24b2, and the merge workspace worktree exists at .story_kit/merge_workspace, but the pipeline never completed (no success/failure logged after MERGE-DEBUG calls). The stale merge workspace worktree may be blocking completion. Possibly related to bug 250 (merge pipeline cherry-pick fails with bad revision on merge-queue branch). Human intervention needed to: 1) clean up the merge-queue worktree and branch, 2) investigate why the merge pipeline hangs after creating the squash merge commit, 3) retry the merge."
 ---
 # Story 254: Add refactor work item type
@@ -1,6 +1,5 @@
 ---
 name: "Show agent logs in expanded story popup"
 merge_failure: "merge_agent_work tool returned empty output. The merge pipeline created the merge-queue branch (merge-queue/255_story_show_agent_logs_in_expanded_story_popup) and merge workspace worktree at .story_kit/merge_workspace, but hung without completing. This is the same issue that affected story 254 — likely related to bug 250 (merge pipeline cherry-pick fails with bad revision on merge-queue branch). The stale merge workspace worktree on the merge-queue branch may be blocking completion. Human intervention needed to: 1) clean up the merge workspace worktree and merge-queue branch, 2) investigate the root cause in the merge pipeline (possibly the cherry-pick/fast-forward step after squash merge), 3) retry the merge."
 ---
 # Story 255: Show agent logs in expanded story popup
@@ -1,5 +1,6 @@
 ---
 name: "Web UI OAuth flow for Claude authentication"
 agent: "coder-opus"
 ---
 # Story 368: Web UI OAuth flow for Claude authentication
@@ -0,0 +1,32 @@
 ---
 name: "No-arg storkit in empty directory skips scaffold"
 ---
 # Bug 371: No-arg storkit in empty directory skips scaffold
 ## Description
 When running `storkit` with no path argument from an empty directory (no `.storkit/`), the server starts but never calls `open_project` or the scaffold. The `find_story_kit_root` check fails to find `.storkit/`, so the fallback at main.rs:179-186 just sets `project_root = cwd` without scaffolding. This means no `.storkit/`, no `project.toml`, no `.mcp.json`, no `CLAUDE.md` — the project is non-functional.
 The explicit path branch (`storkit .`) works correctly because it calls `open_project` → `ensure_project_root_with_story_kit` → `scaffold_story_kit`. The no-arg branch should do the same.
 ## How to Reproduce
 1. Create a new empty directory
 2. cd into it
 3. Run `storkit` (no path argument)
 4. Observe that no scaffold is created — `.storkit/`, `CLAUDE.md`, `.mcp.json`, etc. are all missing
 ## Actual Result
 Server starts with project_root set to cwd but no scaffold runs. The project is non-functional — no agent config, no MCP endpoint, no work pipeline directories.
 ## Expected Result
 Running `storkit` with no arguments from a directory without `.storkit/` should scaffold the project the same as `storkit .` does — calling `open_project` and triggering `ensure_project_root_with_story_kit`.
 ## Acceptance Criteria
 - [ ] Running `storkit` with no args from a dir without `.storkit/` calls `open_project` and triggers the full scaffold
 - [ ] The no-arg fallback path in main.rs calls `open_project(cwd)` instead of just setting project_root directly
 - [ ] After `storkit` completes startup, `.storkit/project.toml`, `.mcp.json`, `CLAUDE.md`, and `script/test` all exist
@@ -0,0 +1,24 @@
 ---
 name: "Scaffold auto-detects tech stack and configures script/test"
 ---
 # Story 372: Scaffold auto-detects tech stack and configures script/test
 ## User Story
 As a user setting up a new project with storkit, I want the scaffold to detect my project's tech stack and generate a working `script/test` automatically, so that agents can run tests immediately without manual configuration.
 ## Acceptance Criteria
 - [ ] Scaffold detects Go projects (go.mod) and adds `go test ./...` to script/test
 - [ ] Scaffold detects Node.js projects (package.json) and adds `npm test` to script/test
 - [ ] Scaffold detects Rust projects (Cargo.toml) and adds `cargo test` to script/test
 - [ ] Scaffold detects Python projects (pyproject.toml or requirements.txt) and adds `pytest` to script/test
 - [ ] Scaffold handles multi-stack projects (e.g. Go + Next.js) by combining the relevant test commands
 - [ ] project.toml component entries are generated to match detected tech stack
 - [ ] Falls back to the generic 'No tests configured' stub if no known stack is detected
 - [ ] Coder agent prompt includes instruction to configure `script/test` for the project's test framework if it still contains the generic stub
 ## Out of Scope
 - TBD
@@ -0,0 +1,28 @@
 ---
 name: "Scaffold gitignore missing transient pipeline stage directories"
 ---
 # Bug 373: Scaffold gitignore missing transient pipeline stage directories
 ## Description
 The `write_story_kit_gitignore` function in `server/src/io/fs.rs` does not include the transient pipeline stages (`work/2_current/`, `work/3_qa/`, `work/4_merge/`) in the `.storkit/.gitignore` entries list. These stages are not committed to git (only `1_backlog`, `5_done`, and `6_archived` are commit-worthy per spike 92), so they should be ignored for new projects.
 ## How to Reproduce
 1. Scaffold a new project with storkit
 2. Check `.storkit/.gitignore`
 ## Actual Result
 `.storkit/.gitignore` only contains `bot.toml`, `matrix_store/`, `matrix_device_id`, `worktrees/`, `merge_workspace/`, `coverage/`. The transient pipeline directories are missing.
 ## Expected Result
 `.storkit/.gitignore` also includes `work/2_current/`, `work/3_qa/`, `work/4_merge/`.
 ## Acceptance Criteria
 - [ ] Scaffold writes work/2_current/, work/3_qa/, work/4_merge/ to .storkit/.gitignore
 - [ ] Idempotent — running scaffold again does not duplicate entries
 - [ ] Existing .storkit/.gitignore files get the new entries appended on next scaffold run
@@ -0,0 +1,30 @@
 ---
 name: "Web UI implements all bot commands as slash commands"
 ---
 # Story 374: Web UI implements all bot commands as slash commands
 ## User Story
 As a user working in the storkit web UI, I want to type slash commands (e.g. `/status`, `/start 42`, `/cost`) in the chat input to trigger the same deterministic bot commands available in Matrix, so that I can manage my project entirely from the browser without needing a chat bot.
 ## Acceptance Criteria
 - [ ] /status — shows pipeline status and agent availability; /status <number> shows story triage dump
 - [ ] /assign <number> <model> — pre-assign a model to a story
 - [ ] /start <number> — start a coder on a story; /start <number> opus for specific model
 - [ ] /show <number> — display full text of a work item
 - [ ] /move <number> <stage> — move a work item to a pipeline stage
 - [ ] /delete <number> — remove a work item from the pipeline
 - [ ] /cost — show token spend (24h total, top stories, by agent type, all-time)
 - [ ] /git — show git status (branch, uncommitted changes, ahead/behind)
 - [ ] /overview <number> — show implementation summary for a merged story
 - [ ] /rebuild — rebuild the server binary and restart
 - [ ] /reset — clear the current Claude Code session
 - [ ] /help — list all available slash commands
 - [ ] Slash commands are handled at the frontend/backend level without LLM invocation
 - [ ] Unrecognised slash commands show a helpful error message
 ## Out of Scope
 - TBD
@@ -0,0 +1,43 @@
 ---
 name: "Default project.toml contains Rust-specific setup commands for non-Rust projects"
 ---
 # Bug 375: Default project.toml contains Rust-specific setup commands for non-Rust projects
 ## Description
 When scaffolding a new project where no tech stack is detected, the generated `project.toml` contains Rust-specific setup commands (`cargo check`) as example fallback components. This causes coder agents to try to satisfy Rust gates on non-Rust projects.
 ## Fix
 1. In `detect_components_toml()` fallback (when no stack markers found): replace the Rust/pnpm example components with a single generic `app` component with empty `setup = []`
 2. In the onboarding prompt Step 4: simplify to configure `[[component]]` entries based on what the user told the LLM in Step 2 (tech stack), rather than re-scanning the filesystem independently
 ## Acceptance Criteria
 - [ ] Default project.toml does not contain language-specific setup commands when that language is not detected in the project
 - [ ] If go.mod is present, setup commands use Go tooling
 - [ ] If package.json is present, setup commands use npm/node tooling
 - [ ] If no known stack is detected, setup commands are empty or just echo a placeholder
 ## How to Reproduce
 1. Create a new Go + Next.js project directory with `go.mod` and `package.json`
 2. Run `storkit .` to scaffold
 3. Check `.storkit/project.toml` — the component setup commands reference cargo/Rust
 4. Start a coder agent — it creates a `Cargo.toml` trying to satisfy the Rust setup commands
 ## Actual Result
 The scaffolded `project.toml` has Rust-specific setup commands (`cargo check`) even for non-Rust projects. Agents try to satisfy these and create spurious files.
 ## Expected Result
 The scaffolded `project.toml` should have generic or stack-appropriate setup commands. If no known stack is detected, setup commands should be empty or minimal (not Rust-specific).
 ## Acceptance Criteria
 - [ ] Default project.toml does not contain language-specific setup commands when that language is not detected in the project
 - [ ] If go.mod is present, setup commands use Go tooling
 - [ ] If package.json is present, setup commands use npm/node tooling
 - [ ] If no known stack is detected, setup commands are empty or just echo a placeholder
@@ -0,0 +1,22 @@
 ---
 name: "Rename MCP whatsup tool to status for consistency"
 agent: coder-opus
 ---
 # Story 376: Rename MCP whatsup tool to status for consistency
 ## User Story
 As a developer using storkit's MCP tools, I want the MCP tool to be called `status` instead of `whatsup`, so that the naming is consistent between the bot command (`status`), the web UI slash command (`/status`), and the MCP tool.
 ## Acceptance Criteria
 - [ ] MCP tool is renamed from 'whatsup' to 'status'
 - [ ] MCP tool is discoverable as 'status' via tools/list
 - [ ] The tool still accepts a story_id parameter and returns the same triage data
 - [ ] Old 'whatsup' tool name is removed from the MCP registry
 - [ ] Any internal references to the whatsup tool name are updated
 ## Out of Scope
 - TBD
@@ -0,0 +1,30 @@
 ---
 name: "update_story MCP tool writes front matter values as YAML strings instead of native types"
 ---
 # Bug 377: update_story MCP tool writes front matter values as YAML strings instead of native types
 ## Description
 The `update_story` MCP tool accepts `front_matter` as a `Map<String, String>`, so all values are written as quoted YAML strings. Fields like `retry_count` (expected `u32`) and `blocked` (expected `bool`) end up as `"0"` and `"false"` in the YAML. This causes `parse_front_matter()` to fail because serde_yaml cannot deserialize a quoted string into `u32` or `bool`. When parsing fails, the story `name` comes back as `None`, so the status command shows no title for the story.
 ## How to Reproduce
 1. Call `update_story` with `front_matter: {"blocked": "false", "retry_count": "0"}`
 2. Read the story file — front matter contains `blocked: "false"` and `retry_count: "0"` (quoted strings)
 3. Call `get_pipeline_status` or the bot `status` command
 4. The story shows with no title/name
 ## Actual Result
 Front matter values are written as quoted YAML strings. `parse_front_matter()` fails to deserialize `"false"` as `bool` and `"0"` as `u32`, returning an error. The story name is lost and the status command shows no title.
 ## Expected Result
 The `update_story` tool should write `blocked` and `retry_count` as native YAML types (unquoted `false` and `0`), or `parse_front_matter()` should accept both string and native representations. The story name should always be displayed correctly in the status command.
 ## Acceptance Criteria
 - [ ] update_story with front_matter {"blocked": "false"} writes `blocked: false` (unquoted) in the YAML
 - [ ] update_story with front_matter {"retry_count": "0"} writes `retry_count: 0` (unquoted) in the YAML
 - [ ] Story name is displayed correctly in the status command after update_story modifies front matter fields
@@ -0,0 +1,20 @@
 ---
 name: "Status command shows work item type (story, bug, spike, refactor) next to each item"
 ---
 # Story 378: Status command shows work item type (story, bug, spike, refactor) next to each item
 ## User Story
 As a user viewing the pipeline status, I want to see the type of each work item (story, bug, spike, refactor) so that I can quickly understand what kind of work is in progress without having to open individual files.
 ## Acceptance Criteria
 - [ ] The status command displays the work item type (story, bug, spike, refactor) as a label next to each item — e.g. "375 [bug] — Default project.toml contains Rust-specific setup commands"
 - [ ] The type is extracted from the story_id filename convention ({id}_{type}_{slug})
 - [ ] All known types are supported: story, bug, spike, refactor
 - [ ] Unknown or missing types are omitted gracefully (no crash, no placeholder)
 ## Out of Scope
 - TBD
@@ -0,0 +1,34 @@
 ---
 name: "start_agent ignores story front matter agent assignment"
 ---
 # Bug 379: start_agent ignores story front matter agent assignment
 ## Description
 When a model is pre-assigned to a story via the `assign` command (which writes `agent: coder-opus` to the story's YAML front matter), the MCP `start_agent` tool ignores this field. It only looks at the `agent_name` argument passed directly in the tool call. If none is passed, it auto-selects the first idle coder (usually sonnet), bypassing the user's assignment.
 The auto-assign pipeline (`auto_assign.rs`) correctly reads and respects the front matter `agent` field, but the direct `tool_start_agent` path in `agent_tools.rs` does not.
 Additionally, the `show` (whatsup/triage) command should display the assigned agent from the story's front matter so users can verify their assignment took effect.
 ## How to Reproduce
 1. Run `assign 368 opus` — this writes `agent: coder-opus` to story 368's front matter
 2. Run `start 368` (without specifying a model)
 3. Observe that a sonnet coder is assigned, not coder-opus
 4. Run `show 368` — the assigned agent is not displayed
 ## Actual Result
 The `start_agent` MCP tool ignores the `agent` field in the story's front matter and picks the first idle coder. The `show` command does not display the pre-assigned agent.
 ## Expected Result
 When no explicit `agent_name` is passed to `start_agent`, it should read the story's front matter `agent` field and use that agent if it's available. The `show` command should display the assigned agent from front matter.
 ## Acceptance Criteria
 - [ ] start_agent without an explicit agent_name reads the story's front matter `agent` field and uses it if the agent is idle
 - [ ] If the preferred agent from front matter is busy, start_agent either waits or falls back to auto-selection (matching auto_assign behavior)
 - [ ] The show/triage command displays the assigned agent from story front matter when present
@@ -0,0 +1,20 @@
 ---
 name: "Assign command restarts coder when story is already in progress"
 ---
 # Story 380: Assign command restarts coder when story is already in progress
 ## User Story
 As a user, I want `assign X opus` on a running story to stop the current coder, update the front matter, and start the newly assigned agent, so that I can switch models mid-flight without manually stopping and restarting.
 ## Acceptance Criteria
 - [ ] When assign is called on a story with a running coder, the current coder agent is stopped
 - [ ] The story's front matter `agent` field is updated to the new agent name
 - [ ] The newly assigned agent is started on the story automatically
 - [ ] When assign is called on a story with no running coder, it behaves as before (just updates front matter)
 ## Out of Scope
 - TBD
@@ -0,0 +1,20 @@
 ---
 name: "Bot command to delete a worktree"
 ---
 # Story 381: Bot command to delete a worktree
 ## User Story
 As a user, I want a bot command to delete a worktree so that I can clean up orphaned or unwanted worktrees without SSHing into the server.
 ## Acceptance Criteria
 - [ ] A new bot command (e.g. `rmtree <story_number>`) deletes the worktree for the given story
 - [ ] The command stops any running agent on that story before removing the worktree
 - [ ] The command returns a confirmation message on success
 - [ ] The command returns a helpful error if no worktree exists for the given story
 ## Out of Scope
 - TBD
@@ -0,0 +1,22 @@
 ---
 name: "WhatsApp transport supports Twilio API as alternative to Meta Cloud API"
 ---
 # Story 382: WhatsApp transport supports Twilio API as alternative to Meta Cloud API
 ## User Story
 As a user, I want to use Twilio's WhatsApp API instead of Meta's Cloud API directly, so that I can avoid Meta's painful developer onboarding and use Twilio's simpler signup process.
 ## Acceptance Criteria
 - [ ] bot.toml supports a `whatsapp_provider` field with values `meta` (default, current behavior) or `twilio`
 - [ ] When provider is `twilio`, messages are sent via Twilio's REST API (`api.twilio.com`) using Account SID + Auth Token
 - [ ] When provider is `twilio`, inbound webhooks parse Twilio's form-encoded format instead of Meta's JSON
 - [ ] Twilio config requires `twilio_account_sid`, `twilio_auth_token`, and `twilio_whatsapp_number` in bot.toml
 - [ ] All existing bot commands and LLM passthrough work identically regardless of provider
 - [ ] 24-hour messaging window logic still applies (Twilio enforces this server-side too)
 ## Out of Scope
 - TBD
@@ -0,0 +1,41 @@
 ---
 name: "Reorganize chat system into chat module with transport submodules"
 ---
 # Refactor 383: Reorganize chat system into chat module with transport submodules
 ## Current State
 - TBD
 ## Desired State
 Currently chat-related code is scattered at the top level of `src/`: `transport.rs`, `whatsapp.rs`, `slack.rs`, plus `matrix/` as a directory module. This should be reorganized into a clean module hierarchy:
 ```
 src/
  chat/
    mod.rs          # Generic chat traits, types, ChatTransport etc.
    transport/
      mod.rs
      matrix/       # Existing matrix module moved here
      whatsapp.rs   # Existing whatsapp.rs moved here
      slack.rs      # Existing slack.rs moved here
      twilio.rs     # Future Twilio transport
 ```
 The `ChatTransport` trait and shared chat types should live in `chat/mod.rs`. Each transport implementation becomes a submodule of `chat::transport`.
 ## Acceptance Criteria
 - [ ] ChatTransport trait and shared chat types live in `chat/mod.rs`
 - [ ] Matrix transport lives in `chat/transport/matrix/`
 - [ ] WhatsApp transport lives in `chat/transport/whatsapp.rs`
 - [ ] Slack transport lives in `chat/transport/slack.rs`
 - [ ] Top-level `transport.rs`, `whatsapp.rs`, `slack.rs`, and `matrix/` are removed
 - [ ] All existing tests pass without modification (or with only import path changes)
 - [ ] No functional changes — pure file reorganization and re-exports
 ## Out of Scope
 - TBD
@@ -0,0 +1,23 @@
 ---
 name: "WhatsApp markdown-to-WhatsApp formatting conversion"
 ---
 # Story 384: WhatsApp markdown-to-WhatsApp formatting conversion
 ## User Story
 As a WhatsApp user, I want bot messages to use WhatsApp-native formatting instead of raw markdown, so that headers, bold text, and links render properly.
 ## Acceptance Criteria
 - [ ] Headers (# ## ### etc.) are converted to bold text (*Header*) in WhatsApp messages
 - [ ] Markdown bold (**text**) is converted to WhatsApp bold (*text*)
 - [ ] Markdown strikethrough (~~text~~) is converted to WhatsApp strikethrough (~text~)
 - [ ] Markdown links [text](url) are converted to readable format: text (url)
 - [ ] Code blocks and inline code are preserved as-is (already compatible)
 - [ ] Matrix bot formatting is completely unaffected (conversion only applied in WhatsApp send paths)
 - [ ] Existing WhatsApp chunking (4096 char limit) still works correctly after conversion
 ## Out of Scope
 - TBD
@@ -0,0 +1,23 @@
 ---
 name: "Slack markdown-to-mrkdwn formatting conversion"
 ---
 # Story 385: Slack markdown-to-mrkdwn formatting conversion
 ## User Story
 As a Slack user, I want bot messages to use Slack-native mrkdwn formatting instead of raw markdown, so that headers, bold text, and links render properly.
 ## Acceptance Criteria
 - [ ] Headers (# ## ### etc.) are converted to bold text (*Header*) in Slack messages
 - [ ] Markdown bold (**text**) is converted to Slack bold (*text*)
 - [ ] Markdown strikethrough (~~text~~) is converted to Slack strikethrough (~text~)
 - [ ] Markdown links [text](url) are converted to Slack format: <url|text>
 - [ ] Code blocks and inline code are preserved as-is (already compatible)
 - [ ] WhatsApp and Matrix bot formatting are completely unaffected (conversion only applied in Slack send paths)
 - [ ] Conversion is applied to all Slack send paths: command responses, LLM streaming, htop snapshots, delete responses, and slash command responses
 ## Out of Scope
 - TBD
@@ -0,0 +1,22 @@
 ---
 name: "Unreleased command shows list of stories since last release"
 ---
 # Story 386: Unreleased command shows list of stories since last release
 ## User Story
 As a user, I want a bot command and web UI slash command called "unreleased" that shows a list of stories completed since the last release, so that I can see what's ready to ship.
 ## Acceptance Criteria
 - [ ] Bot command `unreleased` returns a list of stories merged to master since the last release tag
 - [ ] Web UI slash command /unreleased returns the same list
 - [ ] Each entry shows story number and name
 - [ ] If there are no unreleased stories, a clear message is shown
 - [ ] Command is registered in the help command output
 - [ ] WhatsApp, Slack, and Matrix transports all support the command via the shared command dispatcher
 ## Out of Scope
 - TBD
@@ -0,0 +1,23 @@
 ---
 name: "Configurable base branch name in project.toml"
 ---
 # Story 387: Configurable base branch name in project.toml
 ## User Story
 As a project owner, I want to configure the main branch name in project.toml (e.g. "main", "master", "develop"), so that the system doesn't hardcode "master" and works with any branching convention.
 ## Acceptance Criteria
 - [ ] New optional `base_branch` setting in project.toml (e.g. base_branch = "main")
 - [ ] When set, all worktree creation, merge operations, and agent prompts use the configured branch name
 - [ ] When not set, falls back to the existing auto-detection logic (detect_base_branch) which reads the current git branch
 - [ ] The hardcoded "master" fallback in detect_base_branch is replaced by the project.toml setting when available
 - [ ] Agent prompt template {{base_branch}} resolves to the configured value
 - [ ] Existing projects without the setting continue to work unchanged (backwards compatible)
 - [ ] project.toml.example uses base_branch = \"main\" as the example value; the actual project.toml uses base_branch = \"master\"
 ## Out of Scope
 - TBD
@@ -0,0 +1,21 @@
 ---
 name: "WhatsApp phone number allowlist authorization"
 ---
 # Story 389: WhatsApp phone number allowlist authorization
 ## User Story
 As a bot operator, I want to restrict which phone numbers can interact with the bot, so that only authorized users can send commands.
 ## Acceptance Criteria
 - [ ] New optional allowed_phones list in bot.toml for WhatsApp (similar to Matrix allowed_users)
 - [ ] When configured, only messages from listed phone numbers are processed; all others are silently ignored
 - [ ] When not configured (empty or absent), all phone numbers are allowed (backwards compatible)
 - [ ] Unauthorized senders are logged but receive no response
 - [ ] The allowlist applies to all message types: commands, LLM conversations, and async commands (htop, delete)
 ## Out of Scope
 - TBD
@@ -0,0 +1,31 @@
 ---
 name: "WhatsApp missing async command handlers for start, rebuild, reset, rmtree, assign"
 ---
 # Bug 390: WhatsApp missing async command handlers for start, rebuild, reset, rmtree, assign
 ## Description
 Five bot commands listed in help don't work in WhatsApp. Matrix's on_room_message pre-dispatches these via extract_*_command() functions before calling try_handle_command(), but WhatsApp's handle_incoming_message only pre-dispatches htop and delete. The missing commands have fallback handlers that return None, so they silently fall through to the LLM instead of executing.
 ## How to Reproduce
 1. Send "rebuild" (or "start 386", "reset", "rmtree 386", "assign 386 opus") to the WhatsApp bot\n2. Observe the message is forwarded to the LLM instead of executing the command
 ## Actual Result
 The 5 commands (start, rebuild, reset, rmtree, assign) fall through to the LLM and generate a conversational response instead of executing the bot command.
 ## Expected Result
 All commands listed in help should work in WhatsApp, matching Matrix behavior. start should spawn an agent, rebuild should rebuild the server, reset should clear the session, rmtree should remove a worktree, assign should pre-assign a model.
 ## Acceptance Criteria
 - [ ] start command works in WhatsApp (extract_start_command dispatch)
 - [ ] rebuild command works in WhatsApp (extract_rebuild_command dispatch)
 - [ ] reset command works in WhatsApp (extract_reset_command dispatch)
 - [ ] rmtree command works in WhatsApp (extract_rmtree_command dispatch)
 - [ ] assign command works in WhatsApp (extract_assign_command dispatch)
 - [ ] Same 5 commands also work in Slack transport if similarly missing
 - [ ] RETRY: Previous attempt was marked done without any code changes — the mergemaster moved the story to done but no async command handlers were actually added to whatsapp.rs. The fix must add extract_start_command, extract_rebuild_command, extract_reset_command, extract_rmtree_command, and extract_assign_command dispatch blocks to handle_incoming_message in whatsapp.rs, following the existing pattern used for htop and delete. Also check and fix Slack if similarly missing.
@@ -0,0 +1,27 @@
 ---
 name: "strip_prefix_ci panics on multi-byte UTF-8 characters"
 ---
 # Bug 391: strip_prefix_ci panics on multi-byte UTF-8 characters
 ## Description
 strip_prefix_ci in commands/mod.rs slices text by byte offset using prefix.len(), which panics when the slice boundary falls inside a multi-byte UTF-8 character (e.g. right single quote U+2019, emojis). The function assumes ASCII-safe byte boundaries but real WhatsApp/Matrix messages contain Unicode.
 ## How to Reproduce
 1. Send a message to the bot containing a smart quote or emoji within the first N bytes (where N = bot name length)\n2. e.g. "For now let\u2019s just deal with it" where the bot name prefix check slices at byte 12, inside the 3-byte \u2019 character
 ## Actual Result
 Thread panics: "byte index 12 is not a char boundary; it is inside \u2018\u2019\u2019 (bytes 11..14)"
 ## Expected Result
 The function should safely handle multi-byte UTF-8 without panicking. If the slice boundary isn't a char boundary, the prefix doesn't match — return None.
 ## Acceptance Criteria
 - [ ] strip_prefix_ci does not panic on messages containing multi-byte UTF-8 characters (smart quotes, emojis, CJK, etc.)
 - [ ] Use text.get(..prefix.len()) or text.is_char_boundary() instead of direct indexing
 - [ ] Add test cases for messages with emojis and smart quotes
@@ -0,0 +1,27 @@
 ---
 name: "Extract shared transport utilities from matrix module into chat submodule"
 agent: "coder-opus"
 ---
 # Refactor 392: Extract shared transport utilities from matrix module into chat submodule
 ## Current State
 - TBD
 ## Desired State
 Several functions currently living in the matrix transport module are used by all transports (WhatsApp, Slack, Matrix). These should be pulled up into a shared location under the chat module. Candidates include: strip_prefix_ci, strip_bot_mention, try_handle_command, drain_complete_paragraphs, markdown_to_whatsapp (pattern could generalize), chunk_for_whatsapp, and the command dispatch infrastructure. A chat::util or chat::text submodule would be a natural home for string utilities like strip_prefix_ci. The command dispatch (try_handle_command, CommandDispatch, BotCommand registry) could live in chat::commands.
 ## Acceptance Criteria
 - [ ] Shared string utilities (strip_prefix_ci, strip_bot_mention, drain_complete_paragraphs) moved to a chat::util or chat::text submodule
 - [ ] Command dispatch infrastructure (try_handle_command, CommandDispatch, BotCommand, command registry) moved to chat::commands
 - [ ] Per-transport formatting functions (markdown_to_whatsapp, markdown_to_slack) remain in their respective transport modules
 - [ ] All transports import from the new shared location instead of reaching into matrix::
 - [ ] No functional changes — purely structural refactor
 - [ ] All existing tests pass and move with their code
 ## Out of Scope
 - TBD
@@ -0,0 +1,23 @@
 ---
 name: "Pipeline stage notifications for WhatsApp and Slack transports"
 ---
 # Story 393: Pipeline stage notifications for WhatsApp and Slack transports
 ## User Story
 As a WhatsApp or Slack user, I want to receive pipeline stage transition notifications (e.g. "story moved from Current to QA") just like Matrix users do, so I can track story progress from any transport.
 ## Acceptance Criteria
 - [ ] WhatsApp transport spawns a notification listener at startup using the existing spawn_notification_listener infrastructure
 - [ ] Slack transport spawns a notification listener at startup using the same infrastructure
 - [ ] Notifications are sent to all active ambient senders/channels for the respective transport
 - [ ] Stage transition notifications (story moved between pipeline stages) are delivered
 - [ ] Error notifications (story failures) are delivered
 - [ ] Rate limit warnings are delivered with debouncing
 - [ ] Matrix notification behavior is completely unaffected
 ## Out of Scope
 - TBD
@@ -0,0 +1,23 @@
 ---
 name: "WhatsApp and Slack permission prompt forwarding"
 ---
 # Story 394: WhatsApp and Slack permission prompt forwarding
 ## User Story
 As a WhatsApp or Slack user, I want permission requests from Claude Code to be forwarded to my chat so I can approve or deny them, rather than having them silently fail.
 ## Acceptance Criteria
 - [ ] Permission requests are sent as messages to the WhatsApp sender with tool name and input details
 - [ ] User can reply yes/y/approve or no/n/deny to approve or deny the permission
 - [ ] Permission requests time out and auto-deny (fail-closed) if not answered within the configured timeout
 - [ ] Slack receives the same permission forwarding treatment
 - [ ] Reuses the existing permission channel infrastructure (perm_rx, PermissionForward, PermissionDecision)
 - [ ] Matrix permission handling is completely unaffected
 - [ ] handle_llm_message uses a tokio::select! loop (like Matrix bot.rs) to listen for both LLM output and permission requests concurrently
 ## Out of Scope
 - TBD
@@ -0,0 +1,24 @@
 ---
 name: "Fix npm deprecated module warnings"
 ---
 # Refactor 395: Fix npm deprecated module warnings
 ## Current State
 - TBD
 ## Desired State
 Address npm warnings about deprecated modules in the frontend dependencies. Update or replace deprecated packages to eliminate warnings during npm install.
 ## Acceptance Criteria
 - [ ] npm install runs with zero deprecation warnings
 - [ ] All existing frontend tests (npm test) still pass
 - [ ] npm run build succeeds without errors
 - [ ] No functional regressions in the frontend
 ## Out of Scope
 - TBD
@@ -0,0 +1,21 @@
 ---
 name: "WhatsApp bot startup announcement after restart"
 ---
 # Story 396: WhatsApp bot startup announcement after restart
 ## User Story
 As a WhatsApp user, I want the bot to announce its presence when it starts up or restarts, like it does in Matrix, so I know it's back online and ready.
 ## Acceptance Criteria
 - [ ] Bot sends a startup message to all known WhatsApp senders (from conversation history or ambient rooms) when the server starts
 - [ ] Startup message includes the bot name and indicates it is online/ready
 - [ ] Slack transport gets the same startup announcement treatment
 - [ ] Matrix startup announcement behavior is unaffected
 - [ ] After a rebuild command, the new process sends the announcement on startup
 ## Out of Scope
 - TBD
@@ -0,0 +1,30 @@
 ---
 name: "Selection screen directory picker unreadable in dark mode"
 ---
 # Bug 397: Selection screen directory picker unreadable in dark mode
 ## Description
 The ProjectPathInput component in the selection screen uses hardcoded light-theme inline styles (white backgrounds, dark borders, dark text highlights) that don't adapt to dark mode. When the browser/OS uses dark mode, the global CSS sets text color to #f6f6f6 (white) but the dropdown keeps background: #fff — resulting in white text on a white background, making the directory picker completely unreadable.
 ## How to Reproduce
 1. Run storkit under Docker (or locally) with a browser set to dark mode (prefers-color-scheme: dark).
 2. Open http://localhost:3001 in the browser.
 3. Click into the project path input and start typing a path to trigger the autocomplete dropdown.
 ## Actual Result
 The suggestion dropdown has white background with white/light text inherited from the dark-mode global styles. Match highlights use color: #222 which is barely visible. The close button and header bar also use light-only colors. The entire directory picker is effectively unreadable.
 ## Expected Result
 The directory picker dropdown should be readable in both light and dark mode. Colors for background, text, borders, and highlights should adapt to the active color scheme.
 ## Acceptance Criteria
 - [ ] ProjectPathInput dropdown is readable in dark mode (prefers-color-scheme: dark)
 - [ ] ProjectPathInput dropdown remains readable in light mode
 - [ ] Suggestion highlight text is visible against the dropdown background in both themes
 - [ ] No hardcoded light-only colors remain in ProjectPathInput inline styles
@@ -0,0 +1,31 @@
 ---
 name: "CLI --port flag with project.toml persistence"
 ---
 # Story 399: CLI --port flag with project.toml persistence
 ## User Story
 As a developer, I want to set the server port via a --port CLI flag that persists to project.toml, so that I don't have to remember an environment variable on every run.
 ## Acceptance Criteria
 - [ ] `storkit --help` shows a `--port` option
 - [ ] `storkit --port 4000` starts the server on port 4000
 - [ ] After first run with `--port`, the port is saved to `project.toml`
 - [ ] On subsequent runs without `--port`, the port from `project.toml` is used
 - [ ] CLI `--port` overrides the value in `project.toml`
 - [ ] Default port is 3001 when neither `--port` nor `project.toml` port is set
 - [ ] `STORKIT_PORT` env var is removed — no longer read or respected
 - [ ] `.storkit_port` lock file mechanism is removed (`write_port_file` / `remove_port_file`)
 ## Out of Scope
 - Docker compose changes (can update `STORKIT_PORT` references separately)
 - Adding other CLI flags beyond `--port`
 ## Technical Notes
 Port resolution priority: `--port` flag > `project.toml` `port` field > default 3001
 The port should be written to `project.toml` on startup so subsequent runs remember it. Use the existing `config.rs` / `ProjectConfig` struct — add a `port` field.
@@ -0,0 +1,45 @@
 ---
 name: "WhatsApp and Slack missing reset command handler"
 ---
 # Bug 400: WhatsApp and Slack missing reset command handler
 ## Description
 The reset command has a fallback handler in chat/commands/mod.rs that returns None with a comment saying it's handled before try_handle_command. This is only true for Matrix. WhatsApp and Slack don't have pre-dispatch handling, so None causes fallthrough to LLM. This caused a real outage when stale session IDs couldn't be cleared via the bot after switching from Docker to bare-metal.
 ## Implementation Note
 Follow the **rebuild pattern** established in story 402, with one complication: `handle_reset` in `server/src/chat/transport/matrix/reset.rs` takes a Matrix-specific `ConversationHistory` (`Arc<TokioMutex<HashMap<OwnedRoomId, RoomConversation>>>`), so it cannot be called directly from WhatsApp or Slack.
 **WhatsApp session storage** (`server/src/chat/transport/whatsapp.rs`):
 - Type: `WhatsAppConversationHistory = Arc<TokioMutex<HashMap<String, RoomConversation>>>` (key = sender phone number)
 - Persisted to `.storkit/whatsapp_history.json` via `save_whatsapp_history`
 **Slack session storage** (`server/src/chat/transport/slack.rs`):
 - Type: `SlackConversationHistory = Arc<TokioMutex<HashMap<String, RoomConversation>>>` (key = channel ID)
 - Persisted to `.storkit/slack_history.json` via `save_slack_history`
 **Approach:**
 - Use `extract_reset_command` from `server/src/chat/transport/matrix/reset.rs` to detect the command (it works transport-agnostically)
 - Implement the reset inline in each transport's async message handler: clear `session_id` and `entries` for the sender/channel key, call the transport's own `save_*_history`, reply with confirmation
 - Add async intercepts in `whatsapp.rs` (~line 1107, after the rebuild intercept) and `slack.rs` (~line 845, after the rebuild intercept)
 - The fallback handler in `chat/commands/mod.rs` (`handle_reset_fallback`) stays as-is
 ## How to Reproduce
 1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "reset" to the bot\n3. Check server logs
 ## Actual Result
 Log shows "No command matched, forwarding to LLM" — reset is sent to the LLM as a conversational message instead of clearing the session.
 ## Expected Result
 The bot clears the sender's session_id from conversation history and replies with confirmation like "Session cleared."
 ## Acceptance Criteria
 - [ ] WhatsApp transport handles reset command: clears sender session_id and replies with confirmation
 - [ ] Slack transport handles reset command: clears channel session_id and replies with confirmation
 - [ ] Fallback handler in chat/commands/mod.rs no longer silently swallows the reset command
@@ -0,0 +1,35 @@
 ---
 name: "WhatsApp and Slack missing start command handler"
 ---
 # Bug 401: WhatsApp and Slack missing start command handler
 ## Description
 The start command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
 ## Implementation Note
 Follow the **rebuild pattern** established in story 402.
 - `extract_start_command` and `handle_start` already exist in `server/src/chat/transport/matrix/start.rs`
 - Add an async intercept in `server/src/chat/transport/whatsapp.rs` (see rebuild intercept ~line 1107) and `server/src/chat/transport/slack.rs` (see rebuild intercept ~line 845)
 - Call `crate::chat::transport::matrix::start::extract_start_command` to detect the command, then `crate::chat::transport::matrix::start::handle_start` to execute it
 - The fallback handler in `chat/commands/mod.rs` (`handle_start_fallback`) stays as-is — it exists only so `help` lists the command
 ## How to Reproduce
 1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "start <story_id>" to the bot\n3. Check server logs
 ## Actual Result
 Command falls through to LLM instead of starting an agent.
 ## Expected Result
 The bot starts an agent for the specified story and replies with confirmation.
 ## Acceptance Criteria
 - [ ] WhatsApp transport handles start command: starts agent and replies with confirmation
 - [ ] Slack transport handles start command: starts agent and replies with confirmation
@@ -0,0 +1,26 @@
 ---
 name: "WhatsApp and Slack missing rebuild command handler"
 ---
 # Bug 402: WhatsApp and Slack missing rebuild command handler
 ## Description
 The rebuild command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
 ## How to Reproduce
 1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "rebuild" to the bot\n3. Check server logs
 ## Actual Result
 Command falls through to LLM instead of triggering a server rebuild.
 ## Expected Result
 The bot triggers a server rebuild and replies with confirmation.
 ## Acceptance Criteria
 - [ ] WhatsApp transport handles rebuild command: triggers rebuild and replies with confirmation
 - [ ] Slack transport handles rebuild command: triggers rebuild and replies with confirmation
@@ -0,0 +1,37 @@
 ---
 name: "WhatsApp and Slack missing rmtree command handler"
 retry_count: 2
 blocked: true
 ---
 # Bug 403: WhatsApp and Slack missing rmtree command handler
 ## Description
 The rmtree command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
 ## Implementation Note
 Follow the **rebuild pattern** established in story 402.
 - `extract_rmtree_command` and `handle_rmtree` already exist in `server/src/chat/transport/matrix/rmtree.rs`
 - Add an async intercept in `server/src/chat/transport/whatsapp.rs` (see rebuild intercept ~line 1107) and `server/src/chat/transport/slack.rs` (see rebuild intercept ~line 845)
 - Call `crate::chat::transport::matrix::rmtree::extract_rmtree_command` to detect the command, then `crate::chat::transport::matrix::rmtree::handle_rmtree` to execute it
 - The fallback handler in `chat/commands/mod.rs` (`handle_rmtree_fallback`) stays as-is — it exists only so `help` lists the command
 ## How to Reproduce
 1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "rmtree <story_id>" to the bot\n3. Check server logs
 ## Actual Result
 Command falls through to LLM instead of removing the worktree.
 ## Expected Result
 The bot removes the worktree for the specified story and replies with confirmation.
 ## Acceptance Criteria
 - [ ] WhatsApp transport handles rmtree command: removes worktree and replies with confirmation
 - [ ] Slack transport handles rmtree command: removes worktree and replies with confirmation
@@ -0,0 +1,36 @@
 ---
 name: "WhatsApp and Slack missing assign command handler"
 ---
 # Bug 404: WhatsApp and Slack missing assign command handler
 ## Description
 The assign command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
 ## Implementation Note
 Follow the **rebuild pattern** established in story 402.
 - `extract_assign_command` and `handle_assign` already exist in `server/src/chat/transport/matrix/assign.rs`
 - Add an async intercept in `server/src/chat/transport/whatsapp.rs` (see rebuild intercept ~line 1107) and `server/src/chat/transport/slack.rs` (see rebuild intercept ~line 845)
 - Call `crate::chat::transport::matrix::assign::extract_assign_command` to detect the command, then `crate::chat::transport::matrix::assign::handle_assign` to execute it
 - The fallback handler in `chat/commands/mod.rs` (`handle_assign_fallback` — note: the registry entry for `assign` currently calls `assign::handle_assign` synchronously; verify this doesn't conflict) stays as-is for `help` listing
 - The fallback in `chat/commands/assign.rs` may need to return `None` instead of a real response once the async path handles it
 ## How to Reproduce
 1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "assign <story_id> <agent>" to the bot\n3. Check server logs
 ## Actual Result
 Command falls through to LLM instead of assigning the agent.
 ## Expected Result
 The bot assigns the specified agent to the story and replies with confirmation.
 ## Acceptance Criteria
 - [ ] WhatsApp transport handles assign command: assigns agent and replies with confirmation
 - [ ] Slack transport handles assign command: assigns agent and replies with confirmation
@@ -0,0 +1,30 @@
 ---
 name: "Auto-refresh expired OAuth token for Claude Code PTY"
 ---
 # Story 405: Auto-refresh expired OAuth token for Claude Code PTY
 ## User Story
 As a storkit user with a Claude Max subscription, I want the server to automatically refresh my expired OAuth token so that chat, Matrix, and WhatsApp integrations don't stop working when the token expires.
 ## Acceptance Criteria
 ### Detection
 - [ ] When the Claude Code PTY returns an `authentication_failed` error, storkit detects it instead of passing the raw 401 JSON to the user
 ### Auto-refresh (credentials exist, refresh token valid)
 - [ ] Storkit reads the OAuth refresh token from `~/.claude/.credentials.json`
 - [ ] Storkit calls the Anthropic OAuth token refresh endpoint (`https://console.anthropic.com/v1/oauth/token` with `grant_type=refresh_token`) to obtain a new access token
 - [ ] Storkit writes the refreshed access token (and new expiresAt) back to `~/.claude/.credentials.json`
 - [ ] After a successful refresh, storkit automatically retries the original chat request
 - [ ] The refresh+retry is transparent to the user — they see no error
 ### Full login required (no credentials, or refresh token also expired)
 - [ ] If `.credentials.json` doesn't exist or the refresh call itself fails, storkit surfaces a clear error: "OAuth session expired. Please run `claude login` to re-authenticate."
 - [ ] The error message is surfaced through the normal chat stream (not just server logs)
 ## Out of Scope
 - Implementing the full interactive `claude login` browser OAuth flow inside storkit
 - Proactive token refresh before expiry (refreshing on demand when the error occurs is sufficient)
@@ -0,0 +1,21 @@
 ---
 name: "Browser-based OAuth login flow from web UI and chat integrations"
 ---
 # Story 406: Browser-based OAuth login flow from web UI and chat integrations
 ## User Story
 As a new storkit user (or one whose refresh token has expired), I want to complete the full Claude OAuth login flow from the web UI, Matrix, or WhatsApp so that I don't need terminal access to run `claude login`.
 ## Acceptance Criteria
 - [ ] From the web UI, the user can initiate OAuth login — storkit generates the Anthropic authorize URL and opens it in a new tab
 - [ ] After the user authenticates in the browser, the OAuth callback writes accessToken, refreshToken, and expiresAt to ~/.claude/.credentials.json
 - [ ] From Matrix or WhatsApp, storkit sends the user a clickable OAuth authorize link when credentials are missing or fully expired
 - [ ] After successful login, the user can immediately start chatting without restarting storkit
 - [ ] If the OAuth callback fails or the user cancels, a clear error is shown
 ## Out of Scope
 - TBD
@@ -0,0 +1,195 @@
 ---
 name: "Fly.io Machines for multi-tenant storkit SaaS — docs, security & pricing"
 retry_count: 2
 blocked: true
 ---
 # Spike 407: Fly.io Machines for multi-tenant storkit SaaS — docs, security & pricing
 ## Question
 What do Fly.io's published docs, security claims, and pricing say about using Machines as the isolation layer for a multi-tenant storkit SaaS? Is there anything that rules it out before we write code?
 ## Hypothesis
 Fly.io Machines (Firecracker-based microVMs) are a viable isolation primitive for tenants running arbitrary shell commands, and the pricing model is workable at early SaaS scale.
 ## Timebox
 2 hours
 ## Investigation Plan
 - [x] Read Fly.io Machines API docs — what are the core primitives (machine lifecycle, networking, volumes, secrets)?
 - [x] Research Fly.io's published isolation model — what security guarantees do they document for Firecracker microVMs? Summarise claims and explicitly flag what would require independent security review before production use.
 - [x] Research cold start time — what do Fly.io docs and community benchmarks claim? Note that real numbers require a test account (covered in spike 408).
 - [x] Research persistent volume support — can a volume be attached per-tenant? What are the size/count limits?
 - [x] Research secret injection options — env vars, Fly Secrets API, volume mounts. What's the right approach for per-tenant `~/.claude/.credentials.json`?
 - [x] Research machine count and org limits — any hard caps that would block SaaS growth?
 - [x] Research pricing — always-on vs stop-on-idle machine costs at 10, 100, 1000 tenants. Include volume and egress costs.
 - [x] Identify any documented showstoppers.
 ## Findings
 ### 1. Core API Primitives
 Base URL: `https://api.machines.dev` (or `http://_api.internal:4280` from within 6PN).
 Auth: `Authorization: Bearer <fly_api_token>`.
 **Machine lifecycle** — full REST API:
 - `POST /v1/apps/{app}/machines` — create (+ optionally start via `skip_launch: false`)
 - `POST /v1/apps/{app}/machines/{id}/start` — start stopped machine (~10ms same-region)
 - `POST /v1/apps/{app}/machines/{id}/stop` — stop (SIGINT/SIGKILL, retains disk)
 - `POST /v1/apps/{app}/machines/{id}/suspend` — snapshot RAM to disk for fast resume
 - `DELETE /v1/apps/{app}/machines/{id}` — destroy (irreversible)
 - `GET /v1/apps/{app}/machines/{id}/wait?state=started` — synchronize on state transitions
 Machine states: `created → started → stopped/suspended → destroyed`.
 Leases (`POST .../lease`) provide exclusive mutation locks — useful for orchestration.
 **Rate limits**: 1 req/s per action per machine/app ID (burst to 3). Matters for rapid tenant provisioning.
 ### 2. Isolation Model
 Each Fly Machine is a **Firecracker microVM** — a separate Linux kernel, not a container. Defense in depth:
 1. KVM hardware-enforced memory and CPU isolation
 2. Minimal device model (5 virtual devices vs QEMU's hundreds)
 3. Rust VMM implementation (no C memory-safety bugs in VMM)
 4. `seccomp-bpf` limits Firecracker process to ~40 syscalls with argument filters
 5. Jailer chroots + namespaces + drops privileges around the Firecracker process
 From official docs: *"MicroVMs provide strong hardware-virtualization-based security and workload isolation, which allows us to safely run applications from different customers on shared hardware."* Full VM isolation prevents kernel sharing between apps.
 Tenants have full root inside their VM by design — the kernel boundary contains blast radius.
 **Claims requiring independent verification before production use:**
 - Whether SMT/hyperthreading is disabled on hosts (directly relevant to Spectre/MDS side-channel attacks — Firecracker's own docs recommend disabling SMT for strict multi-tenancy, but Fly.io does not publicly document this)
 - CPU dedication is explicitly described as "best-effort", not a hard guarantee
 - Pentest scope/dates/findings for three named firms (Atredis Partners, Doyensec, Tetrel) are not published
 - Whether the SOC 2 Type II report scope covers the Firecracker isolation layer specifically
 **Compliance**: SOC 2 Type II certified (report available on request), ISO 27001 datacenters (Equinix), HIPAA BAA available, GDPR DPA available.
 ### 3. Network Isolation
 Each machine gets a private IPv6 (6PN) address. Key isolation controls:
 - Cross-organization: Fly.io platform blocks all cross-org traffic at the platform level — strong boundary
 - Intra-organization: **open by default** — any machine in the same org can reach any other
 For multi-tenant SaaS, this means tenant machines in the same Fly.io org are NOT network-isolated from each other unless you use **Custom Private Networks (6PNs)**:
 - `POST /v1/apps` with a `network` field assigns that app to an isolated 6PN
 - Apps on different 6PNs cannot reach each other via private networking (only via public IPs)
 - **Assignment is permanent** — cannot be changed after app creation; plan upfront
 Stable machine addressing: `<machine_id>.vm.<appname>.internal` (6PN addresses change on migration).
 ### 4. Cold Start Times
 | Scenario | Documented Latency |
 |---|---|
 | Cold boot (create + start, same region) | ~300 ms |
 | Start existing stopped machine (same region) | ~10 ms |
 | Start stopped machine (cross-region) | ~150 ms |
 | Resume from suspend (same region) | Sub-100ms (implied) |
 Community-observed: 400–600ms end-to-end (including app init) for stopped machine cold starts.
 FLAME workloads report 3–8s in some restart-race conditions.
 Real latency numbers with our actual image size require a test account — covered by spike 408.
 ### 5. Persistent Volume Support
 - Volumes are created via `POST /v1/apps/{app}/volumes` with `size_gb` (default 3 GB), region, encryption flag
 - Attached to machine via `config.mounts[].volume` at create/update time
 - **1:1 constraint**: one volume per machine, one machine per volume, same region required
 - Volumes persist across machine stop/start/suspend/destroy — they are a separate resource
 - Can extend volume online (`PUT .../volumes/{id}/extend`)
 - Volume snapshots available (billed at $0.08/GB/month as of Jan 2026)
 - No documented per-org volume count cap (separate from machine cap)
 For per-tenant `~/.claude/` home directories, attach one volume per tenant machine — straightforward.
 ### 6. Secret Injection
 Four methods, in order of recommendation for sensitive credentials:
 1. **Fly Secrets** (`fly secrets set KEY=value`) — encrypted at rest, injected as env vars at boot to all machines in the app. **Secrets are per-app, not per-machine** — all machines in an app share the same secret set. For per-tenant isolated secrets, each tenant needs their own app (or use method 3).
 2. **`config.files` with `secret_name`** — writes a named secret to a file path inside the machine at start time:
   ```json
   {"guest_path": "/root/.claude/.credentials.json", "secret_name": "TENANT_CREDENTIALS"}
   ```
   This is the right approach for per-tenant `~/.claude/.credentials.json` if tenants share an app — pair with `ignore_app_secrets: true` and per-process secret scoping.
 3. **`config.env`** — plain env vars in machine config, not encrypted at rest. Non-sensitive config only.
 4. **`config.processes[].secrets`** — inject named secrets only to specific process groups; `ignore_app_secrets: true` prevents inheritance of app-level secrets.
 **Recommended architecture**: One app per tenant (isolated 6PN + isolated secrets) is the cleanest security model. Secrets stored per app via Fly Secrets, credentials file written via `config.files` at boot.
 ### 7. Machine Count and Org Limits
 | Limit | Default | Hard Cap |
 |---|---|---|
 | Machines per org (all states) | 50 | None architectural |
 - The 50-machine default is a **fail-safe**, not an architectural limit. Fly.io runs customers with 100,000+ machines.
 - To raise: email `billing@fly.io` with requirements.
 - **This limit will be hit immediately in any real multi-tenant deployment** — must budget for an early limit-raise request before launching.
 - API rate limit of 1 req/s per action also needs consideration for bulk tenant provisioning scripts.
 ### 8. Pricing (as of March 2026)
 **Compute (per second, billed only while running):**
 | Preset | Per Month always-on |
 |---|---|
 | shared-cpu-1x (256 MB) | $2.05 |
 | shared-cpu-2x (512 MB) | $4.10 |
 | performance-1x (2 GB) | $32.64 |
 **Storage**: $0.15/GB/month (provisioned, regardless of machine state)
 **Egress**: $0.02/GB (North America/Europe), $0.04/GB (APAC/SA), $0.12/GB (Africa/India)
 **Dedicated IPv4**: $2.00/month per app (shared IPv6 is free)
 **No free tier** for new orgs (eliminated 2024). No minimum spend, no base fee.
 **Monthly cost estimates** (1x shared-cpu-1x, 1 GB volume, 1 GB egress/tenant, US East):
 | Scenario | Per Tenant | 10 Tenants | 100 Tenants | 1,000 Tenants |
 |---|---|---|---|---|
 | Always-on (730h/month) | $2.22 | $22 | $222 | $2,220 |
 | Autostop, 8h/day active | $0.92 | $9 | $92 | $920 |
 | Autostop, 2h/day active | $0.53 | $5 | $53 | $530 |
 At scale, volume storage becomes the dominant cost when machines are idle. At 1,000 tenants autostopped, storage is ~$150/month vs compute of $170–$370/month.
 ### 9. Showstoppers
 **None identified** that rule it out. The following require action before launch:
 | Risk | Severity | Mitigation |
 |---|---|---|
 | Default 50-machine org cap | High (blocks launch) | Email billing@fly.io early; no architectural cap |
 | SMT/hyperthreading not documented | Medium (security) | Request confirmation from Fly.io support before production; mitigated by VM-level isolation |
 | Intra-org network open by default | Medium (security) | Use one app per tenant with custom 6PNs |
 | Secrets are per-app not per-machine | Low | Use one app per tenant or `config.files` with `secret_name` |
 | Volume and machine must be same region | Low (ops) | Enforce region consistency in provisioning code |
 | API rate limit 1 req/s per machine | Low | Throttle bulk provisioning loops |
 ## Recommendation
 **Proceed.** Fly.io Machines are a viable isolation layer for multi-tenant storkit SaaS.
 **Architecture to validate in spike 408:**
 - One Fly.io app per tenant (provides 6PN network isolation + isolated secrets)
 - One Firecracker microVM per tenant app (shared-cpu-1x 256 MB baseline; adjust per observed usage)
 - One persistent volume per tenant (1 GB baseline for `~/.claude/`, repos, storkit state)
 - Autostop/autoresume enabled — 70–92% compute cost reduction vs always-on for typical dev tool usage
 - Tenant credentials injected via `config.files` + Fly Secrets at machine start
 **Pricing verdict**: Workable at early SaaS scale. At 100 tenants with autostop (8h/day), costs ~$92/month; at 1,000 tenants ~$920/month. Margins are viable if per-tenant pricing is $5–$20/month.
 **Before production**: Confirm with Fly.io support whether SMT is disabled on worker hosts. Request org machine limit raised to 200–500 during private beta.
 **Spike 408 scope**: Validate cold start latency, autostop resume behavior, and volume persistence with a real test machine running the storkit container image.
@@ -0,0 +1,69 @@
 ---
 name: "Split whatsapp.rs into focused modules"
 retry_count: 2
 blocked: true
 ---
 # Refactor 409: Split whatsapp.rs into focused modules
 ## Current State
 - TBD
 ## Desired State
 whatsapp.rs is 2000+ lines making it expensive for agents to navigate and edit. Split into focused modules under chat/transport/whatsapp/.
 ## Acceptance Criteria
 - [x] mod.rs contains webhook handlers, WebhookContext, and re-exports
 - [x] meta.rs contains WhatsAppTransport, ChatTransport impl, and Graph API structs/calls
 - [x] twilio.rs contains TwilioWhatsAppTransport, ChatTransport impl, and Twilio structs/calls
 - [x] history.rs contains WhatsAppConversationHistory, load/save_whatsapp_history, and MessagingWindowTracker
 - [x] commands.rs contains handle_incoming_message, handle_llm_message, and all async command dispatch
 - [x] format.rs contains markdown_to_whatsapp and chunk_for_whatsapp
 - [x] All existing tests pass
 - [x] No behaviour changes — pure structural refactor
 ## Out of Scope
 - TBD
 ## Test Results
 <!-- storkit-test-results: {"unit":[{"name":"whatsapp::format::tests::chunk_short_message_returns_single_chunk","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_exactly_at_limit_returns_single_chunk","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_splits_on_paragraph_boundary","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_splits_on_line_boundary_when_no_paragraph_break","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_hard_splits_continuous_text","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_empty_string_returns_single_empty","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_headers_to_bold","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_bold","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_bold_italic","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_strikethrough","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_links","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_removes_horizontal_rules","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_preserves_inline_code","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_preserves_code_blocks","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_mixed_message","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_passthrough_plain_text","status":"pass","details":null},{"name":"whatsapp::history::tests::messaging_window_tracker_basics","status":"pass","details":null},{"name":"whatsapp::history::tests::messaging_window_tracker_expiry","status":"pass","details":null},{"name":"whatsapp::history::tests::messaging_window_tracker_reset","status":"pass","details":null},{"name":"whatsapp::history::tests::load_empty_history","status":"pass","details":null},{"name":"whatsapp::history::tests::save_and_load_history","status":"pass","details":null},{"name":"whatsapp::twilio::tests::parse_twilio_form_valid","status":"pass","details":null},{"name":"whatsapp::twilio::tests::parse_twilio_form_missing_body","status":"pass","details":null},{"name":"whatsapp::twilio::tests::parse_twilio_form_missing_from","status":"pass","details":null},{"name":"whatsapp::commands::tests::parse_command_help","status":"pass","details":null},{"name":"whatsapp::commands::tests::parse_command_status","status":"pass","details":null},{"name":"whatsapp::commands::tests::parse_command_unknown","status":"pass","details":null},{"name":"whatsapp::mod::tests::webhook_context_basics","status":"pass","details":null}],"integration":[]} -->
 ### Unit Tests (28 passed, 0 failed)
 - ✅ whatsapp::format::tests::chunk_short_message_returns_single_chunk
 - ✅ whatsapp::format::tests::chunk_exactly_at_limit_returns_single_chunk
 - ✅ whatsapp::format::tests::chunk_splits_on_paragraph_boundary
 - ✅ whatsapp::format::tests::chunk_splits_on_line_boundary_when_no_paragraph_break
 - ✅ whatsapp::format::tests::chunk_hard_splits_continuous_text
 - ✅ whatsapp::format::tests::chunk_empty_string_returns_single_empty
 - ✅ whatsapp::format::tests::md_to_wa_converts_headers_to_bold
 - ✅ whatsapp::format::tests::md_to_wa_converts_bold
 - ✅ whatsapp::format::tests::md_to_wa_converts_bold_italic
 - ✅ whatsapp::format::tests::md_to_wa_converts_strikethrough
 - ✅ whatsapp::format::tests::md_to_wa_converts_links
 - ✅ whatsapp::format::tests::md_to_wa_removes_horizontal_rules
 - ✅ whatsapp::format::tests::md_to_wa_preserves_inline_code
 - ✅ whatsapp::format::tests::md_to_wa_preserves_code_blocks
 - ✅ whatsapp::format::tests::md_to_wa_mixed_message
 - ✅ whatsapp::format::tests::md_to_wa_passthrough_plain_text
 - ✅ whatsapp::history::tests::messaging_window_tracker_basics
 - ✅ whatsapp::history::tests::messaging_window_tracker_expiry
 - ✅ whatsapp::history::tests::messaging_window_tracker_reset
 - ✅ whatsapp::history::tests::load_empty_history
 - ✅ whatsapp::history::tests::save_and_load_history
 - ✅ whatsapp::twilio::tests::parse_twilio_form_valid
 - ✅ whatsapp::twilio::tests::parse_twilio_form_missing_body
 - ✅ whatsapp::twilio::tests::parse_twilio_form_missing_from
 - ✅ whatsapp::commands::tests::parse_command_help
 - ✅ whatsapp::commands::tests::parse_command_status
 - ✅ whatsapp::commands::tests::parse_command_unknown
 - ✅ whatsapp::mod::tests::webhook_context_basics
 ### Integration Tests (0 passed, 0 failed)
 *No integration tests recorded.*
@@ -0,0 +1,22 @@
 ---
 name: "loc bot command — top files by line count"
 ---
 # Story 410: loc bot command — top files by line count
 ## User Story
 As a developer, I want to send `loc` to the bot and see the top files by line count, so I can spot files that are getting too large before they become a problem for agents.
 ## Acceptance Criteria
 - [ ] loc command is registered in chat/commands/mod.rs and appears in help output
 - [ ] `loc` returns the top 10 source files by line count (excluding generated files, node_modules, target/, .storkit/worktrees/)
 - [ ] `loc 5` returns the top 5 files
 - [ ] `loc 20` returns the top 20 files
 - [ ] Output includes file path, line count, and rank
 - [ ] Command works from all transports (Matrix, WhatsApp, Slack)
 ## Out of Scope
 - TBD
@@ -0,0 +1,29 @@
 ---
 name: "Split slack.rs into focused modules"
 ---
 # Refactor 413: Split slack.rs into focused modules
 ## Current State
 - TBD
 ## Desired State
 Refactor the monolithic server/src/chat/transport/slack.rs (1902 lines) into a slack/ directory with focused modules, mirroring the whatsapp/ module structure from story 409.
 ## Acceptance Criteria
 - [ ] slack.rs is replaced by a slack/ directory with mod.rs re-exporting all public types
 - [ ] meta.rs contains SlackTransport struct, ChatTransport trait impl, and Slack API request/response types
 - [ ] commands.rs contains incoming message dispatch, permission logic, and slash command handling
 - [ ] format.rs contains markdown_to_slack() conversion
 - [ ] history.rs contains load_slack_history(), save_slack_history(), and SlackHistoryDump
 - [ ] verify.rs contains verify_slack_signature(), sha256(), and constant_time_eq()
 - [ ] mod.rs contains Slack event types, webhook handlers, and SlackWebhookContext
 - [ ] All existing tests are preserved and pass in their respective modules
 - [ ] No public API changes — all existing imports from other crates continue to work
 ## Out of Scope
 - TBD
@@ -0,0 +1,19 @@
 ---
 name: "loc command filters out known-huge files"
 ---
 # Story 414: loc command filters out known-huge files
 ## User Story
 As a ..., I want ..., so that ...
 ## Acceptance Criteria
 - [ ] loc command excludes lockfiles and generated files (e.g. package-lock.json, Cargo.lock, frontend/package-lock.json) from results
 - [ ] Exclusion list is defined as a constant, easy to extend
 - [ ] Excluded files do not count toward line totals
 ## Out of Scope
 - TBD
@@ -0,0 +1,29 @@
 ---
 name: "Split agents/pool/mod.rs into submodules"
 ---
 # Refactor 415: Split agents/pool/mod.rs into submodules
 ## Current State
 - TBD
 ## Desired State
 Refactor the monolithic server/src/agents/pool/mod.rs (2407 lines) into focused submodules within the pool/ directory.
 ## Acceptance Criteria
 - [ ] types.rs contains StoryAgent, PendingGuard, AgentInfo, composite_key, and related helper structs
 - [ ] lifecycle.rs contains start_agent, stop_agent, wait_for_agent and their unit tests
 - [ ] worktree.rs contains create_worktree, get_project_root, find_active_story_stage and their unit tests
 - [ ] query.rs contains list_agents, available_agents_for_stage, get_log_info, subscribe, drain_events and their unit tests
 - [ ] process.rs contains kill_all_children, kill_child_for_key, ChildKiller registry methods and their unit tests
 - [ ] test_helpers.rs contains inject_test_agent and its variants (4 methods)
 - [ ] mod.rs contains AgentPool struct, new(), and re-exports all public types
 - [ ] Unit tests live in their respective module files, not in a separate tests module
 - [ ] No public API changes — all existing imports continue to work
 ## Out of Scope
 - TBD
@@ -0,0 +1,28 @@
 ---
 name: "Split io/fs.rs into submodules"
 ---
 # Refactor 416: Split io/fs.rs into submodules
 ## Current State
 - TBD
 ## Desired State
 Refactor the monolithic server/src/io/fs.rs (2007 lines) into focused submodules within an fs/ directory.
 ## Acceptance Criteria
 - [ ] scaffold.rs contains scaffold_story_kit, write_file_if_missing, write_script_if_missing, write_story_kit_gitignore, append_root_gitignore_entries, detect_components_toml, detect_script_test, generate_project_toml and their unit tests
 - [ ] project.rs contains open_project, close_project, get_current_project, get_known_projects, forget_known_project, ensure_project_root_with_story_kit, validate_project_path and their unit tests
 - [ ] files.rs contains read_file, write_file, list_directory, list_project_files, FileEntry, create_directory_absolute and their unit tests
 - [ ] paths.rs contains resolve_cli_path, resolve_path, resolve_path_impl, find_story_kit_root, get_home_directory and their unit tests
 - [ ] preferences.rs contains get_model_preference, set_model_preference and their unit tests
 - [ ] mod.rs re-exports all public types and functions
 - [ ] Unit tests live in their respective module files
 - [ ] No public API changes — all existing imports continue to work
 ## Out of Scope
 - TBD
@@ -1 +0,0 @@
 3001
@@ -209,9 +209,9 @@ dependencies = [
 [[package]]
 name = "aws-lc-sys"
-version = "0.39.0"
+version = "0.39.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a"
+checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399"
 dependencies = [
 "cc",
 "cmake",
@@ -349,9 +349,9 @@ dependencies = [
 [[package]]
 name = "cc"
-version = "1.2.57"
+version = "1.2.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
+checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1"
 dependencies = [
 "find-msvc-tools",
 "jobserver",
@@ -434,9 +434,9 @@ dependencies = [
 [[package]]
 name = "cmake"
-version = "0.1.57"
+version = "0.1.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"
 dependencies = [
 "cc",
 ]
@@ -1862,10 +1862,12 @@ dependencies = [
 [[package]]
 name = "js-sys"
-version = "0.3.91"
+version = "0.3.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
+checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995"
 dependencies = [
 "cfg-if",
 "futures-util",
 "once_cell",
 "wasm-bindgen",
 ]
@@ -1954,9 +1956,9 @@ checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
 [[package]]
 name = "libredox"
-version = "0.1.14"
+version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
+checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
 dependencies = [
 "bitflags 2.11.0",
 "libc",
@@ -2428,9 +2430,9 @@ dependencies = [
 [[package]]
 name = "mio"
-version = "1.1.1"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
 dependencies = [
 "libc",
 "log",
@@ -2559,9 +2561,9 @@ dependencies = [
 [[package]]
 name = "num-conv"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"
 [[package]]
 name = "num-traits"
@@ -3274,6 +3276,7 @@ dependencies = [
 "rustls-platform-verifier",
 "serde",
 "serde_json",
 "serde_urlencoded",
 "sync_wrapper",
 "tokio",
 "tokio-rustls",
@@ -3555,9 +3558,9 @@ dependencies = [
 [[package]]
 name = "rustc-hash"
-version = "2.1.1"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
 [[package]]
 name = "rustc_version"
@@ -3823,9 +3826,9 @@ dependencies = [
 [[package]]
 name = "serde_spanned"
-version = "1.0.4"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
+checksum = "876ac351060d4f882bb1032b6369eb0aef79ad9df1ea8bc404874d8cc3d0cd98"
 dependencies = [
 "serde_core",
 ]
@@ -3940,9 +3943,9 @@ dependencies = [
 [[package]]
 name = "simd-adler32"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
 [[package]]
 name = "similar"
@@ -4016,7 +4019,7 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
 [[package]]
 name = "storkit"
-version = "0.5.0"
+version = "0.7.0"
 dependencies = [
 "async-stream",
 "async-trait",
@@ -4036,17 +4039,19 @@ dependencies = [
 "poem-openapi",
 "portable-pty",
 "pulldown-cmark",
 "regex",
 "reqwest 0.13.2",
 "rust-embed",
 "serde",
 "serde_json",
 "serde_urlencoded",
 "serde_yaml",
 "sha2",
 "strip-ansi-escapes",
 "tempfile",
 "tokio",
 "tokio-tungstenite 0.29.0",
- "toml 1.0.7+spec-1.1.0",
+ "toml 1.1.0+spec-1.1.0",
 "uuid",
 "wait-timeout",
 "walkdir",
@@ -4393,14 +4398,14 @@ dependencies = [
 [[package]]
 name = "toml"
-version = "1.0.7+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd28d57d8a6f6e458bc0b8784f8fdcc4b99a437936056fa122cb234f18656a96"
+checksum = "f8195ca05e4eb728f4ba94f3e3291661320af739c4e43779cbdfae82ab239fcc"
 dependencies = [
 "indexmap",
 "serde_core",
 "serde_spanned",
- "toml_datetime 1.0.1+spec-1.1.0",
+ "toml_datetime 1.1.0+spec-1.1.0",
 "toml_parser",
 "toml_writer",
 "winnow 1.0.0",
@@ -4417,39 +4422,39 @@ dependencies = [
 [[package]]
 name = "toml_datetime"
-version = "1.0.1+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9"
+checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f"
 dependencies = [
 "serde_core",
 ]
 [[package]]
 name = "toml_edit"
-version = "0.25.5+spec-1.1.0"
+version = "0.25.8+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1"
+checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c"
 dependencies = [
 "indexmap",
- "toml_datetime 1.0.1+spec-1.1.0",
+ "toml_datetime 1.1.0+spec-1.1.0",
 "toml_parser",
 "winnow 1.0.0",
 ]
 [[package]]
 name = "toml_parser"
-version = "1.0.10+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420"
+checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011"
 dependencies = [
 "winnow 1.0.0",
 ]
 [[package]]
 name = "toml_writer"
-version = "1.0.7+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f17aaa1c6e3dc22b1da4b6bba97d066e354c7945cac2f7852d4e4e7ca7a6b56d"
+checksum = "d282ade6016312faf3e41e57ebbba0c073e4056dab1232ab1cb624199648f8ed"
 [[package]]
 name = "tower"
@@ -4660,9 +4665,9 @@ dependencies = [
 [[package]]
 name = "unicode-segmentation"
-version = "1.12.0"
+version = "1.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
 [[package]]
 name = "unicode-xid"
@@ -4725,9 +4730,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 [[package]]
 name = "uuid"
-version = "1.22.0"
+version = "1.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
+checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
 dependencies = [
 "getrandom 0.4.2",
 "js-sys",
@@ -4846,9 +4851,9 @@ dependencies = [
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
+checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a"
 dependencies = [
 "cfg-if",
 "once_cell",
@@ -4859,23 +4864,19 @@ dependencies = [
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.64"
+version = "0.4.65"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
+checksum = "2d1faf851e778dfa54db7cd438b70758eba9755cb47403f3496edd7c8fc212f0"
 dependencies = [
 "cfg-if",
 "futures-util",
 "js-sys",
 "once_cell",
 "wasm-bindgen",
 "web-sys",
 ]
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
+checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@@ -4883,9 +4884,9 @@ dependencies = [
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
+checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf"
 dependencies = [
 "bumpalo",
 "proc-macro2",
@@ -4896,9 +4897,9 @@ dependencies = [
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
+checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93"
 dependencies = [
 "unicode-ident",
 ]
@@ -4983,9 +4984,9 @@ dependencies = [
 [[package]]
 name = "web-sys"
-version = "0.3.91"
+version = "0.3.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
+checksum = "84cde8507f4d7cfcb1185b8cb5890c494ffea65edbe1ba82cfd63661c805ed94"
 dependencies = [
 "js-sys",
 "wasm-bindgen",
@@ -21,11 +21,12 @@ rust-embed = "8"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 serde_urlencoded = "0.7"
 sha2 = "0.10"
 serde_yaml = "0.9"
 strip-ansi-escapes = "0.2"
 tempfile = "3"
 tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
-toml = "1.0.7"
+toml = "1.1.0"
 uuid = { version = "1.22.0", features = ["v4", "serde"] }
 tokio-tungstenite = "0.29.0"
 walkdir = "2.5.0"
@@ -35,6 +36,7 @@ matrix-sdk = { version = "0.16.0", default-features = false, features = [
    "sqlite",
    "e2e-encryption",
 ] }
-pulldown-cmark = { version = "0.13.1", default-features = false, features = [
+pulldown-cmark = { version = "0.13.3", default-features = false, features = [
    "html",
 ] }
 regex = "1"
@@ -1,182 +1,55 @@
-# Story Kit
+# Storkit
-This app runs as a single Rust web server binary that serves the Vite/React frontend and exposes APIs.
+A story-driven development server that manages work items, spawns coding agents, and runs them through a pipeline from backlog to done. Ships as a single Rust binary with an embedded React frontend. Communicates via Matrix, WhatsApp, and Slack bot transports, and exposes MCP tools for programmatic access.
 The frontend lives in the `frontend/` directory.
-You can also run the frontend and backend separately in development (Vite dev server + Rust API).
+## Prerequisites
-## Running it in development
+- Rust (2024 edition)
 - Node.js and npm
 - Docker (for Linux cross-compilation and container deployment)
 - `cross` (`cargo install cross`) for Linux static builds
 ## Building for production
 ```bash
 # Build the frontend
 cd frontend
 npm install
 npm run dev
 # In another terminal - run the server (serves embedded frontend/dist/)
 cargo run
 ```
 ## Production
 ```bash
 # Build the release binary (also builds the frontend via build.rs)
 cargo build --release
 # Run the server (serves embedded frontend/dist/)
 ./target/release/storkit
 ```
-## Cross-Platform Distribution
+The release binary embeds the frontend via `rust-embed`. Output: `target/release/storkit`.
-Story Kit ships as a **single self-contained binary** with the React frontend embedded via
+For a static Linux binary (musl, zero dynamic deps):
 `rust-embed`. No Rust toolchain, Node.js, or extra libraries are required on the target machine.
 ### macOS
 ```bash
-# Native build – no extra tools required beyond Rust + npm
+cross build --release --target x86_64-unknown-linux-musl
 make build-macos
 # Output: target/release/storkit
 # Verify only system frameworks are linked (Security.framework, libSystem.B.dylib, etc.)
 otool -L target/release/storkit
 ```
-### Linux (static x86_64, zero dynamic deps)
+Docker:
 The Linux build uses the `x86_64-unknown-linux-musl` target to produce a fully static binary.
 **Prerequisites:**
 ```bash
-# Install cross – a Rust cross-compilation tool backed by Docker
+docker compose -f docker/docker-compose.yml build
 cargo install cross
 # Ensure Docker Desktop (or Docker Engine) is running
 ```
-**Build:**
+## Running in development
 ```bash
-make build-linux
+# Run tests
-# Output: target/x86_64-unknown-linux-musl/release/storkit
+script/test
-# Verify the binary is statically linked
+# Run the server
-file target/x86_64-unknown-linux-musl/release/storkit
+cargo run -- --port 3000
 # Expected: ELF 64-bit LSB executable, x86-64, statically linked
-ldd target/x86_64-unknown-linux-musl/release/storkit
+# In another terminal, run the frontend dev server
-# Expected: not a dynamic executable
+cd frontend && npm install && npm run dev
 ```
-**Running on any Linux x86_64 machine:**
+Configuration lives in `.storkit/project.toml`. See `.storkit/bot.toml.*.example` for transport setup.
 ```bash
 # No Rust, Node, glibc, or any other library needed – just copy and run
 ./storkit
 ```
 ## Releasing
-Builds both macOS and Linux binaries locally, tags the repo, and publishes a Gitea release with a changelog.
+Requires a Gitea API token in `.env` (`GITEA_TOKEN=your_token`).
 **One-time setup:**
 1. Create a Gitea API token at `https://code.crashlabs.io/user/settings/applications` (needs repository read/write)
 2. Add it to `.env` (gitignored): `GITEA_TOKEN=your_token`
 3. Ensure `cross` is installed (`cargo install cross`) and Docker is running
 **To release:**
 ```bash
-make release V=0.2.0
+script/release 0.6.1
 ```
-This will:
+This bumps version in `Cargo.toml` and `package.json`, builds macOS arm64 and Linux amd64 binaries, tags the repo, and publishes a Gitea release with changelog and binaries attached.
 - Build macOS arm64 (native) and Linux amd64 (static musl via cross/Docker)
 - Generate a changelog from commits since the last tag
 - Tag the repo as `v0.2.0` and push the tag
 - Create a Gitea release with both binaries and the changelog attached
 ## Testing
 ### Frontend Tests
 The frontend uses **Vitest** for unit tests and **Playwright** for end-to-end tests.
 ```bash
 cd frontend
 # Run unit tests
 npm test
 # Run end-to-end tests
 npm run test:e2e
 ```
 ### Backend Tests
 This project uses **nextest** for running tests and **cargo-llvm-cov** for code coverage.
 ### Install Tools
 ```bash
 cargo install cargo-nextest cargo-llvm-cov
 ```
 ### Run Tests
 ```bash
 # Run all tests
 cargo nextest run
 # Run specific module
 cargo nextest run search_files
 # Run with verbose output
 cargo nextest run --no-capture
 ```
 ### Generate Coverage
 ```bash
 # HTML report (opens in browser)
 cargo llvm-cov nextest --html --open
 # Terminal output
 cargo llvm-cov nextest
 # LCOV format (for CI)
 cargo llvm-cov nextest --lcov --output-path lcov.info
 # Clean coverage data
 cargo llvm-cov clean
 ```
 ### Configuration
 - **Nextest config**: `.config/nextest.toml`
 - **Coverage output**: `target/llvm-cov/html/index.html`
 ## Current Coverage (search_files module)
 ```
 Module: commands/search.rs
 ├── Region Coverage:   75.36%
 ├── Function Coverage: 69.05%
 └── Line Coverage:     72.55%
 ```
 ### Available Test Profiles
 ```bash
 # Development (default)
 cargo nextest run
 # CI with retries
 cargo nextest run --profile ci
 # Coverage optimized
 cargo nextest run --profile coverage
 ```
@@ -27,6 +27,8 @@ services:
      - GIT_USER_EMAIL=${GIT_USER_EMAIL:?Set GIT_USER_EMAIL}
      # Optional: override the server port (default 3001)
      - STORKIT_PORT=3001
      # Bind to all interfaces so Docker port forwarding works.
      - STORKIT_HOST=0.0.0.0
      # Optional: Matrix bot credentials (if using Matrix integration)
      - MATRIX_HOMESERVER=${MATRIX_HOMESERVER:-}
      - MATRIX_USER=${MATRIX_USER:-}
@@ -89,7 +91,6 @@ services:
      - no-new-privileges:true
    # Resource limits – cap the whole system.
    # Adjust based on your machine. These are conservative defaults.
    deploy:
      resources:
        limits:
@@ -14,8 +14,12 @@ if [ -z "$GIT_USER_EMAIL" ]; then
    exit 1
 fi
-# Use GIT_AUTHOR/COMMITTER env vars instead of git config --global,
+# Set git identity globally so it persists for all shells (docker exec, etc.),
-# so the root filesystem can stay read-only (no ~/.gitconfig write).
+# not just the entrypoint process tree.
 git config --global user.name "$GIT_USER_NAME"
 git config --global user.email "$GIT_USER_EMAIL"
 # Also set env vars for backwards compatibility.
 export GIT_AUTHOR_NAME="$GIT_USER_NAME"
 export GIT_COMMITTER_NAME="$GIT_USER_NAME"
 export GIT_AUTHOR_EMAIL="$GIT_USER_EMAIL"
@@ -27,7 +31,7 @@ export GIT_COMMITTER_EMAIL="$GIT_USER_EMAIL"
 # binaries on a Linux container). Reinstall to get the right ones.
 if [ -d /workspace/frontend ] && [ -f /workspace/frontend/package.json ]; then
    echo "Installing frontend dependencies for container platform..."
-    cd /workspace/frontend && npm install --prefer-offline 2>/dev/null || true
+    cd /workspace/frontend && npm ci --prefer-offline 2>/dev/null || true
    cd /workspace
 fi
@@ -1,7 +1,7 @@
 {
 	"name": "living-spec-standalone",
 	"private": true,
-  "version": "0.5.0",
+	"version": "0.7.0",
 	"type": "module",
 	"scripts": {
 		"dev": "vite",
@@ -20,6 +20,9 @@
 		"react-markdown": "^10.1.0",
 		"react-syntax-highlighter": "^16.1.0"
 	},
 	"overrides": {
 		"glob": "^13.0.0"
 	},
 	"devDependencies": {
 		"@biomejs/biome": "^2.4.2",
 		"@playwright/test": "^1.47.2",
@@ -31,9 +34,7 @@
 		"@types/react-dom": "^19.1.6",
 		"@vitejs/plugin-react": "^4.6.0",
 		"@vitest/coverage-v8": "^2.1.9",
    "jest": "^29.0.0",
 		"jsdom": "^28.1.0",
    "ts-jest": "^29.0.0",
 		"typescript": "~5.8.3",
 		"vite": "^5.4.21",
 		"vitest": "^2.1.4"
@@ -1,6 +1,6 @@
 import { defineConfig } from "@playwright/test";
 import { dirname, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 import { defineConfig } from "@playwright/test";
 const configDir = dirname(fileURLToPath(new URL(import.meta.url)));
 const frontendRoot = resolve(configDir, ".");
@@ -127,13 +127,13 @@ details summary::-webkit-details-marker {
 	display: none;
 }
-details[open] summary span:first-child {
+details summary span:first-child {
 	transform: rotate(90deg);
 	display: inline-block;
 	transition: transform 0.2s ease;
 }
-details summary span:first-child {
+details[open] summary span:first-child {
 	transform: rotate(90deg);
 	display: inline-block;
 	transition: transform 0.2s ease;
 }
@@ -236,3 +236,49 @@ body,
 		opacity: 0;
 	}
 }
 /* ProjectPathInput dropdown theming */
 .path-dropdown {
 	border: 1px solid #ddd;
 	background: #fff;
 	color: #0f0f0f;
 }
 .path-dropdown-header {
 	border-bottom: 1px solid #eee;
 	background: #fafafa;
 }
 .path-dropdown-item {
 	background: transparent;
 }
 .path-dropdown-item--selected {
 	background: #f0f0f0;
 }
 .path-match-highlight {
 	font-weight: 600;
 	color: #222;
 }
@media (prefers-color-scheme: dark) {
 	.path-dropdown {
 		border-color: #555;
 		background: #1e1e1e;
 		color: #f6f6f6;
 	}
 	.path-dropdown-header {
 		border-bottom-color: #444;
 		background: #2a2a2a;
 	}
 	.path-dropdown-item--selected {
 		background: #3a3a3a;
 	}
 	.path-match-highlight {
 		color: #f6f6f6;
 	}
 }
@@ -382,6 +382,14 @@ export const api = {
 	deleteStory(storyId: string) {
 		return callMcpTool("delete_story", { story_id: storyId });
 	},
 	/** Execute a bot slash command without LLM invocation. Returns markdown response text. */
 	botCommand(command: string, args: string, baseUrl?: string) {
 		return requestJson<{ response: string }>(
 			"/bot/command",
 			{ method: "POST", body: JSON.stringify({ command, args }) },
 			baseUrl,
 		);
 	},
 };
 async function callMcpTool(
@@ -40,6 +40,7 @@ vi.mock("../api/client", () => {
 		setAnthropicApiKey: vi.fn(),
 		readFile: vi.fn(),
 		listProjectFiles: vi.fn(),
 		botCommand: vi.fn(),
 	};
 	class ChatWebSocket {
 		connect(handlers: WsHandlers) {
@@ -64,6 +65,7 @@ const mockedApi = {
 	setAnthropicApiKey: vi.mocked(api.setAnthropicApiKey),
 	readFile: vi.mocked(api.readFile),
 	listProjectFiles: vi.mocked(api.listProjectFiles),
 	botCommand: vi.mocked(api.botCommand),
 };
 function setupMocks() {
@@ -76,6 +78,7 @@ function setupMocks() {
 	mockedApi.listProjectFiles.mockResolvedValue([]);
 	mockedApi.cancelChat.mockResolvedValue(true);
 	mockedApi.setAnthropicApiKey.mockResolvedValue(true);
 	mockedApi.botCommand.mockResolvedValue({ response: "Bot response" });
 }
 describe("Default provider selection (Story 206)", () => {
@@ -1457,3 +1460,204 @@ describe("File reference expansion (Story 269 AC4)", () => {
 		expect(mockedApi.readFile).not.toHaveBeenCalled();
 	});
 });
 describe("Slash command handling (Story 374)", () => {
 	beforeEach(() => {
 		capturedWsHandlers = null;
 		lastSendChatArgs = null;
 		setupMocks();
 	});
 	afterEach(() => {
 		vi.clearAllMocks();
 	});
 	it("AC: /status calls botCommand and displays response", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "Pipeline: 3 active" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/status" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith(
 				"status",
 				"",
 				undefined,
 			);
 		});
 		expect(await screen.findByText("Pipeline: 3 active")).toBeInTheDocument();
 		// Should NOT go to LLM
 		expect(lastSendChatArgs).toBeNull();
 	});
 	it("AC: /status <number> passes args to botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "Story 42 details" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/status 42" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith(
 				"status",
 				"42",
 				undefined,
 			);
 		});
 	});
 	it("AC: /start <number> calls botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "Started agent" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/start 42 opus" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith(
 				"start",
 				"42 opus",
 				undefined,
 			);
 		});
 		expect(await screen.findByText("Started agent")).toBeInTheDocument();
 	});
 	it("AC: /git calls botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "On branch main" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/git" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith("git", "", undefined);
 		});
 	});
 	it("AC: /cost calls botCommand", async () => {
 		mockedApi.botCommand.mockResolvedValue({ response: "$1.23 today" });
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/cost" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		await waitFor(() => {
 			expect(mockedApi.botCommand).toHaveBeenCalledWith("cost", "", undefined);
 		});
 	});
 	it("AC: /reset clears messages and session without LLM", async () => {
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		// First add a message so there is history to clear
 		act(() => {
 			capturedWsHandlers?.onUpdate([
 				{ role: "user", content: "hello" },
 				{ role: "assistant", content: "world" },
 			]);
 		});
 		expect(await screen.findByText("world")).toBeInTheDocument();
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/reset" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		// LLM must NOT be invoked
 		expect(lastSendChatArgs).toBeNull();
 		// botCommand must NOT be invoked (reset is frontend-only)
 		expect(mockedApi.botCommand).not.toHaveBeenCalled();
 		// Confirmation message should appear
 		expect(await screen.findByText(/Session reset/)).toBeInTheDocument();
 	});
 	it("AC: unrecognised slash command shows error message", async () => {
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/foobar" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		expect(await screen.findByText(/Unknown command/)).toBeInTheDocument();
 		// Should NOT go to LLM
 		expect(lastSendChatArgs).toBeNull();
 		// Should NOT call botCommand
 		expect(mockedApi.botCommand).not.toHaveBeenCalled();
 	});
 	it("AC: /help shows help overlay", async () => {
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/help" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		expect(await screen.findByTestId("help-overlay")).toBeInTheDocument();
 		expect(lastSendChatArgs).toBeNull();
 		expect(mockedApi.botCommand).not.toHaveBeenCalled();
 	});
 	it("AC: botCommand API error shows error message in chat", async () => {
 		mockedApi.botCommand.mockRejectedValue(new Error("Server error"));
 		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
 		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
 		const input = screen.getByPlaceholderText("Send a message...");
 		await act(async () => {
 			fireEvent.change(input, { target: { value: "/git" } });
 		});
 		await act(async () => {
 			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
 		});
 		expect(
 			await screen.findByText(/Error running command/),
 		).toBeInTheDocument();
 	});
 });
@@ -612,6 +612,81 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 			return;
 		}
 		// /reset — clear session and message history without LLM
 		if (/^\/reset\s*$/i.test(messageText)) {
 			setMessages([]);
 			setClaudeSessionId(null);
 			setStreamingContent("");
 			setStreamingThinking("");
 			setActivityStatus(null);
 			setMessages([
 				{
 					role: "assistant",
 					content: "Session reset. Starting a fresh conversation.",
 				},
 			]);
 			return;
 		}
 		// Slash commands forwarded to the backend bot command endpoint
 		const slashMatch = messageText.match(/^\/(\S+)(?:\s+([\s\S]*))?$/);
 		if (slashMatch) {
 			const cmd = slashMatch[1].toLowerCase();
 			const args = (slashMatch[2] ?? "").trim();
 			// Ignore commands handled elsewhere
 			if (cmd !== "btw") {
 				const knownCommands = new Set([
 					"status",
 					"assign",
 					"start",
 					"show",
 					"move",
 					"delete",
 					"cost",
 					"git",
 					"overview",
 					"rebuild",
 					"loc",
 				]);
 				if (knownCommands.has(cmd)) {
 					// Show the slash command in chat as a user message (display only)
 					setMessages((prev: Message[]) => [
 						...prev,
 						{ role: "user", content: messageText },
 					]);
 					try {
 						const result = await api.botCommand(cmd, args, undefined);
 						setMessages((prev: Message[]) => [
 							...prev,
 							{ role: "assistant", content: result.response },
 						]);
 					} catch (e) {
 						setMessages((prev: Message[]) => [
 							...prev,
 							{
 								role: "assistant",
 								content: `**Error running command:** ${e}`,
 							},
 						]);
 					}
 					return;
 				}
 				// Unknown slash command
 				setMessages((prev: Message[]) => [
 					...prev,
 					{ role: "user", content: messageText },
 					{
 						role: "assistant",
 						content: `Unknown command: \`/${cmd}\`. Type \`/help\` to see available commands.`,
 					},
 				]);
 				return;
 			}
 		}
 		// /btw <question> — answered from context without disrupting main chat
 		const btwMatch = messageText.match(/^\/btw\s+(.+)/s);
 		if (btwMatch) {
@@ -12,6 +12,57 @@ const SLASH_COMMANDS: SlashCommand[] = [
 		name: "/help",
 		description: "Show this list of available slash commands.",
 	},
 	{
 		name: "/status",
 		description:
 			"Show pipeline status and agent availability. `/status <number>` shows a story triage dump.",
 	},
 	{
 		name: "/assign <number> <model>",
 		description: "Pre-assign a model to a story (e.g. `/assign 42 opus`).",
 	},
 	{
 		name: "/start <number>",
 		description:
 			"Start a coder on a story. Optionally specify a model: `/start <number> opus`.",
 	},
 	{
 		name: "/show <number>",
 		description: "Display the full text of a work item.",
 	},
 	{
 		name: "/move <number> <stage>",
 		description:
 			"Move a work item to a pipeline stage (backlog, current, qa, merge, done).",
 	},
 	{
 		name: "/delete <number>",
 		description:
 			"Remove a work item from the pipeline and stop any running agent.",
 	},
 	{
 		name: "/cost",
 		description:
 			"Show token spend: 24h total, top stories, breakdown by agent type, and all-time total.",
 	},
 	{
 		name: "/git",
 		description:
 			"Show git status: branch, uncommitted changes, and ahead/behind remote.",
 	},
 	{
 		name: "/overview <number>",
 		description: "Show the implementation summary for a merged story.",
 	},
 	{
 		name: "/rebuild",
 		description: "Rebuild the server binary and restart.",
 	},
 	{
 		name: "/reset",
 		description:
 			"Clear the current Claude Code session and start fresh (messages and session ID are cleared locally).",
 	},
 	{
 		name: "/btw <question>",
 		description:
@@ -32,7 +32,7 @@ function renderHighlightedMatch(text: string, query: string) {
 		return (
 			<span
 				key={`${char}-${count}`}
-				style={isMatch ? { fontWeight: 600, color: "#222" } : undefined}
+				className={isMatch ? "path-match-highlight" : undefined}
 			>
 				{char}
 			</span>
@@ -93,16 +93,15 @@ export function ProjectPathInput({
 			/>
 			{matchList.length > 0 && (
 				<div
 					className="path-dropdown"
 					style={{
 						position: "absolute",
 						top: "100%",
 						left: 0,
 						right: 0,
 						marginTop: "6px",
 						border: "1px solid #ddd",
 						borderRadius: "6px",
 						overflow: "hidden",
 						background: "#fff",
 						fontFamily: "monospace",
 						height: "160px",
 						overflowY: "auto",
@@ -111,13 +110,12 @@ export function ProjectPathInput({
 					}}
 				>
 					<div
 						className="path-dropdown-header"
 						style={{
 							display: "flex",
 							justifyContent: "flex-end",
 							alignItems: "center",
 							padding: "4px 6px",
 							borderBottom: "1px solid #eee",
 							background: "#fafafa",
 						}}
 					>
 						<button
@@ -128,8 +126,6 @@ export function ProjectPathInput({
 								width: "24px",
 								height: "24px",
 								borderRadius: "4px",
 								border: "1px solid #ddd",
 								background: "#fff",
 								cursor: "pointer",
 								lineHeight: 1,
 							}}
@@ -143,6 +139,7 @@ export function ProjectPathInput({
 							<button
 								key={match.path}
 								type="button"
 								className={`path-dropdown-item${isSelected ? " path-dropdown-item--selected" : ""}`}
 								onMouseEnter={() => onSelectMatch(index)}
 								onMouseDown={(event) => {
 									event.preventDefault();
@@ -154,7 +151,6 @@ export function ProjectPathInput({
 									textAlign: "left",
 									padding: "6px 8px",
 									border: "none",
 									background: isSelected ? "#f0f0f0" : "transparent",
 									cursor: "pointer",
 									fontFamily: "inherit",
 								}}
@@ -70,11 +70,11 @@ export type WsResponse =
 // Re-export API client types for convenience
 export type {
 	CommandOutput as ApiCommandOutput,
 	FileEntry as ApiFileEntry,
 	Message as ApiMessage,
 	ProviderConfig as ApiProviderConfig,
 	FileEntry as ApiFileEntry,
 	SearchResult as ApiSearchResult,
 	CommandOutput as ApiCommandOutput,
 	WsRequest as ApiWsRequest,
 	WsResponse as ApiWsResponse,
 };
@@ -1,6 +1,6 @@
 [package]
 name = "storkit"
-version = "0.5.0"
+version = "0.7.0"
 edition = "2024"
 build = "build.rs"
@@ -18,11 +18,12 @@ notify = { workspace = true }
 poem = { workspace = true, features = ["websocket"] }
 poem-openapi = { workspace = true, features = ["swagger-ui"] }
 portable-pty = { workspace = true }
-reqwest = { workspace = true, features = ["json", "stream"] }
+reqwest = { workspace = true, features = ["json", "stream", "form"] }
 rust-embed = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 serde_urlencoded = { workspace = true }
 sha2 = { workspace = true }
 serde_yaml = { workspace = true }
 strip-ansi-escapes = { workspace = true }
 tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync", "process"] }
@@ -31,6 +32,7 @@ uuid = { workspace = true, features = ["v4", "serde"] }
 walkdir = { workspace = true }
 matrix-sdk = { workspace = true }
 pulldown-cmark = { workspace = true }
 regex = { workspace = true }
 # Force bundled SQLite so static musl builds don't need a system libsqlite3
 libsqlite3-sys = { version = "0.35.0", features = ["bundled"] }
@@ -102,13 +102,29 @@ fn run_command_with_timeout(
    args: &[&str],
    dir: &Path,
 ) -> Result<(bool, String), String> {
-    let mut child = Command::new(program)
+    // On Linux, execve can return ETXTBSY (26) briefly after a file is written
-        .args(args)
+    // before the kernel releases its "write open" state. Retry once after a
    // short pause to handle this race condition.
    let mut last_err = None;
    let mut cmd = Command::new(&program);
    cmd.args(args)
        .current_dir(dir)
        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped());
-        .spawn()
+    let mut child = loop {
-        .map_err(|e| format!("Failed to spawn command: {e}"))?;
+        match cmd.spawn() {
            Ok(c) => break c,
            Err(e) if e.raw_os_error() == Some(26) => {
                // ETXTBSY — wait briefly and retry once
                if last_err.is_some() {
                    return Err(format!("Failed to spawn command: {e}"));
                }
                last_err = Some(e);
                std::thread::sleep(std::time::Duration::from_millis(50));
            }
            Err(e) => return Err(format!("Failed to spawn command: {e}")),
        }
    };
    // Drain stdout/stderr in background threads so the pipe buffers never fill.
    let stdout_handle = child.stdout.take().map(|r| {
@@ -188,6 +188,8 @@ pub struct AgentInfo {
    pub completion: Option<CompletionReport>,
    /// UUID identifying the persistent log file for this session.
    pub log_session_id: Option<String>,
    /// True when a rate-limit throttle warning was received for this agent.
    pub throttled: bool,
 }
 #[cfg(test)]
@@ -0,0 +1,482 @@
 //! Auto-assign: scan pipeline stages and dispatch free agents to unassigned stories.
 use crate::config::ProjectConfig;
 use crate::slog;
 use crate::slog_error;
 use crate::slog_warn;
 use crate::worktree;
 use std::path::Path;
 use super::super::super::PipelineStage;
 use super::super::AgentPool;
 use super::scan::{
    count_active_agents_for_stage, find_free_agent_for_stage, is_agent_free,
    is_story_assigned_for_stage, scan_stage_items,
 };
 use super::story_checks::{
    has_merge_failure, has_review_hold, is_story_blocked, read_story_front_matter_agent,
 };
 impl AgentPool {
    pub async fn auto_assign_available_work(&self, project_root: &Path) {
        let config = match ProjectConfig::load(project_root) {
            Ok(c) => c,
            Err(e) => {
                slog_warn!("[auto-assign] Failed to load project config: {e}");
                return;
            }
        };
        // Process each active pipeline stage in order.
        let stages: [(&str, PipelineStage); 3] = [
            ("2_current", PipelineStage::Coder),
            ("3_qa", PipelineStage::Qa),
            ("4_merge", PipelineStage::Mergemaster),
        ];
        for (stage_dir, stage) in &stages {
            let items = scan_stage_items(project_root, stage_dir);
            if items.is_empty() {
                continue;
            }
            for story_id in &items {
                // Items marked with review_hold (e.g. spikes after QA passes) stay
                // in their current stage for human review — don't auto-assign agents.
                if has_review_hold(project_root, stage_dir, story_id) {
                    continue;
                }
                // Skip blocked stories (retry limit exceeded).
                if is_story_blocked(project_root, stage_dir, story_id) {
                    continue;
                }
                // Skip stories in 4_merge/ that already have a reported merge failure.
                // These need human intervention — auto-assigning a new mergemaster
                // would just waste tokens on the same broken merge.
                if *stage == PipelineStage::Mergemaster
                    && has_merge_failure(project_root, stage_dir, story_id)
                {
                    continue;
                }
                // AC6: Detect empty-diff stories in 4_merge/ before starting a
                // mergemaster. If the worktree has no commits on the feature branch,
                // write a merge_failure and block the story immediately.
                if *stage == PipelineStage::Mergemaster
                    && let Some(wt_path) = worktree::find_worktree_path(project_root, story_id)
                    && !crate::agents::gates::worktree_has_committed_work(&wt_path)
                {
                    slog_warn!(
                        "[auto-assign] Story '{story_id}' in 4_merge/ has no commits \
                         on feature branch. Writing merge_failure and blocking."
                    );
                    let story_path = project_root
                        .join(".storkit/work")
                        .join(stage_dir)
                        .join(format!("{story_id}.md"));
                    let empty_diff_reason = "Feature branch has no code changes — the coder agent \
                         did not produce any commits.";
                    let _ = crate::io::story_metadata::write_merge_failure(
                        &story_path,
                        empty_diff_reason,
                    );
                    let _ = crate::io::story_metadata::write_blocked(&story_path);
                    let _ = self.watcher_tx.send(crate::io::watcher::WatcherEvent::StoryBlocked {
                        story_id: story_id.to_string(),
                        reason: empty_diff_reason.to_string(),
                    });
                    continue;
                }
                // Re-acquire the lock on each iteration to see state changes
                // from previous start_agent calls in the same pass.
                let preferred_agent =
                    read_story_front_matter_agent(project_root, stage_dir, story_id);
                // Check max_coders limit for the Coder stage before agent selection.
                // If the pool is full, all remaining items in this stage wait.
                if *stage == PipelineStage::Coder
                    && let Some(max) = config.max_coders
                {
                    let agents_lock = match self.agents.lock() {
                        Ok(a) => a,
                        Err(e) => {
                            slog_error!("[auto-assign] Failed to lock agents: {e}");
                            break;
                        }
                    };
                    let active = count_active_agents_for_stage(&config, &agents_lock, stage);
                    if active >= max {
                        slog!(
                            "[auto-assign] Coder pool full ({active}/{max}); remaining items in {stage_dir}/ will wait."
                        );
                        break;
                    }
                }
                // Outcome: (already_assigned, chosen_agent, preferred_busy, stage_mismatch)
                // preferred_busy=true means the story has a specific agent requested but it is
                // currently occupied — the story should wait rather than fall back.
                // stage_mismatch=true means the preferred agent's stage doesn't match the
                // pipeline stage, so we fell back to a generic stage agent.
                let (already_assigned, free_agent, preferred_busy, stage_mismatch) = {
                    let agents = match self.agents.lock() {
                        Ok(a) => a,
                        Err(e) => {
                            slog_error!("[auto-assign] Failed to lock agents: {e}");
                            break;
                        }
                    };
                    let assigned = is_story_assigned_for_stage(&config, &agents, story_id, stage);
                    if assigned {
                        (true, None, false, false)
                    } else if let Some(ref pref) = preferred_agent {
                        // Story has a front-matter agent preference.
                        // Verify the preferred agent's stage matches the current
                        // pipeline stage — a coder shouldn't be assigned to QA.
                        let pref_stage_matches = config
                            .find_agent(pref)
                            .map(|cfg| super::super::super::agent_config_stage(cfg) == *stage)
                            .unwrap_or(false);
                        if !pref_stage_matches {
                            // Stage mismatch — fall back to any free agent for this stage.
                            let free = find_free_agent_for_stage(&config, &agents, stage)
                                .map(|s| s.to_string());
                            (false, free, false, true)
                        } else if is_agent_free(&agents, pref) {
                            (false, Some(pref.clone()), false, false)
                        } else {
                            (false, None, true, false)
                        }
                    } else {
                        let free = find_free_agent_for_stage(&config, &agents, stage)
                            .map(|s| s.to_string());
                        (false, free, false, false)
                    }
                };
                if already_assigned {
                    // Story already has an active agent — skip silently.
                    continue;
                }
                if preferred_busy {
                    // The story requests a specific agent that is currently busy.
                    // Do not fall back to a different agent; let this story wait.
                    slog!(
                        "[auto-assign] Preferred agent '{}' busy for '{story_id}'; story will wait.",
                        preferred_agent.as_deref().unwrap_or("?")
                    );
                    continue;
                }
                if stage_mismatch {
                    slog!(
                        "[auto-assign] Preferred agent '{}' stage mismatch for '{story_id}' in {stage_dir}/; falling back to stage-appropriate agent.",
                        preferred_agent.as_deref().unwrap_or("?")
                    );
                }
                match free_agent {
                    Some(agent_name) => {
                        slog!(
                            "[auto-assign] Assigning '{agent_name}' to '{story_id}' in {stage_dir}/"
                        );
                        if let Err(e) = self
                            .start_agent(project_root, story_id, Some(&agent_name), None)
                            .await
                        {
                            slog!(
                                "[auto-assign] Failed to start '{agent_name}' for '{story_id}': {e}"
                            );
                        }
                    }
                    None => {
                        // No free agents of this type — stop scanning this stage.
                        slog!(
                            "[auto-assign] All {:?} agents busy; remaining items in {stage_dir}/ will wait.",
                            stage
                        );
                        break;
                    }
                }
            }
        }
    }
 }
 // ── Tests ──────────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::super::super::AgentPool;
    use crate::agents::AgentStatus;
    use crate::io::watcher::WatcherEvent;
    use tokio::sync::broadcast;
    /// Story 203: auto_assign_available_work must detect a story in 2_current/
    /// with no active agent and start an agent for it.
    #[tokio::test]
    async fn auto_assign_picks_up_story_queued_in_current() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".storkit");
        let current = sk.join("work/2_current");
        std::fs::create_dir_all(&current).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
        )
        .unwrap();
        // Place the story in 2_current/ (simulating the "queued" state).
        std::fs::write(current.join("story-3.md"), "---\nname: Story 3\n---\n").unwrap();
        let pool = AgentPool::new_test(3001);
        // No agents are running — coder-1 is free.
        // auto_assign will try to call start_agent, which will attempt to create
        // a worktree (will fail without a git repo) — that is fine. We only need
        // to verify the agent is registered as Pending before the background
        // task eventually fails.
        pool.auto_assign_available_work(tmp.path()).await;
        let agents = pool.agents.lock().unwrap();
        let has_pending = agents.values().any(|a| {
            a.agent_name == "coder-1"
                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
        });
        assert!(
            has_pending,
            "auto_assign should have started coder-1 for story-3, but pool is empty"
        );
    }
    /// Story 265: auto_assign_available_work must skip spikes in 3_qa/ that
    /// have review_hold: true set in their front matter.
    #[tokio::test]
    async fn auto_assign_skips_spikes_with_review_hold() {
        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();
        // Create project.toml with a QA agent.
        let sk = root.join(".storkit");
        std::fs::create_dir_all(&sk).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            "[[agents]]\nname = \"qa\"\nrole = \"qa\"\nmodel = \"test\"\nprompt = \"test\"\n",
        )
        .unwrap();
        // Put a spike in 3_qa/ with review_hold: true.
        let qa_dir = root.join(".storkit/work/3_qa");
        std::fs::create_dir_all(&qa_dir).unwrap();
        std::fs::write(
            qa_dir.join("20_spike_test.md"),
            "---\nname: Test Spike\nreview_hold: true\n---\n# Spike\n",
        )
        .unwrap();
        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(4);
        let pool = AgentPool::new(3001, watcher_tx);
        pool.auto_assign_available_work(root).await;
        // No agent should have been started for the spike.
        let agents = pool.agents.lock().unwrap();
        assert!(
            agents.is_empty(),
            "No agents should be assigned to a spike with review_hold"
        );
    }
    // ── Story 279: auto-assign respects agent stage from front matter ──────────
    /// When a story in 3_qa/ has `agent: coder-1` in its front matter but
    /// coder-1 is a coder-stage agent, auto-assign must NOT assign coder-1.
    /// Instead it should fall back to a free QA-stage agent.
    #[tokio::test]
    async fn auto_assign_ignores_coder_preference_when_story_is_in_qa_stage() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".storkit");
        let qa_dir = sk.join("work/3_qa");
        std::fs::create_dir_all(&qa_dir).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
             [[agent]]\nname = \"qa-1\"\nstage = \"qa\"\n",
        )
        .unwrap();
        // Story in 3_qa/ with a preferred coder-stage agent.
        std::fs::write(
            qa_dir.join("story-qa1.md"),
            "---\nname: QA Story\nagent: coder-1\n---\n",
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        pool.auto_assign_available_work(tmp.path()).await;
        let agents = pool.agents.lock().unwrap();
        // coder-1 must NOT have been assigned (wrong stage for 3_qa/).
        let coder_assigned = agents.values().any(|a| {
            a.agent_name == "coder-1"
                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
        });
        assert!(
            !coder_assigned,
            "coder-1 should not be assigned to a QA-stage story"
        );
        // qa-1 should have been assigned instead.
        let qa_assigned = agents.values().any(|a| {
            a.agent_name == "qa-1"
                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
        });
        assert!(
            qa_assigned,
            "qa-1 should be assigned as fallback for the QA-stage story"
        );
    }
    /// When a story in 2_current/ has `agent: coder-1` in its front matter and
    /// coder-1 is a coder-stage agent, auto-assign must respect the preference
    /// and assign coder-1 (not fall back to some other coder).
    #[tokio::test]
    async fn auto_assign_respects_coder_preference_when_story_is_in_current_stage() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".storkit");
        let current_dir = sk.join("work/2_current");
        std::fs::create_dir_all(&current_dir).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
             [[agent]]\nname = \"coder-2\"\nstage = \"coder\"\n",
        )
        .unwrap();
        // Story in 2_current/ with a preferred coder-1 agent.
        std::fs::write(
            current_dir.join("story-pref.md"),
            "---\nname: Coder Story\nagent: coder-1\n---\n",
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        pool.auto_assign_available_work(tmp.path()).await;
        let agents = pool.agents.lock().unwrap();
        // coder-1 should have been picked (it matches the stage and is preferred).
        let coder1_assigned = agents.values().any(|a| {
            a.agent_name == "coder-1"
                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
        });
        assert!(
            coder1_assigned,
            "coder-1 should be assigned when it matches the stage and is preferred"
        );
        // coder-2 must NOT be assigned (not preferred).
        let coder2_assigned = agents.values().any(|a| {
            a.agent_name == "coder-2"
                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
        });
        assert!(
            !coder2_assigned,
            "coder-2 should not be assigned when coder-1 is explicitly preferred"
        );
    }
    /// When the preferred agent's stage mismatches and no other agent of the
    /// correct stage is available, auto-assign must not start any agent for that
    /// story (no panic, no error).
    #[tokio::test]
    async fn auto_assign_stage_mismatch_with_no_fallback_starts_no_agent() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".storkit");
        let qa_dir = sk.join("work/3_qa");
        std::fs::create_dir_all(&qa_dir).unwrap();
        // Only a coder agent is configured — no QA agent exists.
        std::fs::write(
            sk.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
        )
        .unwrap();
        // Story in 3_qa/ requests coder-1 (wrong stage) and no QA agent exists.
        std::fs::write(
            qa_dir.join("story-noqa.md"),
            "---\nname: QA Story No Agent\nagent: coder-1\n---\n",
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        // Must not panic.
        pool.auto_assign_available_work(tmp.path()).await;
        let agents = pool.agents.lock().unwrap();
        assert!(
            agents.is_empty(),
            "No agent should be started when no stage-appropriate agent is available"
        );
    }
    /// Two concurrent auto_assign_available_work calls must not assign the same
    /// agent to two stories simultaneously.  After both complete, at most one
    /// Pending/Running entry must exist per agent name.
    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn toctou_concurrent_auto_assign_no_duplicate_agent_assignments() {
        use std::fs;
        use std::sync::Arc;
        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path().to_path_buf();
        let sk_dir = root.join(".storkit");
        // Two stories waiting in 2_current, one coder agent.
        fs::create_dir_all(sk_dir.join("work/2_current")).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\n",
        )
        .unwrap();
        fs::write(
            sk_dir.join("work/2_current/86_story_foo.md"),
            "---\nname: Foo\n---\n",
        )
        .unwrap();
        fs::write(
            sk_dir.join("work/2_current/130_story_bar.md"),
            "---\nname: Bar\n---\n",
        )
        .unwrap();
        let pool = Arc::new(AgentPool::new_test(3099));
        // Run two concurrent auto_assign calls.
        let pool1 = pool.clone();
        let root1 = root.clone();
        let t1 = tokio::spawn(async move { pool1.auto_assign_available_work(&root1).await });
        let pool2 = pool.clone();
        let root2 = root.clone();
        let t2 = tokio::spawn(async move { pool2.auto_assign_available_work(&root2).await });
        let _ = tokio::join!(t1, t2);
        // At most one Pending/Running entry should exist for coder-1.
        let agents = pool.agents.lock().unwrap();
        let active_coder_count = agents
            .values()
            .filter(|a| {
                a.agent_name == "coder-1"
                    && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
            })
            .count();
        assert!(
            active_coder_count <= 1,
            "coder-1 must not be assigned to more than one story simultaneously; \
             found {active_coder_count} active entries"
        );
    }
 }
@@ -0,0 +1,12 @@
 //! Auto-assign submodules: wires focused sub-files and re-exports public items.
 #[allow(clippy::module_inception)]
 mod auto_assign;
 mod reconcile;
 mod scan;
 mod story_checks;
 mod watchdog;
 // Re-export items that were pub(super) in the original monolithic auto_assign.rs
 // so that pool::lifecycle and pool::pipeline continue to access them unchanged.
 pub(super) use scan::{find_free_agent_for_stage, is_agent_free};
@@ -0,0 +1,527 @@
 //! Startup reconciliation: detect stories with committed work and advance the pipeline.
 use std::path::Path;
 use tokio::sync::broadcast;
 use crate::worktree;
 use super::super::super::ReconciliationEvent;
 use super::super::{AgentPool, find_active_story_stage};
 impl AgentPool {
    /// Reconcile stories whose agent work was committed while the server was offline.
    ///
    /// On server startup the in-memory agent pool is empty, so any story that an agent
    /// completed during a previous session is stuck: the worktree has committed work but
    /// the pipeline never advanced.  This method detects those stories, re-runs the
    /// acceptance gates, and advances the pipeline stage so that `auto_assign_available_work`
    /// (called immediately after) picks up the right next-stage agents.
    ///
    /// Algorithm:
    /// 1. List all worktree directories under `{project_root}/.storkit/worktrees/`.
    /// 2. For each worktree, check whether its feature branch has commits ahead of the
    ///    base branch (`master` / `main`).
    /// 3. If committed work is found AND the story is in `2_current/` or `3_qa/`:
    ///    - Run acceptance gates (uncommitted-change check + clippy + tests).
    ///    - On pass + `2_current/`: move the story to `3_qa/`.
    ///    - On pass + `3_qa/`: run the coverage gate; if that also passes move to `4_merge/`.
    ///    - On failure: leave the story where it is so `auto_assign_available_work` can
    ///      start a fresh agent to retry.
    /// 4. Stories in `4_merge/` are left for `auto_assign_available_work` to handle via a
    ///    fresh mergemaster (squash-merge must be re-executed by the mergemaster agent).
    pub async fn reconcile_on_startup(
        &self,
        project_root: &Path,
        progress_tx: &broadcast::Sender<ReconciliationEvent>,
    ) {
        let worktrees = match worktree::list_worktrees(project_root) {
            Ok(wt) => wt,
            Err(e) => {
                eprintln!("[startup:reconcile] Failed to list worktrees: {e}");
                let _ = progress_tx.send(ReconciliationEvent {
                    story_id: String::new(),
                    status: "done".to_string(),
                    message: format!("Reconciliation failed: {e}"),
                });
                return;
            }
        };
        for wt_entry in &worktrees {
            let story_id = &wt_entry.story_id;
            let wt_path = wt_entry.path.clone();
            // Determine which active stage the story is in.
            let stage_dir = match find_active_story_stage(project_root, story_id) {
                Some(s) => s,
                None => continue, // Not in any active stage (backlog/archived or unknown).
            };
            // 4_merge/ is left for auto_assign to handle with a fresh mergemaster.
            if stage_dir == "4_merge" {
                continue;
            }
            let _ = progress_tx.send(ReconciliationEvent {
                story_id: story_id.clone(),
                status: "checking".to_string(),
                message: format!("Checking for committed work in {stage_dir}/"),
            });
            // Check whether the worktree has commits ahead of the base branch.
            let wt_path_for_check = wt_path.clone();
            let has_work = tokio::task::spawn_blocking(move || {
                crate::agents::gates::worktree_has_committed_work(&wt_path_for_check)
            })
            .await
            .unwrap_or(false);
            if !has_work {
                eprintln!(
                    "[startup:reconcile] No committed work for '{story_id}' in {stage_dir}/; skipping."
                );
                let _ = progress_tx.send(ReconciliationEvent {
                    story_id: story_id.clone(),
                    status: "skipped".to_string(),
                    message: "No committed work found; skipping.".to_string(),
                });
                continue;
            }
            eprintln!(
                "[startup:reconcile] Found committed work for '{story_id}' in {stage_dir}/. Running acceptance gates."
            );
            let _ = progress_tx.send(ReconciliationEvent {
                story_id: story_id.clone(),
                status: "gates_running".to_string(),
                message: "Running acceptance gates…".to_string(),
            });
            // Run acceptance gates on the worktree.
            let wt_path_for_gates = wt_path.clone();
            let gates_result = tokio::task::spawn_blocking(move || {
                crate::agents::gates::check_uncommitted_changes(&wt_path_for_gates)?;
                crate::agents::gates::run_acceptance_gates(&wt_path_for_gates)
            })
            .await;
            let (gates_passed, gate_output) = match gates_result {
                Ok(Ok(pair)) => pair,
                Ok(Err(e)) => {
                    eprintln!("[startup:reconcile] Gate check error for '{story_id}': {e}");
                    let _ = progress_tx.send(ReconciliationEvent {
                        story_id: story_id.clone(),
                        status: "failed".to_string(),
                        message: format!("Gate error: {e}"),
                    });
                    continue;
                }
                Err(e) => {
                    eprintln!("[startup:reconcile] Gate check task panicked for '{story_id}': {e}");
                    let _ = progress_tx.send(ReconciliationEvent {
                        story_id: story_id.clone(),
                        status: "failed".to_string(),
                        message: format!("Gate task panicked: {e}"),
                    });
                    continue;
                }
            };
            if !gates_passed {
                eprintln!(
                    "[startup:reconcile] Gates failed for '{story_id}': {gate_output}\n\
                     Leaving in {stage_dir}/ for auto-assign to restart the agent."
                );
                let _ = progress_tx.send(ReconciliationEvent {
                    story_id: story_id.clone(),
                    status: "failed".to_string(),
                    message: "Gates failed; will be retried by auto-assign.".to_string(),
                });
                continue;
            }
            eprintln!("[startup:reconcile] Gates passed for '{story_id}' (stage: {stage_dir}/).");
            if stage_dir == "2_current" {
                // Coder stage — determine qa mode to decide next step.
                let qa_mode = {
                    let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
                    if item_type == "spike" {
                        crate::io::story_metadata::QaMode::Human
                    } else {
                        let default_qa = crate::config::ProjectConfig::load(project_root)
                            .unwrap_or_default()
                            .default_qa_mode();
                        let story_path = project_root
                            .join(".storkit/work/2_current")
                            .join(format!("{story_id}.md"));
                        crate::io::story_metadata::resolve_qa_mode(&story_path, default_qa)
                    }
                };
                match qa_mode {
                    crate::io::story_metadata::QaMode::Server => {
                        if let Err(e) =
                            crate::agents::move_story_to_merge(project_root, story_id)
                        {
                            eprintln!("[startup:reconcile] Failed to move '{story_id}' to 4_merge/: {e}");
                            let _ = progress_tx.send(ReconciliationEvent {
                                story_id: story_id.clone(),
                                status: "failed".to_string(),
                                message: format!("Failed to advance to merge: {e}"),
                            });
                        } else {
                            eprintln!("[startup:reconcile] Moved '{story_id}' → 4_merge/ (qa: server).");
                            let _ = progress_tx.send(ReconciliationEvent {
                                story_id: story_id.clone(),
                                status: "advanced".to_string(),
                                message: "Gates passed — moved to merge (qa: server).".to_string(),
                            });
                        }
                    }
                    crate::io::story_metadata::QaMode::Agent => {
                        if let Err(e) =
                            crate::agents::move_story_to_qa(project_root, story_id)
                        {
                            eprintln!("[startup:reconcile] Failed to move '{story_id}' to 3_qa/: {e}");
                            let _ = progress_tx.send(ReconciliationEvent {
                                story_id: story_id.clone(),
                                status: "failed".to_string(),
                                message: format!("Failed to advance to QA: {e}"),
                            });
                        } else {
                            eprintln!("[startup:reconcile] Moved '{story_id}' → 3_qa/.");
                            let _ = progress_tx.send(ReconciliationEvent {
                                story_id: story_id.clone(),
                                status: "advanced".to_string(),
                                message: "Gates passed — moved to QA.".to_string(),
                            });
                        }
                    }
                    crate::io::story_metadata::QaMode::Human => {
                        if let Err(e) =
                            crate::agents::move_story_to_qa(project_root, story_id)
                        {
                            eprintln!("[startup:reconcile] Failed to move '{story_id}' to 3_qa/: {e}");
                            let _ = progress_tx.send(ReconciliationEvent {
                                story_id: story_id.clone(),
                                status: "failed".to_string(),
                                message: format!("Failed to advance to QA: {e}"),
                            });
                        } else {
                            let story_path = project_root
                                .join(".storkit/work/3_qa")
                                .join(format!("{story_id}.md"));
                            if let Err(e) =
                                crate::io::story_metadata::write_review_hold(&story_path)
                            {
                                eprintln!(
                                    "[startup:reconcile] Failed to set review_hold on '{story_id}': {e}"
                                );
                            }
                            eprintln!("[startup:reconcile] Moved '{story_id}' → 3_qa/ (qa: human — holding for review).");
                            let _ = progress_tx.send(ReconciliationEvent {
                                story_id: story_id.clone(),
                                status: "review_hold".to_string(),
                                message: "Gates passed — holding for human review.".to_string(),
                            });
                        }
                    }
                }
            } else if stage_dir == "3_qa" {
                // QA stage → run coverage gate before advancing to merge.
                let wt_path_for_cov = wt_path.clone();
                let coverage_result = tokio::task::spawn_blocking(move || {
                    crate::agents::gates::run_coverage_gate(&wt_path_for_cov)
                })
                .await;
                let (coverage_passed, coverage_output) = match coverage_result {
                    Ok(Ok(pair)) => pair,
                    Ok(Err(e)) => {
                        eprintln!("[startup:reconcile] Coverage gate error for '{story_id}': {e}");
                        let _ = progress_tx.send(ReconciliationEvent {
                            story_id: story_id.clone(),
                            status: "failed".to_string(),
                            message: format!("Coverage gate error: {e}"),
                        });
                        continue;
                    }
                    Err(e) => {
                        eprintln!(
                            "[startup:reconcile] Coverage gate panicked for '{story_id}': {e}"
                        );
                        let _ = progress_tx.send(ReconciliationEvent {
                            story_id: story_id.clone(),
                            status: "failed".to_string(),
                            message: format!("Coverage gate panicked: {e}"),
                        });
                        continue;
                    }
                };
                if coverage_passed {
                    // Check whether this item needs human review before merging.
                    let needs_human_review = {
                        let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
                        if item_type == "spike" {
                            true
                        } else {
                            let story_path = project_root
                                .join(".storkit/work/3_qa")
                                .join(format!("{story_id}.md"));
                            let default_qa = crate::config::ProjectConfig::load(project_root)
                                .unwrap_or_default()
                                .default_qa_mode();
                            matches!(
                                crate::io::story_metadata::resolve_qa_mode(&story_path, default_qa),
                                crate::io::story_metadata::QaMode::Human
                            )
                        }
                    };
                    if needs_human_review {
                        let story_path = project_root
                            .join(".storkit/work/3_qa")
                            .join(format!("{story_id}.md"));
                        if let Err(e) =
                            crate::io::story_metadata::write_review_hold(&story_path)
                        {
                            eprintln!(
                                "[startup:reconcile] Failed to set review_hold on '{story_id}': {e}"
                            );
                        }
                        eprintln!(
                            "[startup:reconcile] '{story_id}' passed QA — holding for human review."
                        );
                        let _ = progress_tx.send(ReconciliationEvent {
                            story_id: story_id.clone(),
                            status: "review_hold".to_string(),
                            message: "Passed QA — waiting for human review.".to_string(),
                        });
                    } else if let Err(e) =
                        crate::agents::move_story_to_merge(project_root, story_id)
                    {
                        eprintln!(
                            "[startup:reconcile] Failed to move '{story_id}' to 4_merge/: {e}"
                        );
                        let _ = progress_tx.send(ReconciliationEvent {
                            story_id: story_id.clone(),
                            status: "failed".to_string(),
                            message: format!("Failed to advance to merge: {e}"),
                        });
                    } else {
                        eprintln!("[startup:reconcile] Moved '{story_id}' → 4_merge/.");
                        let _ = progress_tx.send(ReconciliationEvent {
                            story_id: story_id.clone(),
                            status: "advanced".to_string(),
                            message: "Gates passed — moved to merge.".to_string(),
                        });
                    }
                } else {
                    eprintln!(
                        "[startup:reconcile] Coverage gate failed for '{story_id}': {coverage_output}\n\
                         Leaving in 3_qa/ for auto-assign to restart the QA agent."
                    );
                    let _ = progress_tx.send(ReconciliationEvent {
                        story_id: story_id.clone(),
                        status: "failed".to_string(),
                        message: "Coverage gate failed; will be retried.".to_string(),
                    });
                }
            }
        }
        // Signal that reconciliation is complete.
        let _ = progress_tx.send(ReconciliationEvent {
            story_id: String::new(),
            status: "done".to_string(),
            message: "Startup reconciliation complete.".to_string(),
        });
    }
 }
 // ── Tests ──────────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use std::process::Command;
    use tokio::sync::broadcast;
    use super::super::super::AgentPool;
    use crate::agents::ReconciliationEvent;
    fn init_git_repo(repo: &std::path::Path) {
        Command::new("git")
            .args(["init"])
            .current_dir(repo)
            .output()
            .unwrap();
        Command::new("git")
            .args(["config", "user.email", "test@test.com"])
            .current_dir(repo)
            .output()
            .unwrap();
        Command::new("git")
            .args(["config", "user.name", "Test"])
            .current_dir(repo)
            .output()
            .unwrap();
        // Create initial commit so master branch exists.
        std::fs::write(repo.join("README.md"), "# test\n").unwrap();
        Command::new("git")
            .args(["add", "."])
            .current_dir(repo)
            .output()
            .unwrap();
        Command::new("git")
            .args(["commit", "-m", "initial"])
            .current_dir(repo)
            .output()
            .unwrap();
    }
    #[tokio::test]
    async fn reconcile_on_startup_noop_when_no_worktrees() {
        let tmp = tempfile::tempdir().unwrap();
        let pool = AgentPool::new_test(3001);
        let (tx, _rx) = broadcast::channel(16);
        // Should not panic; no worktrees to reconcile.
        pool.reconcile_on_startup(tmp.path(), &tx).await;
    }
    #[tokio::test]
    async fn reconcile_on_startup_emits_done_event() {
        let tmp = tempfile::tempdir().unwrap();
        let pool = AgentPool::new_test(3001);
        let (tx, mut rx) = broadcast::channel::<ReconciliationEvent>(16);
        pool.reconcile_on_startup(tmp.path(), &tx).await;
        // Collect all events; the last must be "done".
        let mut events: Vec<ReconciliationEvent> = Vec::new();
        while let Ok(evt) = rx.try_recv() {
            events.push(evt);
        }
        assert!(
            events.iter().any(|e| e.status == "done"),
            "reconcile_on_startup must emit a 'done' event; got: {:?}",
            events.iter().map(|e| &e.status).collect::<Vec<_>>()
        );
    }
    #[tokio::test]
    async fn reconcile_on_startup_skips_story_without_committed_work() {
        use std::fs;
        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();
        // Set up story in 2_current/.
        let current = root.join(".storkit/work/2_current");
        fs::create_dir_all(&current).unwrap();
        fs::write(current.join("60_story_test.md"), "test").unwrap();
        // Create a worktree directory that is a fresh git repo with no commits
        // ahead of its own base branch (simulates a worktree where no work was done).
        let wt_dir = root.join(".storkit/worktrees/60_story_test");
        fs::create_dir_all(&wt_dir).unwrap();
        init_git_repo(&wt_dir);
        let pool = AgentPool::new_test(3001);
        let (tx, _rx) = broadcast::channel(16);
        pool.reconcile_on_startup(root, &tx).await;
        // Story should still be in 2_current/ — nothing was reconciled.
        assert!(
            current.join("60_story_test.md").exists(),
            "story should stay in 2_current/ when worktree has no committed work"
        );
    }
    #[tokio::test]
    async fn reconcile_on_startup_runs_gates_on_worktree_with_committed_work() {
        use std::fs;
        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();
        // Set up a git repo for the project root.
        init_git_repo(root);
        // Set up story in 2_current/ and commit it so the project root is clean.
        let current = root.join(".storkit/work/2_current");
        fs::create_dir_all(&current).unwrap();
        fs::write(current.join("61_story_test.md"), "test").unwrap();
        Command::new("git")
            .args(["add", "."])
            .current_dir(root)
            .output()
            .unwrap();
        Command::new("git")
            .args([
                "-c",
                "user.email=test@test.com",
                "-c",
                "user.name=Test",
                "commit",
                "-m",
                "add story",
            ])
            .current_dir(root)
            .output()
            .unwrap();
        // Create a real git worktree for the story.
        let wt_dir = root.join(".storkit/worktrees/61_story_test");
        fs::create_dir_all(wt_dir.parent().unwrap()).unwrap();
        Command::new("git")
            .args([
                "worktree",
                "add",
                &wt_dir.to_string_lossy(),
                "-b",
                "feature/story-61_story_test",
            ])
            .current_dir(root)
            .output()
            .unwrap();
        // Add a commit to the feature branch (simulates coder completing work).
        fs::write(wt_dir.join("implementation.txt"), "done").unwrap();
        Command::new("git")
            .args(["add", "."])
            .current_dir(&wt_dir)
            .output()
            .unwrap();
        Command::new("git")
            .args([
                "-c",
                "user.email=test@test.com",
                "-c",
                "user.name=Test",
                "commit",
                "-m",
                "implement story",
            ])
            .current_dir(&wt_dir)
            .output()
            .unwrap();
        assert!(
            crate::agents::gates::worktree_has_committed_work(&wt_dir),
            "test setup: worktree should have committed work"
        );
        let pool = AgentPool::new_test(3001);
        let (tx, _rx) = broadcast::channel(16);
        pool.reconcile_on_startup(root, &tx).await;
        // In the test env, cargo clippy will fail (no Cargo.toml) so gates fail
        // and the story stays in 2_current/.  The important assertion is that
        // reconcile ran without panicking and the story is in a consistent state.
        let in_current = current.join("61_story_test.md").exists();
        let in_qa = root.join(".storkit/work/3_qa/61_story_test.md").exists();
        assert!(
            in_current || in_qa,
            "story should be in 2_current/ or 3_qa/ after reconciliation"
        );
    }
 }
@@ -0,0 +1,553 @@
 //! Scanning pipeline stages for work items and querying agent pool state.
 use crate::config::ProjectConfig;
 use std::collections::HashMap;
 use std::path::Path;
 use super::super::super::{AgentStatus, PipelineStage, agent_config_stage, pipeline_stage};
 use super::super::StoryAgent;
 /// Return `true` if `agent_name` has no active (pending/running) entry in the pool.
 pub(in crate::agents::pool) fn is_agent_free(
    agents: &HashMap<String, StoryAgent>,
    agent_name: &str,
 ) -> bool {
    !agents.values().any(|a| {
        a.agent_name == agent_name
            && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
    })
 }
 pub(super) fn scan_stage_items(project_root: &Path, stage_dir: &str) -> Vec<String> {
    let dir = project_root.join(".storkit").join("work").join(stage_dir);
    if !dir.is_dir() {
        return Vec::new();
    }
    let mut items = Vec::new();
    if let Ok(entries) = std::fs::read_dir(&dir) {
        for entry in entries.flatten() {
            let path = entry.path();
            if path.extension().and_then(|e| e.to_str()) == Some("md")
                && let Some(stem) = path.file_stem().and_then(|s| s.to_str())
            {
                items.push(stem.to_string());
            }
        }
    }
    items.sort();
    items
 }
 /// Return `true` if `story_id` has any active (pending/running) agent matching `stage`.
 ///
 /// Uses the explicit `stage` config field when the agent is found in `config`;
 /// falls back to the legacy name-based heuristic for unlisted agents.
 pub(super) fn is_story_assigned_for_stage(
    config: &ProjectConfig,
    agents: &HashMap<String, StoryAgent>,
    story_id: &str,
    stage: &PipelineStage,
 ) -> bool {
    agents.iter().any(|(key, agent)| {
        // Composite key format: "{story_id}:{agent_name}"
        let key_story_id = key.rsplit_once(':').map(|(sid, _)| sid).unwrap_or(key);
        let agent_stage = config
            .find_agent(&agent.agent_name)
            .map(agent_config_stage)
            .unwrap_or_else(|| pipeline_stage(&agent.agent_name));
        key_story_id == story_id
            && agent_stage == *stage
            && matches!(agent.status, AgentStatus::Running | AgentStatus::Pending)
    })
 }
 /// Count active (pending/running) agents for a given pipeline stage.
 pub(super) fn count_active_agents_for_stage(
    config: &ProjectConfig,
    agents: &HashMap<String, StoryAgent>,
    stage: &PipelineStage,
 ) -> usize {
    agents
        .values()
        .filter(|a| {
            matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
                && config
                    .find_agent(&a.agent_name)
                    .map(|cfg| agent_config_stage(cfg) == *stage)
                    .unwrap_or_else(|| pipeline_stage(&a.agent_name) == *stage)
        })
        .count()
 }
 /// Find the first configured agent for `stage` that has no active (pending/running) assignment.
 /// Returns `None` if all agents for that stage are busy, none are configured,
 /// or the `max_coders` limit has been reached (for the Coder stage).
 ///
 /// For the Coder stage, when `default_coder_model` is set, only considers agents whose
 /// model matches the default. This ensures opus-class agents are reserved for explicit
 /// front-matter requests.
 pub(in crate::agents::pool) fn find_free_agent_for_stage<'a>(
    config: &'a ProjectConfig,
    agents: &HashMap<String, StoryAgent>,
    stage: &PipelineStage,
 ) -> Option<&'a str> {
    // Enforce max_coders limit for the Coder stage.
    if *stage == PipelineStage::Coder
        && let Some(max) = config.max_coders
    {
        let active = count_active_agents_for_stage(config, agents, stage);
        if active >= max {
            return None;
        }
    }
    for agent_config in &config.agent {
        if agent_config_stage(agent_config) != *stage {
            continue;
        }
        // When default_coder_model is set, only auto-assign coder agents whose
        // model matches. This keeps opus agents reserved for explicit requests.
        if *stage == PipelineStage::Coder
            && let Some(ref default_model) = config.default_coder_model
        {
            let agent_model = agent_config.model.as_deref().unwrap_or("");
            if agent_model != default_model {
                continue;
            }
        }
        let is_busy = agents.values().any(|a| {
            a.agent_name == agent_config.name
                && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
        });
        if !is_busy {
            return Some(&agent_config.name);
        }
    }
    None
 }
 // ── Tests ──────────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::config::ProjectConfig;
    use std::sync::{Arc, Mutex};
    use tokio::sync::broadcast;
    use super::super::super::AgentPool;
    fn make_config(toml_str: &str) -> ProjectConfig {
        ProjectConfig::parse(toml_str).unwrap()
    }
    fn make_test_story_agent(agent_name: &str, status: AgentStatus) -> StoryAgent {
        StoryAgent {
            agent_name: agent_name.to_string(),
            status,
            worktree_info: None,
            session_id: None,
            tx: broadcast::channel(1).0,
            task_handle: None,
            event_log: Arc::new(Mutex::new(Vec::new())),
            completion: None,
            project_root: None,
            log_session_id: None,
            merge_failure_reported: false,
            throttled: false,
        }
    }
    #[test]
    fn scan_stage_items_returns_empty_for_missing_dir() {
        let tmp = tempfile::tempdir().unwrap();
        let items = scan_stage_items(tmp.path(), "2_current");
        assert!(items.is_empty());
    }
    #[test]
    fn scan_stage_items_returns_sorted_story_ids() {
        use std::fs;
        let tmp = tempfile::tempdir().unwrap();
        let stage_dir = tmp.path().join(".storkit").join("work").join("2_current");
        fs::create_dir_all(&stage_dir).unwrap();
        fs::write(stage_dir.join("42_story_foo.md"), "---\nname: foo\n---").unwrap();
        fs::write(stage_dir.join("10_story_bar.md"), "---\nname: bar\n---").unwrap();
        fs::write(stage_dir.join("5_story_baz.md"), "---\nname: baz\n---").unwrap();
        // non-md file should be ignored
        fs::write(stage_dir.join("README.txt"), "ignore me").unwrap();
        let items = scan_stage_items(tmp.path(), "2_current");
        assert_eq!(items, vec!["10_story_bar", "42_story_foo", "5_story_baz"]);
    }
    #[test]
    fn is_story_assigned_returns_true_for_running_coder() {
        let config = ProjectConfig::default();
        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running);
        let agents = pool.agents.lock().unwrap();
        assert!(is_story_assigned_for_stage(
            &config,
            &agents,
            "42_story_foo",
            &PipelineStage::Coder
        ));
        // Same story but wrong stage — should be false
        assert!(!is_story_assigned_for_stage(
            &config,
            &agents,
            "42_story_foo",
            &PipelineStage::Qa
        ));
        // Different story — should be false
        assert!(!is_story_assigned_for_stage(
            &config,
            &agents,
            "99_story_other",
            &PipelineStage::Coder
        ));
    }
    #[test]
    fn is_story_assigned_returns_false_for_completed_agent() {
        let config = ProjectConfig::default();
        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Completed);
        let agents = pool.agents.lock().unwrap();
        // Completed agents don't count as assigned
        assert!(!is_story_assigned_for_stage(
            &config,
            &agents,
            "42_story_foo",
            &PipelineStage::Coder
        ));
    }
    #[test]
    fn is_story_assigned_uses_config_stage_field_for_nonstandard_names() {
        let config = ProjectConfig::parse(
            r#"
 [[agent]]
 name = "qa-2"
 stage = "qa"
 "#,
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("42_story_foo", "qa-2", AgentStatus::Running);
        let agents = pool.agents.lock().unwrap();
        // qa-2 with stage=qa should be recognised as a QA agent
        assert!(
            is_story_assigned_for_stage(&config, &agents, "42_story_foo", &PipelineStage::Qa),
            "qa-2 should be detected as assigned to QA stage"
        );
        // Should NOT appear as a coder
        assert!(
            !is_story_assigned_for_stage(&config, &agents, "42_story_foo", &PipelineStage::Coder),
            "qa-2 should not be detected as a coder"
        );
    }
    #[test]
    fn find_free_agent_returns_none_when_all_busy() {
        let config = ProjectConfig::parse(
            r#"
 [[agent]]
 name = "coder-1"
 [[agent]]
 name = "coder-2"
 "#,
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("s1", "coder-1", AgentStatus::Running);
        pool.inject_test_agent("s2", "coder-2", AgentStatus::Running);
        let agents = pool.agents.lock().unwrap();
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert!(free.is_none(), "no free coders should be available");
    }
    #[test]
    fn find_free_agent_returns_first_free_coder() {
        let config = ProjectConfig::parse(
            r#"
 [[agent]]
 name = "coder-1"
 [[agent]]
 name = "coder-2"
 [[agent]]
 name = "coder-3"
 "#,
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        // coder-1 is busy, coder-2 is free
        pool.inject_test_agent("s1", "coder-1", AgentStatus::Running);
        let agents = pool.agents.lock().unwrap();
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(
            free,
            Some("coder-2"),
            "coder-2 should be the first free coder"
        );
    }
    #[test]
    fn find_free_agent_ignores_completed_agents() {
        let config = ProjectConfig::parse(
            r#"
 [[agent]]
 name = "coder-1"
 "#,
        )
        .unwrap();
        let pool = AgentPool::new_test(3001);
        // coder-1 completed its previous story — it's free for a new one
        pool.inject_test_agent("s1", "coder-1", AgentStatus::Completed);
        let agents = pool.agents.lock().unwrap();
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(free, Some("coder-1"), "completed coder-1 should be free");
    }
    #[test]
    fn find_free_agent_returns_none_for_wrong_stage() {
        let config = ProjectConfig::parse(
            r#"
 [[agent]]
 name = "qa"
 "#,
        )
        .unwrap();
        let agents: HashMap<String, StoryAgent> = HashMap::new();
        // Looking for a Coder but only QA is configured
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert!(free.is_none());
        // Looking for QA should find it
        let free_qa = find_free_agent_for_stage(&config, &agents, &PipelineStage::Qa);
        assert_eq!(free_qa, Some("qa"));
    }
    #[test]
    fn find_free_agent_uses_config_stage_field_not_name() {
        // Agents named "qa-2" and "coder-opus" don't match the legacy name heuristic
        // but should be picked up via their explicit stage field.
        let config = ProjectConfig::parse(
            r#"
 [[agent]]
 name = "qa-2"
 stage = "qa"
 [[agent]]
 name = "coder-opus"
 stage = "coder"
 "#,
        )
        .unwrap();
        let agents: HashMap<String, StoryAgent> = HashMap::new();
        // qa-2 should be found for PipelineStage::Qa via config stage field
        let free_qa = find_free_agent_for_stage(&config, &agents, &PipelineStage::Qa);
        assert_eq!(free_qa, Some("qa-2"), "qa-2 with stage=qa should be found");
        // coder-opus should be found for PipelineStage::Coder via config stage field
        let free_coder = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(
            free_coder,
            Some("coder-opus"),
            "coder-opus with stage=coder should be found"
        );
        // Neither should match the other stage
        let free_merge = find_free_agent_for_stage(&config, &agents, &PipelineStage::Mergemaster);
        assert!(free_merge.is_none());
    }
    // ── find_free_agent_for_stage: default_coder_model filtering ─────────
    #[test]
    fn find_free_agent_skips_opus_when_default_coder_model_set() {
        let config = make_config(
            r#"
 default_coder_model = "sonnet"
 [[agent]]
 name = "coder-1"
 stage = "coder"
 model = "sonnet"
 [[agent]]
 name = "coder-opus"
 stage = "coder"
 model = "opus"
 "#,
        );
        let agents = HashMap::new();
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(free, Some("coder-1"));
    }
    #[test]
    fn find_free_agent_returns_opus_when_no_default_coder_model() {
        let config = make_config(
            r#"
 [[agent]]
 name = "coder-opus"
 stage = "coder"
 model = "opus"
 "#,
        );
        let agents = HashMap::new();
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(free, Some("coder-opus"));
    }
    #[test]
    fn find_free_agent_returns_none_when_all_sonnet_coders_busy() {
        let config = make_config(
            r#"
 default_coder_model = "sonnet"
 [[agent]]
 name = "coder-1"
 stage = "coder"
 model = "sonnet"
 [[agent]]
 name = "coder-opus"
 stage = "coder"
 model = "opus"
 "#,
        );
        let mut agents = HashMap::new();
        agents.insert(
            "story1:coder-1".to_string(),
            make_test_story_agent("coder-1", AgentStatus::Running),
        );
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(free, None, "opus agent should not be auto-assigned");
    }
    // ── find_free_agent_for_stage: max_coders limit ─────────────────────
    #[test]
    fn find_free_agent_respects_max_coders() {
        let config = make_config(
            r#"
 max_coders = 1
 [[agent]]
 name = "coder-1"
 stage = "coder"
 model = "sonnet"
 [[agent]]
 name = "coder-2"
 stage = "coder"
 model = "sonnet"
 "#,
        );
        let mut agents = HashMap::new();
        agents.insert(
            "story1:coder-1".to_string(),
            make_test_story_agent("coder-1", AgentStatus::Running),
        );
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(free, None, "max_coders=1 should block second coder");
    }
    #[test]
    fn find_free_agent_allows_within_max_coders() {
        let config = make_config(
            r#"
 max_coders = 2
 [[agent]]
 name = "coder-1"
 stage = "coder"
 model = "sonnet"
 [[agent]]
 name = "coder-2"
 stage = "coder"
 model = "sonnet"
 "#,
        );
        let mut agents = HashMap::new();
        agents.insert(
            "story1:coder-1".to_string(),
            make_test_story_agent("coder-1", AgentStatus::Running),
        );
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(free, Some("coder-2"));
    }
    #[test]
    fn max_coders_does_not_affect_qa_stage() {
        let config = make_config(
            r#"
 max_coders = 1
 [[agent]]
 name = "qa"
 stage = "qa"
 model = "sonnet"
 "#,
        );
        let agents = HashMap::new();
        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Qa);
        assert_eq!(free, Some("qa"));
    }
    // ── count_active_agents_for_stage ────────────────────────────────────
    #[test]
    fn count_active_agents_counts_running_and_pending() {
        let config = make_config(
            r#"
 [[agent]]
 name = "coder-1"
 stage = "coder"
 [[agent]]
 name = "coder-2"
 stage = "coder"
 "#,
        );
        let mut agents = HashMap::new();
        agents.insert(
            "s1:coder-1".to_string(),
            make_test_story_agent("coder-1", AgentStatus::Running),
        );
        agents.insert(
            "s2:coder-2".to_string(),
            make_test_story_agent("coder-2", AgentStatus::Completed),
        );
        let count = count_active_agents_for_stage(&config, &agents, &PipelineStage::Coder);
        assert_eq!(count, 1, "Only Running coder should be counted, not Completed");
    }
 }
@@ -0,0 +1,113 @@
 //! Front-matter checks for story files: review holds, blocked state, and merge failures.
 use std::path::Path;
 /// Read the optional `agent:` field from the front matter of a story file.
 ///
 /// Returns `Some(agent_name)` if the front matter specifies an agent, or `None`
 /// if the field is absent or the file cannot be read / parsed.
 pub(super) fn read_story_front_matter_agent(
    project_root: &Path,
    stage_dir: &str,
    story_id: &str,
 ) -> Option<String> {
    use crate::io::story_metadata::parse_front_matter;
    let path = project_root
        .join(".storkit")
        .join("work")
        .join(stage_dir)
        .join(format!("{story_id}.md"));
    let contents = std::fs::read_to_string(path).ok()?;
    parse_front_matter(&contents).ok()?.agent
 }
 /// Return `true` if the story file in the given stage has `review_hold: true` in its front matter.
 pub(super) fn has_review_hold(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
    use crate::io::story_metadata::parse_front_matter;
    let path = project_root
        .join(".storkit")
        .join("work")
        .join(stage_dir)
        .join(format!("{story_id}.md"));
    let contents = match std::fs::read_to_string(path) {
        Ok(c) => c,
        Err(_) => return false,
    };
    parse_front_matter(&contents)
        .ok()
        .and_then(|m| m.review_hold)
        .unwrap_or(false)
 }
 /// Return `true` if the story file has `blocked: true` in its front matter.
 pub(super) fn is_story_blocked(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
    use crate::io::story_metadata::parse_front_matter;
    let path = project_root
        .join(".storkit")
        .join("work")
        .join(stage_dir)
        .join(format!("{story_id}.md"));
    let contents = match std::fs::read_to_string(path) {
        Ok(c) => c,
        Err(_) => return false,
    };
    parse_front_matter(&contents)
        .ok()
        .and_then(|m| m.blocked)
        .unwrap_or(false)
 }
 /// Return `true` if the story file has a `merge_failure` field in its front matter.
 pub(super) fn has_merge_failure(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
    use crate::io::story_metadata::parse_front_matter;
    let path = project_root
        .join(".storkit")
        .join("work")
        .join(stage_dir)
        .join(format!("{story_id}.md"));
    let contents = match std::fs::read_to_string(path) {
        Ok(c) => c,
        Err(_) => return false,
    };
    parse_front_matter(&contents)
        .ok()
        .and_then(|m| m.merge_failure)
        .is_some()
 }
 // ── Tests ──────────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn has_review_hold_returns_true_when_set() {
        let tmp = tempfile::tempdir().unwrap();
        let qa_dir = tmp.path().join(".storkit/work/3_qa");
        std::fs::create_dir_all(&qa_dir).unwrap();
        let spike_path = qa_dir.join("10_spike_research.md");
        std::fs::write(
            &spike_path,
            "---\nname: Research spike\nreview_hold: true\n---\n# Spike\n",
        )
        .unwrap();
        assert!(has_review_hold(tmp.path(), "3_qa", "10_spike_research"));
    }
    #[test]
    fn has_review_hold_returns_false_when_not_set() {
        let tmp = tempfile::tempdir().unwrap();
        let qa_dir = tmp.path().join(".storkit/work/3_qa");
        std::fs::create_dir_all(&qa_dir).unwrap();
        let spike_path = qa_dir.join("10_spike_research.md");
        std::fs::write(&spike_path, "---\nname: Research spike\n---\n# Spike\n").unwrap();
        assert!(!has_review_hold(tmp.path(), "3_qa", "10_spike_research"));
    }
    #[test]
    fn has_review_hold_returns_false_when_file_missing() {
        let tmp = tempfile::tempdir().unwrap();
        assert!(!has_review_hold(tmp.path(), "3_qa", "99_spike_missing"));
    }
 }
@@ -0,0 +1,220 @@
 //! Watchdog task: detects orphaned agents and triggers auto-assign.
 use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
 use tokio::sync::broadcast;
 use crate::slog;
 use super::super::super::{AgentEvent, AgentStatus};
 use super::super::{AgentPool, StoryAgent};
 /// Scan the agent pool for Running entries whose backing tokio task has already
 /// finished and mark them as Failed.
 ///
 /// This handles the case where the PTY read loop or the spawned task exits
 /// without updating the agent status — for example when the process is killed
 /// externally and the PTY master fd returns EOF before our inactivity timeout
 /// fires, but some other edge case prevents the normal cleanup path from running.
 pub(super) fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) -> usize {
    let mut lock = match agents.lock() {
        Ok(l) => l,
        Err(_) => return 0,
    };
    // Collect orphaned entries: Running or Pending agents whose task handle is finished.
    // Pending agents can be orphaned if worktree creation panics before setting status.
    let orphaned: Vec<(String, String, broadcast::Sender<AgentEvent>, AgentStatus)> = lock
        .iter()
        .filter_map(|(key, agent)| {
            if matches!(agent.status, AgentStatus::Running | AgentStatus::Pending)
                && let Some(handle) = &agent.task_handle
                && handle.is_finished()
            {
                let story_id = key
                    .rsplit_once(':')
                    .map(|(s, _)| s.to_string())
                    .unwrap_or_else(|| key.clone());
                return Some((
                    key.clone(),
                    story_id,
                    agent.tx.clone(),
                    agent.status.clone(),
                ));
            }
            None
        })
        .collect();
    let count = orphaned.len();
    for (key, story_id, tx, prev_status) in orphaned {
        if let Some(agent) = lock.get_mut(&key) {
            agent.status = AgentStatus::Failed;
            slog!(
                "[watchdog] Orphaned agent '{key}': task finished but status was {prev_status}. \
                 Marking Failed."
            );
            let _ = tx.send(AgentEvent::Error {
                story_id,
                agent_name: agent.agent_name.clone(),
                message: "Agent process terminated unexpectedly (watchdog detected orphan)"
                    .to_string(),
            });
        }
    }
    count
 }
 impl AgentPool {
    /// Run a single watchdog pass synchronously (test helper).
    #[cfg(test)]
    pub fn run_watchdog_once(&self) {
        check_orphaned_agents(&self.agents);
    }
    /// Spawn a background watchdog task that periodically checks for Running agents
    /// whose underlying task has already finished (orphaned entries).  Any such agent
    /// is marked Failed and an Error event is emitted so that `wait_for_agent` unblocks.
    ///
    /// The watchdog runs every 30 seconds.  It is a safety net for edge cases where the
    /// PTY read loop exits without updating the agent status (e.g. a panic in the
    /// spawn_blocking task, or an external SIGKILL that closes the PTY fd immediately).
    ///
    /// When orphaned agents are detected and a `project_root` is provided, auto-assign
    /// is triggered so that free agents can pick up unassigned work.
    pub fn spawn_watchdog(pool: Arc<AgentPool>, project_root: Option<PathBuf>) {
        tokio::spawn(async move {
            let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
            loop {
                interval.tick().await;
                let found = check_orphaned_agents(&pool.agents);
                if found > 0
                    && let Some(ref root) = project_root
                {
                    slog!("[watchdog] {found} orphaned agent(s) detected; triggering auto-assign.");
                    pool.auto_assign_available_work(root).await;
                }
            }
        });
    }
 }
 // ── Tests ──────────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    use super::super::super::{AgentPool, composite_key};
    // ── check_orphaned_agents return value tests (bug 161) ──────────────────
    #[tokio::test]
    async fn check_orphaned_agents_returns_count_of_orphaned_agents() {
        let pool = AgentPool::new_test(3001);
        // Spawn two tasks that finish immediately.
        let h1 = tokio::spawn(async {});
        let h2 = tokio::spawn(async {});
        tokio::time::sleep(std::time::Duration::from_millis(20)).await;
        assert!(h1.is_finished());
        assert!(h2.is_finished());
        pool.inject_test_agent_with_handle("story_a", "coder", AgentStatus::Running, h1);
        pool.inject_test_agent_with_handle("story_b", "coder", AgentStatus::Running, h2);
        let found = check_orphaned_agents(&pool.agents);
        assert_eq!(found, 2, "should detect both orphaned agents");
    }
    #[test]
    fn check_orphaned_agents_returns_zero_when_no_orphans() {
        let pool = AgentPool::new_test(3001);
        // Inject agents in terminal states — not orphaned.
        pool.inject_test_agent("story_a", "coder", AgentStatus::Completed);
        pool.inject_test_agent("story_b", "qa", AgentStatus::Failed);
        let found = check_orphaned_agents(&pool.agents);
        assert_eq!(
            found, 0,
            "no orphans should be detected for terminal agents"
        );
    }
    #[tokio::test]
    async fn watchdog_detects_orphaned_running_agent() {
        let pool = AgentPool::new_test(3001);
        let handle = tokio::spawn(async {});
        tokio::time::sleep(std::time::Duration::from_millis(20)).await;
        assert!(
            handle.is_finished(),
            "task should be finished before injection"
        );
        let tx = pool.inject_test_agent_with_handle(
            "orphan_story",
            "coder",
            AgentStatus::Running,
            handle,
        );
        let mut rx = tx.subscribe();
        pool.run_watchdog_once();
        {
            let agents = pool.agents.lock().unwrap();
            let key = composite_key("orphan_story", "coder");
            let agent = agents.get(&key).unwrap();
            assert_eq!(
                agent.status,
                AgentStatus::Failed,
                "watchdog must mark an orphaned Running agent as Failed"
            );
        }
        let event = rx.try_recv().expect("watchdog must emit an Error event");
        assert!(
            matches!(event, AgentEvent::Error { .. }),
            "expected AgentEvent::Error, got: {event:?}"
        );
    }
    #[tokio::test]
    async fn watchdog_orphan_detection_returns_nonzero_enabling_auto_assign() {
        // This test verifies the contract that `check_orphaned_agents` returns
        // a non-zero count when orphans exist, which the watchdog uses to
        // decide whether to trigger auto-assign (bug 161).
        let pool = AgentPool::new_test(3001);
        let handle = tokio::spawn(async {});
        tokio::time::sleep(std::time::Duration::from_millis(20)).await;
        pool.inject_test_agent_with_handle("orphan_story", "coder", AgentStatus::Running, handle);
        // Before watchdog: agent is Running.
        {
            let agents = pool.agents.lock().unwrap();
            let key = composite_key("orphan_story", "coder");
            assert_eq!(agents.get(&key).unwrap().status, AgentStatus::Running);
        }
        // Run watchdog pass — should return 1 (orphan found).
        let found = check_orphaned_agents(&pool.agents);
        assert_eq!(
            found, 1,
            "watchdog must return 1 for a single orphaned agent"
        );
        // After watchdog: agent is Failed.
        {
            let agents = pool.agents.lock().unwrap();
            let key = composite_key("orphan_story", "coder");
            assert_eq!(
                agents.get(&key).unwrap().status,
                AgentStatus::Failed,
                "orphaned agent must be marked Failed"
            );
        }
    }
 }
--- a/Show More
+++ b/Show More