Bump version to 0.7.0

storkit: merge 428_refactor_split_pool_pipeline_rs_into_submodules
storkit: done 428_refactor_split_pool_pipeline_rs_into_submodules
2026-03-28 12:20:00 +00:00 · 2026-03-28 11:50:15 +00:00 · 2026-03-28 11:50:02 +00:00 · 2026-03-28 11:47:26 +00:00 · 2026-03-28 11:35:03 +00:00 · 2026-03-28 11:33:43 +00:00
157 changed files with 20451 additions and 16485 deletions
@@ -26,3 +26,8 @@ whatsapp_verify_token = "my-secret-verify-token"

 # Maximum conversation turns to remember per user (default: 20).
 # history_size = 20
+
+# Optional: restrict which phone numbers can interact with the bot.
+# When set, only listed numbers are processed; all others are silently ignored.
+# When absent or empty, all numbers are allowed (open by default).
+# whatsapp_allowed_phones = ["+15551234567", "+15559876543"]
@@ -22,3 +22,8 @@ twilio_whatsapp_number = "+14155238886"

 # Maximum conversation turns to remember per user (default: 20).
 # history_size = 20
+
+# Optional: restrict which phone numbers can interact with the bot.
+# When set, only listed numbers are processed; all others are silently ignored.
+# When absent or empty, all numbers are allowed (open by default).
+# whatsapp_allowed_phones = ["+15551234567", "+15559876543"]
@@ -11,12 +11,17 @@ max_coders = 3

 # Maximum retries per story per pipeline stage before marking as blocked.
 # Set to 0 to disable retry limits.
-max_retries = 2
+max_retries = 3
+
+# Base branch name for this project. Worktree creation, merges, and agent prompts
+# use this value for {{base_branch}}. When not set, falls back to auto-detection
+# (reads current HEAD branch).
+base_branch = "master"

 [[component]]
 name = "frontend"
 path = "frontend"
-setup = ["npm install", "npm run build"]
+setup = ["npm ci", "npm run build"]
 teardown = []

 [[component]]
@@ -0,0 +1,43 @@
+# Example project.toml — copy to .storkit/project.toml and customise.
+# This file is checked in; project.toml itself is gitignored (it may contain
+# instance-specific settings).
+
+# Project-wide default QA mode: "server", "agent", or "human".
+# Per-story `qa` front matter overrides this setting.
+default_qa = "server"
+
+# Default model for coder agents. Only agents with this model are auto-assigned.
+# Opus coders are reserved for explicit per-story `agent:` front matter requests.
+default_coder_model = "sonnet"
+
+# Maximum concurrent coder agents. Stories wait in 2_current/ when all slots are full.
+max_coders = 3
+
+# Maximum retries per story per pipeline stage before marking as blocked.
+# Set to 0 to disable retry limits.
+max_retries = 2
+
+# Base branch name for this project. Worktree creation, merges, and agent prompts
+# use this value for {{base_branch}}. When not set, falls back to auto-detection
+# (reads current HEAD branch).
+base_branch = "main"
+
+[[component]]
+name = "server"
+path = "."
+setup = ["cargo build"]
+teardown = []
+
+[[agent]]
+name = "coder-1"
+role = "Full-stack engineer"
+stage = "coder"
+model = "sonnet"
+max_turns = 50
+max_budget_usd = 5.00
+prompt = """
+You are working in a git worktree on story {{story_id}}.
+Read CLAUDE.md first, then .storkit/README.md to understand the dev process.
+Run: cd "{{worktree_path}}" && git difftool {{base_branch}}...HEAD
+Commit all your work before your process exits.
+"""
@@ -0,0 +1,24 @@
+---
+name: "WhatsApp webhook HMAC signature verification"
+retry_count: 3
+blocked: true
+---
+
+# Story 388: WhatsApp webhook HMAC signature verification
+
+## User Story
+
+As a bot operator, I want incoming WhatsApp webhook requests to be cryptographically verified, so that forged requests from unauthorized sources are rejected.
+
+## Acceptance Criteria
+
+- [ ] Meta webhooks: validate X-Hub-Signature-256 HMAC-SHA256 header using the app secret before processing
+- [ ] Twilio webhooks: validate request signature using the auth token before processing
+- [ ] Requests with missing or invalid signatures are rejected with 403 Forbidden
+- [ ] Verification is fail-closed: if signature checking is configured, unsigned requests are rejected
+- [ ] Existing bot.toml config is extended with any needed secrets (e.g. Meta app_secret for HMAC verification)
+- [ ] MUST use audited crypto crates (hmac, sha2, sha1, base64) — no hand-rolled cryptographic primitives
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,40 @@
+---
+name: "Fly.io Machines API integration for multi-tenant storkit SaaS"
+---
+
+# Spike 408: Fly.io Machines API integration for multi-tenant storkit SaaS
+
+## Question
+
+Can we build a working Rust integration that creates and manages per-tenant Fly.io Machines, attaches volumes, injects Claude credentials, and proxies JWT-authenticated HTTP/WebSocket traffic to the right machine?
+
+## Hypothesis
+
+A thin Rust service using `reqwest` for the Machines API and `axum` for the reverse proxy is sufficient. No heavyweight orchestration framework needed.
+
+## Prerequisites
+
+- Fly.io account with API token (set `FLY_API_TOKEN` env var)
+- Spike 407 findings reviewed
+
+## Timebox
+
+4 hours
+
+## Investigation Plan
+
+- [ ] Create a minimal Rust crate in `spikes/fly_machines/` — do not touch production code
+- [ ] Implement machine lifecycle: create, start, stop, destroy via Fly Machines REST API using `reqwest`
+- [ ] Test attaching a persistent volume to a machine and verify it persists across stop/start
+- [ ] Test secret injection — pass a dummy `credentials.json` as a Fly secret and verify it's readable inside the machine
+- [ ] Sketch the auth proxy: JWT validation → machine lookup → reverse proxy to machine's private IP; verify WebSocket proxying works
+- [ ] Measure actual cold start time for a minimal storkit container image
+- [ ] Document any API quirks, rate limits, or sharp edges discovered during testing
+
+## Findings
+
+- TBD
+
+## Recommendation
+
+- TBD
@@ -0,0 +1,22 @@
+---
+name: "Multi-account OAuth token rotation on rate limit"
+---
+
+# Story 411: Multi-account OAuth token rotation on rate limit
+
+## User Story
+
+As a storkit user with multiple Claude Max subscriptions, I want the system to automatically rotate to a different account when one gets rate limited, so that agents and chat don't stall out waiting for limits to reset.
+
+## Acceptance Criteria
+
+- [ ] OAuth login flow stores credentials per-account (keyed by email), not overwriting previous accounts
+- [ ] GET /oauth/status returns all stored accounts and their status (active, rate-limited, expired)
+- [ ] When the active account hits a rate limit, storkit automatically swaps to the next available account's refresh token, refreshes, and retries
+- [ ] The bot sends a notification in Matrix/WhatsApp when it swaps accounts
+- [ ] If all accounts are rate limited, the bot surfaces a clear message with the time until the earliest reset
+- [ ] A new /oauth/authorize login adds to the account pool rather than replacing the current credentials
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,24 @@
+---
+name: "Recheck bot command to re-run gates without restarting agent"
+---
+
+# Story 412: Recheck bot command to re-run gates without restarting agent
+
+## User Story
+
+As a user, I want to send `recheck <number>` to the bot so that it re-runs acceptance gates on an existing worktree without spawning a new agent, so I can unblock stories that failed due to environment issues without wasting agent turns.
+
+## Acceptance Criteria
+
+- [ ] recheck command is registered in chat/commands/mod.rs and appears in help output
+- [ ] `recheck <number>` runs run_acceptance_gates on the story's existing worktree
+- [ ] If gates pass, the story advances through the pipeline (same as if a coder completed successfully)
+- [ ] If gates fail, the error output is returned to the user (not silently retried)
+- [ ] If no worktree exists for the story, returns a clear error
+- [ ] Does not spawn a new agent or increment retry_count
+- [ ] Works from all transports (Matrix, WhatsApp, Slack)
+- [ ] Works from web UI slash commands
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,57 @@
+---
+name: "Mergemaster pipeline marks story done without verifying code landed on master"
+retry_count: 1
+---
+
+# Bug 426: Mergemaster pipeline marks story done without verifying code landed on master
+
+## Description
+
+The mergemaster pipeline can mark a story as done even when the feature code never makes it to master. The cherry-pick step in merge.rs may fail or be skipped, but the pipeline still advances the story to done via the filesystem watcher. There is no post-merge verification that the code actually exists on master before marking done.
+
+## How to Reproduce
+
+Observed on stories 422 and 403. For 422: mergemaster created merge-queue branch, resolved 2 conflicts in chat/commands/mod.rs and http/mcp/mod.rs, passed quality gates, created merge-queue commit cb2ef6b (4 files, 333 insertions including unblock.rs). But the done commit on master (05db012) only moves the story file — zero code changes. There is no 'storkit: merge 422' commit on master at all. The feature branch (db3157f) still has the code but it was never cherry-picked onto master.
+
+## Manual Merge Notes
+
+When manually cherry-picking 422 onto master, two conflicts arose:
+
+1. `server/src/chat/commands/mod.rs` — both 421 (timer) and 422 (unblock) added entries to the same BotCommand registry. Resolution: keep both.
+2. `server/src/http/mcp/mod.rs` — 420 (loc_file) and 422 (unblock) both bumped the tool count assertion from 49→50. Resolution: keep loc_file assertion, bump count to 51.
+
+Additionally, the cherry-pick could not proceed at all because master was on the `merge-queue/424` branch with 3 unresolved files (notifications.rs, ws.rs, watcher.rs). A concurrent in-progress merge left the working tree dirty, which likely caused the original cherry-pick to fail silently. This suggests a race condition: the filesystem watcher commits (story file moves) can leave master in a state where the cherry-pick step in merge.rs fails.
+
+## Full Audit of Done Stories (2026-03-28)
+
+Audited all 9 stories in `5_done/` to check whether their code actually landed on master:
+
+| Story | Merge Commit | Code on Master |
+|-------|-------------|----------------|
+| 417 — Split matrix/bot.rs | `665c036` (9 files, +1973/-1926) | YES |
+| 418 — Split pool/auto_assign.rs | `d375c4b` (7 files, +1901/-1813) | YES |
+| 419 — Matrix bot network error | `1193b7a` (1 file, +121/-3) | YES |
+| 420 — loc file command | `d6f8239` (5 files, +112/-32) | YES |
+| 421 — Timer command | `cf5424f` (7 files, +836) | YES |
+| 422 — Unblock command | `6c6bc35` (4 files, +336) — manual cherry-pick | YES |
+| 423 — Auto-schedule timer on rate limit | `b44f3a3` + `8ab2e19` (6 files, +375/-8) — manual cherry-pick | YES |
+| **424 — Rate limit traffic light** | **None** | **NO — moved back to backlog for redo** |
+| 425 — Chat notification on story block | `98b5475` (5 files, +184/-15) | YES |
+| **427 — Text normalization for line breaks** | **None** | **NO — phantom done, code never landed** |
+
+**4 out of 10 stories (422, 423, 424, 427) had broken merges.** 422 and 423 were fixed via manual cherry-pick. 424 was moved back to backlog for a fresh run. 427 also hit the same bug — marked done without code on master.
+
+## Actual Result
+
+Story moved to done with no code on master. The merge-queue commit exists on a detached branch but was never applied to master. No merge commit appears in git log on master.
+
+## Expected Result
+
+Pipeline should verify that the cherry-pick produced a merge commit on master before advancing to done. If cherry-pick fails or is missing, the story should remain in merge stage with a merge_failure flag.
+
+## Acceptance Criteria
+
+- [ ] Pipeline must not move a story to done unless a merge commit containing the feature code exists on master
+- [ ] If cherry-pick fails or produces no code diff on master, the merge must be reported as failed
+- [ ] Add a post-merge verification step that checks git log on master for the expected merge commit before advancing to done
+- [ ] When verification fails, emit a merge_failure and leave the story in the merge stage for retry
@@ -0,0 +1,20 @@
+---
+name: "Server-side text normalization for chat message line breaks"
+---
+
+# Story 427: Server-side text normalization for chat message line breaks
+
+## User Story
+
+As a user reading bot messages in Matrix, I want single newlines between sentences to render correctly, so that messages don't show up with words joined together like "sentence one.Sentence two".
+
+## Acceptance Criteria
+
+- [ ] Add a text normalization step before markdown-to-HTML conversion in the Matrix transport that converts single newlines between non-empty prose lines into double newlines
+- [ ] Preserve intentional single-newline formatting in bullet lists, headings, table rows, and code fences
+- [ ] Apply the same normalization in WhatsApp and Slack transports
+- [ ] Unit tests covering prose paragraphs, bullet lists, code blocks, and mixed content
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,30 @@
+---
+name: "Split matrix/bot.rs into focused modules"
+---
+
+# Refactor 417: Split matrix/bot.rs into focused modules
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Refactor the monolithic server/src/chat/transport/matrix/bot.rs (1926 lines) into focused submodules.
+
+## Acceptance Criteria
+
+- [ ] history.rs contains ConversationRole, ConversationEntry, RoomConversation, PersistedHistory, load_history, save_history and their unit tests
+- [ ] context.rs contains BotContext struct
+- [ ] run.rs contains run_bot main event loop
+- [ ] messages.rs contains on_room_message, handle_message, format_user_prompt, is_permission_approval and their unit tests
+- [ ] mentions.rs contains mentions_bot, contains_word, is_reply_to_bot and their unit tests
+- [ ] verification.rs contains check_sender_verified, on_to_device_verification_request, handle_sas_verification and their unit tests
+- [ ] format.rs contains markdown_to_html, format_startup_announcement and their unit tests
+- [ ] mod.rs re-exports all public types
+- [ ] Unit tests live in their respective module files
+- [ ] No public API changes — all existing imports continue to work
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,28 @@
+---
+name: "Split pool/auto_assign.rs into submodules"
+---
+
+# Refactor 418: Split pool/auto_assign.rs into submodules
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Refactor the monolithic server/src/agents/pool/auto_assign.rs (1813 lines) into focused submodules.
+
+## Acceptance Criteria
+
+- [ ] auto_assign.rs contains auto_assign_available_work and its unit tests
+- [ ] reconcile.rs contains reconcile_on_startup and its unit tests
+- [ ] watchdog.rs contains run_watchdog_once, spawn_watchdog, check_orphaned_agents and their unit tests
+- [ ] scan.rs contains scan_stage_items, is_story_assigned_for_stage, count_active_agents_for_stage, find_free_agent_for_stage, is_agent_free and their unit tests
+- [ ] story_checks.rs contains read_story_front_matter_agent, has_review_hold, is_story_blocked, has_merge_failure and their unit tests
+- [ ] mod.rs wires the submodules and re-exports all public items
+- [ ] Unit tests live in their respective module files
+- [ ] No public API changes — all existing imports continue to work
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,29 @@
+---
+name: "Matrix bot crashes on transient network error instead of retrying"
+---
+
+# Bug 419: Matrix bot crashes on transient network error instead of retrying
+
+## Description
+
+The Matrix bot treats a transient sync error as fatal and stops entirely. A single failed HTTP request to the homeserver kills the bot, requiring a full server rebuild to recover.
+
+## How to Reproduce
+
+1. Run storkit with Matrix bot enabled\n2. Homeserver becomes temporarily unreachable (network blip, DNS hiccup, server restart)\n3. Bot hits sync error and crashes
+
+## Actual Result
+
+Bot logs "Fatal error: Matrix sync error: error sending request for url (...)" and stops responding. No retry, no recovery.
+
+## Expected Result
+
+Bot logs a warning, backs off with exponential delay, and retries the sync. Only crash on unrecoverable errors (invalid credentials, banned, etc).
+
+## Acceptance Criteria
+
+- [ ] Transient network errors (connection refused, timeout, DNS failure) trigger a retry with exponential backoff
+- [ ] Bot logs a warning on each failed retry attempt
+- [ ] Bot resumes normal operation once the homeserver is reachable again
+- [ ] Unrecoverable errors (401, 403) still cause a clean shutdown with a clear error message
+- [ ] Bot sends a notification after recovering from a network outage
@@ -0,0 +1,23 @@
+---
+name: "loc for a specified file — bot command and web UI slash command"
+---
+
+# Story 420: loc for a specified file — bot command and web UI slash command
+
+## User Story
+
+As a developer, I want to send `loc <filepath>` to the bot or use it as a slash command in the web UI to see the line count for a specific file, so I can quickly check how large a file is without leaving my workflow.
+
+## Acceptance Criteria
+
+- [ ] loc <filepath> returns the line count for the specified file
+- [ ] Relative paths are resolved against the project root
+- [ ] If the file does not exist, returns a clear error
+- [ ] Works from all transports (Matrix, WhatsApp, Slack)
+- [ ] Works as a slash command in the web UI
+- [ ] loc with no argument retains existing behavior (top files by line count)
+- [ ] Exposed as an MCP tool so agents can query file line counts programmatically
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,24 @@
+---
+name: "Timer command for deferred agent start"
+---
+
+# Story 421: Timer command for deferred agent start
+
+## User Story
+
+As a ..., I want ..., so that ...
+
+## Acceptance Criteria
+
+- [ ] Bot command `timer <story_id> <HH:MM>` schedules a one-shot deferred start for the given story at the next occurrence of that time (server-local timezone)
+- [ ] Bot command `timer list` shows all pending timers with story ID and scheduled time
+- [ ] Bot command `timer cancel <story_id>` removes the pending timer for that story
+- [ ] Timers are persisted to .storkit/timers.json so they survive server restarts
+- [ ] A 30s tick loop (tokio task, same pattern as watchdog) checks for due timers and calls start_agent when triggered
+- [ ] When a timer fires, the story must already be in current — timer does not move stories between stages
+- [ ] Fired timers are removed after execution (one-shot, not recurring)
+- [ ] Multiple timers for the same time are supported and respect agent slot contention via auto-assign
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,22 @@
+---
+name: "Unblock command to reset blocked stories"
+---
+
+# Story 422: Unblock command to reset blocked stories
+
+## User Story
+
+As a ..., I want ..., so that ...
+
+## Acceptance Criteria
+
+- [ ] Bot command `unblock <story_id>` clears blocked flag and resets retry_count to 0 on the story front matter
+- [ ] Replies with confirmation including story ID and name
+- [ ] Returns clear error if story is not found or not blocked
+- [ ] Works from all transports (Matrix, WhatsApp, Slack)
+- [ ] Exposed as an MCP tool so agents can unblock stories programmatically
+- [ ] Works as a slash command in the web UI
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,22 @@
+---
+name: "Auto-schedule timer on rate limit to resume after reset"
+---
+
+# Story 423: Auto-schedule timer on rate limit to resume after reset
+
+## User Story
+
+As a ..., I want ..., so that ...
+
+## Acceptance Criteria
+
+- [ ] When a rate_limit_event with a hard block (not just allowed_warning) is received from the PTY stream, parse the reset time from rate_limit_info
+- [ ] Automatically create a timer (via TimerStore from story 421) for the blocked story at the parsed reset time
+- [ ] If a timer already exists for that story, update it to the later reset time rather than creating a duplicate
+- [ ] Log the auto-scheduled timer with story ID, agent name, and scheduled resume time
+- [ ] Notify chat transports that the story was rate-limited and will auto-resume at the scheduled time
+- [ ] When the timer fires and restarts the agent, the existing worktree and committed work are preserved
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,23 @@
+---
+name: "Rate limit traffic light status and hard block alerts"
+agent: coder-opus
+---
+
+# Story 424: Rate limit traffic light status and hard block alerts
+
+## User Story
+
+As a ..., I want ..., so that ...
+
+## Acceptance Criteria
+
+- [ ] Remove repetitive per-message throttle warnings (allowed_warning) from chat transports entirely
+- [ ] Pipeline status messages show a coloured dot next to each work item: green for running normally, yellow for throttled, red for hard blocked, white/grey for idle/no agent
+- [ ] Hard block events (429 / rate_limit_exceeded) still send an individual chat notification with a red icon, including the reset time
+- [ ] Throttle and block state tracked per-agent so the status dot updates in real time
+- [ ] Server-side logging of throttle warnings is preserved for debugging
+- [ ] Traffic light dots in status report should be small/compact, not large emoji
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,20 @@
+---
+name: "Chat notification when a story blocks with reason"
+---
+
+# Story 425: Chat notification when a story blocks with reason
+
+## User Story
+
+As a project owner monitoring agent progress via chat, I want to receive a notification when a story gets blocked, including the reason, so that I can decide whether to unblock it or investigate the failure.
+
+## Acceptance Criteria
+
+- [ ] When a story transitions to blocked state, send a chat notification to all configured transports
+- [ ] Notification includes the story ID, story name, and the reason for blocking (e.g. gate failure output, max retries exceeded, empty diff)
+- [ ] Notification uses a red or warning icon to distinguish from normal status messages
+- [ ] Works across Matrix, WhatsApp, and Slack transports
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,26 @@
+---
+name: "Split pool/pipeline.rs into submodules"
+---
+
+# Refactor 428: Split pool/pipeline.rs into submodules
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Refactor the monolithic server/src/agents/pool/pipeline.rs (1789 lines) into focused submodules.
+
+## Acceptance Criteria
+
+- [ ] advance.rs contains run_pipeline_advance, spawn_pipeline_advance, should_block_story and their unit tests
+- [ ] completion.rs contains run_server_owned_completion, report_completion and their unit tests
+- [ ] merge.rs contains start_merge_agent_work, run_merge_pipeline, get_merge_status, set_merge_failure_reported and their unit tests
+- [ ] mod.rs re-exports all public items and wires the submodules
+- [ ] Unit tests live in their respective module files
+- [ ] No public API changes — all existing imports continue to work
+
+## Out of Scope
+
+- TBD
@@ -1,6 +1,5 @@
 ---
 name: "Work item titles render too large in expanded view"
-merge_failure: "Merge pipeline infrastructure failure: squash merge committed successfully on merge-queue branch, but cherry-pick onto master failed with 'fatal: bad revision merge-queue/237_bug_work_item_titles_render_too_large_in_expanded_view'. The merge worktree setup also failed (ENOENT for .story_kit/merge_workspace — pnpm install, pnpm build, cargo check all skipped). The merge-queue branch appears to have been cleaned up before the cherry-pick step could reference it. Master is untouched."
 ---

 # Bug 237: Work item titles render too large in expanded view
@@ -1,6 +1,5 @@
 ---
 name: "Add refactor work item type"
-merge_failure: "merge_agent_work tool returned empty output on two attempts. The merge-queue branch (merge-queue/254_story_add_refactor_work_item_type) was created with squash merge commit 27d24b2, and the merge workspace worktree exists at .story_kit/merge_workspace, but the pipeline never completed (no success/failure logged after MERGE-DEBUG calls). The stale merge workspace worktree may be blocking completion. Possibly related to bug 250 (merge pipeline cherry-pick fails with bad revision on merge-queue branch). Human intervention needed to: 1) clean up the merge-queue worktree and branch, 2) investigate why the merge pipeline hangs after creating the squash merge commit, 3) retry the merge."
 ---

 # Story 254: Add refactor work item type
@@ -1,6 +1,5 @@
 ---
 name: "Show agent logs in expanded story popup"
-merge_failure: "merge_agent_work tool returned empty output. The merge pipeline created the merge-queue branch (merge-queue/255_story_show_agent_logs_in_expanded_story_popup) and merge workspace worktree at .story_kit/merge_workspace, but hung without completing. This is the same issue that affected story 254 — likely related to bug 250 (merge pipeline cherry-pick fails with bad revision on merge-queue branch). The stale merge workspace worktree on the merge-queue branch may be blocking completion. Human intervention needed to: 1) clean up the merge workspace worktree and merge-queue branch, 2) investigate the root cause in the merge pipeline (possibly the cherry-pick/fast-forward step after squash merge), 3) retry the merge."
 ---

 # Story 255: Show agent logs in expanded story popup
@@ -0,0 +1,23 @@
+---
+name: "WhatsApp markdown-to-WhatsApp formatting conversion"
+---
+
+# Story 384: WhatsApp markdown-to-WhatsApp formatting conversion
+
+## User Story
+
+As a WhatsApp user, I want bot messages to use WhatsApp-native formatting instead of raw markdown, so that headers, bold text, and links render properly.
+
+## Acceptance Criteria
+
+- [ ] Headers (# ## ### etc.) are converted to bold text (*Header*) in WhatsApp messages
+- [ ] Markdown bold (**text**) is converted to WhatsApp bold (*text*)
+- [ ] Markdown strikethrough (~~text~~) is converted to WhatsApp strikethrough (~text~)
+- [ ] Markdown links [text](url) are converted to readable format: text (url)
+- [ ] Code blocks and inline code are preserved as-is (already compatible)
+- [ ] Matrix bot formatting is completely unaffected (conversion only applied in WhatsApp send paths)
+- [ ] Existing WhatsApp chunking (4096 char limit) still works correctly after conversion
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,23 @@
+---
+name: "Slack markdown-to-mrkdwn formatting conversion"
+---
+
+# Story 385: Slack markdown-to-mrkdwn formatting conversion
+
+## User Story
+
+As a Slack user, I want bot messages to use Slack-native mrkdwn formatting instead of raw markdown, so that headers, bold text, and links render properly.
+
+## Acceptance Criteria
+
+- [ ] Headers (# ## ### etc.) are converted to bold text (*Header*) in Slack messages
+- [ ] Markdown bold (**text**) is converted to Slack bold (*text*)
+- [ ] Markdown strikethrough (~~text~~) is converted to Slack strikethrough (~text~)
+- [ ] Markdown links [text](url) are converted to Slack format: <url|text>
+- [ ] Code blocks and inline code are preserved as-is (already compatible)
+- [ ] WhatsApp and Matrix bot formatting are completely unaffected (conversion only applied in Slack send paths)
+- [ ] Conversion is applied to all Slack send paths: command responses, LLM streaming, htop snapshots, delete responses, and slash command responses
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,22 @@
+---
+name: "Unreleased command shows list of stories since last release"
+---
+
+# Story 386: Unreleased command shows list of stories since last release
+
+## User Story
+
+As a user, I want a bot command and web UI slash command called "unreleased" that shows a list of stories completed since the last release, so that I can see what's ready to ship.
+
+## Acceptance Criteria
+
+- [ ] Bot command `unreleased` returns a list of stories merged to master since the last release tag
+- [ ] Web UI slash command /unreleased returns the same list
+- [ ] Each entry shows story number and name
+- [ ] If there are no unreleased stories, a clear message is shown
+- [ ] Command is registered in the help command output
+- [ ] WhatsApp, Slack, and Matrix transports all support the command via the shared command dispatcher
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,23 @@
+---
+name: "Configurable base branch name in project.toml"
+---
+
+# Story 387: Configurable base branch name in project.toml
+
+## User Story
+
+As a project owner, I want to configure the main branch name in project.toml (e.g. "main", "master", "develop"), so that the system doesn't hardcode "master" and works with any branching convention.
+
+## Acceptance Criteria
+
+- [ ] New optional `base_branch` setting in project.toml (e.g. base_branch = "main")
+- [ ] When set, all worktree creation, merge operations, and agent prompts use the configured branch name
+- [ ] When not set, falls back to the existing auto-detection logic (detect_base_branch) which reads the current git branch
+- [ ] The hardcoded "master" fallback in detect_base_branch is replaced by the project.toml setting when available
+- [ ] Agent prompt template {{base_branch}} resolves to the configured value
+- [ ] Existing projects without the setting continue to work unchanged (backwards compatible)
+- [ ] project.toml.example uses base_branch = \"main\" as the example value; the actual project.toml uses base_branch = \"master\"
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,21 @@
+---
+name: "WhatsApp phone number allowlist authorization"
+---
+
+# Story 389: WhatsApp phone number allowlist authorization
+
+## User Story
+
+As a bot operator, I want to restrict which phone numbers can interact with the bot, so that only authorized users can send commands.
+
+## Acceptance Criteria
+
+- [ ] New optional allowed_phones list in bot.toml for WhatsApp (similar to Matrix allowed_users)
+- [ ] When configured, only messages from listed phone numbers are processed; all others are silently ignored
+- [ ] When not configured (empty or absent), all phone numbers are allowed (backwards compatible)
+- [ ] Unauthorized senders are logged but receive no response
+- [ ] The allowlist applies to all message types: commands, LLM conversations, and async commands (htop, delete)
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,31 @@
+---
+name: "WhatsApp missing async command handlers for start, rebuild, reset, rmtree, assign"
+---
+
+# Bug 390: WhatsApp missing async command handlers for start, rebuild, reset, rmtree, assign
+
+## Description
+
+Five bot commands listed in help don't work in WhatsApp. Matrix's on_room_message pre-dispatches these via extract_*_command() functions before calling try_handle_command(), but WhatsApp's handle_incoming_message only pre-dispatches htop and delete. The missing commands have fallback handlers that return None, so they silently fall through to the LLM instead of executing.
+
+## How to Reproduce
+
+1. Send "rebuild" (or "start 386", "reset", "rmtree 386", "assign 386 opus") to the WhatsApp bot\n2. Observe the message is forwarded to the LLM instead of executing the command
+
+## Actual Result
+
+The 5 commands (start, rebuild, reset, rmtree, assign) fall through to the LLM and generate a conversational response instead of executing the bot command.
+
+## Expected Result
+
+All commands listed in help should work in WhatsApp, matching Matrix behavior. start should spawn an agent, rebuild should rebuild the server, reset should clear the session, rmtree should remove a worktree, assign should pre-assign a model.
+
+## Acceptance Criteria
+
+- [ ] start command works in WhatsApp (extract_start_command dispatch)
+- [ ] rebuild command works in WhatsApp (extract_rebuild_command dispatch)
+- [ ] reset command works in WhatsApp (extract_reset_command dispatch)
+- [ ] rmtree command works in WhatsApp (extract_rmtree_command dispatch)
+- [ ] assign command works in WhatsApp (extract_assign_command dispatch)
+- [ ] Same 5 commands also work in Slack transport if similarly missing
+- [ ] RETRY: Previous attempt was marked done without any code changes — the mergemaster moved the story to done but no async command handlers were actually added to whatsapp.rs. The fix must add extract_start_command, extract_rebuild_command, extract_reset_command, extract_rmtree_command, and extract_assign_command dispatch blocks to handle_incoming_message in whatsapp.rs, following the existing pattern used for htop and delete. Also check and fix Slack if similarly missing.
@@ -0,0 +1,27 @@
+---
+name: "strip_prefix_ci panics on multi-byte UTF-8 characters"
+---
+
+# Bug 391: strip_prefix_ci panics on multi-byte UTF-8 characters
+
+## Description
+
+strip_prefix_ci in commands/mod.rs slices text by byte offset using prefix.len(), which panics when the slice boundary falls inside a multi-byte UTF-8 character (e.g. right single quote U+2019, emojis). The function assumes ASCII-safe byte boundaries but real WhatsApp/Matrix messages contain Unicode.
+
+## How to Reproduce
+
+1. Send a message to the bot containing a smart quote or emoji within the first N bytes (where N = bot name length)\n2. e.g. "For now let\u2019s just deal with it" where the bot name prefix check slices at byte 12, inside the 3-byte \u2019 character
+
+## Actual Result
+
+Thread panics: "byte index 12 is not a char boundary; it is inside \u2018\u2019\u2019 (bytes 11..14)"
+
+## Expected Result
+
+The function should safely handle multi-byte UTF-8 without panicking. If the slice boundary isn't a char boundary, the prefix doesn't match — return None.
+
+## Acceptance Criteria
+
+- [ ] strip_prefix_ci does not panic on messages containing multi-byte UTF-8 characters (smart quotes, emojis, CJK, etc.)
+- [ ] Use text.get(..prefix.len()) or text.is_char_boundary() instead of direct indexing
+- [ ] Add test cases for messages with emojis and smart quotes
@@ -0,0 +1,27 @@
+---
+name: "Extract shared transport utilities from matrix module into chat submodule"
+agent: "coder-opus"
+---
+
+# Refactor 392: Extract shared transport utilities from matrix module into chat submodule
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Several functions currently living in the matrix transport module are used by all transports (WhatsApp, Slack, Matrix). These should be pulled up into a shared location under the chat module. Candidates include: strip_prefix_ci, strip_bot_mention, try_handle_command, drain_complete_paragraphs, markdown_to_whatsapp (pattern could generalize), chunk_for_whatsapp, and the command dispatch infrastructure. A chat::util or chat::text submodule would be a natural home for string utilities like strip_prefix_ci. The command dispatch (try_handle_command, CommandDispatch, BotCommand registry) could live in chat::commands.
+
+## Acceptance Criteria
+
+- [ ] Shared string utilities (strip_prefix_ci, strip_bot_mention, drain_complete_paragraphs) moved to a chat::util or chat::text submodule
+- [ ] Command dispatch infrastructure (try_handle_command, CommandDispatch, BotCommand, command registry) moved to chat::commands
+- [ ] Per-transport formatting functions (markdown_to_whatsapp, markdown_to_slack) remain in their respective transport modules
+- [ ] All transports import from the new shared location instead of reaching into matrix::
+- [ ] No functional changes — purely structural refactor
+- [ ] All existing tests pass and move with their code
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,23 @@
+---
+name: "Pipeline stage notifications for WhatsApp and Slack transports"
+---
+
+# Story 393: Pipeline stage notifications for WhatsApp and Slack transports
+
+## User Story
+
+As a WhatsApp or Slack user, I want to receive pipeline stage transition notifications (e.g. "story moved from Current to QA") just like Matrix users do, so I can track story progress from any transport.
+
+## Acceptance Criteria
+
+- [ ] WhatsApp transport spawns a notification listener at startup using the existing spawn_notification_listener infrastructure
+- [ ] Slack transport spawns a notification listener at startup using the same infrastructure
+- [ ] Notifications are sent to all active ambient senders/channels for the respective transport
+- [ ] Stage transition notifications (story moved between pipeline stages) are delivered
+- [ ] Error notifications (story failures) are delivered
+- [ ] Rate limit warnings are delivered with debouncing
+- [ ] Matrix notification behavior is completely unaffected
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,23 @@
+---
+name: "WhatsApp and Slack permission prompt forwarding"
+---
+
+# Story 394: WhatsApp and Slack permission prompt forwarding
+
+## User Story
+
+As a WhatsApp or Slack user, I want permission requests from Claude Code to be forwarded to my chat so I can approve or deny them, rather than having them silently fail.
+
+## Acceptance Criteria
+
+- [ ] Permission requests are sent as messages to the WhatsApp sender with tool name and input details
+- [ ] User can reply yes/y/approve or no/n/deny to approve or deny the permission
+- [ ] Permission requests time out and auto-deny (fail-closed) if not answered within the configured timeout
+- [ ] Slack receives the same permission forwarding treatment
+- [ ] Reuses the existing permission channel infrastructure (perm_rx, PermissionForward, PermissionDecision)
+- [ ] Matrix permission handling is completely unaffected
+- [ ] handle_llm_message uses a tokio::select! loop (like Matrix bot.rs) to listen for both LLM output and permission requests concurrently
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,24 @@
+---
+name: "Fix npm deprecated module warnings"
+---
+
+# Refactor 395: Fix npm deprecated module warnings
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Address npm warnings about deprecated modules in the frontend dependencies. Update or replace deprecated packages to eliminate warnings during npm install.
+
+## Acceptance Criteria
+
+- [ ] npm install runs with zero deprecation warnings
+- [ ] All existing frontend tests (npm test) still pass
+- [ ] npm run build succeeds without errors
+- [ ] No functional regressions in the frontend
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,21 @@
+---
+name: "WhatsApp bot startup announcement after restart"
+---
+
+# Story 396: WhatsApp bot startup announcement after restart
+
+## User Story
+
+As a WhatsApp user, I want the bot to announce its presence when it starts up or restarts, like it does in Matrix, so I know it's back online and ready.
+
+## Acceptance Criteria
+
+- [ ] Bot sends a startup message to all known WhatsApp senders (from conversation history or ambient rooms) when the server starts
+- [ ] Startup message includes the bot name and indicates it is online/ready
+- [ ] Slack transport gets the same startup announcement treatment
+- [ ] Matrix startup announcement behavior is unaffected
+- [ ] After a rebuild command, the new process sends the announcement on startup
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,30 @@
+---
+name: "Selection screen directory picker unreadable in dark mode"
+---
+
+# Bug 397: Selection screen directory picker unreadable in dark mode
+
+## Description
+
+The ProjectPathInput component in the selection screen uses hardcoded light-theme inline styles (white backgrounds, dark borders, dark text highlights) that don't adapt to dark mode. When the browser/OS uses dark mode, the global CSS sets text color to #f6f6f6 (white) but the dropdown keeps background: #fff — resulting in white text on a white background, making the directory picker completely unreadable.
+
+## How to Reproduce
+
+1. Run storkit under Docker (or locally) with a browser set to dark mode (prefers-color-scheme: dark).
+2. Open http://localhost:3001 in the browser.
+3. Click into the project path input and start typing a path to trigger the autocomplete dropdown.
+
+## Actual Result
+
+The suggestion dropdown has white background with white/light text inherited from the dark-mode global styles. Match highlights use color: #222 which is barely visible. The close button and header bar also use light-only colors. The entire directory picker is effectively unreadable.
+
+## Expected Result
+
+The directory picker dropdown should be readable in both light and dark mode. Colors for background, text, borders, and highlights should adapt to the active color scheme.
+
+## Acceptance Criteria
+
+- [ ] ProjectPathInput dropdown is readable in dark mode (prefers-color-scheme: dark)
+- [ ] ProjectPathInput dropdown remains readable in light mode
+- [ ] Suggestion highlight text is visible against the dropdown background in both themes
+- [ ] No hardcoded light-only colors remain in ProjectPathInput inline styles
@@ -0,0 +1,31 @@
+---
+name: "CLI --port flag with project.toml persistence"
+---
+
+# Story 399: CLI --port flag with project.toml persistence
+
+## User Story
+
+As a developer, I want to set the server port via a --port CLI flag that persists to project.toml, so that I don't have to remember an environment variable on every run.
+
+## Acceptance Criteria
+
+- [ ] `storkit --help` shows a `--port` option
+- [ ] `storkit --port 4000` starts the server on port 4000
+- [ ] After first run with `--port`, the port is saved to `project.toml`
+- [ ] On subsequent runs without `--port`, the port from `project.toml` is used
+- [ ] CLI `--port` overrides the value in `project.toml`
+- [ ] Default port is 3001 when neither `--port` nor `project.toml` port is set
+- [ ] `STORKIT_PORT` env var is removed — no longer read or respected
+- [ ] `.storkit_port` lock file mechanism is removed (`write_port_file` / `remove_port_file`)
+
+## Out of Scope
+
+- Docker compose changes (can update `STORKIT_PORT` references separately)
+- Adding other CLI flags beyond `--port`
+
+## Technical Notes
+
+Port resolution priority: `--port` flag > `project.toml` `port` field > default 3001
+
+The port should be written to `project.toml` on startup so subsequent runs remember it. Use the existing `config.rs` / `ProjectConfig` struct — add a `port` field.
@@ -0,0 +1,45 @@
+---
+name: "WhatsApp and Slack missing reset command handler"
+---
+
+# Bug 400: WhatsApp and Slack missing reset command handler
+
+## Description
+
+The reset command has a fallback handler in chat/commands/mod.rs that returns None with a comment saying it's handled before try_handle_command. This is only true for Matrix. WhatsApp and Slack don't have pre-dispatch handling, so None causes fallthrough to LLM. This caused a real outage when stale session IDs couldn't be cleared via the bot after switching from Docker to bare-metal.
+
+## Implementation Note
+
+Follow the **rebuild pattern** established in story 402, with one complication: `handle_reset` in `server/src/chat/transport/matrix/reset.rs` takes a Matrix-specific `ConversationHistory` (`Arc<TokioMutex<HashMap<OwnedRoomId, RoomConversation>>>`), so it cannot be called directly from WhatsApp or Slack.
+
+**WhatsApp session storage** (`server/src/chat/transport/whatsapp.rs`):
+- Type: `WhatsAppConversationHistory = Arc<TokioMutex<HashMap<String, RoomConversation>>>` (key = sender phone number)
+- Persisted to `.storkit/whatsapp_history.json` via `save_whatsapp_history`
+
+**Slack session storage** (`server/src/chat/transport/slack.rs`):
+- Type: `SlackConversationHistory = Arc<TokioMutex<HashMap<String, RoomConversation>>>` (key = channel ID)
+- Persisted to `.storkit/slack_history.json` via `save_slack_history`
+
+**Approach:**
+- Use `extract_reset_command` from `server/src/chat/transport/matrix/reset.rs` to detect the command (it works transport-agnostically)
+- Implement the reset inline in each transport's async message handler: clear `session_id` and `entries` for the sender/channel key, call the transport's own `save_*_history`, reply with confirmation
+- Add async intercepts in `whatsapp.rs` (~line 1107, after the rebuild intercept) and `slack.rs` (~line 845, after the rebuild intercept)
+- The fallback handler in `chat/commands/mod.rs` (`handle_reset_fallback`) stays as-is
+
+## How to Reproduce
+
+1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "reset" to the bot\n3. Check server logs
+
+## Actual Result
+
+Log shows "No command matched, forwarding to LLM" — reset is sent to the LLM as a conversational message instead of clearing the session.
+
+## Expected Result
+
+The bot clears the sender's session_id from conversation history and replies with confirmation like "Session cleared."
+
+## Acceptance Criteria
+
+- [ ] WhatsApp transport handles reset command: clears sender session_id and replies with confirmation
+- [ ] Slack transport handles reset command: clears channel session_id and replies with confirmation
+- [ ] Fallback handler in chat/commands/mod.rs no longer silently swallows the reset command
@@ -0,0 +1,35 @@
+---
+name: "WhatsApp and Slack missing start command handler"
+---
+
+# Bug 401: WhatsApp and Slack missing start command handler
+
+## Description
+
+The start command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
+
+## Implementation Note
+
+Follow the **rebuild pattern** established in story 402.
+
+- `extract_start_command` and `handle_start` already exist in `server/src/chat/transport/matrix/start.rs`
+- Add an async intercept in `server/src/chat/transport/whatsapp.rs` (see rebuild intercept ~line 1107) and `server/src/chat/transport/slack.rs` (see rebuild intercept ~line 845)
+- Call `crate::chat::transport::matrix::start::extract_start_command` to detect the command, then `crate::chat::transport::matrix::start::handle_start` to execute it
+- The fallback handler in `chat/commands/mod.rs` (`handle_start_fallback`) stays as-is — it exists only so `help` lists the command
+
+## How to Reproduce
+
+1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "start <story_id>" to the bot\n3. Check server logs
+
+## Actual Result
+
+Command falls through to LLM instead of starting an agent.
+
+## Expected Result
+
+The bot starts an agent for the specified story and replies with confirmation.
+
+## Acceptance Criteria
+
+- [ ] WhatsApp transport handles start command: starts agent and replies with confirmation
+- [ ] Slack transport handles start command: starts agent and replies with confirmation
@@ -0,0 +1,26 @@
+---
+name: "WhatsApp and Slack missing rebuild command handler"
+---
+
+# Bug 402: WhatsApp and Slack missing rebuild command handler
+
+## Description
+
+The rebuild command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
+
+## How to Reproduce
+
+1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "rebuild" to the bot\n3. Check server logs
+
+## Actual Result
+
+Command falls through to LLM instead of triggering a server rebuild.
+
+## Expected Result
+
+The bot triggers a server rebuild and replies with confirmation.
+
+## Acceptance Criteria
+
+- [ ] WhatsApp transport handles rebuild command: triggers rebuild and replies with confirmation
+- [ ] Slack transport handles rebuild command: triggers rebuild and replies with confirmation
@@ -0,0 +1,37 @@
+---
+name: "WhatsApp and Slack missing rmtree command handler"
+retry_count: 2
+blocked: true
+---
+
+# Bug 403: WhatsApp and Slack missing rmtree command handler
+
+## Description
+
+The rmtree command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
+
+## Implementation Note
+
+Follow the **rebuild pattern** established in story 402.
+
+- `extract_rmtree_command` and `handle_rmtree` already exist in `server/src/chat/transport/matrix/rmtree.rs`
+- Add an async intercept in `server/src/chat/transport/whatsapp.rs` (see rebuild intercept ~line 1107) and `server/src/chat/transport/slack.rs` (see rebuild intercept ~line 845)
+- Call `crate::chat::transport::matrix::rmtree::extract_rmtree_command` to detect the command, then `crate::chat::transport::matrix::rmtree::handle_rmtree` to execute it
+- The fallback handler in `chat/commands/mod.rs` (`handle_rmtree_fallback`) stays as-is — it exists only so `help` lists the command
+
+## How to Reproduce
+
+1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "rmtree <story_id>" to the bot\n3. Check server logs
+
+## Actual Result
+
+Command falls through to LLM instead of removing the worktree.
+
+## Expected Result
+
+The bot removes the worktree for the specified story and replies with confirmation.
+
+## Acceptance Criteria
+
+- [ ] WhatsApp transport handles rmtree command: removes worktree and replies with confirmation
+- [ ] Slack transport handles rmtree command: removes worktree and replies with confirmation
@@ -0,0 +1,36 @@
+---
+name: "WhatsApp and Slack missing assign command handler"
+---
+
+# Bug 404: WhatsApp and Slack missing assign command handler
+
+## Description
+
+The assign command has a fallback handler in chat/commands/mod.rs that returns None. Only Matrix has pre-dispatch handling for this command. On WhatsApp and Slack, the command falls through to the LLM path.
+
+## Implementation Note
+
+Follow the **rebuild pattern** established in story 402.
+
+- `extract_assign_command` and `handle_assign` already exist in `server/src/chat/transport/matrix/assign.rs`
+- Add an async intercept in `server/src/chat/transport/whatsapp.rs` (see rebuild intercept ~line 1107) and `server/src/chat/transport/slack.rs` (see rebuild intercept ~line 845)
+- Call `crate::chat::transport::matrix::assign::extract_assign_command` to detect the command, then `crate::chat::transport::matrix::assign::handle_assign` to execute it
+- The fallback handler in `chat/commands/mod.rs` (`handle_assign_fallback` — note: the registry entry for `assign` currently calls `assign::handle_assign` synchronously; verify this doesn't conflict) stays as-is for `help` listing
+- The fallback in `chat/commands/assign.rs` may need to return `None` instead of a real response once the async path handles it
+
+## How to Reproduce
+
+1. Configure bot with transport = "whatsapp" or "slack"\n2. Send "assign <story_id> <agent>" to the bot\n3. Check server logs
+
+## Actual Result
+
+Command falls through to LLM instead of assigning the agent.
+
+## Expected Result
+
+The bot assigns the specified agent to the story and replies with confirmation.
+
+## Acceptance Criteria
+
+- [ ] WhatsApp transport handles assign command: assigns agent and replies with confirmation
+- [ ] Slack transport handles assign command: assigns agent and replies with confirmation
@@ -0,0 +1,30 @@
+---
+name: "Auto-refresh expired OAuth token for Claude Code PTY"
+---
+
+# Story 405: Auto-refresh expired OAuth token for Claude Code PTY
+
+## User Story
+
+As a storkit user with a Claude Max subscription, I want the server to automatically refresh my expired OAuth token so that chat, Matrix, and WhatsApp integrations don't stop working when the token expires.
+
+## Acceptance Criteria
+
+### Detection
+- [ ] When the Claude Code PTY returns an `authentication_failed` error, storkit detects it instead of passing the raw 401 JSON to the user
+
+### Auto-refresh (credentials exist, refresh token valid)
+- [ ] Storkit reads the OAuth refresh token from `~/.claude/.credentials.json`
+- [ ] Storkit calls the Anthropic OAuth token refresh endpoint (`https://console.anthropic.com/v1/oauth/token` with `grant_type=refresh_token`) to obtain a new access token
+- [ ] Storkit writes the refreshed access token (and new expiresAt) back to `~/.claude/.credentials.json`
+- [ ] After a successful refresh, storkit automatically retries the original chat request
+- [ ] The refresh+retry is transparent to the user — they see no error
+
+### Full login required (no credentials, or refresh token also expired)
+- [ ] If `.credentials.json` doesn't exist or the refresh call itself fails, storkit surfaces a clear error: "OAuth session expired. Please run `claude login` to re-authenticate."
+- [ ] The error message is surfaced through the normal chat stream (not just server logs)
+
+## Out of Scope
+
+- Implementing the full interactive `claude login` browser OAuth flow inside storkit
+- Proactive token refresh before expiry (refreshing on demand when the error occurs is sufficient)
@@ -0,0 +1,21 @@
+---
+name: "Browser-based OAuth login flow from web UI and chat integrations"
+---
+
+# Story 406: Browser-based OAuth login flow from web UI and chat integrations
+
+## User Story
+
+As a new storkit user (or one whose refresh token has expired), I want to complete the full Claude OAuth login flow from the web UI, Matrix, or WhatsApp so that I don't need terminal access to run `claude login`.
+
+## Acceptance Criteria
+
+- [ ] From the web UI, the user can initiate OAuth login — storkit generates the Anthropic authorize URL and opens it in a new tab
+- [ ] After the user authenticates in the browser, the OAuth callback writes accessToken, refreshToken, and expiresAt to ~/.claude/.credentials.json
+- [ ] From Matrix or WhatsApp, storkit sends the user a clickable OAuth authorize link when credentials are missing or fully expired
+- [ ] After successful login, the user can immediately start chatting without restarting storkit
+- [ ] If the OAuth callback fails or the user cancels, a clear error is shown
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,195 @@
+---
+name: "Fly.io Machines for multi-tenant storkit SaaS — docs, security & pricing"
+retry_count: 2
+blocked: true
+---
+
+# Spike 407: Fly.io Machines for multi-tenant storkit SaaS — docs, security & pricing
+
+## Question
+
+What do Fly.io's published docs, security claims, and pricing say about using Machines as the isolation layer for a multi-tenant storkit SaaS? Is there anything that rules it out before we write code?
+
+## Hypothesis
+
+Fly.io Machines (Firecracker-based microVMs) are a viable isolation primitive for tenants running arbitrary shell commands, and the pricing model is workable at early SaaS scale.
+
+## Timebox
+
+2 hours
+
+## Investigation Plan
+
+- [x] Read Fly.io Machines API docs — what are the core primitives (machine lifecycle, networking, volumes, secrets)?
+- [x] Research Fly.io's published isolation model — what security guarantees do they document for Firecracker microVMs? Summarise claims and explicitly flag what would require independent security review before production use.
+- [x] Research cold start time — what do Fly.io docs and community benchmarks claim? Note that real numbers require a test account (covered in spike 408).
+- [x] Research persistent volume support — can a volume be attached per-tenant? What are the size/count limits?
+- [x] Research secret injection options — env vars, Fly Secrets API, volume mounts. What's the right approach for per-tenant `~/.claude/.credentials.json`?
+- [x] Research machine count and org limits — any hard caps that would block SaaS growth?
+- [x] Research pricing — always-on vs stop-on-idle machine costs at 10, 100, 1000 tenants. Include volume and egress costs.
+- [x] Identify any documented showstoppers.
+
+## Findings
+
+### 1. Core API Primitives
+
+Base URL: `https://api.machines.dev` (or `http://_api.internal:4280` from within 6PN).
+Auth: `Authorization: Bearer <fly_api_token>`.
+
+**Machine lifecycle** — full REST API:
+- `POST /v1/apps/{app}/machines` — create (+ optionally start via `skip_launch: false`)
+- `POST /v1/apps/{app}/machines/{id}/start` — start stopped machine (~10ms same-region)
+- `POST /v1/apps/{app}/machines/{id}/stop` — stop (SIGINT/SIGKILL, retains disk)
+- `POST /v1/apps/{app}/machines/{id}/suspend` — snapshot RAM to disk for fast resume
+- `DELETE /v1/apps/{app}/machines/{id}` — destroy (irreversible)
+- `GET /v1/apps/{app}/machines/{id}/wait?state=started` — synchronize on state transitions
+
+Machine states: `created → started → stopped/suspended → destroyed`.
+Leases (`POST .../lease`) provide exclusive mutation locks — useful for orchestration.
+
+**Rate limits**: 1 req/s per action per machine/app ID (burst to 3). Matters for rapid tenant provisioning.
+
+### 2. Isolation Model
+
+Each Fly Machine is a **Firecracker microVM** — a separate Linux kernel, not a container. Defense in depth:
+1. KVM hardware-enforced memory and CPU isolation
+2. Minimal device model (5 virtual devices vs QEMU's hundreds)
+3. Rust VMM implementation (no C memory-safety bugs in VMM)
+4. `seccomp-bpf` limits Firecracker process to ~40 syscalls with argument filters
+5. Jailer chroots + namespaces + drops privileges around the Firecracker process
+
+From official docs: *"MicroVMs provide strong hardware-virtualization-based security and workload isolation, which allows us to safely run applications from different customers on shared hardware."* Full VM isolation prevents kernel sharing between apps.
+
+Tenants have full root inside their VM by design — the kernel boundary contains blast radius.
+
+**Claims requiring independent verification before production use:**
+- Whether SMT/hyperthreading is disabled on hosts (directly relevant to Spectre/MDS side-channel attacks — Firecracker's own docs recommend disabling SMT for strict multi-tenancy, but Fly.io does not publicly document this)
+- CPU dedication is explicitly described as "best-effort", not a hard guarantee
+- Pentest scope/dates/findings for three named firms (Atredis Partners, Doyensec, Tetrel) are not published
+- Whether the SOC 2 Type II report scope covers the Firecracker isolation layer specifically
+
+**Compliance**: SOC 2 Type II certified (report available on request), ISO 27001 datacenters (Equinix), HIPAA BAA available, GDPR DPA available.
+
+### 3. Network Isolation
+
+Each machine gets a private IPv6 (6PN) address. Key isolation controls:
+- Cross-organization: Fly.io platform blocks all cross-org traffic at the platform level — strong boundary
+- Intra-organization: **open by default** — any machine in the same org can reach any other
+
+For multi-tenant SaaS, this means tenant machines in the same Fly.io org are NOT network-isolated from each other unless you use **Custom Private Networks (6PNs)**:
+- `POST /v1/apps` with a `network` field assigns that app to an isolated 6PN
+- Apps on different 6PNs cannot reach each other via private networking (only via public IPs)
+- **Assignment is permanent** — cannot be changed after app creation; plan upfront
+
+Stable machine addressing: `<machine_id>.vm.<appname>.internal` (6PN addresses change on migration).
+
+### 4. Cold Start Times
+
+| Scenario | Documented Latency |
+|---|---|
+| Cold boot (create + start, same region) | ~300 ms |
+| Start existing stopped machine (same region) | ~10 ms |
+| Start stopped machine (cross-region) | ~150 ms |
+| Resume from suspend (same region) | Sub-100ms (implied) |
+
+Community-observed: 400–600ms end-to-end (including app init) for stopped machine cold starts.
+FLAME workloads report 3–8s in some restart-race conditions.
+
+Real latency numbers with our actual image size require a test account — covered by spike 408.
+
+### 5. Persistent Volume Support
+
+- Volumes are created via `POST /v1/apps/{app}/volumes` with `size_gb` (default 3 GB), region, encryption flag
+- Attached to machine via `config.mounts[].volume` at create/update time
+- **1:1 constraint**: one volume per machine, one machine per volume, same region required
+- Volumes persist across machine stop/start/suspend/destroy — they are a separate resource
+- Can extend volume online (`PUT .../volumes/{id}/extend`)
+- Volume snapshots available (billed at $0.08/GB/month as of Jan 2026)
+- No documented per-org volume count cap (separate from machine cap)
+
+For per-tenant `~/.claude/` home directories, attach one volume per tenant machine — straightforward.
+
+### 6. Secret Injection
+
+Four methods, in order of recommendation for sensitive credentials:
+
+1. **Fly Secrets** (`fly secrets set KEY=value`) — encrypted at rest, injected as env vars at boot to all machines in the app. **Secrets are per-app, not per-machine** — all machines in an app share the same secret set. For per-tenant isolated secrets, each tenant needs their own app (or use method 3).
+
+2. **`config.files` with `secret_name`** — writes a named secret to a file path inside the machine at start time:
+   ```json
+   {"guest_path": "/root/.claude/.credentials.json", "secret_name": "TENANT_CREDENTIALS"}
+   ```
+   This is the right approach for per-tenant `~/.claude/.credentials.json` if tenants share an app — pair with `ignore_app_secrets: true` and per-process secret scoping.
+
+3. **`config.env`** — plain env vars in machine config, not encrypted at rest. Non-sensitive config only.
+
+4. **`config.processes[].secrets`** — inject named secrets only to specific process groups; `ignore_app_secrets: true` prevents inheritance of app-level secrets.
+
+**Recommended architecture**: One app per tenant (isolated 6PN + isolated secrets) is the cleanest security model. Secrets stored per app via Fly Secrets, credentials file written via `config.files` at boot.
+
+### 7. Machine Count and Org Limits
+
+| Limit | Default | Hard Cap |
+|---|---|---|
+| Machines per org (all states) | 50 | None architectural |
+
+- The 50-machine default is a **fail-safe**, not an architectural limit. Fly.io runs customers with 100,000+ machines.
+- To raise: email `billing@fly.io` with requirements.
+- **This limit will be hit immediately in any real multi-tenant deployment** — must budget for an early limit-raise request before launching.
+- API rate limit of 1 req/s per action also needs consideration for bulk tenant provisioning scripts.
+
+### 8. Pricing (as of March 2026)
+
+**Compute (per second, billed only while running):**
+
+| Preset | Per Month always-on |
+|---|---|
+| shared-cpu-1x (256 MB) | $2.05 |
+| shared-cpu-2x (512 MB) | $4.10 |
+| performance-1x (2 GB) | $32.64 |
+
+**Storage**: $0.15/GB/month (provisioned, regardless of machine state)
+**Egress**: $0.02/GB (North America/Europe), $0.04/GB (APAC/SA), $0.12/GB (Africa/India)
+**Dedicated IPv4**: $2.00/month per app (shared IPv6 is free)
+
+**No free tier** for new orgs (eliminated 2024). No minimum spend, no base fee.
+
+**Monthly cost estimates** (1x shared-cpu-1x, 1 GB volume, 1 GB egress/tenant, US East):
+
+| Scenario | Per Tenant | 10 Tenants | 100 Tenants | 1,000 Tenants |
+|---|---|---|---|---|
+| Always-on (730h/month) | $2.22 | $22 | $222 | $2,220 |
+| Autostop, 8h/day active | $0.92 | $9 | $92 | $920 |
+| Autostop, 2h/day active | $0.53 | $5 | $53 | $530 |
+
+At scale, volume storage becomes the dominant cost when machines are idle. At 1,000 tenants autostopped, storage is ~$150/month vs compute of $170–$370/month.
+
+### 9. Showstoppers
+
+**None identified** that rule it out. The following require action before launch:
+
+| Risk | Severity | Mitigation |
+|---|---|---|
+| Default 50-machine org cap | High (blocks launch) | Email billing@fly.io early; no architectural cap |
+| SMT/hyperthreading not documented | Medium (security) | Request confirmation from Fly.io support before production; mitigated by VM-level isolation |
+| Intra-org network open by default | Medium (security) | Use one app per tenant with custom 6PNs |
+| Secrets are per-app not per-machine | Low | Use one app per tenant or `config.files` with `secret_name` |
+| Volume and machine must be same region | Low (ops) | Enforce region consistency in provisioning code |
+| API rate limit 1 req/s per machine | Low | Throttle bulk provisioning loops |
+
+## Recommendation
+
+**Proceed.** Fly.io Machines are a viable isolation layer for multi-tenant storkit SaaS.
+
+**Architecture to validate in spike 408:**
+- One Fly.io app per tenant (provides 6PN network isolation + isolated secrets)
+- One Firecracker microVM per tenant app (shared-cpu-1x 256 MB baseline; adjust per observed usage)
+- One persistent volume per tenant (1 GB baseline for `~/.claude/`, repos, storkit state)
+- Autostop/autoresume enabled — 70–92% compute cost reduction vs always-on for typical dev tool usage
+- Tenant credentials injected via `config.files` + Fly Secrets at machine start
+
+**Pricing verdict**: Workable at early SaaS scale. At 100 tenants with autostop (8h/day), costs ~$92/month; at 1,000 tenants ~$920/month. Margins are viable if per-tenant pricing is $5–$20/month.
+
+**Before production**: Confirm with Fly.io support whether SMT is disabled on worker hosts. Request org machine limit raised to 200–500 during private beta.
+
+**Spike 408 scope**: Validate cold start latency, autostop resume behavior, and volume persistence with a real test machine running the storkit container image.
@@ -0,0 +1,69 @@
+---
+name: "Split whatsapp.rs into focused modules"
+retry_count: 2
+blocked: true
+---
+
+# Refactor 409: Split whatsapp.rs into focused modules
+
+## Current State
+
+- TBD
+
+## Desired State
+
+whatsapp.rs is 2000+ lines making it expensive for agents to navigate and edit. Split into focused modules under chat/transport/whatsapp/.
+
+## Acceptance Criteria
+
+- [x] mod.rs contains webhook handlers, WebhookContext, and re-exports
+- [x] meta.rs contains WhatsAppTransport, ChatTransport impl, and Graph API structs/calls
+- [x] twilio.rs contains TwilioWhatsAppTransport, ChatTransport impl, and Twilio structs/calls
+- [x] history.rs contains WhatsAppConversationHistory, load/save_whatsapp_history, and MessagingWindowTracker
+- [x] commands.rs contains handle_incoming_message, handle_llm_message, and all async command dispatch
+- [x] format.rs contains markdown_to_whatsapp and chunk_for_whatsapp
+- [x] All existing tests pass
+- [x] No behaviour changes — pure structural refactor
+
+## Out of Scope
+
+- TBD
+
+## Test Results
+
+<!-- storkit-test-results: {"unit":[{"name":"whatsapp::format::tests::chunk_short_message_returns_single_chunk","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_exactly_at_limit_returns_single_chunk","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_splits_on_paragraph_boundary","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_splits_on_line_boundary_when_no_paragraph_break","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_hard_splits_continuous_text","status":"pass","details":null},{"name":"whatsapp::format::tests::chunk_empty_string_returns_single_empty","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_headers_to_bold","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_bold","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_bold_italic","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_strikethrough","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_converts_links","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_removes_horizontal_rules","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_preserves_inline_code","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_preserves_code_blocks","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_mixed_message","status":"pass","details":null},{"name":"whatsapp::format::tests::md_to_wa_passthrough_plain_text","status":"pass","details":null},{"name":"whatsapp::history::tests::messaging_window_tracker_basics","status":"pass","details":null},{"name":"whatsapp::history::tests::messaging_window_tracker_expiry","status":"pass","details":null},{"name":"whatsapp::history::tests::messaging_window_tracker_reset","status":"pass","details":null},{"name":"whatsapp::history::tests::load_empty_history","status":"pass","details":null},{"name":"whatsapp::history::tests::save_and_load_history","status":"pass","details":null},{"name":"whatsapp::twilio::tests::parse_twilio_form_valid","status":"pass","details":null},{"name":"whatsapp::twilio::tests::parse_twilio_form_missing_body","status":"pass","details":null},{"name":"whatsapp::twilio::tests::parse_twilio_form_missing_from","status":"pass","details":null},{"name":"whatsapp::commands::tests::parse_command_help","status":"pass","details":null},{"name":"whatsapp::commands::tests::parse_command_status","status":"pass","details":null},{"name":"whatsapp::commands::tests::parse_command_unknown","status":"pass","details":null},{"name":"whatsapp::mod::tests::webhook_context_basics","status":"pass","details":null}],"integration":[]} -->
+
+### Unit Tests (28 passed, 0 failed)
+
+- ✅ whatsapp::format::tests::chunk_short_message_returns_single_chunk
+- ✅ whatsapp::format::tests::chunk_exactly_at_limit_returns_single_chunk
+- ✅ whatsapp::format::tests::chunk_splits_on_paragraph_boundary
+- ✅ whatsapp::format::tests::chunk_splits_on_line_boundary_when_no_paragraph_break
+- ✅ whatsapp::format::tests::chunk_hard_splits_continuous_text
+- ✅ whatsapp::format::tests::chunk_empty_string_returns_single_empty
+- ✅ whatsapp::format::tests::md_to_wa_converts_headers_to_bold
+- ✅ whatsapp::format::tests::md_to_wa_converts_bold
+- ✅ whatsapp::format::tests::md_to_wa_converts_bold_italic
+- ✅ whatsapp::format::tests::md_to_wa_converts_strikethrough
+- ✅ whatsapp::format::tests::md_to_wa_converts_links
+- ✅ whatsapp::format::tests::md_to_wa_removes_horizontal_rules
+- ✅ whatsapp::format::tests::md_to_wa_preserves_inline_code
+- ✅ whatsapp::format::tests::md_to_wa_preserves_code_blocks
+- ✅ whatsapp::format::tests::md_to_wa_mixed_message
+- ✅ whatsapp::format::tests::md_to_wa_passthrough_plain_text
+- ✅ whatsapp::history::tests::messaging_window_tracker_basics
+- ✅ whatsapp::history::tests::messaging_window_tracker_expiry
+- ✅ whatsapp::history::tests::messaging_window_tracker_reset
+- ✅ whatsapp::history::tests::load_empty_history
+- ✅ whatsapp::history::tests::save_and_load_history
+- ✅ whatsapp::twilio::tests::parse_twilio_form_valid
+- ✅ whatsapp::twilio::tests::parse_twilio_form_missing_body
+- ✅ whatsapp::twilio::tests::parse_twilio_form_missing_from
+- ✅ whatsapp::commands::tests::parse_command_help
+- ✅ whatsapp::commands::tests::parse_command_status
+- ✅ whatsapp::commands::tests::parse_command_unknown
+- ✅ whatsapp::mod::tests::webhook_context_basics
+
+### Integration Tests (0 passed, 0 failed)
+
+*No integration tests recorded.*
@@ -0,0 +1,22 @@
+---
+name: "loc bot command — top files by line count"
+---
+
+# Story 410: loc bot command — top files by line count
+
+## User Story
+
+As a developer, I want to send `loc` to the bot and see the top files by line count, so I can spot files that are getting too large before they become a problem for agents.
+
+## Acceptance Criteria
+
+- [ ] loc command is registered in chat/commands/mod.rs and appears in help output
+- [ ] `loc` returns the top 10 source files by line count (excluding generated files, node_modules, target/, .storkit/worktrees/)
+- [ ] `loc 5` returns the top 5 files
+- [ ] `loc 20` returns the top 20 files
+- [ ] Output includes file path, line count, and rank
+- [ ] Command works from all transports (Matrix, WhatsApp, Slack)
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,29 @@
+---
+name: "Split slack.rs into focused modules"
+---
+
+# Refactor 413: Split slack.rs into focused modules
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Refactor the monolithic server/src/chat/transport/slack.rs (1902 lines) into a slack/ directory with focused modules, mirroring the whatsapp/ module structure from story 409.
+
+## Acceptance Criteria
+
+- [ ] slack.rs is replaced by a slack/ directory with mod.rs re-exporting all public types
+- [ ] meta.rs contains SlackTransport struct, ChatTransport trait impl, and Slack API request/response types
+- [ ] commands.rs contains incoming message dispatch, permission logic, and slash command handling
+- [ ] format.rs contains markdown_to_slack() conversion
+- [ ] history.rs contains load_slack_history(), save_slack_history(), and SlackHistoryDump
+- [ ] verify.rs contains verify_slack_signature(), sha256(), and constant_time_eq()
+- [ ] mod.rs contains Slack event types, webhook handlers, and SlackWebhookContext
+- [ ] All existing tests are preserved and pass in their respective modules
+- [ ] No public API changes — all existing imports from other crates continue to work
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,19 @@
+---
+name: "loc command filters out known-huge files"
+---
+
+# Story 414: loc command filters out known-huge files
+
+## User Story
+
+As a ..., I want ..., so that ...
+
+## Acceptance Criteria
+
+- [ ] loc command excludes lockfiles and generated files (e.g. package-lock.json, Cargo.lock, frontend/package-lock.json) from results
+- [ ] Exclusion list is defined as a constant, easy to extend
+- [ ] Excluded files do not count toward line totals
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,29 @@
+---
+name: "Split agents/pool/mod.rs into submodules"
+---
+
+# Refactor 415: Split agents/pool/mod.rs into submodules
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Refactor the monolithic server/src/agents/pool/mod.rs (2407 lines) into focused submodules within the pool/ directory.
+
+## Acceptance Criteria
+
+- [ ] types.rs contains StoryAgent, PendingGuard, AgentInfo, composite_key, and related helper structs
+- [ ] lifecycle.rs contains start_agent, stop_agent, wait_for_agent and their unit tests
+- [ ] worktree.rs contains create_worktree, get_project_root, find_active_story_stage and their unit tests
+- [ ] query.rs contains list_agents, available_agents_for_stage, get_log_info, subscribe, drain_events and their unit tests
+- [ ] process.rs contains kill_all_children, kill_child_for_key, ChildKiller registry methods and their unit tests
+- [ ] test_helpers.rs contains inject_test_agent and its variants (4 methods)
+- [ ] mod.rs contains AgentPool struct, new(), and re-exports all public types
+- [ ] Unit tests live in their respective module files, not in a separate tests module
+- [ ] No public API changes — all existing imports continue to work
+
+## Out of Scope
+
+- TBD
@@ -0,0 +1,28 @@
+---
+name: "Split io/fs.rs into submodules"
+---
+
+# Refactor 416: Split io/fs.rs into submodules
+
+## Current State
+
+- TBD
+
+## Desired State
+
+Refactor the monolithic server/src/io/fs.rs (2007 lines) into focused submodules within an fs/ directory.
+
+## Acceptance Criteria
+
+- [ ] scaffold.rs contains scaffold_story_kit, write_file_if_missing, write_script_if_missing, write_story_kit_gitignore, append_root_gitignore_entries, detect_components_toml, detect_script_test, generate_project_toml and their unit tests
+- [ ] project.rs contains open_project, close_project, get_current_project, get_known_projects, forget_known_project, ensure_project_root_with_story_kit, validate_project_path and their unit tests
+- [ ] files.rs contains read_file, write_file, list_directory, list_project_files, FileEntry, create_directory_absolute and their unit tests
+- [ ] paths.rs contains resolve_cli_path, resolve_path, resolve_path_impl, find_story_kit_root, get_home_directory and their unit tests
+- [ ] preferences.rs contains get_model_preference, set_model_preference and their unit tests
+- [ ] mod.rs re-exports all public types and functions
+- [ ] Unit tests live in their respective module files
+- [ ] No public API changes — all existing imports continue to work
+
+## Out of Scope
+
+- TBD
@@ -1 +0,0 @@
-3001
@@ -209,9 +209,9 @@ dependencies = [

 [[package]]
 name = "aws-lc-sys"
-version = "0.39.0"
+version = "0.39.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a"
+checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399"
 dependencies = [
 "cc",
 "cmake",
@@ -349,9 +349,9 @@ dependencies = [

 [[package]]
 name = "cc"
-version = "1.2.57"
+version = "1.2.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
+checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1"
 dependencies = [
 "find-msvc-tools",
 "jobserver",
@@ -434,9 +434,9 @@ dependencies = [

 [[package]]
 name = "cmake"
-version = "0.1.57"
+version = "0.1.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"
 dependencies = [
 "cc",
 ]
@@ -1862,10 +1862,12 @@ dependencies = [

 [[package]]
 name = "js-sys"
-version = "0.3.91"
+version = "0.3.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
+checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995"
 dependencies = [
+ "cfg-if",
+ "futures-util",
 "once_cell",
 "wasm-bindgen",
 ]
@@ -2428,9 +2430,9 @@ dependencies = [

 [[package]]
 name = "mio"
-version = "1.1.1"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
 dependencies = [
 "libc",
 "log",
@@ -2559,9 +2561,9 @@ dependencies = [

 [[package]]
 name = "num-conv"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"

 [[package]]
 name = "num-traits"
@@ -3556,9 +3558,9 @@ dependencies = [

 [[package]]
 name = "rustc-hash"
-version = "2.1.1"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"

 [[package]]
 name = "rustc_version"
@@ -3941,9 +3943,9 @@ dependencies = [

 [[package]]
 name = "simd-adler32"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"

 [[package]]
 name = "similar"
@@ -4017,7 +4019,7 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"

 [[package]]
 name = "storkit"
-version = "0.6.0"
+version = "0.7.0"
 dependencies = [
 "async-stream",
 "async-trait",
@@ -4037,12 +4039,14 @@ dependencies = [
 "poem-openapi",
 "portable-pty",
 "pulldown-cmark",
+ "regex",
 "reqwest 0.13.2",
 "rust-embed",
 "serde",
 "serde_json",
 "serde_urlencoded",
 "serde_yaml",
+ "sha2",
 "strip-ansi-escapes",
 "tempfile",
 "tokio",
@@ -4661,9 +4665,9 @@ dependencies = [

 [[package]]
 name = "unicode-segmentation"
-version = "1.13.1"
+version = "1.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b"
+checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"

 [[package]]
 name = "unicode-xid"
@@ -4726,9 +4730,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"

 [[package]]
 name = "uuid"
-version = "1.22.0"
+version = "1.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
+checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
 dependencies = [
 "getrandom 0.4.2",
 "js-sys",
@@ -4847,9 +4851,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
+checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a"
 dependencies = [
 "cfg-if",
 "once_cell",
@@ -4860,23 +4864,19 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.64"
+version = "0.4.65"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
+checksum = "2d1faf851e778dfa54db7cd438b70758eba9755cb47403f3496edd7c8fc212f0"
 dependencies = [
- "cfg-if",
- "futures-util",
 "js-sys",
- "once_cell",
 "wasm-bindgen",
- "web-sys",
 ]

 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
+checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@@ -4884,9 +4884,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
+checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf"
 dependencies = [
 "bumpalo",
 "proc-macro2",
@@ -4897,9 +4897,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.114"
+version = "0.2.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
+checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93"
 dependencies = [
 "unicode-ident",
 ]
@@ -4984,9 +4984,9 @@ dependencies = [

 [[package]]
 name = "web-sys"
-version = "0.3.91"
+version = "0.3.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
+checksum = "84cde8507f4d7cfcb1185b8cb5890c494ffea65edbe1ba82cfd63661c805ed94"
 dependencies = [
 "js-sys",
 "wasm-bindgen",
@@ -21,6 +21,7 @@ rust-embed = "8"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 serde_urlencoded = "0.7"
+sha2 = "0.10"
 serde_yaml = "0.9"
 strip-ansi-escapes = "0.2"
 tempfile = "3"
@@ -38,3 +39,4 @@ matrix-sdk = { version = "0.16.0", default-features = false, features = [
 pulldown-cmark = { version = "0.13.3", default-features = false, features = [
    "html",
 ] }
+regex = "1"
@@ -1,182 +1,55 @@
-# Story Kit
+# Storkit

-This app runs as a single Rust web server binary that serves the Vite/React frontend and exposes APIs.
-The frontend lives in the `frontend/` directory.
+A story-driven development server that manages work items, spawns coding agents, and runs them through a pipeline from backlog to done. Ships as a single Rust binary with an embedded React frontend. Communicates via Matrix, WhatsApp, and Slack bot transports, and exposes MCP tools for programmatic access.

-You can also run the frontend and backend separately in development (Vite dev server + Rust API).
+## Prerequisites

-## Running it in development
+- Rust (2024 edition)
+- Node.js and npm
+- Docker (for Linux cross-compilation and container deployment)
+- `cross` (`cargo install cross`) for Linux static builds
+
+## Building for production

 ```bash
-# Build the frontend
-cd frontend
-npm install
-npm run dev
-
-# In another terminal - run the server (serves embedded frontend/dist/)
-cargo run
-```
-
-## Production
-
-```bash
-# Build the release binary (also builds the frontend via build.rs)
 cargo build --release
-
-# Run the server (serves embedded frontend/dist/)
-./target/release/storkit
 ```

-## Cross-Platform Distribution
+The release binary embeds the frontend via `rust-embed`. Output: `target/release/storkit`.

-Story Kit ships as a **single self-contained binary** with the React frontend embedded via
-`rust-embed`. No Rust toolchain, Node.js, or extra libraries are required on the target machine.
-
-### macOS
+For a static Linux binary (musl, zero dynamic deps):

 ```bash
-# Native build – no extra tools required beyond Rust + npm
-make build-macos
-# Output: target/release/storkit
-
-# Verify only system frameworks are linked (Security.framework, libSystem.B.dylib, etc.)
-otool -L target/release/storkit
+cross build --release --target x86_64-unknown-linux-musl
 ```

-### Linux (static x86_64, zero dynamic deps)
-
-The Linux build uses the `x86_64-unknown-linux-musl` target to produce a fully static binary.
-
-**Prerequisites:**
+Docker:

 ```bash
-# Install cross – a Rust cross-compilation tool backed by Docker
-cargo install cross
-
-# Ensure Docker Desktop (or Docker Engine) is running
+docker compose -f docker/docker-compose.yml build
 ```

-**Build:**
+## Running in development

 ```bash
-make build-linux
-# Output: target/x86_64-unknown-linux-musl/release/storkit
+# Run tests
+script/test

-# Verify the binary is statically linked
-file target/x86_64-unknown-linux-musl/release/storkit
-# Expected: ELF 64-bit LSB executable, x86-64, statically linked
+# Run the server
+cargo run -- --port 3000

-ldd target/x86_64-unknown-linux-musl/release/storkit
-# Expected: not a dynamic executable
+# In another terminal, run the frontend dev server
+cd frontend && npm install && npm run dev
 ```

-**Running on any Linux x86_64 machine:**
-
-```bash
-# No Rust, Node, glibc, or any other library needed – just copy and run
-./storkit
-```
+Configuration lives in `.storkit/project.toml`. See `.storkit/bot.toml.*.example` for transport setup.

 ## Releasing

-Builds both macOS and Linux binaries locally, tags the repo, and publishes a Gitea release with a changelog.
-
-**One-time setup:**
-
-1. Create a Gitea API token at `https://code.crashlabs.io/user/settings/applications` (needs repository read/write)
-2. Add it to `.env` (gitignored): `GITEA_TOKEN=your_token`
-3. Ensure `cross` is installed (`cargo install cross`) and Docker is running
-
-**To release:**
+Requires a Gitea API token in `.env` (`GITEA_TOKEN=your_token`).

 ```bash
-make release V=0.2.0
+script/release 0.6.1
 ```

-This will:
- Build macOS arm64 (native) and Linux amd64 (static musl via cross/Docker)
- Generate a changelog from commits since the last tag
- Tag the repo as `v0.2.0` and push the tag
- Create a Gitea release with both binaries and the changelog attached
-
-## Testing
-
-### Frontend Tests
-
-The frontend uses **Vitest** for unit tests and **Playwright** for end-to-end tests.
-
-```bash
-cd frontend
-
-# Run unit tests
-npm test
-
-# Run end-to-end tests
-npm run test:e2e
-```
-
-### Backend Tests
-
-This project uses **nextest** for running tests and **cargo-llvm-cov** for code coverage.
-
-### Install Tools
-
-```bash
-cargo install cargo-nextest cargo-llvm-cov
-```
-
-### Run Tests
-
-```bash
-# Run all tests
-cargo nextest run
-
-# Run specific module
-cargo nextest run search_files
-
-# Run with verbose output
-cargo nextest run --no-capture
-```
-
-### Generate Coverage
-
-```bash
-# HTML report (opens in browser)
-cargo llvm-cov nextest --html --open
-
-# Terminal output
-cargo llvm-cov nextest
-
-# LCOV format (for CI)
-cargo llvm-cov nextest --lcov --output-path lcov.info
-
-# Clean coverage data
-cargo llvm-cov clean
-```
-
-### Configuration
-
- **Nextest config**: `.config/nextest.toml`
- **Coverage output**: `target/llvm-cov/html/index.html`
-
-## Current Coverage (search_files module)
-
-```
-Module: commands/search.rs
-├── Region Coverage:   75.36%
-├── Function Coverage: 69.05%
-└── Line Coverage:     72.55%
-```
-
-### Available Test Profiles
-
-```bash
-# Development (default)
-cargo nextest run
-
-# CI with retries
-cargo nextest run --profile ci
-
-# Coverage optimized
-cargo nextest run --profile coverage
-```
+This bumps version in `Cargo.toml` and `package.json`, builds macOS arm64 and Linux amd64 binaries, tags the repo, and publishes a Gitea release with changelog and binaries attached.
@@ -91,7 +91,6 @@ services:
      - no-new-privileges:true

    # Resource limits – cap the whole system.
-    # Adjust based on your machine. These are conservative defaults.
    deploy:
      resources:
        limits:
@@ -14,8 +14,12 @@ if [ -z "$GIT_USER_EMAIL" ]; then
    exit 1
 fi

-# Use GIT_AUTHOR/COMMITTER env vars instead of git config --global,
-# so the root filesystem can stay read-only (no ~/.gitconfig write).
+# Set git identity globally so it persists for all shells (docker exec, etc.),
+# not just the entrypoint process tree.
+git config --global user.name "$GIT_USER_NAME"
+git config --global user.email "$GIT_USER_EMAIL"
+
+# Also set env vars for backwards compatibility.
 export GIT_AUTHOR_NAME="$GIT_USER_NAME"
 export GIT_COMMITTER_NAME="$GIT_USER_NAME"
 export GIT_AUTHOR_EMAIL="$GIT_USER_EMAIL"
@@ -27,7 +31,7 @@ export GIT_COMMITTER_EMAIL="$GIT_USER_EMAIL"
 # binaries on a Linux container). Reinstall to get the right ones.
 if [ -d /workspace/frontend ] && [ -f /workspace/frontend/package.json ]; then
    echo "Installing frontend dependencies for container platform..."
-    cd /workspace/frontend && npm install --prefer-offline 2>/dev/null || true
+    cd /workspace/frontend && npm ci --prefer-offline 2>/dev/null || true
    cd /workspace
 fi

@@ -1,7 +1,7 @@
 {
 	"name": "living-spec-standalone",
 	"private": true,
-	"version": "0.6.0",
+	"version": "0.7.0",
 	"type": "module",
 	"scripts": {
 		"dev": "vite",
@@ -20,6 +20,9 @@
 		"react-markdown": "^10.1.0",
 		"react-syntax-highlighter": "^16.1.0"
 	},
+	"overrides": {
+		"glob": "^13.0.0"
+	},
 	"devDependencies": {
 		"@biomejs/biome": "^2.4.2",
 		"@playwright/test": "^1.47.2",
@@ -31,9 +34,7 @@
 		"@types/react-dom": "^19.1.6",
 		"@vitejs/plugin-react": "^4.6.0",
 		"@vitest/coverage-v8": "^2.1.9",
-		"jest": "^29.0.0",
 		"jsdom": "^28.1.0",
-		"ts-jest": "^29.0.0",
 		"typescript": "~5.8.3",
 		"vite": "^5.4.21",
 		"vitest": "^2.1.4"
@@ -127,13 +127,13 @@ details summary::-webkit-details-marker {
 	display: none;
 }

-details[open] summary span:first-child {
-	transform: rotate(90deg);
-	display: inline-block;
+details summary span:first-child {
 	transition: transform 0.2s ease;
 }

-details summary span:first-child {
+details[open] summary span:first-child {
+	transform: rotate(90deg);
+	display: inline-block;
 	transition: transform 0.2s ease;
 }

@@ -236,3 +236,49 @@ body,
 		opacity: 0;
 	}
 }
+
+/* ProjectPathInput dropdown theming */
+.path-dropdown {
+	border: 1px solid #ddd;
+	background: #fff;
+	color: #0f0f0f;
+}
+
+.path-dropdown-header {
+	border-bottom: 1px solid #eee;
+	background: #fafafa;
+}
+
+.path-dropdown-item {
+	background: transparent;
+}
+
+.path-dropdown-item--selected {
+	background: #f0f0f0;
+}
+
+.path-match-highlight {
+	font-weight: 600;
+	color: #222;
+}
+
+@media (prefers-color-scheme: dark) {
+	.path-dropdown {
+		border-color: #555;
+		background: #1e1e1e;
+		color: #f6f6f6;
+	}
+
+	.path-dropdown-header {
+		border-bottom-color: #444;
+		background: #2a2a2a;
+	}
+
+	.path-dropdown-item--selected {
+		background: #3a3a3a;
+	}
+
+	.path-match-highlight {
+		color: #f6f6f6;
+	}
+}
@@ -647,6 +647,7 @@ export function Chat({ projectPath, onCloseProject }: ChatProps) {
 					"git",
 					"overview",
 					"rebuild",
+					"loc",
 				]);

 				if (knownCommands.has(cmd)) {
@@ -32,7 +32,7 @@ function renderHighlightedMatch(text: string, query: string) {
 		return (
 			<span
 				key={`${char}-${count}`}
-				style={isMatch ? { fontWeight: 600, color: "#222" } : undefined}
+				className={isMatch ? "path-match-highlight" : undefined}
 			>
 				{char}
 			</span>
@@ -93,16 +93,15 @@ export function ProjectPathInput({
 			/>
 			{matchList.length > 0 && (
 				<div
+					className="path-dropdown"
 					style={{
 						position: "absolute",
 						top: "100%",
 						left: 0,
 						right: 0,
 						marginTop: "6px",
-						border: "1px solid #ddd",
 						borderRadius: "6px",
 						overflow: "hidden",
-						background: "#fff",
 						fontFamily: "monospace",
 						height: "160px",
 						overflowY: "auto",
@@ -111,13 +110,12 @@ export function ProjectPathInput({
 					}}
 				>
 					<div
+						className="path-dropdown-header"
 						style={{
 							display: "flex",
 							justifyContent: "flex-end",
 							alignItems: "center",
 							padding: "4px 6px",
-							borderBottom: "1px solid #eee",
-							background: "#fafafa",
 						}}
 					>
 						<button
@@ -128,8 +126,6 @@ export function ProjectPathInput({
 								width: "24px",
 								height: "24px",
 								borderRadius: "4px",
-								border: "1px solid #ddd",
-								background: "#fff",
 								cursor: "pointer",
 								lineHeight: 1,
 							}}
@@ -143,6 +139,7 @@ export function ProjectPathInput({
 							<button
 								key={match.path}
 								type="button"
+								className={`path-dropdown-item${isSelected ? " path-dropdown-item--selected" : ""}`}
 								onMouseEnter={() => onSelectMatch(index)}
 								onMouseDown={(event) => {
 									event.preventDefault();
@@ -154,7 +151,6 @@ export function ProjectPathInput({
 									textAlign: "left",
 									padding: "6px 8px",
 									border: "none",
-									background: isSelected ? "#f0f0f0" : "transparent",
 									cursor: "pointer",
 									fontFamily: "inherit",
 								}}
@@ -70,11 +70,11 @@ export type WsResponse =

 // Re-export API client types for convenience
 export type {
+	CommandOutput as ApiCommandOutput,
+	FileEntry as ApiFileEntry,
 	Message as ApiMessage,
 	ProviderConfig as ApiProviderConfig,
-	FileEntry as ApiFileEntry,
 	SearchResult as ApiSearchResult,
-	CommandOutput as ApiCommandOutput,
 	WsRequest as ApiWsRequest,
 	WsResponse as ApiWsResponse,
 };
@@ -1,6 +1,6 @@
 [package]
 name = "storkit"
-version = "0.6.0"
+version = "0.7.0"
 edition = "2024"
 build = "build.rs"

@@ -23,6 +23,7 @@ rust-embed = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 serde_urlencoded = { workspace = true }
+sha2 = { workspace = true }
 serde_yaml = { workspace = true }
 strip-ansi-escapes = { workspace = true }
 tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync", "process"] }
@@ -31,6 +32,7 @@ uuid = { workspace = true, features = ["v4", "serde"] }
 walkdir = { workspace = true }
 matrix-sdk = { workspace = true }
 pulldown-cmark = { workspace = true }
+regex = { workspace = true }

 # Force bundled SQLite so static musl builds don't need a system libsqlite3
 libsqlite3-sys = { version = "0.35.0", features = ["bundled"] }
@@ -188,6 +188,8 @@ pub struct AgentInfo {
    pub completion: Option<CompletionReport>,
    /// UUID identifying the persistent log file for this session.
    pub log_session_id: Option<String>,
+    /// True when a rate-limit throttle warning was received for this agent.
+    pub throttled: bool,
 }

 #[cfg(test)]
@@ -0,0 +1,482 @@
+//! Auto-assign: scan pipeline stages and dispatch free agents to unassigned stories.
+
+use crate::config::ProjectConfig;
+use crate::slog;
+use crate::slog_error;
+use crate::slog_warn;
+use crate::worktree;
+use std::path::Path;
+
+use super::super::super::PipelineStage;
+use super::super::AgentPool;
+use super::scan::{
+    count_active_agents_for_stage, find_free_agent_for_stage, is_agent_free,
+    is_story_assigned_for_stage, scan_stage_items,
+};
+use super::story_checks::{
+    has_merge_failure, has_review_hold, is_story_blocked, read_story_front_matter_agent,
+};
+
+impl AgentPool {
+    pub async fn auto_assign_available_work(&self, project_root: &Path) {
+        let config = match ProjectConfig::load(project_root) {
+            Ok(c) => c,
+            Err(e) => {
+                slog_warn!("[auto-assign] Failed to load project config: {e}");
+                return;
+            }
+        };
+
+        // Process each active pipeline stage in order.
+        let stages: [(&str, PipelineStage); 3] = [
+            ("2_current", PipelineStage::Coder),
+            ("3_qa", PipelineStage::Qa),
+            ("4_merge", PipelineStage::Mergemaster),
+        ];
+
+        for (stage_dir, stage) in &stages {
+            let items = scan_stage_items(project_root, stage_dir);
+            if items.is_empty() {
+                continue;
+            }
+
+            for story_id in &items {
+                // Items marked with review_hold (e.g. spikes after QA passes) stay
+                // in their current stage for human review — don't auto-assign agents.
+                if has_review_hold(project_root, stage_dir, story_id) {
+                    continue;
+                }
+
+                // Skip blocked stories (retry limit exceeded).
+                if is_story_blocked(project_root, stage_dir, story_id) {
+                    continue;
+                }
+
+                // Skip stories in 4_merge/ that already have a reported merge failure.
+                // These need human intervention — auto-assigning a new mergemaster
+                // would just waste tokens on the same broken merge.
+                if *stage == PipelineStage::Mergemaster
+                    && has_merge_failure(project_root, stage_dir, story_id)
+                {
+                    continue;
+                }
+
+                // AC6: Detect empty-diff stories in 4_merge/ before starting a
+                // mergemaster. If the worktree has no commits on the feature branch,
+                // write a merge_failure and block the story immediately.
+                if *stage == PipelineStage::Mergemaster
+                    && let Some(wt_path) = worktree::find_worktree_path(project_root, story_id)
+                    && !crate::agents::gates::worktree_has_committed_work(&wt_path)
+                {
+                    slog_warn!(
+                        "[auto-assign] Story '{story_id}' in 4_merge/ has no commits \
+                         on feature branch. Writing merge_failure and blocking."
+                    );
+                    let story_path = project_root
+                        .join(".storkit/work")
+                        .join(stage_dir)
+                        .join(format!("{story_id}.md"));
+                    let empty_diff_reason = "Feature branch has no code changes — the coder agent \
+                         did not produce any commits.";
+                    let _ = crate::io::story_metadata::write_merge_failure(
+                        &story_path,
+                        empty_diff_reason,
+                    );
+                    let _ = crate::io::story_metadata::write_blocked(&story_path);
+                    let _ = self.watcher_tx.send(crate::io::watcher::WatcherEvent::StoryBlocked {
+                        story_id: story_id.to_string(),
+                        reason: empty_diff_reason.to_string(),
+                    });
+                    continue;
+                }
+
+                // Re-acquire the lock on each iteration to see state changes
+                // from previous start_agent calls in the same pass.
+                let preferred_agent =
+                    read_story_front_matter_agent(project_root, stage_dir, story_id);
+
+                // Check max_coders limit for the Coder stage before agent selection.
+                // If the pool is full, all remaining items in this stage wait.
+                if *stage == PipelineStage::Coder
+                    && let Some(max) = config.max_coders
+                {
+                    let agents_lock = match self.agents.lock() {
+                        Ok(a) => a,
+                        Err(e) => {
+                            slog_error!("[auto-assign] Failed to lock agents: {e}");
+                            break;
+                        }
+                    };
+                    let active = count_active_agents_for_stage(&config, &agents_lock, stage);
+                    if active >= max {
+                        slog!(
+                            "[auto-assign] Coder pool full ({active}/{max}); remaining items in {stage_dir}/ will wait."
+                        );
+                        break;
+                    }
+                }
+
+                // Outcome: (already_assigned, chosen_agent, preferred_busy, stage_mismatch)
+                // preferred_busy=true means the story has a specific agent requested but it is
+                // currently occupied — the story should wait rather than fall back.
+                // stage_mismatch=true means the preferred agent's stage doesn't match the
+                // pipeline stage, so we fell back to a generic stage agent.
+                let (already_assigned, free_agent, preferred_busy, stage_mismatch) = {
+                    let agents = match self.agents.lock() {
+                        Ok(a) => a,
+                        Err(e) => {
+                            slog_error!("[auto-assign] Failed to lock agents: {e}");
+                            break;
+                        }
+                    };
+                    let assigned = is_story_assigned_for_stage(&config, &agents, story_id, stage);
+                    if assigned {
+                        (true, None, false, false)
+                    } else if let Some(ref pref) = preferred_agent {
+                        // Story has a front-matter agent preference.
+                        // Verify the preferred agent's stage matches the current
+                        // pipeline stage — a coder shouldn't be assigned to QA.
+                        let pref_stage_matches = config
+                            .find_agent(pref)
+                            .map(|cfg| super::super::super::agent_config_stage(cfg) == *stage)
+                            .unwrap_or(false);
+                        if !pref_stage_matches {
+                            // Stage mismatch — fall back to any free agent for this stage.
+                            let free = find_free_agent_for_stage(&config, &agents, stage)
+                                .map(|s| s.to_string());
+                            (false, free, false, true)
+                        } else if is_agent_free(&agents, pref) {
+                            (false, Some(pref.clone()), false, false)
+                        } else {
+                            (false, None, true, false)
+                        }
+                    } else {
+                        let free = find_free_agent_for_stage(&config, &agents, stage)
+                            .map(|s| s.to_string());
+                        (false, free, false, false)
+                    }
+                };
+
+                if already_assigned {
+                    // Story already has an active agent — skip silently.
+                    continue;
+                }
+
+                if preferred_busy {
+                    // The story requests a specific agent that is currently busy.
+                    // Do not fall back to a different agent; let this story wait.
+                    slog!(
+                        "[auto-assign] Preferred agent '{}' busy for '{story_id}'; story will wait.",
+                        preferred_agent.as_deref().unwrap_or("?")
+                    );
+                    continue;
+                }
+
+                if stage_mismatch {
+                    slog!(
+                        "[auto-assign] Preferred agent '{}' stage mismatch for '{story_id}' in {stage_dir}/; falling back to stage-appropriate agent.",
+                        preferred_agent.as_deref().unwrap_or("?")
+                    );
+                }
+
+                match free_agent {
+                    Some(agent_name) => {
+                        slog!(
+                            "[auto-assign] Assigning '{agent_name}' to '{story_id}' in {stage_dir}/"
+                        );
+                        if let Err(e) = self
+                            .start_agent(project_root, story_id, Some(&agent_name), None)
+                            .await
+                        {
+                            slog!(
+                                "[auto-assign] Failed to start '{agent_name}' for '{story_id}': {e}"
+                            );
+                        }
+                    }
+                    None => {
+                        // No free agents of this type — stop scanning this stage.
+                        slog!(
+                            "[auto-assign] All {:?} agents busy; remaining items in {stage_dir}/ will wait.",
+                            stage
+                        );
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::super::super::AgentPool;
+    use crate::agents::AgentStatus;
+    use crate::io::watcher::WatcherEvent;
+    use tokio::sync::broadcast;
+
+    /// Story 203: auto_assign_available_work must detect a story in 2_current/
+    /// with no active agent and start an agent for it.
+    #[tokio::test]
+    async fn auto_assign_picks_up_story_queued_in_current() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        let current = sk.join("work/2_current");
+        std::fs::create_dir_all(&current).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
+        )
+        .unwrap();
+        // Place the story in 2_current/ (simulating the "queued" state).
+        std::fs::write(current.join("story-3.md"), "---\nname: Story 3\n---\n").unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        // No agents are running — coder-1 is free.
+
+        // auto_assign will try to call start_agent, which will attempt to create
+        // a worktree (will fail without a git repo) — that is fine. We only need
+        // to verify the agent is registered as Pending before the background
+        // task eventually fails.
+        pool.auto_assign_available_work(tmp.path()).await;
+
+        let agents = pool.agents.lock().unwrap();
+        let has_pending = agents.values().any(|a| {
+            a.agent_name == "coder-1"
+                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
+        });
+        assert!(
+            has_pending,
+            "auto_assign should have started coder-1 for story-3, but pool is empty"
+        );
+    }
+
+    /// Story 265: auto_assign_available_work must skip spikes in 3_qa/ that
+    /// have review_hold: true set in their front matter.
+    #[tokio::test]
+    async fn auto_assign_skips_spikes_with_review_hold() {
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Create project.toml with a QA agent.
+        let sk = root.join(".storkit");
+        std::fs::create_dir_all(&sk).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            "[[agents]]\nname = \"qa\"\nrole = \"qa\"\nmodel = \"test\"\nprompt = \"test\"\n",
+        )
+        .unwrap();
+
+        // Put a spike in 3_qa/ with review_hold: true.
+        let qa_dir = root.join(".storkit/work/3_qa");
+        std::fs::create_dir_all(&qa_dir).unwrap();
+        std::fs::write(
+            qa_dir.join("20_spike_test.md"),
+            "---\nname: Test Spike\nreview_hold: true\n---\n# Spike\n",
+        )
+        .unwrap();
+
+        let (watcher_tx, _) = broadcast::channel::<WatcherEvent>(4);
+        let pool = AgentPool::new(3001, watcher_tx);
+
+        pool.auto_assign_available_work(root).await;
+
+        // No agent should have been started for the spike.
+        let agents = pool.agents.lock().unwrap();
+        assert!(
+            agents.is_empty(),
+            "No agents should be assigned to a spike with review_hold"
+        );
+    }
+
+    // ── Story 279: auto-assign respects agent stage from front matter ──────────
+
+    /// When a story in 3_qa/ has `agent: coder-1` in its front matter but
+    /// coder-1 is a coder-stage agent, auto-assign must NOT assign coder-1.
+    /// Instead it should fall back to a free QA-stage agent.
+    #[tokio::test]
+    async fn auto_assign_ignores_coder_preference_when_story_is_in_qa_stage() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        let qa_dir = sk.join("work/3_qa");
+        std::fs::create_dir_all(&qa_dir).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
+             [[agent]]\nname = \"qa-1\"\nstage = \"qa\"\n",
+        )
+        .unwrap();
+        // Story in 3_qa/ with a preferred coder-stage agent.
+        std::fs::write(
+            qa_dir.join("story-qa1.md"),
+            "---\nname: QA Story\nagent: coder-1\n---\n",
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+
+        pool.auto_assign_available_work(tmp.path()).await;
+
+        let agents = pool.agents.lock().unwrap();
+        // coder-1 must NOT have been assigned (wrong stage for 3_qa/).
+        let coder_assigned = agents.values().any(|a| {
+            a.agent_name == "coder-1"
+                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
+        });
+        assert!(
+            !coder_assigned,
+            "coder-1 should not be assigned to a QA-stage story"
+        );
+        // qa-1 should have been assigned instead.
+        let qa_assigned = agents.values().any(|a| {
+            a.agent_name == "qa-1"
+                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
+        });
+        assert!(
+            qa_assigned,
+            "qa-1 should be assigned as fallback for the QA-stage story"
+        );
+    }
+
+    /// When a story in 2_current/ has `agent: coder-1` in its front matter and
+    /// coder-1 is a coder-stage agent, auto-assign must respect the preference
+    /// and assign coder-1 (not fall back to some other coder).
+    #[tokio::test]
+    async fn auto_assign_respects_coder_preference_when_story_is_in_current_stage() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        let current_dir = sk.join("work/2_current");
+        std::fs::create_dir_all(&current_dir).unwrap();
+        std::fs::write(
+            sk.join("project.toml"),
+            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
+             [[agent]]\nname = \"coder-2\"\nstage = \"coder\"\n",
+        )
+        .unwrap();
+        // Story in 2_current/ with a preferred coder-1 agent.
+        std::fs::write(
+            current_dir.join("story-pref.md"),
+            "---\nname: Coder Story\nagent: coder-1\n---\n",
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+
+        pool.auto_assign_available_work(tmp.path()).await;
+
+        let agents = pool.agents.lock().unwrap();
+        // coder-1 should have been picked (it matches the stage and is preferred).
+        let coder1_assigned = agents.values().any(|a| {
+            a.agent_name == "coder-1"
+                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
+        });
+        assert!(
+            coder1_assigned,
+            "coder-1 should be assigned when it matches the stage and is preferred"
+        );
+        // coder-2 must NOT be assigned (not preferred).
+        let coder2_assigned = agents.values().any(|a| {
+            a.agent_name == "coder-2"
+                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
+        });
+        assert!(
+            !coder2_assigned,
+            "coder-2 should not be assigned when coder-1 is explicitly preferred"
+        );
+    }
+
+    /// When the preferred agent's stage mismatches and no other agent of the
+    /// correct stage is available, auto-assign must not start any agent for that
+    /// story (no panic, no error).
+    #[tokio::test]
+    async fn auto_assign_stage_mismatch_with_no_fallback_starts_no_agent() {
+        let tmp = tempfile::tempdir().unwrap();
+        let sk = tmp.path().join(".storkit");
+        let qa_dir = sk.join("work/3_qa");
+        std::fs::create_dir_all(&qa_dir).unwrap();
+        // Only a coder agent is configured — no QA agent exists.
+        std::fs::write(
+            sk.join("project.toml"),
+            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
+        )
+        .unwrap();
+        // Story in 3_qa/ requests coder-1 (wrong stage) and no QA agent exists.
+        std::fs::write(
+            qa_dir.join("story-noqa.md"),
+            "---\nname: QA Story No Agent\nagent: coder-1\n---\n",
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+
+        // Must not panic.
+        pool.auto_assign_available_work(tmp.path()).await;
+
+        let agents = pool.agents.lock().unwrap();
+        assert!(
+            agents.is_empty(),
+            "No agent should be started when no stage-appropriate agent is available"
+        );
+    }
+
+    /// Two concurrent auto_assign_available_work calls must not assign the same
+    /// agent to two stories simultaneously.  After both complete, at most one
+    /// Pending/Running entry must exist per agent name.
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn toctou_concurrent_auto_assign_no_duplicate_agent_assignments() {
+        use std::fs;
+        use std::sync::Arc;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path().to_path_buf();
+
+        let sk_dir = root.join(".storkit");
+        // Two stories waiting in 2_current, one coder agent.
+        fs::create_dir_all(sk_dir.join("work/2_current")).unwrap();
+        fs::write(
+            sk_dir.join("project.toml"),
+            "[[agent]]\nname = \"coder-1\"\n",
+        )
+        .unwrap();
+        fs::write(
+            sk_dir.join("work/2_current/86_story_foo.md"),
+            "---\nname: Foo\n---\n",
+        )
+        .unwrap();
+        fs::write(
+            sk_dir.join("work/2_current/130_story_bar.md"),
+            "---\nname: Bar\n---\n",
+        )
+        .unwrap();
+
+        let pool = Arc::new(AgentPool::new_test(3099));
+
+        // Run two concurrent auto_assign calls.
+        let pool1 = pool.clone();
+        let root1 = root.clone();
+        let t1 = tokio::spawn(async move { pool1.auto_assign_available_work(&root1).await });
+
+        let pool2 = pool.clone();
+        let root2 = root.clone();
+        let t2 = tokio::spawn(async move { pool2.auto_assign_available_work(&root2).await });
+
+        let _ = tokio::join!(t1, t2);
+
+        // At most one Pending/Running entry should exist for coder-1.
+        let agents = pool.agents.lock().unwrap();
+        let active_coder_count = agents
+            .values()
+            .filter(|a| {
+                a.agent_name == "coder-1"
+                    && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
+            })
+            .count();
+
+        assert!(
+            active_coder_count <= 1,
+            "coder-1 must not be assigned to more than one story simultaneously; \
+             found {active_coder_count} active entries"
+        );
+    }
+}
@@ -0,0 +1,12 @@
+//! Auto-assign submodules: wires focused sub-files and re-exports public items.
+
+#[allow(clippy::module_inception)]
+mod auto_assign;
+mod reconcile;
+mod scan;
+mod story_checks;
+mod watchdog;
+
+// Re-export items that were pub(super) in the original monolithic auto_assign.rs
+// so that pool::lifecycle and pool::pipeline continue to access them unchanged.
+pub(super) use scan::{find_free_agent_for_stage, is_agent_free};
@@ -0,0 +1,527 @@
+//! Startup reconciliation: detect stories with committed work and advance the pipeline.
+
+use std::path::Path;
+use tokio::sync::broadcast;
+
+use crate::worktree;
+
+use super::super::super::ReconciliationEvent;
+use super::super::{AgentPool, find_active_story_stage};
+
+impl AgentPool {
+    /// Reconcile stories whose agent work was committed while the server was offline.
+    ///
+    /// On server startup the in-memory agent pool is empty, so any story that an agent
+    /// completed during a previous session is stuck: the worktree has committed work but
+    /// the pipeline never advanced.  This method detects those stories, re-runs the
+    /// acceptance gates, and advances the pipeline stage so that `auto_assign_available_work`
+    /// (called immediately after) picks up the right next-stage agents.
+    ///
+    /// Algorithm:
+    /// 1. List all worktree directories under `{project_root}/.storkit/worktrees/`.
+    /// 2. For each worktree, check whether its feature branch has commits ahead of the
+    ///    base branch (`master` / `main`).
+    /// 3. If committed work is found AND the story is in `2_current/` or `3_qa/`:
+    ///    - Run acceptance gates (uncommitted-change check + clippy + tests).
+    ///    - On pass + `2_current/`: move the story to `3_qa/`.
+    ///    - On pass + `3_qa/`: run the coverage gate; if that also passes move to `4_merge/`.
+    ///    - On failure: leave the story where it is so `auto_assign_available_work` can
+    ///      start a fresh agent to retry.
+    /// 4. Stories in `4_merge/` are left for `auto_assign_available_work` to handle via a
+    ///    fresh mergemaster (squash-merge must be re-executed by the mergemaster agent).
+    pub async fn reconcile_on_startup(
+        &self,
+        project_root: &Path,
+        progress_tx: &broadcast::Sender<ReconciliationEvent>,
+    ) {
+        let worktrees = match worktree::list_worktrees(project_root) {
+            Ok(wt) => wt,
+            Err(e) => {
+                eprintln!("[startup:reconcile] Failed to list worktrees: {e}");
+                let _ = progress_tx.send(ReconciliationEvent {
+                    story_id: String::new(),
+                    status: "done".to_string(),
+                    message: format!("Reconciliation failed: {e}"),
+                });
+                return;
+            }
+        };
+
+        for wt_entry in &worktrees {
+            let story_id = &wt_entry.story_id;
+            let wt_path = wt_entry.path.clone();
+
+            // Determine which active stage the story is in.
+            let stage_dir = match find_active_story_stage(project_root, story_id) {
+                Some(s) => s,
+                None => continue, // Not in any active stage (backlog/archived or unknown).
+            };
+
+            // 4_merge/ is left for auto_assign to handle with a fresh mergemaster.
+            if stage_dir == "4_merge" {
+                continue;
+            }
+
+            let _ = progress_tx.send(ReconciliationEvent {
+                story_id: story_id.clone(),
+                status: "checking".to_string(),
+                message: format!("Checking for committed work in {stage_dir}/"),
+            });
+
+            // Check whether the worktree has commits ahead of the base branch.
+            let wt_path_for_check = wt_path.clone();
+            let has_work = tokio::task::spawn_blocking(move || {
+                crate::agents::gates::worktree_has_committed_work(&wt_path_for_check)
+            })
+            .await
+            .unwrap_or(false);
+
+            if !has_work {
+                eprintln!(
+                    "[startup:reconcile] No committed work for '{story_id}' in {stage_dir}/; skipping."
+                );
+                let _ = progress_tx.send(ReconciliationEvent {
+                    story_id: story_id.clone(),
+                    status: "skipped".to_string(),
+                    message: "No committed work found; skipping.".to_string(),
+                });
+                continue;
+            }
+
+            eprintln!(
+                "[startup:reconcile] Found committed work for '{story_id}' in {stage_dir}/. Running acceptance gates."
+            );
+            let _ = progress_tx.send(ReconciliationEvent {
+                story_id: story_id.clone(),
+                status: "gates_running".to_string(),
+                message: "Running acceptance gates…".to_string(),
+            });
+
+            // Run acceptance gates on the worktree.
+            let wt_path_for_gates = wt_path.clone();
+            let gates_result = tokio::task::spawn_blocking(move || {
+                crate::agents::gates::check_uncommitted_changes(&wt_path_for_gates)?;
+                crate::agents::gates::run_acceptance_gates(&wt_path_for_gates)
+            })
+            .await;
+
+            let (gates_passed, gate_output) = match gates_result {
+                Ok(Ok(pair)) => pair,
+                Ok(Err(e)) => {
+                    eprintln!("[startup:reconcile] Gate check error for '{story_id}': {e}");
+                    let _ = progress_tx.send(ReconciliationEvent {
+                        story_id: story_id.clone(),
+                        status: "failed".to_string(),
+                        message: format!("Gate error: {e}"),
+                    });
+                    continue;
+                }
+                Err(e) => {
+                    eprintln!("[startup:reconcile] Gate check task panicked for '{story_id}': {e}");
+                    let _ = progress_tx.send(ReconciliationEvent {
+                        story_id: story_id.clone(),
+                        status: "failed".to_string(),
+                        message: format!("Gate task panicked: {e}"),
+                    });
+                    continue;
+                }
+            };
+
+            if !gates_passed {
+                eprintln!(
+                    "[startup:reconcile] Gates failed for '{story_id}': {gate_output}\n\
+                     Leaving in {stage_dir}/ for auto-assign to restart the agent."
+                );
+                let _ = progress_tx.send(ReconciliationEvent {
+                    story_id: story_id.clone(),
+                    status: "failed".to_string(),
+                    message: "Gates failed; will be retried by auto-assign.".to_string(),
+                });
+                continue;
+            }
+
+            eprintln!("[startup:reconcile] Gates passed for '{story_id}' (stage: {stage_dir}/).");
+
+            if stage_dir == "2_current" {
+                // Coder stage — determine qa mode to decide next step.
+                let qa_mode = {
+                    let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
+                    if item_type == "spike" {
+                        crate::io::story_metadata::QaMode::Human
+                    } else {
+                        let default_qa = crate::config::ProjectConfig::load(project_root)
+                            .unwrap_or_default()
+                            .default_qa_mode();
+                        let story_path = project_root
+                            .join(".storkit/work/2_current")
+                            .join(format!("{story_id}.md"));
+                        crate::io::story_metadata::resolve_qa_mode(&story_path, default_qa)
+                    }
+                };
+
+                match qa_mode {
+                    crate::io::story_metadata::QaMode::Server => {
+                        if let Err(e) =
+                            crate::agents::move_story_to_merge(project_root, story_id)
+                        {
+                            eprintln!("[startup:reconcile] Failed to move '{story_id}' to 4_merge/: {e}");
+                            let _ = progress_tx.send(ReconciliationEvent {
+                                story_id: story_id.clone(),
+                                status: "failed".to_string(),
+                                message: format!("Failed to advance to merge: {e}"),
+                            });
+                        } else {
+                            eprintln!("[startup:reconcile] Moved '{story_id}' → 4_merge/ (qa: server).");
+                            let _ = progress_tx.send(ReconciliationEvent {
+                                story_id: story_id.clone(),
+                                status: "advanced".to_string(),
+                                message: "Gates passed — moved to merge (qa: server).".to_string(),
+                            });
+                        }
+                    }
+                    crate::io::story_metadata::QaMode::Agent => {
+                        if let Err(e) =
+                            crate::agents::move_story_to_qa(project_root, story_id)
+                        {
+                            eprintln!("[startup:reconcile] Failed to move '{story_id}' to 3_qa/: {e}");
+                            let _ = progress_tx.send(ReconciliationEvent {
+                                story_id: story_id.clone(),
+                                status: "failed".to_string(),
+                                message: format!("Failed to advance to QA: {e}"),
+                            });
+                        } else {
+                            eprintln!("[startup:reconcile] Moved '{story_id}' → 3_qa/.");
+                            let _ = progress_tx.send(ReconciliationEvent {
+                                story_id: story_id.clone(),
+                                status: "advanced".to_string(),
+                                message: "Gates passed — moved to QA.".to_string(),
+                            });
+                        }
+                    }
+                    crate::io::story_metadata::QaMode::Human => {
+                        if let Err(e) =
+                            crate::agents::move_story_to_qa(project_root, story_id)
+                        {
+                            eprintln!("[startup:reconcile] Failed to move '{story_id}' to 3_qa/: {e}");
+                            let _ = progress_tx.send(ReconciliationEvent {
+                                story_id: story_id.clone(),
+                                status: "failed".to_string(),
+                                message: format!("Failed to advance to QA: {e}"),
+                            });
+                        } else {
+                            let story_path = project_root
+                                .join(".storkit/work/3_qa")
+                                .join(format!("{story_id}.md"));
+                            if let Err(e) =
+                                crate::io::story_metadata::write_review_hold(&story_path)
+                            {
+                                eprintln!(
+                                    "[startup:reconcile] Failed to set review_hold on '{story_id}': {e}"
+                                );
+                            }
+                            eprintln!("[startup:reconcile] Moved '{story_id}' → 3_qa/ (qa: human — holding for review).");
+                            let _ = progress_tx.send(ReconciliationEvent {
+                                story_id: story_id.clone(),
+                                status: "review_hold".to_string(),
+                                message: "Gates passed — holding for human review.".to_string(),
+                            });
+                        }
+                    }
+                }
+            } else if stage_dir == "3_qa" {
+                // QA stage → run coverage gate before advancing to merge.
+                let wt_path_for_cov = wt_path.clone();
+                let coverage_result = tokio::task::spawn_blocking(move || {
+                    crate::agents::gates::run_coverage_gate(&wt_path_for_cov)
+                })
+                .await;
+
+                let (coverage_passed, coverage_output) = match coverage_result {
+                    Ok(Ok(pair)) => pair,
+                    Ok(Err(e)) => {
+                        eprintln!("[startup:reconcile] Coverage gate error for '{story_id}': {e}");
+                        let _ = progress_tx.send(ReconciliationEvent {
+                            story_id: story_id.clone(),
+                            status: "failed".to_string(),
+                            message: format!("Coverage gate error: {e}"),
+                        });
+                        continue;
+                    }
+                    Err(e) => {
+                        eprintln!(
+                            "[startup:reconcile] Coverage gate panicked for '{story_id}': {e}"
+                        );
+                        let _ = progress_tx.send(ReconciliationEvent {
+                            story_id: story_id.clone(),
+                            status: "failed".to_string(),
+                            message: format!("Coverage gate panicked: {e}"),
+                        });
+                        continue;
+                    }
+                };
+
+                if coverage_passed {
+                    // Check whether this item needs human review before merging.
+                    let needs_human_review = {
+                        let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
+                        if item_type == "spike" {
+                            true
+                        } else {
+                            let story_path = project_root
+                                .join(".storkit/work/3_qa")
+                                .join(format!("{story_id}.md"));
+                            let default_qa = crate::config::ProjectConfig::load(project_root)
+                                .unwrap_or_default()
+                                .default_qa_mode();
+                            matches!(
+                                crate::io::story_metadata::resolve_qa_mode(&story_path, default_qa),
+                                crate::io::story_metadata::QaMode::Human
+                            )
+                        }
+                    };
+
+                    if needs_human_review {
+                        let story_path = project_root
+                            .join(".storkit/work/3_qa")
+                            .join(format!("{story_id}.md"));
+                        if let Err(e) =
+                            crate::io::story_metadata::write_review_hold(&story_path)
+                        {
+                            eprintln!(
+                                "[startup:reconcile] Failed to set review_hold on '{story_id}': {e}"
+                            );
+                        }
+                        eprintln!(
+                            "[startup:reconcile] '{story_id}' passed QA — holding for human review."
+                        );
+                        let _ = progress_tx.send(ReconciliationEvent {
+                            story_id: story_id.clone(),
+                            status: "review_hold".to_string(),
+                            message: "Passed QA — waiting for human review.".to_string(),
+                        });
+                    } else if let Err(e) =
+                        crate::agents::move_story_to_merge(project_root, story_id)
+                    {
+                        eprintln!(
+                            "[startup:reconcile] Failed to move '{story_id}' to 4_merge/: {e}"
+                        );
+                        let _ = progress_tx.send(ReconciliationEvent {
+                            story_id: story_id.clone(),
+                            status: "failed".to_string(),
+                            message: format!("Failed to advance to merge: {e}"),
+                        });
+                    } else {
+                        eprintln!("[startup:reconcile] Moved '{story_id}' → 4_merge/.");
+                        let _ = progress_tx.send(ReconciliationEvent {
+                            story_id: story_id.clone(),
+                            status: "advanced".to_string(),
+                            message: "Gates passed — moved to merge.".to_string(),
+                        });
+                    }
+                } else {
+                    eprintln!(
+                        "[startup:reconcile] Coverage gate failed for '{story_id}': {coverage_output}\n\
+                         Leaving in 3_qa/ for auto-assign to restart the QA agent."
+                    );
+                    let _ = progress_tx.send(ReconciliationEvent {
+                        story_id: story_id.clone(),
+                        status: "failed".to_string(),
+                        message: "Coverage gate failed; will be retried.".to_string(),
+                    });
+                }
+            }
+        }
+
+        // Signal that reconciliation is complete.
+        let _ = progress_tx.send(ReconciliationEvent {
+            story_id: String::new(),
+            status: "done".to_string(),
+            message: "Startup reconciliation complete.".to_string(),
+        });
+    }
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use std::process::Command;
+    use tokio::sync::broadcast;
+
+    use super::super::super::AgentPool;
+    use crate::agents::ReconciliationEvent;
+
+    fn init_git_repo(repo: &std::path::Path) {
+        Command::new("git")
+            .args(["init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        // Create initial commit so master branch exists.
+        std::fs::write(repo.join("README.md"), "# test\n").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "initial"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+    }
+
+    #[tokio::test]
+    async fn reconcile_on_startup_noop_when_no_worktrees() {
+        let tmp = tempfile::tempdir().unwrap();
+        let pool = AgentPool::new_test(3001);
+        let (tx, _rx) = broadcast::channel(16);
+        // Should not panic; no worktrees to reconcile.
+        pool.reconcile_on_startup(tmp.path(), &tx).await;
+    }
+
+    #[tokio::test]
+    async fn reconcile_on_startup_emits_done_event() {
+        let tmp = tempfile::tempdir().unwrap();
+        let pool = AgentPool::new_test(3001);
+        let (tx, mut rx) = broadcast::channel::<ReconciliationEvent>(16);
+        pool.reconcile_on_startup(tmp.path(), &tx).await;
+
+        // Collect all events; the last must be "done".
+        let mut events: Vec<ReconciliationEvent> = Vec::new();
+        while let Ok(evt) = rx.try_recv() {
+            events.push(evt);
+        }
+        assert!(
+            events.iter().any(|e| e.status == "done"),
+            "reconcile_on_startup must emit a 'done' event; got: {:?}",
+            events.iter().map(|e| &e.status).collect::<Vec<_>>()
+        );
+    }
+
+    #[tokio::test]
+    async fn reconcile_on_startup_skips_story_without_committed_work() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Set up story in 2_current/.
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("60_story_test.md"), "test").unwrap();
+
+        // Create a worktree directory that is a fresh git repo with no commits
+        // ahead of its own base branch (simulates a worktree where no work was done).
+        let wt_dir = root.join(".storkit/worktrees/60_story_test");
+        fs::create_dir_all(&wt_dir).unwrap();
+        init_git_repo(&wt_dir);
+
+        let pool = AgentPool::new_test(3001);
+        let (tx, _rx) = broadcast::channel(16);
+        pool.reconcile_on_startup(root, &tx).await;
+
+        // Story should still be in 2_current/ — nothing was reconciled.
+        assert!(
+            current.join("60_story_test.md").exists(),
+            "story should stay in 2_current/ when worktree has no committed work"
+        );
+    }
+
+    #[tokio::test]
+    async fn reconcile_on_startup_runs_gates_on_worktree_with_committed_work() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Set up a git repo for the project root.
+        init_git_repo(root);
+
+        // Set up story in 2_current/ and commit it so the project root is clean.
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("61_story_test.md"), "test").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(root)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args([
+                "-c",
+                "user.email=test@test.com",
+                "-c",
+                "user.name=Test",
+                "commit",
+                "-m",
+                "add story",
+            ])
+            .current_dir(root)
+            .output()
+            .unwrap();
+
+        // Create a real git worktree for the story.
+        let wt_dir = root.join(".storkit/worktrees/61_story_test");
+        fs::create_dir_all(wt_dir.parent().unwrap()).unwrap();
+        Command::new("git")
+            .args([
+                "worktree",
+                "add",
+                &wt_dir.to_string_lossy(),
+                "-b",
+                "feature/story-61_story_test",
+            ])
+            .current_dir(root)
+            .output()
+            .unwrap();
+
+        // Add a commit to the feature branch (simulates coder completing work).
+        fs::write(wt_dir.join("implementation.txt"), "done").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(&wt_dir)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args([
+                "-c",
+                "user.email=test@test.com",
+                "-c",
+                "user.name=Test",
+                "commit",
+                "-m",
+                "implement story",
+            ])
+            .current_dir(&wt_dir)
+            .output()
+            .unwrap();
+
+        assert!(
+            crate::agents::gates::worktree_has_committed_work(&wt_dir),
+            "test setup: worktree should have committed work"
+        );
+
+        let pool = AgentPool::new_test(3001);
+        let (tx, _rx) = broadcast::channel(16);
+        pool.reconcile_on_startup(root, &tx).await;
+
+        // In the test env, cargo clippy will fail (no Cargo.toml) so gates fail
+        // and the story stays in 2_current/.  The important assertion is that
+        // reconcile ran without panicking and the story is in a consistent state.
+        let in_current = current.join("61_story_test.md").exists();
+        let in_qa = root.join(".storkit/work/3_qa/61_story_test.md").exists();
+        assert!(
+            in_current || in_qa,
+            "story should be in 2_current/ or 3_qa/ after reconciliation"
+        );
+    }
+}
@@ -0,0 +1,553 @@
+//! Scanning pipeline stages for work items and querying agent pool state.
+
+use crate::config::ProjectConfig;
+use std::collections::HashMap;
+use std::path::Path;
+
+use super::super::super::{AgentStatus, PipelineStage, agent_config_stage, pipeline_stage};
+use super::super::StoryAgent;
+
+/// Return `true` if `agent_name` has no active (pending/running) entry in the pool.
+pub(in crate::agents::pool) fn is_agent_free(
+    agents: &HashMap<String, StoryAgent>,
+    agent_name: &str,
+) -> bool {
+    !agents.values().any(|a| {
+        a.agent_name == agent_name
+            && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
+    })
+}
+
+pub(super) fn scan_stage_items(project_root: &Path, stage_dir: &str) -> Vec<String> {
+    let dir = project_root.join(".storkit").join("work").join(stage_dir);
+    if !dir.is_dir() {
+        return Vec::new();
+    }
+    let mut items = Vec::new();
+    if let Ok(entries) = std::fs::read_dir(&dir) {
+        for entry in entries.flatten() {
+            let path = entry.path();
+            if path.extension().and_then(|e| e.to_str()) == Some("md")
+                && let Some(stem) = path.file_stem().and_then(|s| s.to_str())
+            {
+                items.push(stem.to_string());
+            }
+        }
+    }
+    items.sort();
+    items
+}
+
+/// Return `true` if `story_id` has any active (pending/running) agent matching `stage`.
+///
+/// Uses the explicit `stage` config field when the agent is found in `config`;
+/// falls back to the legacy name-based heuristic for unlisted agents.
+pub(super) fn is_story_assigned_for_stage(
+    config: &ProjectConfig,
+    agents: &HashMap<String, StoryAgent>,
+    story_id: &str,
+    stage: &PipelineStage,
+) -> bool {
+    agents.iter().any(|(key, agent)| {
+        // Composite key format: "{story_id}:{agent_name}"
+        let key_story_id = key.rsplit_once(':').map(|(sid, _)| sid).unwrap_or(key);
+        let agent_stage = config
+            .find_agent(&agent.agent_name)
+            .map(agent_config_stage)
+            .unwrap_or_else(|| pipeline_stage(&agent.agent_name));
+        key_story_id == story_id
+            && agent_stage == *stage
+            && matches!(agent.status, AgentStatus::Running | AgentStatus::Pending)
+    })
+}
+
+/// Count active (pending/running) agents for a given pipeline stage.
+pub(super) fn count_active_agents_for_stage(
+    config: &ProjectConfig,
+    agents: &HashMap<String, StoryAgent>,
+    stage: &PipelineStage,
+) -> usize {
+    agents
+        .values()
+        .filter(|a| {
+            matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
+                && config
+                    .find_agent(&a.agent_name)
+                    .map(|cfg| agent_config_stage(cfg) == *stage)
+                    .unwrap_or_else(|| pipeline_stage(&a.agent_name) == *stage)
+        })
+        .count()
+}
+
+/// Find the first configured agent for `stage` that has no active (pending/running) assignment.
+/// Returns `None` if all agents for that stage are busy, none are configured,
+/// or the `max_coders` limit has been reached (for the Coder stage).
+///
+/// For the Coder stage, when `default_coder_model` is set, only considers agents whose
+/// model matches the default. This ensures opus-class agents are reserved for explicit
+/// front-matter requests.
+pub(in crate::agents::pool) fn find_free_agent_for_stage<'a>(
+    config: &'a ProjectConfig,
+    agents: &HashMap<String, StoryAgent>,
+    stage: &PipelineStage,
+) -> Option<&'a str> {
+    // Enforce max_coders limit for the Coder stage.
+    if *stage == PipelineStage::Coder
+        && let Some(max) = config.max_coders
+    {
+        let active = count_active_agents_for_stage(config, agents, stage);
+        if active >= max {
+            return None;
+        }
+    }
+
+    for agent_config in &config.agent {
+        if agent_config_stage(agent_config) != *stage {
+            continue;
+        }
+        // When default_coder_model is set, only auto-assign coder agents whose
+        // model matches. This keeps opus agents reserved for explicit requests.
+        if *stage == PipelineStage::Coder
+            && let Some(ref default_model) = config.default_coder_model
+        {
+            let agent_model = agent_config.model.as_deref().unwrap_or("");
+            if agent_model != default_model {
+                continue;
+            }
+        }
+        let is_busy = agents.values().any(|a| {
+            a.agent_name == agent_config.name
+                && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
+        });
+        if !is_busy {
+            return Some(&agent_config.name);
+        }
+    }
+    None
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::config::ProjectConfig;
+    use std::sync::{Arc, Mutex};
+    use tokio::sync::broadcast;
+
+    use super::super::super::AgentPool;
+
+    fn make_config(toml_str: &str) -> ProjectConfig {
+        ProjectConfig::parse(toml_str).unwrap()
+    }
+
+    fn make_test_story_agent(agent_name: &str, status: AgentStatus) -> StoryAgent {
+        StoryAgent {
+            agent_name: agent_name.to_string(),
+            status,
+            worktree_info: None,
+            session_id: None,
+            tx: broadcast::channel(1).0,
+            task_handle: None,
+            event_log: Arc::new(Mutex::new(Vec::new())),
+            completion: None,
+            project_root: None,
+            log_session_id: None,
+            merge_failure_reported: false,
+            throttled: false,
+        }
+    }
+
+    #[test]
+    fn scan_stage_items_returns_empty_for_missing_dir() {
+        let tmp = tempfile::tempdir().unwrap();
+        let items = scan_stage_items(tmp.path(), "2_current");
+        assert!(items.is_empty());
+    }
+
+    #[test]
+    fn scan_stage_items_returns_sorted_story_ids() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let stage_dir = tmp.path().join(".storkit").join("work").join("2_current");
+        fs::create_dir_all(&stage_dir).unwrap();
+        fs::write(stage_dir.join("42_story_foo.md"), "---\nname: foo\n---").unwrap();
+        fs::write(stage_dir.join("10_story_bar.md"), "---\nname: bar\n---").unwrap();
+        fs::write(stage_dir.join("5_story_baz.md"), "---\nname: baz\n---").unwrap();
+        // non-md file should be ignored
+        fs::write(stage_dir.join("README.txt"), "ignore me").unwrap();
+
+        let items = scan_stage_items(tmp.path(), "2_current");
+        assert_eq!(items, vec!["10_story_bar", "42_story_foo", "5_story_baz"]);
+    }
+
+    #[test]
+    fn is_story_assigned_returns_true_for_running_coder() {
+        let config = ProjectConfig::default();
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running);
+
+        let agents = pool.agents.lock().unwrap();
+        assert!(is_story_assigned_for_stage(
+            &config,
+            &agents,
+            "42_story_foo",
+            &PipelineStage::Coder
+        ));
+        // Same story but wrong stage — should be false
+        assert!(!is_story_assigned_for_stage(
+            &config,
+            &agents,
+            "42_story_foo",
+            &PipelineStage::Qa
+        ));
+        // Different story — should be false
+        assert!(!is_story_assigned_for_stage(
+            &config,
+            &agents,
+            "99_story_other",
+            &PipelineStage::Coder
+        ));
+    }
+
+    #[test]
+    fn is_story_assigned_returns_false_for_completed_agent() {
+        let config = ProjectConfig::default();
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Completed);
+
+        let agents = pool.agents.lock().unwrap();
+        // Completed agents don't count as assigned
+        assert!(!is_story_assigned_for_stage(
+            &config,
+            &agents,
+            "42_story_foo",
+            &PipelineStage::Coder
+        ));
+    }
+
+    #[test]
+    fn is_story_assigned_uses_config_stage_field_for_nonstandard_names() {
+        let config = ProjectConfig::parse(
+            r#"
+[[agent]]
+name = "qa-2"
+stage = "qa"
+"#,
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("42_story_foo", "qa-2", AgentStatus::Running);
+
+        let agents = pool.agents.lock().unwrap();
+        // qa-2 with stage=qa should be recognised as a QA agent
+        assert!(
+            is_story_assigned_for_stage(&config, &agents, "42_story_foo", &PipelineStage::Qa),
+            "qa-2 should be detected as assigned to QA stage"
+        );
+        // Should NOT appear as a coder
+        assert!(
+            !is_story_assigned_for_stage(&config, &agents, "42_story_foo", &PipelineStage::Coder),
+            "qa-2 should not be detected as a coder"
+        );
+    }
+
+    #[test]
+    fn find_free_agent_returns_none_when_all_busy() {
+        let config = ProjectConfig::parse(
+            r#"
+[[agent]]
+name = "coder-1"
+[[agent]]
+name = "coder-2"
+"#,
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("s1", "coder-1", AgentStatus::Running);
+        pool.inject_test_agent("s2", "coder-2", AgentStatus::Running);
+
+        let agents = pool.agents.lock().unwrap();
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert!(free.is_none(), "no free coders should be available");
+    }
+
+    #[test]
+    fn find_free_agent_returns_first_free_coder() {
+        let config = ProjectConfig::parse(
+            r#"
+[[agent]]
+name = "coder-1"
+[[agent]]
+name = "coder-2"
+[[agent]]
+name = "coder-3"
+"#,
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        // coder-1 is busy, coder-2 is free
+        pool.inject_test_agent("s1", "coder-1", AgentStatus::Running);
+
+        let agents = pool.agents.lock().unwrap();
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(
+            free,
+            Some("coder-2"),
+            "coder-2 should be the first free coder"
+        );
+    }
+
+    #[test]
+    fn find_free_agent_ignores_completed_agents() {
+        let config = ProjectConfig::parse(
+            r#"
+[[agent]]
+name = "coder-1"
+"#,
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        // coder-1 completed its previous story — it's free for a new one
+        pool.inject_test_agent("s1", "coder-1", AgentStatus::Completed);
+
+        let agents = pool.agents.lock().unwrap();
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(free, Some("coder-1"), "completed coder-1 should be free");
+    }
+
+    #[test]
+    fn find_free_agent_returns_none_for_wrong_stage() {
+        let config = ProjectConfig::parse(
+            r#"
+[[agent]]
+name = "qa"
+"#,
+        )
+        .unwrap();
+
+        let agents: HashMap<String, StoryAgent> = HashMap::new();
+        // Looking for a Coder but only QA is configured
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert!(free.is_none());
+        // Looking for QA should find it
+        let free_qa = find_free_agent_for_stage(&config, &agents, &PipelineStage::Qa);
+        assert_eq!(free_qa, Some("qa"));
+    }
+
+    #[test]
+    fn find_free_agent_uses_config_stage_field_not_name() {
+        // Agents named "qa-2" and "coder-opus" don't match the legacy name heuristic
+        // but should be picked up via their explicit stage field.
+        let config = ProjectConfig::parse(
+            r#"
+[[agent]]
+name = "qa-2"
+stage = "qa"
+
+[[agent]]
+name = "coder-opus"
+stage = "coder"
+"#,
+        )
+        .unwrap();
+
+        let agents: HashMap<String, StoryAgent> = HashMap::new();
+
+        // qa-2 should be found for PipelineStage::Qa via config stage field
+        let free_qa = find_free_agent_for_stage(&config, &agents, &PipelineStage::Qa);
+        assert_eq!(free_qa, Some("qa-2"), "qa-2 with stage=qa should be found");
+
+        // coder-opus should be found for PipelineStage::Coder via config stage field
+        let free_coder = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(
+            free_coder,
+            Some("coder-opus"),
+            "coder-opus with stage=coder should be found"
+        );
+
+        // Neither should match the other stage
+        let free_merge = find_free_agent_for_stage(&config, &agents, &PipelineStage::Mergemaster);
+        assert!(free_merge.is_none());
+    }
+
+    // ── find_free_agent_for_stage: default_coder_model filtering ─────────
+
+    #[test]
+    fn find_free_agent_skips_opus_when_default_coder_model_set() {
+        let config = make_config(
+            r#"
+default_coder_model = "sonnet"
+
+[[agent]]
+name = "coder-1"
+stage = "coder"
+model = "sonnet"
+
+[[agent]]
+name = "coder-opus"
+stage = "coder"
+model = "opus"
+"#,
+        );
+
+        let agents = HashMap::new();
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(free, Some("coder-1"));
+    }
+
+    #[test]
+    fn find_free_agent_returns_opus_when_no_default_coder_model() {
+        let config = make_config(
+            r#"
+[[agent]]
+name = "coder-opus"
+stage = "coder"
+model = "opus"
+"#,
+        );
+
+        let agents = HashMap::new();
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(free, Some("coder-opus"));
+    }
+
+    #[test]
+    fn find_free_agent_returns_none_when_all_sonnet_coders_busy() {
+        let config = make_config(
+            r#"
+default_coder_model = "sonnet"
+
+[[agent]]
+name = "coder-1"
+stage = "coder"
+model = "sonnet"
+
+[[agent]]
+name = "coder-opus"
+stage = "coder"
+model = "opus"
+"#,
+        );
+
+        let mut agents = HashMap::new();
+        agents.insert(
+            "story1:coder-1".to_string(),
+            make_test_story_agent("coder-1", AgentStatus::Running),
+        );
+
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(free, None, "opus agent should not be auto-assigned");
+    }
+
+    // ── find_free_agent_for_stage: max_coders limit ─────────────────────
+
+    #[test]
+    fn find_free_agent_respects_max_coders() {
+        let config = make_config(
+            r#"
+max_coders = 1
+
+[[agent]]
+name = "coder-1"
+stage = "coder"
+model = "sonnet"
+
+[[agent]]
+name = "coder-2"
+stage = "coder"
+model = "sonnet"
+"#,
+        );
+
+        let mut agents = HashMap::new();
+        agents.insert(
+            "story1:coder-1".to_string(),
+            make_test_story_agent("coder-1", AgentStatus::Running),
+        );
+
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(free, None, "max_coders=1 should block second coder");
+    }
+
+    #[test]
+    fn find_free_agent_allows_within_max_coders() {
+        let config = make_config(
+            r#"
+max_coders = 2
+
+[[agent]]
+name = "coder-1"
+stage = "coder"
+model = "sonnet"
+
+[[agent]]
+name = "coder-2"
+stage = "coder"
+model = "sonnet"
+"#,
+        );
+
+        let mut agents = HashMap::new();
+        agents.insert(
+            "story1:coder-1".to_string(),
+            make_test_story_agent("coder-1", AgentStatus::Running),
+        );
+
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(free, Some("coder-2"));
+    }
+
+    #[test]
+    fn max_coders_does_not_affect_qa_stage() {
+        let config = make_config(
+            r#"
+max_coders = 1
+
+[[agent]]
+name = "qa"
+stage = "qa"
+model = "sonnet"
+"#,
+        );
+
+        let agents = HashMap::new();
+        let free = find_free_agent_for_stage(&config, &agents, &PipelineStage::Qa);
+        assert_eq!(free, Some("qa"));
+    }
+
+    // ── count_active_agents_for_stage ────────────────────────────────────
+
+    #[test]
+    fn count_active_agents_counts_running_and_pending() {
+        let config = make_config(
+            r#"
+[[agent]]
+name = "coder-1"
+stage = "coder"
+
+[[agent]]
+name = "coder-2"
+stage = "coder"
+"#,
+        );
+
+        let mut agents = HashMap::new();
+        agents.insert(
+            "s1:coder-1".to_string(),
+            make_test_story_agent("coder-1", AgentStatus::Running),
+        );
+        agents.insert(
+            "s2:coder-2".to_string(),
+            make_test_story_agent("coder-2", AgentStatus::Completed),
+        );
+
+        let count = count_active_agents_for_stage(&config, &agents, &PipelineStage::Coder);
+        assert_eq!(count, 1, "Only Running coder should be counted, not Completed");
+    }
+
+}
@@ -0,0 +1,113 @@
+//! Front-matter checks for story files: review holds, blocked state, and merge failures.
+
+use std::path::Path;
+
+/// Read the optional `agent:` field from the front matter of a story file.
+///
+/// Returns `Some(agent_name)` if the front matter specifies an agent, or `None`
+/// if the field is absent or the file cannot be read / parsed.
+pub(super) fn read_story_front_matter_agent(
+    project_root: &Path,
+    stage_dir: &str,
+    story_id: &str,
+) -> Option<String> {
+    use crate::io::story_metadata::parse_front_matter;
+    let path = project_root
+        .join(".storkit")
+        .join("work")
+        .join(stage_dir)
+        .join(format!("{story_id}.md"));
+    let contents = std::fs::read_to_string(path).ok()?;
+    parse_front_matter(&contents).ok()?.agent
+}
+
+/// Return `true` if the story file in the given stage has `review_hold: true` in its front matter.
+pub(super) fn has_review_hold(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
+    use crate::io::story_metadata::parse_front_matter;
+    let path = project_root
+        .join(".storkit")
+        .join("work")
+        .join(stage_dir)
+        .join(format!("{story_id}.md"));
+    let contents = match std::fs::read_to_string(path) {
+        Ok(c) => c,
+        Err(_) => return false,
+    };
+    parse_front_matter(&contents)
+        .ok()
+        .and_then(|m| m.review_hold)
+        .unwrap_or(false)
+}
+
+/// Return `true` if the story file has `blocked: true` in its front matter.
+pub(super) fn is_story_blocked(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
+    use crate::io::story_metadata::parse_front_matter;
+    let path = project_root
+        .join(".storkit")
+        .join("work")
+        .join(stage_dir)
+        .join(format!("{story_id}.md"));
+    let contents = match std::fs::read_to_string(path) {
+        Ok(c) => c,
+        Err(_) => return false,
+    };
+    parse_front_matter(&contents)
+        .ok()
+        .and_then(|m| m.blocked)
+        .unwrap_or(false)
+}
+
+/// Return `true` if the story file has a `merge_failure` field in its front matter.
+pub(super) fn has_merge_failure(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
+    use crate::io::story_metadata::parse_front_matter;
+    let path = project_root
+        .join(".storkit")
+        .join("work")
+        .join(stage_dir)
+        .join(format!("{story_id}.md"));
+    let contents = match std::fs::read_to_string(path) {
+        Ok(c) => c,
+        Err(_) => return false,
+    };
+    parse_front_matter(&contents)
+        .ok()
+        .and_then(|m| m.merge_failure)
+        .is_some()
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn has_review_hold_returns_true_when_set() {
+        let tmp = tempfile::tempdir().unwrap();
+        let qa_dir = tmp.path().join(".storkit/work/3_qa");
+        std::fs::create_dir_all(&qa_dir).unwrap();
+        let spike_path = qa_dir.join("10_spike_research.md");
+        std::fs::write(
+            &spike_path,
+            "---\nname: Research spike\nreview_hold: true\n---\n# Spike\n",
+        )
+        .unwrap();
+        assert!(has_review_hold(tmp.path(), "3_qa", "10_spike_research"));
+    }
+
+    #[test]
+    fn has_review_hold_returns_false_when_not_set() {
+        let tmp = tempfile::tempdir().unwrap();
+        let qa_dir = tmp.path().join(".storkit/work/3_qa");
+        std::fs::create_dir_all(&qa_dir).unwrap();
+        let spike_path = qa_dir.join("10_spike_research.md");
+        std::fs::write(&spike_path, "---\nname: Research spike\n---\n# Spike\n").unwrap();
+        assert!(!has_review_hold(tmp.path(), "3_qa", "10_spike_research"));
+    }
+
+    #[test]
+    fn has_review_hold_returns_false_when_file_missing() {
+        let tmp = tempfile::tempdir().unwrap();
+        assert!(!has_review_hold(tmp.path(), "3_qa", "99_spike_missing"));
+    }
+}
@@ -0,0 +1,220 @@
+//! Watchdog task: detects orphaned agents and triggers auto-assign.
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
+use tokio::sync::broadcast;
+
+use crate::slog;
+
+use super::super::super::{AgentEvent, AgentStatus};
+use super::super::{AgentPool, StoryAgent};
+
+/// Scan the agent pool for Running entries whose backing tokio task has already
+/// finished and mark them as Failed.
+///
+/// This handles the case where the PTY read loop or the spawned task exits
+/// without updating the agent status — for example when the process is killed
+/// externally and the PTY master fd returns EOF before our inactivity timeout
+/// fires, but some other edge case prevents the normal cleanup path from running.
+pub(super) fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) -> usize {
+    let mut lock = match agents.lock() {
+        Ok(l) => l,
+        Err(_) => return 0,
+    };
+
+    // Collect orphaned entries: Running or Pending agents whose task handle is finished.
+    // Pending agents can be orphaned if worktree creation panics before setting status.
+    let orphaned: Vec<(String, String, broadcast::Sender<AgentEvent>, AgentStatus)> = lock
+        .iter()
+        .filter_map(|(key, agent)| {
+            if matches!(agent.status, AgentStatus::Running | AgentStatus::Pending)
+                && let Some(handle) = &agent.task_handle
+                && handle.is_finished()
+            {
+                let story_id = key
+                    .rsplit_once(':')
+                    .map(|(s, _)| s.to_string())
+                    .unwrap_or_else(|| key.clone());
+                return Some((
+                    key.clone(),
+                    story_id,
+                    agent.tx.clone(),
+                    agent.status.clone(),
+                ));
+            }
+            None
+        })
+        .collect();
+
+    let count = orphaned.len();
+    for (key, story_id, tx, prev_status) in orphaned {
+        if let Some(agent) = lock.get_mut(&key) {
+            agent.status = AgentStatus::Failed;
+            slog!(
+                "[watchdog] Orphaned agent '{key}': task finished but status was {prev_status}. \
+                 Marking Failed."
+            );
+            let _ = tx.send(AgentEvent::Error {
+                story_id,
+                agent_name: agent.agent_name.clone(),
+                message: "Agent process terminated unexpectedly (watchdog detected orphan)"
+                    .to_string(),
+            });
+        }
+    }
+    count
+}
+
+impl AgentPool {
+    /// Run a single watchdog pass synchronously (test helper).
+    #[cfg(test)]
+    pub fn run_watchdog_once(&self) {
+        check_orphaned_agents(&self.agents);
+    }
+
+    /// Spawn a background watchdog task that periodically checks for Running agents
+    /// whose underlying task has already finished (orphaned entries).  Any such agent
+    /// is marked Failed and an Error event is emitted so that `wait_for_agent` unblocks.
+    ///
+    /// The watchdog runs every 30 seconds.  It is a safety net for edge cases where the
+    /// PTY read loop exits without updating the agent status (e.g. a panic in the
+    /// spawn_blocking task, or an external SIGKILL that closes the PTY fd immediately).
+    ///
+    /// When orphaned agents are detected and a `project_root` is provided, auto-assign
+    /// is triggered so that free agents can pick up unassigned work.
+    pub fn spawn_watchdog(pool: Arc<AgentPool>, project_root: Option<PathBuf>) {
+        tokio::spawn(async move {
+            let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
+            loop {
+                interval.tick().await;
+                let found = check_orphaned_agents(&pool.agents);
+                if found > 0
+                    && let Some(ref root) = project_root
+                {
+                    slog!("[watchdog] {found} orphaned agent(s) detected; triggering auto-assign.");
+                    pool.auto_assign_available_work(root).await;
+                }
+            }
+        });
+    }
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use super::super::super::{AgentPool, composite_key};
+
+    // ── check_orphaned_agents return value tests (bug 161) ──────────────────
+
+    #[tokio::test]
+    async fn check_orphaned_agents_returns_count_of_orphaned_agents() {
+        let pool = AgentPool::new_test(3001);
+
+        // Spawn two tasks that finish immediately.
+        let h1 = tokio::spawn(async {});
+        let h2 = tokio::spawn(async {});
+        tokio::time::sleep(std::time::Duration::from_millis(20)).await;
+        assert!(h1.is_finished());
+        assert!(h2.is_finished());
+
+        pool.inject_test_agent_with_handle("story_a", "coder", AgentStatus::Running, h1);
+        pool.inject_test_agent_with_handle("story_b", "coder", AgentStatus::Running, h2);
+
+        let found = check_orphaned_agents(&pool.agents);
+        assert_eq!(found, 2, "should detect both orphaned agents");
+    }
+
+    #[test]
+    fn check_orphaned_agents_returns_zero_when_no_orphans() {
+        let pool = AgentPool::new_test(3001);
+        // Inject agents in terminal states — not orphaned.
+        pool.inject_test_agent("story_a", "coder", AgentStatus::Completed);
+        pool.inject_test_agent("story_b", "qa", AgentStatus::Failed);
+
+        let found = check_orphaned_agents(&pool.agents);
+        assert_eq!(
+            found, 0,
+            "no orphans should be detected for terminal agents"
+        );
+    }
+
+    #[tokio::test]
+    async fn watchdog_detects_orphaned_running_agent() {
+        let pool = AgentPool::new_test(3001);
+
+        let handle = tokio::spawn(async {});
+        tokio::time::sleep(std::time::Duration::from_millis(20)).await;
+        assert!(
+            handle.is_finished(),
+            "task should be finished before injection"
+        );
+
+        let tx = pool.inject_test_agent_with_handle(
+            "orphan_story",
+            "coder",
+            AgentStatus::Running,
+            handle,
+        );
+        let mut rx = tx.subscribe();
+
+        pool.run_watchdog_once();
+
+        {
+            let agents = pool.agents.lock().unwrap();
+            let key = composite_key("orphan_story", "coder");
+            let agent = agents.get(&key).unwrap();
+            assert_eq!(
+                agent.status,
+                AgentStatus::Failed,
+                "watchdog must mark an orphaned Running agent as Failed"
+            );
+        }
+
+        let event = rx.try_recv().expect("watchdog must emit an Error event");
+        assert!(
+            matches!(event, AgentEvent::Error { .. }),
+            "expected AgentEvent::Error, got: {event:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn watchdog_orphan_detection_returns_nonzero_enabling_auto_assign() {
+        // This test verifies the contract that `check_orphaned_agents` returns
+        // a non-zero count when orphans exist, which the watchdog uses to
+        // decide whether to trigger auto-assign (bug 161).
+        let pool = AgentPool::new_test(3001);
+
+        let handle = tokio::spawn(async {});
+        tokio::time::sleep(std::time::Duration::from_millis(20)).await;
+
+        pool.inject_test_agent_with_handle("orphan_story", "coder", AgentStatus::Running, handle);
+
+        // Before watchdog: agent is Running.
+        {
+            let agents = pool.agents.lock().unwrap();
+            let key = composite_key("orphan_story", "coder");
+            assert_eq!(agents.get(&key).unwrap().status, AgentStatus::Running);
+        }
+
+        // Run watchdog pass — should return 1 (orphan found).
+        let found = check_orphaned_agents(&pool.agents);
+        assert_eq!(
+            found, 1,
+            "watchdog must return 1 for a single orphaned agent"
+        );
+
+        // After watchdog: agent is Failed.
+        {
+            let agents = pool.agents.lock().unwrap();
+            let key = composite_key("orphan_story", "coder");
+            assert_eq!(
+                agents.get(&key).unwrap().status,
+                AgentStatus::Failed,
+                "orphaned agent must be marked Failed"
+            );
+        }
+    }
+}
@@ -0,0 +1,785 @@
+use crate::config::ProjectConfig;
+use crate::slog;
+use crate::slog_error;
+use crate::slog_warn;
+use crate::io::watcher::WatcherEvent;
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
+use tokio::sync::broadcast;
+
+use super::super::super::{
+    CompletionReport, PipelineStage,
+    agent_config_stage, pipeline_stage,
+};
+use super::super::{AgentPool, StoryAgent};
+
+impl AgentPool {
+    /// Pipeline advancement: after an agent completes, move the story to
+    /// the next pipeline stage and start the appropriate agent.
+    pub(super) async fn run_pipeline_advance(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        completion: CompletionReport,
+        project_root: Option<PathBuf>,
+        worktree_path: Option<PathBuf>,
+        merge_failure_reported: bool,
+    ) {
+        let project_root = match project_root {
+            Some(p) => p,
+            None => {
+                slog_warn!("[pipeline] No project_root for '{story_id}:{agent_name}'");
+                return;
+            }
+        };
+
+        let config = ProjectConfig::load(&project_root).unwrap_or_default();
+        let stage = config
+            .find_agent(agent_name)
+            .map(agent_config_stage)
+            .unwrap_or_else(|| pipeline_stage(agent_name));
+
+        match stage {
+            PipelineStage::Other => {
+                // Supervisors and unknown agents do not advance the pipeline.
+            }
+            PipelineStage::Coder => {
+                if completion.gates_passed {
+                    // Determine effective QA mode for this story.
+                    let qa_mode = {
+                        let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
+                        if item_type == "spike" {
+                            crate::io::story_metadata::QaMode::Human
+                        } else {
+                            let default_qa = config.default_qa_mode();
+                            // Story is in 2_current/ when a coder completes.
+                            let story_path = project_root
+                                .join(".storkit/work/2_current")
+                                .join(format!("{story_id}.md"));
+                            crate::io::story_metadata::resolve_qa_mode(&story_path, default_qa)
+                        }
+                    };
+
+                    match qa_mode {
+                        crate::io::story_metadata::QaMode::Server => {
+                            slog!(
+                                "[pipeline] Coder '{agent_name}' passed gates for '{story_id}'. \
+                                 qa: server — moving directly to merge."
+                            );
+                            if let Err(e) =
+                                crate::agents::lifecycle::move_story_to_merge(&project_root, story_id)
+                            {
+                                slog_error!(
+                                    "[pipeline] Failed to move '{story_id}' to 4_merge/: {e}"
+                                );
+                            } else if let Err(e) = self
+                                .start_agent(&project_root, story_id, Some("mergemaster"), None)
+                                .await
+                            {
+                                slog_error!(
+                                    "[pipeline] Failed to start mergemaster for '{story_id}': {e}"
+                                );
+                            }
+                        }
+                        crate::io::story_metadata::QaMode::Agent => {
+                            slog!(
+                                "[pipeline] Coder '{agent_name}' passed gates for '{story_id}'. \
+                                 qa: agent — moving to QA."
+                            );
+                            if let Err(e) = crate::agents::lifecycle::move_story_to_qa(&project_root, story_id) {
+                                slog_error!("[pipeline] Failed to move '{story_id}' to 3_qa/: {e}");
+                            } else if let Err(e) = self
+                                .start_agent(&project_root, story_id, Some("qa"), None)
+                                .await
+                            {
+                                slog_error!("[pipeline] Failed to start qa agent for '{story_id}': {e}");
+                            }
+                        }
+                        crate::io::story_metadata::QaMode::Human => {
+                            slog!(
+                                "[pipeline] Coder '{agent_name}' passed gates for '{story_id}'. \
+                                 qa: human — holding for human review."
+                            );
+                            if let Err(e) = crate::agents::lifecycle::move_story_to_qa(&project_root, story_id) {
+                                slog_error!("[pipeline] Failed to move '{story_id}' to 3_qa/: {e}");
+                            } else {
+                                let qa_dir = project_root.join(".storkit/work/3_qa");
+                                let story_path = qa_dir.join(format!("{story_id}.md"));
+                                if let Err(e) =
+                                    crate::io::story_metadata::write_review_hold(&story_path)
+                                {
+                                    slog_error!(
+                                        "[pipeline] Failed to set review_hold on '{story_id}': {e}"
+                                    );
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    // Increment retry count and check if blocked.
+                    let story_path = project_root
+                        .join(".storkit/work/2_current")
+                        .join(format!("{story_id}.md"));
+                    if let Some(reason) = should_block_story(&story_path, config.max_retries, story_id, "coder") {
+                        // Story has exceeded retry limit — do not restart.
+                        let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
+                            story_id: story_id.to_string(),
+                            reason,
+                        });
+                    } else {
+                        slog!(
+                            "[pipeline] Coder '{agent_name}' failed gates for '{story_id}'. Restarting."
+                        );
+                        let context = format!(
+                            "\n\n---\n## Previous Attempt Failed\n\
+                             The acceptance gates failed with the following output:\n{}\n\n\
+                             Please review the failures above, fix the issues, and try again.",
+                            completion.gate_output
+                        );
+                        if let Err(e) = self
+                            .start_agent(&project_root, story_id, Some(agent_name), Some(&context))
+                            .await
+                        {
+                            slog_error!(
+                                "[pipeline] Failed to restart coder '{agent_name}' for '{story_id}': {e}"
+                            );
+                        }
+                    }
+                }
+            }
+            PipelineStage::Qa => {
+                if completion.gates_passed {
+                    // Run coverage gate in the QA worktree before advancing to merge.
+                    let coverage_path = worktree_path
+                        .clone()
+                        .unwrap_or_else(|| project_root.clone());
+                    let cp = coverage_path.clone();
+                    let coverage_result =
+                        tokio::task::spawn_blocking(move || crate::agents::gates::run_coverage_gate(&cp))
+                            .await
+                            .unwrap_or_else(|e| {
+                                slog_warn!("[pipeline] Coverage gate task panicked: {e}");
+                                Ok((false, format!("Coverage gate task panicked: {e}")))
+                            });
+                    let (coverage_passed, coverage_output) = match coverage_result {
+                        Ok(pair) => pair,
+                        Err(e) => (false, e),
+                    };
+
+                    if coverage_passed {
+                        // Check whether this item needs human review before merging.
+                        let needs_human_review = {
+                            let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
+                            if item_type == "spike" {
+                                true // Spikes always need human review.
+                            } else {
+                                let qa_dir = project_root.join(".storkit/work/3_qa");
+                                let story_path = qa_dir.join(format!("{story_id}.md"));
+                                let default_qa = config.default_qa_mode();
+                                matches!(
+                                    crate::io::story_metadata::resolve_qa_mode(&story_path, default_qa),
+                                    crate::io::story_metadata::QaMode::Human
+                                )
+                            }
+                        };
+
+                        if needs_human_review {
+                            // Hold in 3_qa/ for human review.
+                            let qa_dir = project_root.join(".storkit/work/3_qa");
+                            let story_path = qa_dir.join(format!("{story_id}.md"));
+                            if let Err(e) =
+                                crate::io::story_metadata::write_review_hold(&story_path)
+                            {
+                                slog_error!(
+                                    "[pipeline] Failed to set review_hold on '{story_id}': {e}"
+                                );
+                            }
+                            slog!(
+                                "[pipeline] QA passed for '{story_id}'. \
+                                 Holding for human review. \
+                                 Worktree preserved at: {worktree_path:?}"
+                            );
+                        } else {
+                            slog!(
+                                "[pipeline] QA passed gates and coverage for '{story_id}'. \
+                                 Moving directly to merge."
+                            );
+                            if let Err(e) =
+                                crate::agents::lifecycle::move_story_to_merge(&project_root, story_id)
+                            {
+                                slog_error!(
+                                    "[pipeline] Failed to move '{story_id}' to 4_merge/: {e}"
+                                );
+                            } else if let Err(e) = self
+                                .start_agent(&project_root, story_id, Some("mergemaster"), None)
+                                .await
+                            {
+                                slog_error!(
+                                    "[pipeline] Failed to start mergemaster for '{story_id}': {e}"
+                                );
+                            }
+                        }
+                    } else {
+                        let story_path = project_root
+                            .join(".storkit/work/3_qa")
+                            .join(format!("{story_id}.md"));
+                        if let Some(reason) = should_block_story(&story_path, config.max_retries, story_id, "qa-coverage") {
+                            // Story has exceeded retry limit — do not restart.
+                            let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
+                                story_id: story_id.to_string(),
+                                reason,
+                            });
+                        } else {
+                            slog!(
+                                "[pipeline] QA coverage gate failed for '{story_id}'. Restarting QA."
+                            );
+                            let context = format!(
+                                "\n\n---\n## Coverage Gate Failed\n\
+                                 The coverage gate (script/test_coverage) failed with the following output:\n{}\n\n\
+                                 Please improve test coverage until the coverage gate passes.",
+                                coverage_output
+                            );
+                            if let Err(e) = self
+                                .start_agent(&project_root, story_id, Some("qa"), Some(&context))
+                                .await
+                            {
+                                slog_error!("[pipeline] Failed to restart qa for '{story_id}': {e}");
+                            }
+                        }
+                    }
+                } else {
+                    let story_path = project_root
+                        .join(".storkit/work/3_qa")
+                        .join(format!("{story_id}.md"));
+                    if let Some(reason) = should_block_story(&story_path, config.max_retries, story_id, "qa") {
+                        // Story has exceeded retry limit — do not restart.
+                        let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
+                            story_id: story_id.to_string(),
+                            reason,
+                        });
+                    } else {
+                        slog!("[pipeline] QA failed gates for '{story_id}'. Restarting.");
+                        let context = format!(
+                            "\n\n---\n## Previous QA Attempt Failed\n\
+                             The acceptance gates failed with the following output:\n{}\n\n\
+                             Please re-run and fix the issues.",
+                            completion.gate_output
+                        );
+                        if let Err(e) = self
+                            .start_agent(&project_root, story_id, Some("qa"), Some(&context))
+                            .await
+                        {
+                            slog_error!("[pipeline] Failed to restart qa for '{story_id}': {e}");
+                        }
+                    }
+                }
+            }
+            PipelineStage::Mergemaster => {
+                // Block advancement if the mergemaster explicitly reported a failure.
+                // The server-owned gate check runs in the feature-branch worktree (not
+                // master), so `gates_passed=true` is misleading when no code was merged.
+                if merge_failure_reported {
+                    slog!(
+                        "[pipeline] Pipeline advancement blocked for '{story_id}': \
+                         mergemaster explicitly reported a merge failure. \
+                         Story stays in 4_merge/ for human review."
+                    );
+                } else {
+                    // Run script/test on master (project_root) as the post-merge verification.
+                    slog!(
+                        "[pipeline] Mergemaster completed for '{story_id}'. Running post-merge tests on master."
+                    );
+                    let root = project_root.clone();
+                    let test_result =
+                        tokio::task::spawn_blocking(move || crate::agents::gates::run_project_tests(&root))
+                            .await
+                            .unwrap_or_else(|e| {
+                                slog_warn!("[pipeline] Post-merge test task panicked: {e}");
+                                Ok((false, format!("Test task panicked: {e}")))
+                            });
+                    let (passed, output) = match test_result {
+                        Ok(pair) => pair,
+                        Err(e) => (false, e),
+                    };
+
+                    if passed {
+                        slog!(
+                            "[pipeline] Post-merge tests passed for '{story_id}'. Moving to done."
+                        );
+                        if let Err(e) =
+                            crate::agents::lifecycle::move_story_to_archived(&project_root, story_id)
+                        {
+                            slog_error!("[pipeline] Failed to move '{story_id}' to done: {e}");
+                        }
+                        self.remove_agents_for_story(story_id);
+                        // TODO: Re-enable worktree cleanup once we have persistent agent logs.
+                        // Removing worktrees destroys evidence needed to debug empty-commit agents.
+                        // let config =
+                        //     crate::config::ProjectConfig::load(&project_root).unwrap_or_default();
+                        // if let Err(e) =
+                        //     worktree::remove_worktree_by_story_id(&project_root, story_id, &config)
+                        //         .await
+                        // {
+                        //     slog!(
+                        //         "[pipeline] Failed to remove worktree for '{story_id}': {e}"
+                        //     );
+                        // }
+                        slog!(
+                            "[pipeline] Story '{story_id}' done. Worktree preserved for inspection."
+                        );
+                    } else {
+                        let story_path = project_root
+                            .join(".storkit/work/4_merge")
+                            .join(format!("{story_id}.md"));
+                        if let Some(reason) = should_block_story(&story_path, config.max_retries, story_id, "mergemaster") {
+                            // Story has exceeded retry limit — do not restart.
+                            let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
+                                story_id: story_id.to_string(),
+                                reason,
+                            });
+                        } else {
+                            slog!(
+                                "[pipeline] Post-merge tests failed for '{story_id}'. Restarting mergemaster."
+                            );
+                            let context = format!(
+                                "\n\n---\n## Post-Merge Test Failed\n\
+                             The tests on master failed with the following output:\n{}\n\n\
+                             Please investigate and resolve the failures, then call merge_agent_work again.",
+                                output
+                            );
+                            if let Err(e) = self
+                                .start_agent(
+                                    &project_root,
+                                    story_id,
+                                    Some("mergemaster"),
+                                    Some(&context),
+                                )
+                                .await
+                            {
+                                slog_error!(
+                                    "[pipeline] Failed to restart mergemaster for '{story_id}': {e}"
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Always scan for unassigned work after any agent completes, regardless
+        // of the outcome (success, failure, restart).  This ensures stories that
+        // failed agent assignment due to busy agents are retried when agents
+        // become available (bug 295).
+        self.auto_assign_available_work(&project_root).await;
+    }
+}
+
+/// Spawn pipeline advancement as a background task.
+///
+/// This is a **non-async** function so it does not participate in the opaque
+/// type cycle between `start_agent` and `run_server_owned_completion`.
+#[allow(clippy::too_many_arguments)]
+pub(super) fn spawn_pipeline_advance(
+    agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
+    port: u16,
+    story_id: &str,
+    agent_name: &str,
+    completion: CompletionReport,
+    project_root: Option<PathBuf>,
+    worktree_path: Option<PathBuf>,
+    watcher_tx: broadcast::Sender<WatcherEvent>,
+    merge_failure_reported: bool,
+) {
+    let sid = story_id.to_string();
+    let aname = agent_name.to_string();
+    tokio::spawn(async move {
+        let pool = AgentPool {
+            agents,
+            port,
+            child_killers: Arc::new(Mutex::new(HashMap::new())),
+            watcher_tx,
+            merge_jobs: Arc::new(Mutex::new(HashMap::new())),
+        };
+        pool.run_pipeline_advance(
+            &sid,
+            &aname,
+            completion,
+            project_root,
+            worktree_path,
+            merge_failure_reported,
+        )
+        .await;
+    });
+}
+
+/// Increment retry_count and block the story if it exceeds `max_retries`.
+///
+/// Returns `Some(reason)` if the story is now blocked (caller should NOT restart the agent).
+/// Returns `None` if the story may be retried.
+/// When `max_retries` is 0, retry limits are disabled.
+fn should_block_story(story_path: &Path, max_retries: u32, story_id: &str, stage_label: &str) -> Option<String> {
+    use crate::io::story_metadata::{increment_retry_count, write_blocked};
+
+    if max_retries == 0 {
+        // Retry limits disabled.
+        return None;
+    }
+
+    match increment_retry_count(story_path) {
+        Ok(new_count) => {
+            if new_count >= max_retries {
+                slog_warn!(
+                    "[pipeline] Story '{story_id}' reached retry limit ({new_count}/{max_retries}) \
+                     at {stage_label} stage. Marking as blocked."
+                );
+                if let Err(e) = write_blocked(story_path) {
+                    slog_error!("[pipeline] Failed to write blocked flag for '{story_id}': {e}");
+                }
+                Some(format!(
+                    "Retry limit exceeded ({new_count}/{max_retries}) at {stage_label} stage"
+                ))
+            } else {
+                slog!(
+                    "[pipeline] Story '{story_id}' retry {new_count}/{max_retries} at {stage_label} stage."
+                );
+                None
+            }
+        }
+        Err(e) => {
+            slog_error!("[pipeline] Failed to increment retry_count for '{story_id}': {e}");
+            None // Don't block on error — allow retry.
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::super::AgentPool;
+    use super::super::super::composite_key;
+    use crate::agents::{AgentStatus, CompletionReport};
+    use crate::io::watcher::WatcherEvent;
+
+    // ── pipeline advance tests ────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn pipeline_advance_coder_gates_pass_server_qa_moves_to_merge() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Set up story in 2_current/ (no qa frontmatter → uses project default "server")
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("50_story_test.md"), "test").unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.run_pipeline_advance(
+            "50_story_test",
+            "coder-1",
+            CompletionReport {
+                summary: "done".to_string(),
+                gates_passed: true,
+                gate_output: String::new(),
+            },
+            Some(root.to_path_buf()),
+            None,
+            false,
+        )
+        .await;
+
+        // With default qa: server, story skips QA and goes straight to 4_merge/
+        assert!(
+            root.join(".storkit/work/4_merge/50_story_test.md")
+                .exists(),
+            "story should be in 4_merge/"
+        );
+        assert!(
+            !current.join("50_story_test.md").exists(),
+            "story should not still be in 2_current/"
+        );
+    }
+
+    #[tokio::test]
+    async fn pipeline_advance_coder_gates_pass_agent_qa_moves_to_qa() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Set up story in 2_current/ with qa: agent frontmatter
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(
+            current.join("50_story_test.md"),
+            "---\nname: Test\nqa: agent\n---\ntest",
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.run_pipeline_advance(
+            "50_story_test",
+            "coder-1",
+            CompletionReport {
+                summary: "done".to_string(),
+                gates_passed: true,
+                gate_output: String::new(),
+            },
+            Some(root.to_path_buf()),
+            None,
+            false,
+        )
+        .await;
+
+        // With qa: agent, story should move to 3_qa/
+        assert!(
+            root.join(".storkit/work/3_qa/50_story_test.md").exists(),
+            "story should be in 3_qa/"
+        );
+        assert!(
+            !current.join("50_story_test.md").exists(),
+            "story should not still be in 2_current/"
+        );
+    }
+
+    #[tokio::test]
+    async fn pipeline_advance_qa_gates_pass_moves_story_to_merge() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Set up story in 3_qa/
+        let qa_dir = root.join(".storkit/work/3_qa");
+        fs::create_dir_all(&qa_dir).unwrap();
+        // qa: server so the story skips human review and goes straight to merge.
+        fs::write(
+            qa_dir.join("51_story_test.md"),
+            "---\nname: Test\nqa: server\n---\ntest",
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.run_pipeline_advance(
+            "51_story_test",
+            "qa",
+            CompletionReport {
+                summary: "QA done".to_string(),
+                gates_passed: true,
+                gate_output: String::new(),
+            },
+            Some(root.to_path_buf()),
+            None,
+            false,
+        )
+        .await;
+
+        // Story should have moved to 4_merge/
+        assert!(
+            root.join(".storkit/work/4_merge/51_story_test.md")
+                .exists(),
+            "story should be in 4_merge/"
+        );
+        assert!(
+            !qa_dir.join("51_story_test.md").exists(),
+            "story should not still be in 3_qa/"
+        );
+    }
+
+    #[tokio::test]
+    async fn pipeline_advance_supervisor_does_not_advance() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("52_story_test.md"), "test").unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.run_pipeline_advance(
+            "52_story_test",
+            "supervisor",
+            CompletionReport {
+                summary: "supervised".to_string(),
+                gates_passed: true,
+                gate_output: String::new(),
+            },
+            Some(root.to_path_buf()),
+            None,
+            false,
+        )
+        .await;
+
+        // Story should NOT have moved (supervisors don't advance pipeline)
+        assert!(
+            current.join("52_story_test.md").exists(),
+            "story should still be in 2_current/ for supervisor"
+        );
+    }
+
+    #[tokio::test]
+    async fn pipeline_advance_sends_agent_state_changed_to_watcher_tx() {
+        use std::fs;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        // Set up story in 2_current/
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("173_story_test.md"), "test").unwrap();
+        // Ensure 3_qa/ exists for the move target
+        fs::create_dir_all(root.join(".storkit/work/3_qa")).unwrap();
+        // Ensure 1_backlog/ exists (start_agent calls move_story_to_current)
+        fs::create_dir_all(root.join(".storkit/work/1_backlog")).unwrap();
+
+        // Write a project.toml with a qa agent so start_agent can resolve it.
+        fs::create_dir_all(root.join(".storkit")).unwrap();
+        fs::write(
+            root.join(".storkit/project.toml"),
+            r#"
+default_qa = "agent"
+
+[[agent]]
+name = "coder-1"
+role = "Coder"
+command = "echo"
+args = ["noop"]
+prompt = "test"
+stage = "coder"
+
+[[agent]]
+name = "qa"
+role = "QA"
+command = "echo"
+args = ["noop"]
+prompt = "test"
+stage = "qa"
+"#,
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        // Subscribe to the watcher channel BEFORE the pipeline advance.
+        let mut rx = pool.watcher_tx.subscribe();
+
+        pool.run_pipeline_advance(
+            "173_story_test",
+            "coder-1",
+            CompletionReport {
+                summary: "done".to_string(),
+                gates_passed: true,
+                gate_output: String::new(),
+            },
+            Some(root.to_path_buf()),
+            None,
+            false,
+        )
+        .await;
+
+        // The pipeline advance should have sent AgentStateChanged events via
+        // the pool's watcher_tx (not a dummy channel). Collect all events.
+        let mut got_agent_state_changed = false;
+        while let Ok(evt) = rx.try_recv() {
+            if matches!(evt, WatcherEvent::AgentStateChanged) {
+                got_agent_state_changed = true;
+                break;
+            }
+        }
+
+        assert!(
+            got_agent_state_changed,
+            "pipeline advance should send AgentStateChanged through the real watcher_tx \
+             (bug 173: lozenges must update when agents are assigned during pipeline advance)"
+        );
+    }
+
+    // ── bug 295: pipeline advance picks up waiting QA stories ──────────
+
+    #[tokio::test]
+    async fn pipeline_advance_picks_up_waiting_qa_stories_after_completion() {
+        use std::fs;
+        use super::super::super::auto_assign::is_agent_free;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+
+        let sk = root.join(".storkit");
+        let qa_dir = sk.join("work/3_qa");
+        fs::create_dir_all(&qa_dir).unwrap();
+
+        // Configure a single QA agent.
+        fs::write(
+            sk.join("project.toml"),
+            r#"
+[[agent]]
+name = "qa"
+stage = "qa"
+"#,
+        )
+        .unwrap();
+
+        // Story 292 is in QA with QA agent running (will "complete" via
+        // run_pipeline_advance below).  Story 293 is in QA with NO agent —
+        // simulating the "stuck" state from bug 295.
+        fs::write(
+            qa_dir.join("292_story_first.md"),
+            "---\nname: First\nqa: human\n---\n",
+        )
+        .unwrap();
+        fs::write(
+            qa_dir.join("293_story_second.md"),
+            "---\nname: Second\nqa: human\n---\n",
+        )
+        .unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        // QA is currently running on story 292.
+        pool.inject_test_agent("292_story_first", "qa", AgentStatus::Running);
+
+        // Verify that 293 cannot get a QA agent right now (QA is busy).
+        {
+            let agents = pool.agents.lock().unwrap();
+            assert!(
+                !is_agent_free(&agents, "qa"),
+                "qa should be busy on story 292"
+            );
+        }
+
+        // Simulate QA completing on story 292: remove the agent from the pool
+        // (as run_server_owned_completion does) then run pipeline advance.
+        {
+            let mut agents = pool.agents.lock().unwrap();
+            agents.remove(&composite_key("292_story_first", "qa"));
+        }
+
+        pool.run_pipeline_advance(
+            "292_story_first",
+            "qa",
+            CompletionReport {
+                summary: "QA done".to_string(),
+                gates_passed: true,
+                gate_output: String::new(),
+            },
+            Some(root.to_path_buf()),
+            None,
+            false,
+        )
+        .await;
+
+        // After pipeline advance, auto_assign should have started QA on story 293.
+        let agents = pool.agents.lock().unwrap();
+        let qa_on_293 = agents.values().any(|a| {
+            a.agent_name == "qa"
+                && matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
+        });
+        assert!(
+            qa_on_293,
+            "auto_assign should have started qa for story 293 after 292's QA completed, \
+             but no qa agent is pending/running. Pool: {:?}",
+            agents
+                .iter()
+                .map(|(k, a)| format!("{k}: {} ({})", a.agent_name, a.status))
+                .collect::<Vec<_>>()
+        );
+    }
+}
@@ -0,0 +1,519 @@
+use crate::slog;
+use crate::io::watcher::WatcherEvent;
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+use tokio::sync::broadcast;
+
+use super::super::super::{AgentEvent, AgentStatus, CompletionReport};
+use super::super::{AgentPool, StoryAgent, composite_key};
+use super::advance::spawn_pipeline_advance;
+
+impl AgentPool {
+    /// Internal: report that an agent has finished work on a story.
+    ///
+    /// **Note:** This is no longer exposed as an MCP tool. The server now
+    /// automatically runs completion gates when an agent process exits
+    /// (see `run_server_owned_completion`). This method is retained for
+    /// backwards compatibility and testing.
+    ///
+    /// - Rejects with an error if the worktree has uncommitted changes.
+    /// - Runs acceptance gates (cargo clippy + cargo nextest run / cargo test).
+    /// - Stores the `CompletionReport` on the agent record.
+    /// - Transitions status to `Completed` (gates passed) or `Failed` (gates failed).
+    /// - Emits a `Done` event so `wait_for_agent` unblocks.
+    #[allow(dead_code)]
+    pub async fn report_completion(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        summary: &str,
+    ) -> Result<CompletionReport, String> {
+        let key = composite_key(story_id, agent_name);
+
+        // Verify agent exists, is Running, and grab its worktree path.
+        let worktree_path = {
+            let agents = self.agents.lock().map_err(|e| e.to_string())?;
+            let agent = agents
+                .get(&key)
+                .ok_or_else(|| format!("No agent '{agent_name}' for story '{story_id}'"))?;
+
+            if agent.status != AgentStatus::Running {
+                return Err(format!(
+                    "Agent '{agent_name}' for story '{story_id}' is not running (status: {}). \
+                     report_completion can only be called by a running agent.",
+                    agent.status
+                ));
+            }
+
+            agent
+                .worktree_info
+                .as_ref()
+                .map(|wt| wt.path.clone())
+                .ok_or_else(|| {
+                    format!(
+                        "Agent '{agent_name}' for story '{story_id}' has no worktree. \
+                         Cannot run acceptance gates."
+                    )
+                })?
+        };
+
+        let path = worktree_path.clone();
+
+        // Run gate checks in a blocking thread to avoid stalling the async runtime.
+        let (gates_passed, gate_output) = tokio::task::spawn_blocking(move || {
+            // Step 1: Reject if worktree is dirty.
+            crate::agents::gates::check_uncommitted_changes(&path)?;
+            // Step 2: Run clippy + tests and return (passed, output).
+            crate::agents::gates::run_acceptance_gates(&path)
+        })
+        .await
+        .map_err(|e| format!("Gate check task panicked: {e}"))??;
+
+        let report = CompletionReport {
+            summary: summary.to_string(),
+            gates_passed,
+            gate_output,
+        };
+
+        // Extract data for pipeline advance, then remove the entry so
+        // completed agents never appear in list_agents.
+        let (
+            tx,
+            session_id,
+            project_root_for_advance,
+            wt_path_for_advance,
+            merge_failure_reported_for_advance,
+        ) = {
+            let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
+            let agent = agents.get_mut(&key).ok_or_else(|| {
+                format!("Agent '{agent_name}' for story '{story_id}' disappeared during gate check")
+            })?;
+            agent.completion = Some(report.clone());
+            let tx = agent.tx.clone();
+            let sid = agent.session_id.clone();
+            let pr = agent.project_root.clone();
+            let wt = agent.worktree_info.as_ref().map(|w| w.path.clone());
+            let mfr = agent.merge_failure_reported;
+            agents.remove(&key);
+            (tx, sid, pr, wt, mfr)
+        };
+
+        // Emit Done so wait_for_agent unblocks.
+        let _ = tx.send(AgentEvent::Done {
+            story_id: story_id.to_string(),
+            agent_name: agent_name.to_string(),
+            session_id,
+        });
+
+        // Notify WebSocket clients that the agent is gone.
+        Self::notify_agent_state_changed(&self.watcher_tx);
+
+        // Advance the pipeline state machine in a background task.
+        let pool_clone = Self {
+            agents: Arc::clone(&self.agents),
+            port: self.port,
+            child_killers: Arc::clone(&self.child_killers),
+            watcher_tx: self.watcher_tx.clone(),
+            merge_jobs: Arc::clone(&self.merge_jobs),
+        };
+        let sid = story_id.to_string();
+        let aname = agent_name.to_string();
+        let report_for_advance = report.clone();
+        tokio::spawn(async move {
+            pool_clone
+                .run_pipeline_advance(
+                    &sid,
+                    &aname,
+                    report_for_advance,
+                    project_root_for_advance,
+                    wt_path_for_advance,
+                    merge_failure_reported_for_advance,
+                )
+                .await;
+        });
+
+        Ok(report)
+    }
+}
+
+/// Server-owned completion: runs acceptance gates when an agent process exits
+/// normally, and advances the pipeline based on results.
+///
+/// This is a **free function** (not a method on `AgentPool`) to break the
+/// opaque type cycle that would otherwise arise: `start_agent` → spawned task
+/// → server-owned completion → pipeline advance → `start_agent`.
+///
+/// If the agent already has a completion report (e.g. from a legacy
+/// `report_completion` call), this is a no-op to avoid double-running gates.
+pub(in crate::agents::pool) async fn run_server_owned_completion(
+    agents: &Arc<Mutex<HashMap<String, StoryAgent>>>,
+    port: u16,
+    story_id: &str,
+    agent_name: &str,
+    session_id: Option<String>,
+    watcher_tx: broadcast::Sender<WatcherEvent>,
+) {
+    let key = composite_key(story_id, agent_name);
+
+    // Guard: skip if completion was already recorded (legacy path).
+    {
+        let lock = match agents.lock() {
+            Ok(a) => a,
+            Err(_) => return,
+        };
+        match lock.get(&key) {
+            Some(agent) if agent.completion.is_some() => {
+                slog!(
+                    "[agents] Completion already recorded for '{story_id}:{agent_name}'; \
+                     skipping server-owned gates."
+                );
+                return;
+            }
+            Some(_) => {}
+            None => return,
+        }
+    }
+
+    // Get worktree path for running gates.
+    let worktree_path = {
+        let lock = match agents.lock() {
+            Ok(a) => a,
+            Err(_) => return,
+        };
+        lock.get(&key)
+            .and_then(|a| a.worktree_info.as_ref().map(|wt| wt.path.clone()))
+    };
+
+    // Run acceptance gates.
+    let (gates_passed, gate_output) = if let Some(wt_path) = worktree_path {
+        let path = wt_path;
+        match tokio::task::spawn_blocking(move || {
+            crate::agents::gates::check_uncommitted_changes(&path)?;
+            // AC5: Fail early if the coder finished with no commits on the feature branch.
+            // This prevents empty-diff stories from advancing through QA to merge.
+            if !crate::agents::gates::worktree_has_committed_work(&path) {
+                return Ok((
+                    false,
+                    "Agent exited with no commits on the feature branch. \
+                     The agent did not produce any code changes."
+                        .to_string(),
+                ));
+            }
+            crate::agents::gates::run_acceptance_gates(&path)
+        })
+        .await
+        {
+            Ok(Ok(result)) => result,
+            Ok(Err(e)) => (false, e),
+            Err(e) => (false, format!("Gate check task panicked: {e}")),
+        }
+    } else {
+        (
+            false,
+            "No worktree path available to run acceptance gates".to_string(),
+        )
+    };
+
+    slog!(
+        "[agents] Server-owned completion for '{story_id}:{agent_name}': gates_passed={gates_passed}"
+    );
+
+    let report = CompletionReport {
+        summary: "Agent process exited normally".to_string(),
+        gates_passed,
+        gate_output,
+    };
+
+    // Store completion report, extract data for pipeline advance, then
+    // remove the entry so completed agents never appear in list_agents.
+    let (tx, project_root_for_advance, wt_path_for_advance, merge_failure_reported_for_advance) = {
+        let mut lock = match agents.lock() {
+            Ok(a) => a,
+            Err(_) => return,
+        };
+        let agent = match lock.get_mut(&key) {
+            Some(a) => a,
+            None => return,
+        };
+        agent.completion = Some(report.clone());
+        agent.session_id = session_id.clone();
+        let tx = agent.tx.clone();
+        let pr = agent.project_root.clone();
+        let wt = agent.worktree_info.as_ref().map(|w| w.path.clone());
+        let mfr = agent.merge_failure_reported;
+        lock.remove(&key);
+        (tx, pr, wt, mfr)
+    };
+
+    // Emit Done so wait_for_agent unblocks.
+    let _ = tx.send(AgentEvent::Done {
+        story_id: story_id.to_string(),
+        agent_name: agent_name.to_string(),
+        session_id,
+    });
+
+    // Notify WebSocket clients that the agent is gone.
+    AgentPool::notify_agent_state_changed(&watcher_tx);
+
+    // Advance the pipeline state machine in a background task.
+    spawn_pipeline_advance(
+        Arc::clone(agents),
+        port,
+        story_id,
+        agent_name,
+        report,
+        project_root_for_advance,
+        wt_path_for_advance,
+        watcher_tx,
+        merge_failure_reported_for_advance,
+    );
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use super::super::super::AgentPool;
+    use crate::agents::{AgentEvent, AgentStatus, CompletionReport};
+    use std::path::PathBuf;
+    use std::process::Command;
+
+    fn init_git_repo(repo: &std::path::Path) {
+        Command::new("git")
+            .args(["init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+    }
+
+    // ── report_completion tests ────────────────────────────────────
+
+    #[tokio::test]
+    async fn report_completion_rejects_nonexistent_agent() {
+        let pool = AgentPool::new_test(3001);
+        let result = pool.report_completion("no_story", "no_bot", "done").await;
+        assert!(result.is_err());
+        let msg = result.unwrap_err();
+        assert!(msg.contains("No agent"), "unexpected: {msg}");
+    }
+
+    #[tokio::test]
+    async fn report_completion_rejects_non_running_agent() {
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("s6", "bot", AgentStatus::Completed);
+
+        let result = pool.report_completion("s6", "bot", "done").await;
+        assert!(result.is_err());
+        let msg = result.unwrap_err();
+        assert!(
+            msg.contains("not running"),
+            "expected 'not running' in: {msg}"
+        );
+    }
+
+    #[tokio::test]
+    async fn report_completion_rejects_dirty_worktree() {
+        use std::fs;
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+
+        // Init a real git repo and make an initial commit
+        Command::new("git")
+            .args(["init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Write an uncommitted file
+        fs::write(repo.join("dirty.txt"), "not committed").unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent_with_path("s7", "bot", AgentStatus::Running, repo.to_path_buf());
+
+        let result = pool.report_completion("s7", "bot", "done").await;
+        assert!(result.is_err());
+        let msg = result.unwrap_err();
+        assert!(
+            msg.contains("uncommitted"),
+            "expected 'uncommitted' in: {msg}"
+        );
+    }
+
+    // ── server-owned completion tests ───────────────────────────────────────────
+
+    #[tokio::test]
+    async fn server_owned_completion_skips_when_already_completed() {
+        let pool = AgentPool::new_test(3001);
+        let report = CompletionReport {
+            summary: "Already done".to_string(),
+            gates_passed: true,
+            gate_output: String::new(),
+        };
+        pool.inject_test_agent_with_completion(
+            "s10",
+            "coder-1",
+            AgentStatus::Completed,
+            PathBuf::from("/tmp/nonexistent"),
+            report,
+        );
+
+        // Subscribe before calling so we can check if Done event was emitted.
+        let mut rx = pool.subscribe("s10", "coder-1").unwrap();
+
+        run_server_owned_completion(
+            &pool.agents,
+            pool.port,
+            "s10",
+            "coder-1",
+            Some("sess-1".to_string()),
+            pool.watcher_tx.clone(),
+        )
+        .await;
+
+        // Status should remain Completed (unchanged) — no gate re-run.
+        let agents = pool.agents.lock().unwrap();
+        let key = composite_key("s10", "coder-1");
+        let agent = agents.get(&key).unwrap();
+        assert_eq!(agent.status, AgentStatus::Completed);
+        // Summary should still be the original, not overwritten.
+        assert_eq!(agent.completion.as_ref().unwrap().summary, "Already done");
+        drop(agents);
+
+        // No Done event should have been emitted.
+        assert!(
+            rx.try_recv().is_err(),
+            "should not emit Done when completion already exists"
+        );
+    }
+
+    #[tokio::test]
+    async fn server_owned_completion_runs_gates_on_clean_worktree() {
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent_with_path(
+            "s11",
+            "coder-1",
+            AgentStatus::Running,
+            repo.to_path_buf(),
+        );
+
+        let mut rx = pool.subscribe("s11", "coder-1").unwrap();
+
+        run_server_owned_completion(
+            &pool.agents,
+            pool.port,
+            "s11",
+            "coder-1",
+            Some("sess-2".to_string()),
+            pool.watcher_tx.clone(),
+        )
+        .await;
+
+        // Agent entry should be removed from the map after completion.
+        let agents = pool.agents.lock().unwrap();
+        let key = composite_key("s11", "coder-1");
+        assert!(
+            agents.get(&key).is_none(),
+            "agent should be removed from map after completion"
+        );
+        drop(agents);
+
+        // A Done event should have been emitted with the session_id.
+        let event = rx.try_recv().expect("should emit Done event");
+        match &event {
+            AgentEvent::Done { session_id, .. } => {
+                assert_eq!(*session_id, Some("sess-2".to_string()));
+            }
+            other => panic!("expected Done event, got: {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn server_owned_completion_fails_on_dirty_worktree() {
+        use std::fs;
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+        // Create an uncommitted file.
+        fs::write(repo.join("dirty.txt"), "not committed").unwrap();
+
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent_with_path(
+            "s12",
+            "coder-1",
+            AgentStatus::Running,
+            repo.to_path_buf(),
+        );
+
+        let mut rx = pool.subscribe("s12", "coder-1").unwrap();
+
+        run_server_owned_completion(
+            &pool.agents,
+            pool.port,
+            "s12",
+            "coder-1",
+            None,
+            pool.watcher_tx.clone(),
+        )
+        .await;
+
+        // Agent entry should be removed from the map after completion (even on failure).
+        let agents = pool.agents.lock().unwrap();
+        let key = composite_key("s12", "coder-1");
+        assert!(
+            agents.get(&key).is_none(),
+            "agent should be removed from map after failed completion"
+        );
+        drop(agents);
+
+        // A Done event should have been emitted.
+        let event = rx.try_recv().expect("should emit Done event");
+        assert!(
+            matches!(event, AgentEvent::Done { .. }),
+            "expected Done event, got: {event:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn server_owned_completion_nonexistent_agent_is_noop() {
+        let pool = AgentPool::new_test(3001);
+        // Should not panic or error — just silently return.
+        run_server_owned_completion(
+            &pool.agents,
+            pool.port,
+            "nonexistent",
+            "bot",
+            None,
+            pool.watcher_tx.clone(),
+        )
+        .await;
+    }
+}
@@ -0,0 +1,544 @@
+use crate::slog;
+use crate::slog_error;
+use crate::slog_warn;
+use crate::worktree;
+use std::path::Path;
+use std::sync::Arc;
+
+use super::super::super::PipelineStage;
+use super::super::super::pipeline_stage;
+use super::super::AgentPool;
+
+impl AgentPool {
+    /// Start the merge pipeline as a background task.
+    ///
+    /// Returns immediately so the MCP tool call doesn't time out (the full
+    /// pipeline — squash merge + quality gates — takes well over 60 seconds,
+    /// exceeding Claude Code's MCP tool-call timeout).
+    ///
+    /// The mergemaster agent should poll [`get_merge_status`](Self::get_merge_status)
+    /// until the job reaches a terminal state.
+    pub fn start_merge_agent_work(
+        self: &Arc<Self>,
+        project_root: &Path,
+        story_id: &str,
+    ) -> Result<(), String> {
+        // Guard against double-starts.
+        {
+            let jobs = self.merge_jobs.lock().map_err(|e| e.to_string())?;
+            if let Some(job) = jobs.get(story_id)
+                && matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
+            {
+                return Err(format!(
+                    "Merge already in progress for '{story_id}'. \
+                     Use get_merge_status to poll for completion."
+                ));
+            }
+        }
+
+        // Insert Running job.
+        {
+            let mut jobs = self.merge_jobs.lock().map_err(|e| e.to_string())?;
+            jobs.insert(
+                story_id.to_string(),
+                crate::agents::merge::MergeJob {
+                    story_id: story_id.to_string(),
+                    status: crate::agents::merge::MergeJobStatus::Running,
+                },
+            );
+        }
+
+        let pool = Arc::clone(self);
+        let root = project_root.to_path_buf();
+        let sid = story_id.to_string();
+
+        tokio::spawn(async move {
+            let report = pool.run_merge_pipeline(&root, &sid).await;
+            let failed = report.is_err();
+            let status = match report {
+                Ok(r) => crate::agents::merge::MergeJobStatus::Completed(r),
+                Err(e) => crate::agents::merge::MergeJobStatus::Failed(e),
+            };
+            if let Ok(mut jobs) = pool.merge_jobs.lock()
+                && let Some(job) = jobs.get_mut(&sid)
+            {
+                job.status = status;
+            }
+            if failed {
+                pool.auto_assign_available_work(&root).await;
+            }
+        });
+
+        Ok(())
+    }
+
+    /// The actual merge pipeline, run inside a background task.
+    async fn run_merge_pipeline(
+        self: &Arc<Self>,
+        project_root: &Path,
+        story_id: &str,
+    ) -> Result<crate::agents::merge::MergeReport, String> {
+        let branch = format!("feature/story-{story_id}");
+        let wt_path = worktree::worktree_path(project_root, story_id);
+        let root = project_root.to_path_buf();
+        let sid = story_id.to_string();
+        let br = branch.clone();
+
+        let merge_result =
+            tokio::task::spawn_blocking(move || crate::agents::merge::run_squash_merge(&root, &br, &sid))
+                .await
+                .map_err(|e| format!("Merge task panicked: {e}"))??;
+
+        if !merge_result.success {
+            return Ok(crate::agents::merge::MergeReport {
+                story_id: story_id.to_string(),
+                success: false,
+                had_conflicts: merge_result.had_conflicts,
+                conflicts_resolved: merge_result.conflicts_resolved,
+                conflict_details: merge_result.conflict_details,
+                gates_passed: merge_result.gates_passed,
+                gate_output: merge_result.output,
+                worktree_cleaned_up: false,
+                story_archived: false,
+            });
+        }
+
+        let story_archived =
+            crate::agents::lifecycle::move_story_to_archived(project_root, story_id).is_ok();
+        if story_archived {
+            self.remove_agents_for_story(story_id);
+        }
+
+        let worktree_cleaned_up = if wt_path.exists() {
+            let config = crate::config::ProjectConfig::load(project_root).unwrap_or_default();
+            worktree::remove_worktree_by_story_id(project_root, story_id, &config)
+                .await
+                .is_ok()
+        } else {
+            false
+        };
+
+        self.auto_assign_available_work(project_root).await;
+
+        Ok(crate::agents::merge::MergeReport {
+            story_id: story_id.to_string(),
+            success: true,
+            had_conflicts: merge_result.had_conflicts,
+            conflicts_resolved: merge_result.conflicts_resolved,
+            conflict_details: merge_result.conflict_details,
+            gates_passed: true,
+            gate_output: merge_result.output,
+            worktree_cleaned_up,
+            story_archived,
+        })
+    }
+
+    /// Check the status of a background merge job.
+    pub fn get_merge_status(&self, story_id: &str) -> Option<crate::agents::merge::MergeJob> {
+        self.merge_jobs
+            .lock()
+            .ok()
+            .and_then(|jobs| jobs.get(story_id).cloned())
+    }
+
+    /// Record that the mergemaster agent for `story_id` explicitly reported a
+    /// merge failure via the `report_merge_failure` MCP tool.
+    ///
+    /// Sets `merge_failure_reported = true` on the active mergemaster agent so
+    /// that `run_pipeline_advance` can block advancement to `5_done/` even when
+    /// the server-owned gate check returns `gates_passed=true` (those gates run
+    /// in the feature-branch worktree, not on master).
+    pub fn set_merge_failure_reported(&self, story_id: &str) {
+        match self.agents.lock() {
+            Ok(mut lock) => {
+                let found = lock.iter_mut().find(|(key, agent)| {
+                    let key_story_id = key
+                        .rsplit_once(':')
+                        .map(|(sid, _)| sid)
+                        .unwrap_or(key.as_str());
+                    key_story_id == story_id
+                        && pipeline_stage(&agent.agent_name) == PipelineStage::Mergemaster
+                });
+                match found {
+                    Some((_, agent)) => {
+                        agent.merge_failure_reported = true;
+                        slog!(
+                            "[pipeline] Merge failure flag set for '{story_id}:{}'",
+                            agent.agent_name
+                        );
+                    }
+                    None => {
+                        slog_warn!(
+                            "[pipeline] set_merge_failure_reported: no running mergemaster found \
+                             for story '{story_id}' — flag not set"
+                        );
+                    }
+                }
+            }
+            Err(e) => {
+                slog_error!("[pipeline] set_merge_failure_reported: could not lock agents: {e}");
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use super::super::super::AgentPool;
+    use crate::agents::merge::{MergeJob, MergeJobStatus};
+    use std::process::Command;
+
+    fn init_git_repo(repo: &std::path::Path) {
+        Command::new("git")
+            .args(["init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "--allow-empty", "-m", "init"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+    }
+
+    // ── merge_agent_work tests ────────────────────────────────────────────────
+
+    /// Helper: start a merge and poll until terminal state.
+    async fn run_merge_to_completion(
+        pool: &Arc<AgentPool>,
+        repo: &std::path::Path,
+        story_id: &str,
+    ) -> MergeJob {
+        pool.start_merge_agent_work(repo, story_id).unwrap();
+        loop {
+            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+            if let Some(job) = pool.get_merge_status(story_id)
+                && !matches!(job.status, MergeJobStatus::Running)
+            {
+                return job;
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn merge_agent_work_returns_error_when_branch_not_found() {
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+
+        let pool = Arc::new(AgentPool::new_test(3001));
+        let job = run_merge_to_completion(&pool, repo, "99_nonexistent").await;
+        match &job.status {
+            MergeJobStatus::Completed(report) => {
+                assert!(!report.success, "should fail when branch missing");
+            }
+            MergeJobStatus::Failed(_) => {
+                // Also acceptable — the pipeline errored out
+            }
+            MergeJobStatus::Running => {
+                panic!("should not still be running");
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn merge_agent_work_succeeds_on_clean_branch() {
+        use std::fs;
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+
+        // Create a feature branch with a commit
+        Command::new("git")
+            .args(["checkout", "-b", "feature/story-23_test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        fs::write(repo.join("feature.txt"), "feature content").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add feature"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Switch back to master (initial branch)
+        Command::new("git")
+            .args(["checkout", "master"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Create the story file in 4_merge/ so we can test archival
+        let merge_dir = repo.join(".storkit/work/4_merge");
+        fs::create_dir_all(&merge_dir).unwrap();
+        let story_file = merge_dir.join("23_test.md");
+        fs::write(&story_file, "---\nname: Test\n---\n").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add story in merge"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        let pool = Arc::new(AgentPool::new_test(3001));
+        let job = run_merge_to_completion(&pool, repo, "23_test").await;
+
+        match &job.status {
+            MergeJobStatus::Completed(report) => {
+                assert!(!report.had_conflicts, "should have no conflicts");
+                assert!(
+                    report.success
+                        || report.gate_output.contains("Failed to run")
+                        || !report.gates_passed,
+                    "report should be coherent: {report:?}"
+                );
+                if report.story_archived {
+                    let done = repo.join(".storkit/work/5_done/23_test.md");
+                    assert!(done.exists(), "done file should exist");
+                }
+            }
+            MergeJobStatus::Failed(e) => {
+                // Gate failures are acceptable in test env
+                assert!(
+                    e.contains("Failed") || e.contains("failed"),
+                    "unexpected failure: {e}"
+                );
+            }
+            MergeJobStatus::Running => panic!("should not still be running"),
+        }
+    }
+
+    // ── quality gate ordering test ────────────────────────────────
+
+    /// Regression test for bug 142: quality gates must run BEFORE the fast-forward
+    /// to master so that broken code never lands on master.
+    #[cfg(unix)]
+    #[test]
+    fn quality_gates_run_before_fast_forward_to_master() {
+        use std::fs;
+        use std::os::unix::fs::PermissionsExt;
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+
+        // Add a failing script/test so quality gates will fail.
+        let script_dir = repo.join("script");
+        fs::create_dir_all(&script_dir).unwrap();
+        let script_test = script_dir.join("test");
+        fs::write(&script_test, "#!/usr/bin/env bash\nexit 1\n").unwrap();
+        let mut perms = fs::metadata(&script_test).unwrap().permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&script_test, perms).unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add failing script/test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Create a feature branch with a commit.
+        Command::new("git")
+            .args(["checkout", "-b", "feature/story-142_test"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        fs::write(repo.join("change.txt"), "feature change").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "feature work"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Switch back to master and record its HEAD.
+        Command::new("git")
+            .args(["checkout", "master"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        let head_before = String::from_utf8(
+            Command::new("git")
+                .args(["rev-parse", "HEAD"])
+                .current_dir(repo)
+                .output()
+                .unwrap()
+                .stdout,
+        )
+        .unwrap()
+        .trim()
+        .to_string();
+
+        // Run the squash-merge.  The failing script/test makes quality gates
+        // fail → fast-forward must NOT happen.
+        let result =
+            crate::agents::merge::run_squash_merge(repo, "feature/story-142_test", "142_test")
+                .unwrap();
+
+        let head_after = String::from_utf8(
+            Command::new("git")
+                .args(["rev-parse", "HEAD"])
+                .current_dir(repo)
+                .output()
+                .unwrap()
+                .stdout,
+        )
+        .unwrap()
+        .trim()
+        .to_string();
+
+        // Gates must have failed (script/test exits 1) so master should be untouched.
+        assert!(
+            !result.success,
+            "run_squash_merge must report failure when gates fail"
+        );
+        assert_eq!(
+            head_before, head_after,
+            "master HEAD must not advance when quality gates fail (bug 142)"
+        );
+    }
+
+    #[tokio::test]
+    async fn merge_agent_work_conflict_does_not_break_master() {
+        use std::fs;
+        use tempfile::tempdir;
+
+        let tmp = tempdir().unwrap();
+        let repo = tmp.path();
+        init_git_repo(repo);
+
+        // Create a file on master.
+        fs::write(
+            repo.join("code.rs"),
+            "fn main() {\n    println!(\"hello\");\n}\n",
+        )
+        .unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "initial code"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Feature branch: modify the same line differently.
+        Command::new("git")
+            .args(["checkout", "-b", "feature/story-42_story_foo"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        fs::write(
+            repo.join("code.rs"),
+            "fn main() {\n    println!(\"hello\");\n    feature_fn();\n}\n",
+        )
+        .unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "feature: add fn call"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Master: add different line at same location.
+        Command::new("git")
+            .args(["checkout", "master"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        fs::write(
+            repo.join("code.rs"),
+            "fn main() {\n    println!(\"hello\");\n    master_fn();\n}\n",
+        )
+        .unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "master: add fn call"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        // Create story file in 4_merge.
+        let merge_dir = repo.join(".storkit/work/4_merge");
+        fs::create_dir_all(&merge_dir).unwrap();
+        fs::write(merge_dir.join("42_story_foo.md"), "---\nname: Test\n---\n").unwrap();
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+        Command::new("git")
+            .args(["commit", "-m", "add story"])
+            .current_dir(repo)
+            .output()
+            .unwrap();
+
+        let pool = Arc::new(AgentPool::new_test(3001));
+        let job = run_merge_to_completion(&pool, repo, "42_story_foo").await;
+
+        // Master should NEVER have conflict markers, regardless of merge outcome.
+        let master_code = fs::read_to_string(repo.join("code.rs")).unwrap();
+        assert!(
+            !master_code.contains("<<<<<<<"),
+            "master must never contain conflict markers:\n{master_code}"
+        );
+        assert!(
+            !master_code.contains(">>>>>>>"),
+            "master must never contain conflict markers:\n{master_code}"
+        );
+
+        // The report should accurately reflect what happened.
+        match &job.status {
+            MergeJobStatus::Completed(report) => {
+                assert!(report.had_conflicts, "should report conflicts");
+            }
+            MergeJobStatus::Failed(_) => {
+                // Acceptable — merge aborted due to conflicts
+            }
+            MergeJobStatus::Running => panic!("should not still be running"),
+        }
+    }
+}
@@ -0,0 +1,5 @@
+mod advance;
+mod completion;
+mod merge;
+
+pub(super) use completion::run_server_owned_completion;
@@ -0,0 +1,141 @@
+use crate::slog;
+
+use super::AgentPool;
+
+impl AgentPool {
+    /// Kill all active PTY child processes.
+    ///
+    /// Called on server shutdown to prevent orphaned Claude Code processes from
+    /// continuing to run after the server exits. Each registered killer is called
+    /// once, then the registry is cleared.
+    pub fn kill_all_children(&self) {
+        if let Ok(mut killers) = self.child_killers.lock() {
+            for (key, killer) in killers.iter_mut() {
+                slog!("[agents] Killing child process for {key} on shutdown");
+                let _ = killer.kill();
+            }
+            killers.clear();
+        }
+    }
+
+    /// Kill and deregister the child process for a specific agent key.
+    ///
+    /// Used by `stop_agent` to ensure the PTY child is terminated even though
+    /// aborting a `spawn_blocking` task handle does not interrupt the blocking thread.
+    pub(super) fn kill_child_for_key(&self, key: &str) {
+        if let Ok(mut killers) = self.child_killers.lock()
+            && let Some(mut killer) = killers.remove(key)
+        {
+            slog!("[agents] Killing child process for {key} on stop");
+            let _ = killer.kill();
+        }
+    }
+
+    /// Test helper: inject a child killer into the registry.
+    #[cfg(test)]
+    pub fn inject_child_killer(&self, key: &str, killer: Box<dyn portable_pty::ChildKiller + Send + Sync>) {
+        let mut killers = self.child_killers.lock().unwrap();
+        killers.insert(key.to_string(), killer);
+    }
+
+    /// Test helper: return the number of registered child killers.
+    #[cfg(test)]
+    pub fn child_killer_count(&self) -> usize {
+        self.child_killers.lock().unwrap().len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::AgentPool;
+    use portable_pty::{CommandBuilder, PtySize, native_pty_system};
+    use std::process::Command;
+
+    /// Returns true if a process with the given PID is currently running.
+    fn process_is_running(pid: u32) -> bool {
+        Command::new("ps")
+            .args(["-p", &pid.to_string()])
+            .output()
+            .map(|o| o.status.success())
+            .unwrap_or(false)
+    }
+
+    #[test]
+    fn kill_all_children_is_safe_on_empty_pool() {
+        let pool = AgentPool::new_test(3001);
+        pool.kill_all_children();
+        assert_eq!(pool.child_killer_count(), 0);
+    }
+
+    #[test]
+    fn kill_all_children_kills_real_process() {
+        let pool = AgentPool::new_test(3001);
+
+        let pty_system = native_pty_system();
+        let pair = pty_system
+            .openpty(PtySize {
+                rows: 24,
+                cols: 80,
+                pixel_width: 0,
+                pixel_height: 0,
+            })
+            .expect("failed to open pty");
+
+        let mut cmd = CommandBuilder::new("sleep");
+        cmd.arg("100");
+        let mut child = pair
+            .slave
+            .spawn_command(cmd)
+            .expect("failed to spawn sleep");
+        let pid = child.process_id().expect("no pid");
+
+        pool.inject_child_killer("story:agent", child.clone_killer());
+
+        assert!(
+            process_is_running(pid),
+            "process {pid} should be running before kill_all_children"
+        );
+
+        pool.kill_all_children();
+        let _ = child.wait();
+
+        assert!(
+            !process_is_running(pid),
+            "process {pid} should have been killed by kill_all_children"
+        );
+    }
+
+    #[test]
+    fn kill_all_children_clears_registry() {
+        let pool = AgentPool::new_test(3001);
+
+        let pty_system = native_pty_system();
+        let pair = pty_system
+            .openpty(PtySize {
+                rows: 24,
+                cols: 80,
+                pixel_width: 0,
+                pixel_height: 0,
+            })
+            .expect("failed to open pty");
+
+        let mut cmd = CommandBuilder::new("sleep");
+        cmd.arg("1");
+        let mut child = pair
+            .slave
+            .spawn_command(cmd)
+            .expect("failed to spawn sleep");
+
+        pool.inject_child_killer("story:agent", child.clone_killer());
+        assert_eq!(pool.child_killer_count(), 1);
+
+        pool.kill_all_children();
+        let _ = child.wait();
+
+        assert_eq!(
+            pool.child_killer_count(),
+            0,
+            "child_killers should be cleared after kill_all_children"
+        );
+    }
+}
@@ -0,0 +1,166 @@
+use crate::config::ProjectConfig;
+use std::path::PathBuf;
+use tokio::sync::broadcast;
+
+use super::super::{AgentEvent, AgentInfo, AgentStatus, PipelineStage, agent_config_stage};
+use super::types::{agent_info_from_entry, composite_key};
+use super::AgentPool;
+
+impl AgentPool {
+    /// Return the names of configured agents for `stage` that are not currently
+    /// running or pending.
+    pub fn available_agents_for_stage(
+        &self,
+        config: &ProjectConfig,
+        stage: &PipelineStage,
+    ) -> Result<Vec<String>, String> {
+        let agents = self.agents.lock().map_err(|e| e.to_string())?;
+        Ok(config
+            .agent
+            .iter()
+            .filter(|cfg| agent_config_stage(cfg) == *stage)
+            .filter(|cfg| {
+                !agents.values().any(|a| {
+                    a.agent_name == cfg.name
+                        && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
+                })
+            })
+            .map(|cfg| cfg.name.clone())
+            .collect())
+    }
+
+    /// List all agents with their status.
+    pub fn list_agents(&self) -> Result<Vec<AgentInfo>, String> {
+        let agents = self.agents.lock().map_err(|e| e.to_string())?;
+        Ok(agents
+            .iter()
+            .map(|(key, agent)| {
+                // Extract story_id from composite key "story_id:agent_name"
+                let story_id = key
+                    .rsplit_once(':')
+                    .map(|(sid, _)| sid.to_string())
+                    .unwrap_or_else(|| key.clone());
+                agent_info_from_entry(&story_id, agent)
+            })
+            .collect())
+    }
+
+    /// Subscribe to events for a story agent.
+    pub fn subscribe(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+    ) -> Result<broadcast::Receiver<AgentEvent>, String> {
+        let key = composite_key(story_id, agent_name);
+        let agents = self.agents.lock().map_err(|e| e.to_string())?;
+        let agent = agents
+            .get(&key)
+            .ok_or_else(|| format!("No agent '{agent_name}' for story '{story_id}'"))?;
+        Ok(agent.tx.subscribe())
+    }
+
+    /// Drain accumulated events for polling. Returns all events since the last drain.
+    pub fn drain_events(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+    ) -> Result<Vec<AgentEvent>, String> {
+        let key = composite_key(story_id, agent_name);
+        let agents = self.agents.lock().map_err(|e| e.to_string())?;
+        let agent = agents
+            .get(&key)
+            .ok_or_else(|| format!("No agent '{agent_name}' for story '{story_id}'"))?;
+        let mut log = agent.event_log.lock().map_err(|e| e.to_string())?;
+        Ok(log.drain(..).collect())
+    }
+
+    /// Get the log session ID and project root for an agent, if available.
+    ///
+    /// Used by MCP tools to find the persistent log file for a completed agent.
+    pub fn get_log_info(&self, story_id: &str, agent_name: &str) -> Option<(String, PathBuf)> {
+        let key = composite_key(story_id, agent_name);
+        let agents = self.agents.lock().ok()?;
+        let agent = agents.get(&key)?;
+        let session_id = agent.log_session_id.clone()?;
+        let project_root = agent.project_root.clone()?;
+        Some((session_id, project_root))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::AgentPool;
+    use crate::agents::{AgentStatus, PipelineStage};
+    use crate::config::ProjectConfig;
+
+    fn make_config(toml_str: &str) -> ProjectConfig {
+        ProjectConfig::parse(toml_str).unwrap()
+    }
+
+    #[test]
+    fn available_agents_for_stage_returns_idle_agents() {
+        let config = make_config(
+            r#"
+[[agent]]
+name = "coder-1"
+stage = "coder"
+
+[[agent]]
+name = "coder-2"
+stage = "coder"
+
+[[agent]]
+name = "qa"
+stage = "qa"
+"#,
+        );
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
+
+        let available = pool
+            .available_agents_for_stage(&config, &PipelineStage::Coder)
+            .unwrap();
+        assert_eq!(available, vec!["coder-2"]);
+
+        let available_qa = pool
+            .available_agents_for_stage(&config, &PipelineStage::Qa)
+            .unwrap();
+        assert_eq!(available_qa, vec!["qa"]);
+    }
+
+    #[test]
+    fn available_agents_for_stage_returns_empty_when_all_busy() {
+        let config = make_config(
+            r#"
+[[agent]]
+name = "coder-1"
+stage = "coder"
+"#,
+        );
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
+
+        let available = pool
+            .available_agents_for_stage(&config, &PipelineStage::Coder)
+            .unwrap();
+        assert!(available.is_empty());
+    }
+
+    #[test]
+    fn available_agents_for_stage_ignores_completed_agents() {
+        let config = make_config(
+            r#"
+[[agent]]
+name = "coder-1"
+stage = "coder"
+"#,
+        );
+        let pool = AgentPool::new_test(3001);
+        pool.inject_test_agent("story-1", "coder-1", AgentStatus::Completed);
+
+        let available = pool
+            .available_agents_for_stage(&config, &PipelineStage::Coder)
+            .unwrap();
+        assert_eq!(available, vec!["coder-1"]);
+    }
+}
@@ -0,0 +1,142 @@
+use crate::worktree::WorktreeInfo;
+use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
+use tokio::sync::broadcast;
+
+use super::super::{AgentEvent, AgentStatus, CompletionReport};
+use super::types::{StoryAgent, composite_key};
+use super::AgentPool;
+
+impl AgentPool {
+    /// Test helper: inject a pre-built agent entry so unit tests can exercise
+    /// wait/subscribe logic without spawning a real process.
+    pub fn inject_test_agent(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        status: AgentStatus,
+    ) -> broadcast::Sender<AgentEvent> {
+        let (tx, _) = broadcast::channel::<AgentEvent>(64);
+        let key = composite_key(story_id, agent_name);
+        let mut agents = self.agents.lock().unwrap();
+        agents.insert(
+            key,
+            StoryAgent {
+                agent_name: agent_name.to_string(),
+                status,
+                worktree_info: None,
+                session_id: None,
+                tx: tx.clone(),
+                task_handle: None,
+                event_log: Arc::new(Mutex::new(Vec::new())),
+                completion: None,
+                project_root: None,
+                log_session_id: None,
+                merge_failure_reported: false,
+                throttled: false,
+            },
+        );
+        tx
+    }
+
+    /// Test helper: inject an agent with a specific worktree path for testing
+    /// gate-related logic.
+    pub fn inject_test_agent_with_path(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        status: AgentStatus,
+        worktree_path: PathBuf,
+    ) -> broadcast::Sender<AgentEvent> {
+        let (tx, _) = broadcast::channel::<AgentEvent>(64);
+        let key = composite_key(story_id, agent_name);
+        let mut agents = self.agents.lock().unwrap();
+        agents.insert(
+            key,
+            StoryAgent {
+                agent_name: agent_name.to_string(),
+                status,
+                worktree_info: Some(WorktreeInfo {
+                    path: worktree_path,
+                    branch: format!("feature/story-{story_id}"),
+                    base_branch: "master".to_string(),
+                }),
+                session_id: None,
+                tx: tx.clone(),
+                task_handle: None,
+                event_log: Arc::new(Mutex::new(Vec::new())),
+                completion: None,
+                project_root: None,
+                log_session_id: None,
+                merge_failure_reported: false,
+                throttled: false,
+            },
+        );
+        tx
+    }
+
+    /// Test helper: inject an agent with a completion report and project_root
+    /// for testing pipeline advance logic without spawning real agents.
+    pub fn inject_test_agent_with_completion(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        status: AgentStatus,
+        project_root: PathBuf,
+        completion: CompletionReport,
+    ) -> broadcast::Sender<AgentEvent> {
+        let (tx, _) = broadcast::channel::<AgentEvent>(64);
+        let key = composite_key(story_id, agent_name);
+        let mut agents = self.agents.lock().unwrap();
+        agents.insert(
+            key,
+            StoryAgent {
+                agent_name: agent_name.to_string(),
+                status,
+                worktree_info: None,
+                session_id: None,
+                tx: tx.clone(),
+                task_handle: None,
+                event_log: Arc::new(Mutex::new(Vec::new())),
+                completion: Some(completion),
+                project_root: Some(project_root),
+                log_session_id: None,
+                merge_failure_reported: false,
+                throttled: false,
+            },
+        );
+        tx
+    }
+
+    /// Inject a Running agent with a pre-built (possibly finished) task handle.
+    /// Used by watchdog tests to simulate an orphaned agent.
+    pub fn inject_test_agent_with_handle(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        status: AgentStatus,
+        task_handle: tokio::task::JoinHandle<()>,
+    ) -> broadcast::Sender<AgentEvent> {
+        let (tx, _) = broadcast::channel::<AgentEvent>(64);
+        let key = composite_key(story_id, agent_name);
+        let mut agents = self.agents.lock().unwrap();
+        agents.insert(
+            key,
+            StoryAgent {
+                agent_name: agent_name.to_string(),
+                status,
+                worktree_info: None,
+                session_id: None,
+                tx: tx.clone(),
+                task_handle: Some(task_handle),
+                event_log: Arc::new(Mutex::new(Vec::new())),
+                completion: None,
+                project_root: None,
+                log_session_id: None,
+                merge_failure_reported: false,
+                throttled: false,
+            },
+        );
+        tx
+    }
+}
@@ -0,0 +1,107 @@
+use crate::slog;
+use crate::worktree::WorktreeInfo;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
+use tokio::sync::broadcast;
+
+use super::super::{AgentEvent, AgentInfo, AgentStatus, CompletionReport};
+
+/// Build the composite key used to track agents in the pool.
+pub(super) fn composite_key(story_id: &str, agent_name: &str) -> String {
+    format!("{story_id}:{agent_name}")
+}
+
+/// RAII guard that removes a pending agent entry from the pool on drop.
+///
+/// Created after inserting a `Pending` entry into the agent HashMap.
+/// If `start_agent` succeeds (the agent process is spawned and status
+/// transitions to `Running`), call [`disarm`](Self::disarm) to prevent
+/// cleanup.  If any intermediate step fails and the guard is dropped
+/// without being disarmed, the pending entry is removed so it cannot
+/// block future auto-assign dispatches.
+pub(super) struct PendingGuard {
+    pub(super) agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
+    pub(super) key: String,
+    pub(super) armed: bool,
+}
+
+impl PendingGuard {
+    pub(super) fn new(agents: Arc<Mutex<HashMap<String, StoryAgent>>>, key: String) -> Self {
+        Self {
+            agents,
+            key,
+            armed: true,
+        }
+    }
+
+    /// Prevent the guard from cleaning up the entry (call after
+    /// successful spawn).
+    pub(super) fn disarm(&mut self) {
+        self.armed = false;
+    }
+}
+
+impl Drop for PendingGuard {
+    fn drop(&mut self) {
+        if self.armed
+            && let Ok(mut agents) = self.agents.lock()
+            && agents
+                .get(&self.key)
+                .is_some_and(|a| a.status == AgentStatus::Pending)
+        {
+            agents.remove(&self.key);
+            slog!(
+                "[agents] Cleaned up leaked Pending entry for '{}'",
+                self.key
+            );
+        }
+    }
+}
+
+pub(super) struct StoryAgent {
+    pub(super) agent_name: String,
+    pub(super) status: AgentStatus,
+    pub(super) worktree_info: Option<WorktreeInfo>,
+    pub(super) session_id: Option<String>,
+    pub(super) tx: broadcast::Sender<AgentEvent>,
+    pub(super) task_handle: Option<tokio::task::JoinHandle<()>>,
+    /// Accumulated events for polling via get_agent_output.
+    pub(super) event_log: Arc<Mutex<Vec<AgentEvent>>>,
+    /// Set when the agent calls report_completion.
+    pub(super) completion: Option<CompletionReport>,
+    /// Project root, stored for pipeline advancement after completion.
+    pub(super) project_root: Option<PathBuf>,
+    /// UUID identifying the log file for this session.
+    pub(super) log_session_id: Option<String>,
+    /// Set to `true` when the agent calls `report_merge_failure`.
+    /// Prevents the pipeline from blindly advancing to `5_done/` after a
+    /// failed merge: the server-owned gate check runs in the feature-branch
+    /// worktree (which compiles fine) and returns `gates_passed=true` even
+    /// though the code was never squash-merged onto master.
+    pub(super) merge_failure_reported: bool,
+    /// Set to `true` when a rate-limit throttle warning was received for this agent.
+    /// True when a rate-limit throttle warning was received for this agent.
+    pub(super) throttled: bool,
+}
+
+/// Build an `AgentInfo` snapshot from a `StoryAgent` map entry.
+pub(super) fn agent_info_from_entry(story_id: &str, agent: &StoryAgent) -> AgentInfo {
+    AgentInfo {
+        story_id: story_id.to_string(),
+        agent_name: agent.agent_name.clone(),
+        status: agent.status.clone(),
+        session_id: agent.session_id.clone(),
+        worktree_path: agent
+            .worktree_info
+            .as_ref()
+            .map(|wt| wt.path.to_string_lossy().to_string()),
+        base_branch: agent
+            .worktree_info
+            .as_ref()
+            .map(|wt| wt.base_branch.clone()),
+        completion: agent.completion.clone(),
+        log_session_id: agent.log_session_id.clone(),
+        throttled: agent.throttled,
+    }
+}
@@ -0,0 +1,91 @@
+use crate::config::ProjectConfig;
+use std::path::{Path, PathBuf};
+
+use super::AgentPool;
+
+impl AgentPool {
+    /// Create a worktree for the given story using the server port (writes .mcp.json).
+    pub async fn create_worktree(
+        &self,
+        project_root: &Path,
+        story_id: &str,
+    ) -> Result<crate::worktree::WorktreeInfo, String> {
+        let config = ProjectConfig::load(project_root)?;
+        crate::worktree::create_worktree(project_root, story_id, &config, self.port).await
+    }
+
+    /// Get project root helper.
+    pub fn get_project_root(&self, state: &crate::state::SessionState) -> Result<PathBuf, String> {
+        state.get_project_root()
+    }
+}
+
+/// Return the active pipeline stage directory name for `story_id`, or `None` if the
+/// story is not in any active stage (`2_current/`, `3_qa/`, `4_merge/`).
+pub(super) fn find_active_story_stage(project_root: &Path, story_id: &str) -> Option<&'static str> {
+    const STAGES: [&str; 3] = ["2_current", "3_qa", "4_merge"];
+    for stage in &STAGES {
+        let path = project_root
+            .join(".storkit")
+            .join("work")
+            .join(stage)
+            .join(format!("{story_id}.md"));
+        if path.exists() {
+            return Some(stage);
+        }
+    }
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::find_active_story_stage;
+
+    #[test]
+    fn find_active_story_stage_detects_current() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let current = root.join(".storkit/work/2_current");
+        fs::create_dir_all(&current).unwrap();
+        fs::write(current.join("10_story_test.md"), "test").unwrap();
+
+        assert_eq!(
+            find_active_story_stage(root, "10_story_test"),
+            Some("2_current")
+        );
+    }
+
+    #[test]
+    fn find_active_story_stage_detects_qa() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let qa = root.join(".storkit/work/3_qa");
+        fs::create_dir_all(&qa).unwrap();
+        fs::write(qa.join("11_story_test.md"), "test").unwrap();
+
+        assert_eq!(find_active_story_stage(root, "11_story_test"), Some("3_qa"));
+    }
+
+    #[test]
+    fn find_active_story_stage_detects_merge() {
+        use std::fs;
+        let tmp = tempfile::tempdir().unwrap();
+        let root = tmp.path();
+        let merge = root.join(".storkit/work/4_merge");
+        fs::create_dir_all(&merge).unwrap();
+        fs::write(merge.join("12_story_test.md"), "test").unwrap();
+
+        assert_eq!(
+            find_active_story_stage(root, "12_story_test"),
+            Some("4_merge")
+        );
+    }
+
+    #[test]
+    fn find_active_story_stage_returns_none_for_unknown_story() {
+        let tmp = tempfile::tempdir().unwrap();
+        assert_eq!(find_active_story_stage(tmp.path(), "99_nonexistent"), None);
+    }
+}
@@ -347,13 +347,49 @@ fn run_agent_pty_blocking(
            // The raw JSON is still forwarded as AgentJson below.
            "assistant" | "user" => {}
            "rate_limit_event" => {
-                slog!(
-                    "[agent:{story_id}:{agent_name}] API rate limit warning received"
-                );
-                let _ = watcher_tx.send(WatcherEvent::RateLimitWarning {
-                    story_id: story_id.to_string(),
-                    agent_name: agent_name.to_string(),
-                });
+                let rate_limit_info = json.get("rate_limit_info");
+                let status = rate_limit_info
+                    .and_then(|i| i.get("status"))
+                    .and_then(|s| s.as_str())
+                    .unwrap_or("");
+                let is_hard_block = !status.is_empty() && status != "allowed_warning";
+                let reset_at = rate_limit_info
+                    .and_then(|i| i.get("reset_at"))
+                    .and_then(|r| r.as_str())
+                    .and_then(|r| chrono::DateTime::parse_from_rfc3339(r).ok())
+                    .map(|dt| dt.with_timezone(&chrono::Utc));
+
+                if is_hard_block {
+                    if let Some(reset_at) = reset_at {
+                        slog!(
+                            "[agent:{story_id}:{agent_name}] API rate limit hard block \
+                             (status={status}); resets at {reset_at}"
+                        );
+                        let _ = watcher_tx.send(WatcherEvent::RateLimitHardBlock {
+                            story_id: story_id.to_string(),
+                            agent_name: agent_name.to_string(),
+                            reset_at,
+                        });
+                    } else {
+                        slog!(
+                            "[agent:{story_id}:{agent_name}] API rate limit hard block \
+                             (status={status}); no reset_at in rate_limit_info"
+                        );
+                        let _ = watcher_tx.send(WatcherEvent::RateLimitWarning {
+                            story_id: story_id.to_string(),
+                            agent_name: agent_name.to_string(),
+                        });
+                    }
+                } else {
+                    slog!(
+                        "[agent:{story_id}:{agent_name}] API rate limit warning received \
+                         (status={status})"
+                    );
+                    let _ = watcher_tx.send(WatcherEvent::RateLimitWarning {
+                        story_id: story_id.to_string(),
+                        agent_name: agent_name.to_string(),
+                    });
+                }
            }
            "result" => {
                // Extract token usage from the result event.
@@ -468,6 +504,65 @@ mod tests {
        }
    }

+    /// AC1: hard block with `reset_at` emits `RateLimitHardBlock` with the
+    /// correct story_id, agent_name, and parsed reset_at timestamp.
+    #[tokio::test]
+    async fn rate_limit_hard_block_sends_watcher_hard_block_event() {
+        use std::os::unix::fs::PermissionsExt;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let script = tmp.path().join("emit_hard_block.sh");
+        std::fs::write(
+            &script,
+            "#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"hard_block\",\"reset_at\":\"2099-01-01T12:00:00Z\"}}'\n",
+        )
+        .unwrap();
+        std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
+
+        let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
+        let (watcher_tx, mut watcher_rx) = broadcast::channel::<WatcherEvent>(16);
+        let event_log = Arc::new(Mutex::new(Vec::new()));
+        let child_killers = Arc::new(Mutex::new(HashMap::new()));
+
+        let result = run_agent_pty_streaming(
+            "423_story_rate_limit",
+            "coder-1",
+            "sh",
+            &[script.to_string_lossy().to_string()],
+            "--",
+            "/tmp",
+            &tx,
+            &event_log,
+            None,
+            0,
+            child_killers,
+            watcher_tx,
+        )
+        .await;
+
+        assert!(result.is_ok(), "PTY run should succeed: {:?}", result.err());
+
+        let evt = watcher_rx
+            .try_recv()
+            .expect("Expected a RateLimitHardBlock to be sent on watcher_tx");
+        match evt {
+            WatcherEvent::RateLimitHardBlock {
+                story_id,
+                agent_name,
+                reset_at,
+            } => {
+                assert_eq!(story_id, "423_story_rate_limit");
+                assert_eq!(agent_name, "coder-1");
+                assert_eq!(
+                    reset_at.to_rfc3339(),
+                    "2099-01-01T12:00:00+00:00",
+                    "reset_at should match the parsed timestamp"
+                );
+            }
+            other => panic!("Expected RateLimitHardBlock, got: {other:?}"),
+        }
+    }
+
    #[test]
    fn test_emit_event_writes_to_log_writer() {
        let tmp = tempfile::tempdir().unwrap();
@@ -0,0 +1,438 @@
+//! Handler for the `loc` command — top source files by line count.
+
+use super::CommandContext;
+use walkdir::WalkDir;
+
+const DEFAULT_TOP_N: usize = 10;
+
+/// Directories to skip during traversal.
+const SKIP_DIRS: &[&str] = &[
+    "target",
+    "node_modules",
+    ".git",
+    "dist",
+    "build",
+    ".next",
+    "coverage",
+    "test-results",
+];
+
+/// Path components that indicate a worktree path that should be skipped.
+const SKIP_PATH_COMPONENTS: &[&str] = &[".storkit/worktrees"];
+
+/// Known-huge or machine-generated files that are excluded from the loc count
+/// even when they have a recognised source extension (e.g. `.json`, `.yaml`).
+/// Add entries here to extend the exclusion list.
+const EXCLUDED_FILENAMES: &[&str] = &[
+    "package-lock.json",
+    "yarn.lock",
+    "pnpm-lock.yaml",
+    "bun.lockb",
+    "Cargo.lock",
+    "composer.lock",
+    "Gemfile.lock",
+    "poetry.lock",
+    "go.sum",
+    "go.work.sum",
+    "flake.lock",
+];
+
+pub(super) fn handle_loc(ctx: &CommandContext) -> Option<String> {
+    let args = ctx.args.trim();
+
+    if args.is_empty() {
+        return Some(loc_top_n(ctx.project_root, DEFAULT_TOP_N));
+    }
+
+    let first_token = args.split_whitespace().next().unwrap_or("");
+    Some(match first_token.parse::<usize>() {
+        Ok(0) => format!(
+            "Usage: `loc [N]` or `loc <filepath>` — show top N source files by line count (default {DEFAULT_TOP_N}), or line count for a specific file"
+        ),
+        Ok(n) => loc_top_n(ctx.project_root, n),
+        Err(_) => loc_single_file(ctx.project_root, args),
+    })
+}
+
+/// Count lines in a single file resolved relative to `project_root`.
+pub(crate) fn loc_single_file(project_root: &std::path::Path, file_arg: &str) -> String {
+    let path = if std::path::Path::new(file_arg).is_absolute() {
+        std::path::PathBuf::from(file_arg)
+    } else {
+        project_root.join(file_arg)
+    };
+
+    match std::fs::read_to_string(&path) {
+        Ok(content) => {
+            let lines = content.lines().count();
+            let display = path
+                .strip_prefix(project_root)
+                .unwrap_or(&path)
+                .to_string_lossy();
+            format!("`{display}` — {lines} lines")
+        }
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+            format!("File not found: `{file_arg}`")
+        }
+        Err(e) => format!("Error reading `{file_arg}`: {e}"),
+    }
+}
+
+fn loc_top_n(project_root: &std::path::Path, top_n: usize) -> String {
+    let mut files: Vec<(usize, String)> = WalkDir::new(project_root)
+        .follow_links(false)
+        .into_iter()
+        .filter_entry(|e| {
+            if e.file_type().is_dir() {
+                let name = e.file_name().to_string_lossy();
+                if SKIP_DIRS.iter().any(|s| *s == name.as_ref()) {
+                    return false;
+                }
+                // Skip .storkit/worktrees — use relative path so the check
+                // doesn't exclude the project root itself when running
+                // from inside a worktree (where the absolute path contains
+                // ".storkit/worktrees").
+                let rel = e
+                    .path()
+                    .strip_prefix(project_root)
+                    .map(|p| p.to_string_lossy().into_owned())
+                    .unwrap_or_default();
+                if SKIP_PATH_COMPONENTS.iter().any(|s| rel.contains(s)) {
+                    return false;
+                }
+            }
+            true
+        })
+        .filter_map(|entry| {
+            let entry = entry.ok()?;
+            if !entry.file_type().is_file() {
+                return None;
+            }
+            let path = entry.path();
+            // Skip known-huge or machine-generated files (lockfiles, etc.).
+            let filename = path.file_name().and_then(|f| f.to_str()).unwrap_or("");
+            if EXCLUDED_FILENAMES.contains(&filename) {
+                return None;
+            }
+            // Skip binary/generated files without a recognisable text extension.
+            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
+            if !is_source_extension(ext) {
+                return None;
+            }
+            let content = std::fs::read_to_string(path).ok()?;
+            let line_count = content.lines().count();
+            if line_count == 0 {
+                return None;
+            }
+            // Make path relative to project_root for display.
+            let rel = path
+                .strip_prefix(project_root)
+                .unwrap_or(path)
+                .to_string_lossy()
+                .into_owned();
+            Some((line_count, rel))
+        })
+        .collect();
+
+    files.sort_by(|a, b| b.0.cmp(&a.0));
+    files.truncate(top_n);
+
+    if files.is_empty() {
+        return "No source files found.".to_string();
+    }
+
+    let mut out = format!("**Top {} files by line count**\n\n", files.len());
+    for (rank, (lines, path)) in files.iter().enumerate() {
+        out.push_str(&format!("{}. `{}` — {} lines\n", rank + 1, path, lines));
+    }
+    out
+}
+
+/// Returns true for file extensions considered source/text files.
+fn is_source_extension(ext: &str) -> bool {
+    matches!(
+        ext,
+        "rs" | "ts" | "tsx" | "js" | "jsx" | "py" | "go" | "java" | "c" | "cpp" | "h"
+            | "hpp" | "cs" | "rb" | "swift" | "kt" | "scala" | "hs" | "ml" | "ex" | "exs"
+            | "clj" | "lua" | "sh" | "bash" | "zsh" | "fish" | "ps1" | "toml" | "yaml"
+            | "yml" | "json" | "md" | "html" | "css" | "scss" | "less" | "sql" | "graphql"
+            | "proto" | "tf" | "hcl" | "nix" | "r" | "jl" | "dart" | "vue" | "svelte"
+    )
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::agents::AgentPool;
+    use std::collections::HashSet;
+    use std::sync::{Arc, Mutex};
+
+    fn make_ctx<'a>(
+        agents: &'a Arc<AgentPool>,
+        ambient_rooms: &'a Arc<Mutex<HashSet<String>>>,
+        project_root: &'a std::path::Path,
+        args: &'a str,
+    ) -> super::super::CommandContext<'a> {
+        super::super::CommandContext {
+            bot_name: "Timmy",
+            args,
+            project_root,
+            agents,
+            ambient_rooms,
+            room_id: "!test:example.com",
+        }
+    }
+
+    #[test]
+    fn loc_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "loc");
+        assert!(found, "loc command must be in the registry");
+    }
+
+    #[test]
+    fn loc_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(output.contains("loc"), "help should list loc command: {output}");
+    }
+
+    #[test]
+    fn loc_default_returns_top_10() {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "");
+        let output = handle_loc(&ctx).unwrap();
+        assert!(
+            output.contains("Top"),
+            "output should contain 'Top': {output}"
+        );
+        // At most 10 entries (numbered lines "1." through "10.")
+        let count = output.lines().filter(|l| l.contains(". `")).count();
+        assert!(count <= 10, "default should return at most 10 files, got {count}");
+    }
+
+    #[test]
+    fn loc_with_arg_5_returns_at_most_5() {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "5");
+        let output = handle_loc(&ctx).unwrap();
+        let count = output.lines().filter(|l| l.contains(". `")).count();
+        assert!(count <= 5, "loc 5 should return at most 5 files, got {count}");
+    }
+
+    #[test]
+    fn loc_with_arg_20_returns_at_most_20() {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "20");
+        let output = handle_loc(&ctx).unwrap();
+        let count = output.lines().filter(|l| l.contains(". `")).count();
+        assert!(count <= 20, "loc 20 should return at most 20 files, got {count}");
+    }
+
+    #[test]
+    fn loc_output_contains_rank_and_line_count() {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "");
+        let output = handle_loc(&ctx).unwrap();
+        // Each entry should have "N. `path` — N lines"
+        assert!(
+            output.contains("1. `"),
+            "first result should start with rank: {output}"
+        );
+        assert!(
+            output.contains("lines"),
+            "output should mention 'lines': {output}"
+        );
+    }
+
+    #[test]
+    fn loc_zero_arg_returns_usage() {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "0");
+        let output = handle_loc(&ctx).unwrap();
+        assert!(
+            output.contains("Usage"),
+            "loc 0 should show usage: {output}"
+        );
+    }
+
+    #[test]
+    fn loc_filepath_returns_line_count() {
+        use std::io::Write as _;
+        let dir = tempfile::tempdir().expect("tempdir");
+        let src = dir.path().join("hello.rs");
+        {
+            let mut f = std::fs::File::create(&src).unwrap();
+            for i in 0..42 {
+                writeln!(f, "fn line_{i}() {{}}").unwrap();
+            }
+        }
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "hello.rs");
+        let output = handle_loc(&ctx).unwrap();
+        assert!(
+            output.contains("42"),
+            "should report 42 lines for hello.rs: {output}"
+        );
+        assert!(
+            output.contains("hello.rs"),
+            "output should mention the filename: {output}"
+        );
+    }
+
+    #[test]
+    fn loc_filepath_nonexistent_returns_error() {
+        let dir = tempfile::tempdir().expect("tempdir");
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "does_not_exist.rs");
+        let output = handle_loc(&ctx).unwrap();
+        assert!(
+            output.contains("not found") || output.contains("Error"),
+            "nonexistent file should return a clear error: {output}"
+        );
+    }
+
+    #[test]
+    fn loc_skips_worktrees_directory() {
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap_or(std::path::Path::new("."));
+        let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "");
+        let output = handle_loc(&ctx).unwrap();
+        assert!(
+            !output.contains(".storkit/worktrees"),
+            "output must not include paths inside worktrees: {output}"
+        );
+    }
+
+    #[test]
+    fn loc_excludes_lockfiles_from_results() {
+        use std::io::Write as _;
+        let dir = tempfile::tempdir().expect("tempdir");
+        // Write a package-lock.json with many lines — it must NOT appear in output.
+        let lockfile = dir.path().join("package-lock.json");
+        {
+            let mut f = std::fs::File::create(&lockfile).unwrap();
+            for _ in 0..500 {
+                writeln!(f, "  \"line\": true,").unwrap();
+            }
+        }
+        // Write a real source file so the output is non-empty.
+        let source = dir.path().join("main.rs");
+        {
+            let mut f = std::fs::File::create(&source).unwrap();
+            for i in 0..20 {
+                writeln!(f, "fn line_{i}() {{}}").unwrap();
+            }
+        }
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "50");
+        let output = handle_loc(&ctx).unwrap();
+        assert!(
+            !output.contains("package-lock.json"),
+            "package-lock.json must be excluded from loc output: {output}"
+        );
+        assert!(
+            output.contains("main.rs"),
+            "main.rs should appear in loc output: {output}"
+        );
+    }
+
+    #[test]
+    fn loc_excludes_cargo_lock_from_results() {
+        use std::io::Write as _;
+        let dir = tempfile::tempdir().expect("tempdir");
+        // Cargo.lock has no recognised source extension so it would be skipped
+        // anyway — but we still verify EXCLUDED_FILENAMES contains it.
+        assert!(
+            EXCLUDED_FILENAMES.contains(&"Cargo.lock"),
+            "EXCLUDED_FILENAMES must contain Cargo.lock"
+        );
+        // Write a Cargo.lock with many lines and verify it is excluded.
+        let lockfile = dir.path().join("Cargo.lock");
+        {
+            let mut f = std::fs::File::create(&lockfile).unwrap();
+            for _ in 0..500 {
+                writeln!(f, "name = \"foo\"").unwrap();
+            }
+        }
+        let source = dir.path().join("lib.rs");
+        {
+            let mut f = std::fs::File::create(&source).unwrap();
+            for i in 0..10 {
+                std::io::Write::write_all(&mut f, format!("fn f{i}() {{}}\n").as_bytes()).unwrap();
+            }
+        }
+        let agents = Arc::new(AgentPool::new_test(3000));
+        let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
+        let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "50");
+        let output = handle_loc(&ctx).unwrap();
+        assert!(
+            !output.contains("Cargo.lock"),
+            "Cargo.lock must be excluded from loc output: {output}"
+        );
+    }
+
+    #[test]
+    fn excluded_filenames_constant_is_defined() {
+        // The constant must contain the examples from the story.
+        assert!(
+            EXCLUDED_FILENAMES.contains(&"package-lock.json"),
+            "EXCLUDED_FILENAMES must contain package-lock.json"
+        );
+        assert!(
+            EXCLUDED_FILENAMES.contains(&"Cargo.lock"),
+            "EXCLUDED_FILENAMES must contain Cargo.lock"
+        );
+    }
+
+    #[test]
+    fn loc_works_via_full_dispatch() {
+        // Verifies the command is reachable through the same dispatch path used
+        // by all transports (Matrix, WhatsApp, Slack).
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy loc 1",
+        );
+        // /tmp has no source files, so we expect either "No source files found"
+        // or a ranked result — either way the command must respond (not None).
+        assert!(
+            result.is_some(),
+            "loc command must respond via dispatch (not fall through to LLM)"
+        );
+    }
+}
@@ -1,4 +1,4 @@
-//! Bot-level command registry for the Matrix bot.
+//! Bot-level command registry shared by all chat transports.
 //!
 //! Commands registered here are handled directly by the bot without invoking
 //! the LLM. The registry is the single source of truth — the `help` command
@@ -10,13 +10,18 @@ mod assign;
 mod cost;
 mod git;
 mod help;
+pub(crate) mod loc;
 mod move_story;
 mod overview;
 mod show;
 mod status;
+mod timer;
 mod triage;
+pub(crate) mod unblock;
+mod unreleased;

 use crate::agents::AgentPool;
+use crate::chat::util::strip_bot_mention;
 use std::collections::HashSet;
 use std::path::Path;
 use std::sync::{Arc, Mutex};
@@ -112,6 +117,11 @@ pub fn commands() -> &'static [BotCommand] {
            description: "Show token spend: 24h total, top stories, breakdown by agent type, and all-time total",
            handler: cost::handle_cost,
        },
+        BotCommand {
+            name: "loc",
+            description: "Show top source files by line count: `loc` (top 10), `loc <N>`, or `loc <filepath>` for a specific file",
+            handler: loc::handle_loc,
+        },
        BotCommand {
            name: "move",
            description: "Move a work item to a pipeline stage: `move <number> <stage>` (stages: backlog, current, qa, merge, done)",
@@ -152,13 +162,28 @@ pub fn commands() -> &'static [BotCommand] {
            description: "Rebuild the server binary and restart",
            handler: handle_rebuild_fallback,
        },
+        BotCommand {
+            name: "timer",
+            description: "Schedule a deferred agent start: `timer <story_id> <HH:MM>`, `timer list`, `timer cancel <story_id>`",
+            handler: timer::handle_timer,
+        },
+        BotCommand {
+            name: "unblock",
+            description: "Reset a blocked story: `unblock <number>` (clears blocked flag and resets retry count)",
+            handler: unblock::handle_unblock,
+        },
+        BotCommand {
+            name: "unreleased",
+            description: "Show stories merged to master since the last release tag",
+            handler: unreleased::handle_unreleased,
+        },
    ]
 }

 /// Try to match a user message against a registered bot command.
 ///
-/// The message is expected to be the raw body text from Matrix (e.g.,
-/// `"@timmy help"`). The bot mention prefix is stripped before matching.
+/// The message is expected to be the raw body text (e.g., `"@timmy help"`).
+/// The bot mention prefix is stripped before matching.
 ///
 /// Returns `Some(response)` if a command matched and was handled, `None`
 /// otherwise (the caller should fall through to the LLM).
@@ -190,53 +215,6 @@ pub fn try_handle_command(dispatch: &CommandDispatch<'_>, message: &str) -> Opti
        .and_then(|c| (c.handler)(&ctx))
 }

-/// Strip the bot mention prefix from a raw message body.
-///
-/// Handles these forms (case-insensitive where applicable):
-/// - `@bot_localpart:server.com rest` → `rest`
-/// - `@bot_localpart rest` → `rest`
-/// - `DisplayName rest` → `rest`
-fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
-    let trimmed = message.trim();
-
-    // Try full Matrix user ID (e.g. "@timmy:homeserver.local")
-    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
-        return rest;
-    }
-
-    // Try @localpart (e.g. "@timmy")
-    if let Some(localpart) = bot_user_id.split(':').next()
-        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
-    {
-        return rest;
-    }
-
-    // Try display name (e.g. "Timmy")
-    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
-        return rest;
-    }
-
-    trimmed
-}
-
-/// Case-insensitive prefix strip that also requires the match to end at a
-/// word boundary (whitespace, punctuation, or end-of-string).
-fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
-    if text.len() < prefix.len() {
-        return None;
-    }
-    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
-        return None;
-    }
-    let rest = &text[prefix.len()..];
-    // Must be at end or followed by non-alphanumeric
-    match rest.chars().next() {
-        None => Some(rest), // exact match, empty remainder
-        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
-        _ => Some(rest),
-    }
-}
-
 /// Fallback handler for the `htop` command when it is not intercepted by the
 /// async handler in `on_room_message`.  In practice this is never called —
 /// htop is detected and handled before `try_handle_command` is invoked.
@@ -342,55 +320,6 @@ pub(crate) mod tests {
    // Re-export commands() for submodule tests
    pub use super::commands;

-    // -- strip_bot_mention --------------------------------------------------
-
-    #[test]
-    fn strip_mention_full_user_id() {
-        let rest = strip_bot_mention(
-            "@timmy:homeserver.local help",
-            "Timmy",
-            "@timmy:homeserver.local",
-        );
-        assert_eq!(rest.trim(), "help");
-    }
-
-    #[test]
-    fn strip_mention_localpart() {
-        let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim(), "help me");
-    }
-
-    #[test]
-    fn strip_mention_display_name() {
-        let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim(), "help");
-    }
-
-    #[test]
-    fn strip_mention_display_name_case_insensitive() {
-        let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim(), "help");
-    }
-
-    #[test]
-    fn strip_mention_no_match_returns_original() {
-        let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest, "hello world");
-    }
-
-    #[test]
-    fn strip_mention_does_not_match_longer_name() {
-        // "@timmybot" should NOT match "@timmy"
-        let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest, "@timmybot help");
-    }
-
-    #[test]
-    fn strip_mention_comma_after_name() {
-        let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");
-    }
-
    // -- try_handle_command -------------------------------------------------

    #[test]
@@ -423,28 +352,6 @@ pub(crate) mod tests {
        );
    }

-    // -- strip_prefix_ci ----------------------------------------------------
-
-    #[test]
-    fn strip_prefix_ci_basic() {
-        assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
-    }
-
-    #[test]
-    fn strip_prefix_ci_no_match() {
-        assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
-    }
-
-    #[test]
-    fn strip_prefix_ci_word_boundary_required() {
-        assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
-    }
-
-    #[test]
-    fn strip_prefix_ci_exact_match() {
-        assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
-    }
-
    // -- commands registry --------------------------------------------------

    #[test]
@@ -49,6 +49,44 @@ pub(super) fn story_short_label(stem: &str, name: Option<&str>) -> String {
    }
 }

+/// Read the `blocked` flag from a story file's YAML front matter.
+///
+/// Returns `true` when the story has `blocked: true` set (retry limit reached).
+fn read_story_blocked(project_root: &std::path::Path, stage_dir: &str, stem: &str) -> bool {
+    let path = project_root
+        .join(".storkit")
+        .join("work")
+        .join(stage_dir)
+        .join(format!("{stem}.md"));
+    std::fs::read_to_string(path)
+        .ok()
+        .and_then(|c| crate::io::story_metadata::parse_front_matter(&c).ok())
+        .and_then(|m| m.blocked)
+        .unwrap_or(false)
+}
+
+/// Choose the traffic-light dot for a work item.
+///
+/// Priority: blocked > throttled > running > idle.
+/// Uses compact Unicode characters (not large emoji) so the output stays
+/// readable in plain-text chat clients.
+///
+/// - `●` running normally (active agent, no throttle)
+/// - `◑` throttled (rate-limit warning received)
+/// - `✗` hard-blocked (retry limit exceeded)
+/// - `○` idle / no active agent
+pub(super) fn traffic_light_dot(blocked: bool, throttled: bool, has_agent: bool) -> &'static str {
+    if blocked {
+        "\u{2717} " // ✗ — hard blocked
+    } else if throttled {
+        "\u{25D1} " // ◑ — throttled
+    } else if has_agent {
+        "\u{25CF} " // ● — running normally
+    } else {
+        "\u{25CB} " // ○ — idle / no agent
+    }
+}
+
 /// Read all story IDs and names from a pipeline stage directory.
 fn read_stage_items(
    project_root: &std::path::Path,
@@ -130,18 +168,22 @@ pub(super) fn build_pipeline_status(project_root: &std::path::Path, agents: &Age
                    .filter(|&&c| c > 0.0)
                    .map(|c| format!(" — ${c:.2}"))
                    .unwrap_or_default();
-                if let Some(agent) = active_map.get(story_id) {
+                let blocked = read_story_blocked(project_root, dir, story_id);
+                let agent = active_map.get(story_id);
+                let throttled = agent.map(|a| a.throttled).unwrap_or(false);
+                let dot = traffic_light_dot(blocked, throttled, agent.is_some());
+                if let Some(agent) = agent {
                    let model_str = config
                        .as_ref()
                        .and_then(|cfg| cfg.find_agent(&agent.agent_name))
                        .and_then(|ac| ac.model.as_deref())
                        .unwrap_or("?");
                    out.push_str(&format!(
-                        "  • {display}{cost_suffix} — {} ({model_str})\n",
+                        "  {dot}{display}{cost_suffix} — {} ({model_str})\n",
                        agent.agent_name
                    ));
                } else {
-                    out.push_str(&format!("  • {display}{cost_suffix}\n"));
+                    out.push_str(&format!("  {dot}{display}{cost_suffix}\n"));
                }
            }
        }
@@ -399,4 +441,107 @@ mod tests {
            "output must show aggregated cost: {output}"
        );
    }
+
+    // -- traffic_light_dot --------------------------------------------------
+
+    #[test]
+    fn dot_idle_when_no_agent() {
+        assert_eq!(traffic_light_dot(false, false, false), "\u{25CB} "); // ○
+    }
+
+    #[test]
+    fn dot_running_when_agent_not_throttled() {
+        assert_eq!(traffic_light_dot(false, false, true), "\u{25CF} "); // ●
+    }
+
+    #[test]
+    fn dot_throttled_when_agent_throttled() {
+        assert_eq!(traffic_light_dot(false, true, true), "\u{25D1} "); // ◑
+    }
+
+    #[test]
+    fn dot_blocked_takes_priority_over_throttled() {
+        assert_eq!(traffic_light_dot(true, true, true), "\u{2717} "); // ✗
+    }
+
+    #[test]
+    fn dot_blocked_when_no_agent_but_blocked_flag() {
+        assert_eq!(traffic_light_dot(true, false, false), "\u{2717} "); // ✗
+    }
+
+    // -- read_story_blocked --------------------------------------------------
+
+    #[test]
+    fn read_story_blocked_returns_true_when_blocked() {
+        use tempfile::TempDir;
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+        std::fs::write(
+            stage_dir.join("42_story_foo.md"),
+            "---\nname: Foo\nblocked: true\n---\n",
+        )
+        .unwrap();
+        assert!(read_story_blocked(tmp.path(), "2_current", "42_story_foo"));
+    }
+
+    #[test]
+    fn read_story_blocked_returns_false_when_not_blocked() {
+        use tempfile::TempDir;
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+        std::fs::write(
+            stage_dir.join("42_story_foo.md"),
+            "---\nname: Foo\n---\n",
+        )
+        .unwrap();
+        assert!(!read_story_blocked(tmp.path(), "2_current", "42_story_foo"));
+    }
+
+    // -- status output shows idle dot for items with no active agent --------
+
+    #[test]
+    fn status_shows_idle_dot_for_unassigned_story() {
+        use std::io::Write;
+        use tempfile::TempDir;
+
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+
+        let story_path = stage_dir.join("42_story_idle.md");
+        let mut f = std::fs::File::create(&story_path).unwrap();
+        writeln!(f, "---\nname: Idle Story\n---\n").unwrap();
+
+        let agents = AgentPool::new_test(3000);
+        let output = build_pipeline_status(tmp.path(), &agents);
+
+        assert!(
+            output.contains("\u{25CB} "), // ○
+            "idle story should show empty-circle dot: {output}"
+        );
+    }
+
+    #[test]
+    fn status_shows_blocked_dot_for_blocked_story() {
+        use std::io::Write;
+        use tempfile::TempDir;
+
+        let tmp = TempDir::new().unwrap();
+        let stage_dir = tmp.path().join(".storkit/work/2_current");
+        std::fs::create_dir_all(&stage_dir).unwrap();
+
+        let story_path = stage_dir.join("42_story_blocked.md");
+        let mut f = std::fs::File::create(&story_path).unwrap();
+        writeln!(f, "---\nname: Blocked Story\nblocked: true\n---\n").unwrap();
+
+        let agents = AgentPool::new_test(3000);
+        let output = build_pipeline_status(tmp.path(), &agents);
+
+        assert!(
+            output.contains("\u{2717} "), // ✗
+            "blocked story should show X dot: {output}"
+        );
+    }
 }
@@ -0,0 +1,54 @@
+//! Handler stub for the `timer` command.
+//!
+//! The real implementation lives in `crate::chat::timer` (async).  This
+//! stub exists only so that `timer` appears in the help registry — the
+//! handler always returns `None` so the bot's message loop falls through to
+//! the async handler.
+
+use super::CommandContext;
+
+pub(super) fn handle_timer(_ctx: &CommandContext) -> Option<String> {
+    // Handled asynchronously in each transport's message dispatcher.
+    None
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn timer_command_is_registered() {
+        use super::super::commands;
+        let found = commands().iter().any(|c| c.name == "timer");
+        assert!(found, "timer command must be in the registry");
+    }
+
+    #[test]
+    fn timer_command_appears_in_help() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy help",
+        );
+        let output = result.unwrap();
+        assert!(
+            output.contains("timer"),
+            "help should list timer command: {output}"
+        );
+    }
+
+    #[test]
+    fn timer_command_falls_through_to_none_in_registry() {
+        let result = super::super::tests::try_cmd_addressed(
+            "Timmy",
+            "@timmy:homeserver.local",
+            "@timmy timer list",
+        );
+        assert!(
+            result.is_none(),
+            "timer should not produce a sync response (handled async): {result:?}"
+        );
+    }
+}
--- a/Show More
+++ b/Show More