Bump version to 0.11.0

chore: refresh source-map.json before 0.11 release
Catches up master with entries added by stories that merged in a binary predating 1065 (merge-pipeline source-map regen): ErrorBoundary, WsConnectivity, transition_merge_failure_to_retry, and others. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 23:31:15 +01:00 · 2026-05-14 23:28:47 +01:00 · 2026-05-14 21:53:56 +00:00 · 2026-05-14 21:48:09 +00:00 · 2026-05-14 22:33:14 +01:00 · 2026-05-14 20:36:51 +00:00
567 changed files with 105932 additions and 57236 deletions
@@ -1,28 +1,14 @@
 {
  "permissions": {
    "allow": [
-      "Bash(cargo build:*)",
-      "Bash(cargo check:*)",
-      "Bash(git *)",
-      "Bash(ls *)",
-      "Bash(mkdir *)",
-      "Bash(mv *)",
-      "Bash(rm *)",
-      "Bash(touch *)",
-      "Bash(echo:*)",
-      "Bash(pwd *)",
-      "Bash(grep:*)",
-      "Bash(find *)",
-      "Bash(head *)",
-      "Bash(tail *)",
-      "Bash(wc *)",
-      "Bash(cat *)",
+      "Bash",
+      "Read",
      "Edit",
      "Write",
+      "Glob",
+      "Grep",
      "mcp__huskies__*"
    ]
  },
-  "enabledMcpjsonServers": [
-    "huskies"
-  ]
+  "enabledMcpjsonServers": ["huskies"]
 }
@@ -1,5 +1,6 @@
 # Claude Code
 .claude/settings.local.json
+.claude/scheduled_tasks.lock
 .mcp.json

 # Local environment (secrets)
@@ -15,6 +16,9 @@ _merge_parsed.json
 .huskies/bot.toml.bak
 .huskies/build_hash

+# Per-worktree planning file (written by coder agents, must never reach squash commits)
+PLAN.md
+
 # Coverage report (generated by script/test_coverage, not tracked in git)
 .coverage_report.json
 .coverage_baseline
@@ -58,3 +62,4 @@ server/target
 # Ignore old story files until we feel like deleting them
 .storkit
 .storkit_port
+/.huskies/node_identity.key
@@ -33,3 +33,4 @@ wishlist.md
 # Database
 pipeline.db
 pipeline.db.bak*
+session_store.json
@@ -1,5 +1,60 @@
 # Huskies project-local agent guidance

+## Session Start & Resume Protocol
+
+### PLAN.md — required for every coder session
+
+At the very start of each coder session, before doing any code exploration, check for `PLAN.md` in the worktree root:
+
+**If `PLAN.md` exists (resuming after a watchdog respawn):**
+1. Read `PLAN.md` first — it is your primary orientation document.
+2. Only after reading it, call `git_log` / `git_diff` to see commits made since the plan was last updated.
+3. Reconcile any divergence between the plan and the current git state, then update the plan.
+
+**If `PLAN.md` is absent (first session on this story):**
+1. Write `PLAN.md` before any grep, file read, or exploration tool call.
+2. Populate it with what you know from the story ACs alone; add specifics as you discover them.
+
+### What PLAN.md must contain
+
+`PLAN.md` is a living document. Update it after each completed AC or natural unit of work — not only at the start.
+
+**Required trigger:** Before every `wip(...)` commit AND the final commit, update PLAN.md's "Current state" section to reflect what's now done, and tick off completed items in "What's left". This is required, not optional — stale "Current state: No code changes yet" while files are being edited is a process failure. Stage the PLAN.md update in the same commit as the code change it describes.
+
+Required sections:
+
+```markdown
+# Plan: Story <id>
+
+## ACs → implementation locations
+- AC 1: <exact file path>:<line range> — <one-line description of what changes>
+- AC 2: <exact file path>:<line range> — …
+…
+
+## Decisions
+- <Decision made>: <rationale> — rejected alternative: <what was considered and why it lost>
+…
+
+## Current state
+<What has been done so far. Reference commit hashes or specific functions completed.>
+
+## What's left
+- [ ] <specific remaining task with file path and function name>
+…
+```
+
+### Non-conforming outputs
+
+A PLAN.md that contains only generic steps like "read the code", "write the code", "run the tests", or leaves file paths as `<TBD>` or unspecified is **non-conforming**. Every AC entry must name a real file path and describe the actual change. Every decision entry must name both the chosen approach and at least one rejected alternative with a reason. A stub plan is worse than no plan — rewrite it with specifics.
+
+## Doc comments — your merge will fail if you skip even one
+
+Every time you introduce a NEW public item — `pub mod X`, `pub fn`, `pub struct`, `pub enum`, `pub trait`, `pub const`, `pub static`, `pub type`, or a `mod X;` declaration that introduces a new module file — the line directly above it **MUST** be a doc comment starting with `///` (or `//!` at the top of a new module file).
+
+There are no exceptions. The merge gate runs `source-map-check` and rejects the merge for any single missing doc comment. Two stories today (961, 962) passed every test, every clippy check, and every other gate, then got bounced at the final step because of one missed `///` on a `pub mod` line. **Treat the `///` as part of writing the declaration, not as an afterthought.**
+
+Before committing, run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` and address every missing-docs direction it prints. If you added a new module file (e.g. `foo.rs` or `foo/mod.rs`), the FIRST line of that file MUST be a `//! What this module is for` doc comment.
+
 ## Documentation
 Docs live in `website/docs/*.html` (static HTML), **not** Markdown files. When a story asks you to document something, edit the relevant `.html` file in `website/docs/`.

@@ -20,5 +75,14 @@ The frontend is embedded into the Rust binary via `rust-embed`. Run `npm run bui

 Clippy is zero-tolerance: no warnings allowed. Fix every warning before committing.

+## Pre-commit hook
+
+Every agent worktree has a pre-commit hook installed at `.git-hooks/pre-commit` that runs `script/check` (fmt-check, clippy, cargo check, source-map-check) before every `git commit`. If the hook fails, fix the issues shown and re-run `script/check` to validate.
+
+`git commit --no-verify` bypasses the hook. Do **not** use it. The hook exists to prevent broken commits from reaching the merge gate; bypassing it defeats the purpose and wastes CI cycles.
+
+## File size
+Target a maximum of 800 lines per source file as a soft guide. If a file grows beyond 800 lines, decompose it by concern into smaller modules. Split at natural seams: group related types, functions, or handlers together and move each cohesive group to its own file. This keeps files readable and diffs focused.
+
 ## Runtime validation
 The `validate_agents` function in `server/src/config.rs` rejects unknown runtimes. Supported values: `"claude-code"` and `"gemini"`. Adding a new runtime requires updating that function.
@@ -3,37 +3,44 @@ name = "coder-1"
 stage = "coder"
 role = "Full-stack engineer. Implements features across all components."
 model = "sonnet"
-max_turns = 50
+max_turns = 200
+max_tool_turns = 80
 max_budget_usd = 5.00
-prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks until tests complete and returns the results.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Always run the run_tests MCP tool before committing — do not commit until tests pass. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Add //! module-level doc comments to any new modules and /// doc comments to any new public functions, structs, or enums. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes."
+disallowed_tools = ["ScheduleWakeup"]
+prompt ="You are working in a git worktree on story {{story_id}}. The story details are in your prompt above. See .huskies/specs/tech/STACK.md for the tech stack and source map when needed. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Step 0: Before anything else, call `git_status` and `git_log` + `git_diff` against `master..HEAD` to discover any prior-session work in this worktree — uncommitted changes AND commits already on the feature branch. If either shows progress, RESUME from there; do not re-explore the codebase from scratch. To read story content, ACs, or description, call the `get_story_todos` MCP tool — do NOT search for a story `.md` file on disk; story content is CRDT-only. Do NOT run run_tests at the start of a new session on a freshly-forked worktree — master is gated and assumed green. Only run run_tests after you have made changes, to validate your own diff. Always run run_tests before committing — do not commit until tests pass. run_tests blocks server-side and returns the full result; do not poll get_test_result. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Before committing, run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` to check doc coverage on your changed files and address every missing-docs direction it prints. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes. For refactors that delete code or change function signatures, delete first and let the compiler error list be your guide to call sites — do not pre-read files trying to predict what will break. Each compile error is one mechanical fix; resist the urge to explore. Run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` BEFORE you commit and address every direction it prints. For cross-stack stories (any story that touches more than 5 files OR more than 2 modules), commit progressively after each completed acceptance criterion or natural unit of work — do not save everything for a single end-of-story commit. Use `wip(story-{id}): {AC summary}` for intermediate commits and `{type}({id}): {summary}` for the final commit. This rule does NOT apply to small bug fixes or single-AC stories — for those, a single commit at the end is correct. For fast compile-error feedback while iterating, call `run_check` (runs `script/check`). Use `run_tests` only to validate the full pipeline before committing."

 [[agent]]
 name = "coder-2"
 stage = "coder"
 role = "Full-stack engineer. Implements features across all components."
 model = "sonnet"
-max_turns = 50
+max_turns = 200
+max_tool_turns = 80
 max_budget_usd = 5.00
-prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks until tests complete and returns the results.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Always run the run_tests MCP tool before committing — do not commit until tests pass. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Add //! module-level doc comments to any new modules and /// doc comments to any new public functions, structs, or enums. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes."
+disallowed_tools = ["ScheduleWakeup"]
+prompt ="You are working in a git worktree on story {{story_id}}. The story details are in your prompt above. See .huskies/specs/tech/STACK.md for the tech stack and source map when needed. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Step 0: Before anything else, call `git_status` and `git_log` + `git_diff` against `master..HEAD` to discover any prior-session work in this worktree — uncommitted changes AND commits already on the feature branch. If either shows progress, RESUME from there; do not re-explore the codebase from scratch. To read story content, ACs, or description, call the `get_story_todos` MCP tool — do NOT search for a story `.md` file on disk; story content is CRDT-only. Do NOT run run_tests at the start of a new session on a freshly-forked worktree — master is gated and assumed green. Only run run_tests after you have made changes, to validate your own diff. Always run run_tests before committing — do not commit until tests pass. run_tests blocks server-side and returns the full result; do not poll get_test_result. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Before committing, run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` to check doc coverage on your changed files and address every missing-docs direction it prints. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes. For refactors that delete code or change function signatures, delete first and let the compiler error list be your guide to call sites — do not pre-read files trying to predict what will break. Each compile error is one mechanical fix; resist the urge to explore. Run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` BEFORE you commit and address every direction it prints. For cross-stack stories (any story that touches more than 5 files OR more than 2 modules), commit progressively after each completed acceptance criterion or natural unit of work — do not save everything for a single end-of-story commit. Use `wip(story-{id}): {AC summary}` for intermediate commits and `{type}({id}): {summary}` for the final commit. This rule does NOT apply to small bug fixes or single-AC stories — for those, a single commit at the end is correct. For fast compile-error feedback while iterating, call `run_check` (runs `script/check`). Use `run_tests` only to validate the full pipeline before committing."

 [[agent]]
 name = "coder-3"
 stage = "coder"
 role = "Full-stack engineer. Implements features across all components."
 model = "sonnet"
-max_turns = 50
+max_turns = 200
+max_tool_turns = 80
 max_budget_usd = 5.00
-prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks until tests complete and returns the results.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
-system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Always run the run_tests MCP tool before committing — do not commit until tests pass. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Add //! module-level doc comments to any new modules and /// doc comments to any new public functions, structs, or enums. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes."
+disallowed_tools = ["ScheduleWakeup"]
+prompt ="You are working in a git worktree on story {{story_id}}. The story details are in your prompt above. See .huskies/specs/tech/STACK.md for the tech stack and source map when needed. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
+system_prompt = "You are a full-stack engineer working autonomously in a git worktree. Step 0: Before anything else, call `git_status` and `git_log` + `git_diff` against `master..HEAD` to discover any prior-session work in this worktree — uncommitted changes AND commits already on the feature branch. If either shows progress, RESUME from there; do not re-explore the codebase from scratch. To read story content, ACs, or description, call the `get_story_todos` MCP tool — do NOT search for a story `.md` file on disk; story content is CRDT-only. Do NOT run run_tests at the start of a new session on a freshly-forked worktree — master is gated and assumed green. Only run run_tests after you have made changes, to validate your own diff. Always run run_tests before committing — do not commit until tests pass. run_tests blocks server-side and returns the full result; do not poll get_test_result. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Before committing, run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` to check doc coverage on your changed files and address every missing-docs direction it prints. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes. For refactors that delete code or change function signatures, delete first and let the compiler error list be your guide to call sites — do not pre-read files trying to predict what will break. Each compile error is one mechanical fix; resist the urge to explore. Run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` BEFORE you commit and address every direction it prints. For cross-stack stories (any story that touches more than 5 files OR more than 2 modules), commit progressively after each completed acceptance criterion or natural unit of work — do not save everything for a single end-of-story commit. Use `wip(story-{id}): {AC summary}` for intermediate commits and `{type}({id}): {summary}` for the final commit. This rule does NOT apply to small bug fixes or single-AC stories — for those, a single commit at the end is correct. For fast compile-error feedback while iterating, call `run_check` (runs `script/check`). Use `run_tests` only to validate the full pipeline before committing."

 [[agent]]
 name = "qa-2"
 stage = "qa"
 role = "Reviews coder work in worktrees: runs quality gates, verifies acceptance criteria, and reports findings."
 model = "sonnet"
-max_turns = 40
+max_turns = 120
+max_tool_turns = 40
 max_budget_usd = 4.00
 prompt = """You are the QA agent for story {{story_id}}. Your job is to verify the coder's work satisfies the story's acceptance criteria and produce a structured QA report.

@@ -48,7 +55,7 @@ Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/spec

 ### 1. Deterministic Gates (Prerequisites)
 Run these first — if any fail, reject immediately without proceeding to AC review:
- Call the `run_tests` MCP tool — it blocks until complete. All gates must pass (0 lint errors/warnings, all tests green, frontend build clean if applicable).
+- Call the `run_tests` MCP tool — it blocks until tests finish and returns the full result directly. All gates must pass (0 lint errors/warnings, all tests green, frontend build clean if applicable). All gates must pass (0 lint errors/warnings, all tests green, frontend build clean if applicable).

 ### 2. Code Change Review
 - Run `git diff master...HEAD --stat` to see what files changed
@@ -124,17 +131,20 @@ name = "coder-opus"
 stage = "coder"
 role = "Senior full-stack engineer for complex tasks. Implements features across all components."
 model = "opus"
-max_turns = 80
+max_turns = 200
+max_tool_turns = 80
 max_budget_usd = 20.00
-prompt = "You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks until tests complete and returns the results.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
-system_prompt = "You are a senior full-stack engineer working autonomously in a git worktree. You handle complex tasks requiring deep architectural understanding. Always run the run_tests MCP tool before committing — do not commit until tests pass. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Add //! module-level doc comments to any new modules and /// doc comments to any new public functions, structs, or enums. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes."
+disallowed_tools = ["ScheduleWakeup"]
+prompt ="You are working in a git worktree on story {{story_id}}. The story details are in your prompt above. See .huskies/specs/tech/STACK.md for the tech stack and source map when needed. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
+system_prompt = "You are a senior full-stack engineer working autonomously in a git worktree. Step 0: Before anything else, call `git_status` and `git_log` + `git_diff` against `master..HEAD` to discover any prior-session work in this worktree — uncommitted changes AND commits already on the feature branch. If either shows progress, RESUME from there; do not re-explore the codebase from scratch. To read story content, ACs, or description, call the `get_story_todos` MCP tool — do NOT search for a story `.md` file on disk; story content is CRDT-only. You handle complex tasks requiring deep architectural understanding. Do NOT run run_tests at the start of a new session on a freshly-forked worktree — master is gated and assumed green. Only run run_tests after you have made changes, to validate your own diff. Always run run_tests before committing — do not commit until tests pass. run_tests blocks server-side and returns the full result; do not poll get_test_result. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done. Before committing, run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` to check doc coverage on your changed files and address every missing-docs direction it prints. Do not accept stories, move them between stages, or merge to master — the server handles that. For bugs, trust the story description and make surgical fixes. For refactors that delete code or change function signatures, delete first and let the compiler error list be your guide to call sites — do not pre-read files trying to predict what will break. Each compile error is one mechanical fix; resist the urge to explore. Run `cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master` BEFORE you commit and address every direction it prints. For cross-stack stories (any story that touches more than 5 files OR more than 2 modules), commit progressively after each completed acceptance criterion or natural unit of work — do not save everything for a single end-of-story commit. Use `wip(story-{id}): {AC summary}` for intermediate commits and `{type}({id}): {summary}` for the final commit. This rule does NOT apply to small bug fixes or single-AC stories — for those, a single commit at the end is correct. For fast compile-error feedback while iterating, call `run_check` (runs `script/check`). Use `run_tests` only to validate the full pipeline before committing."

 [[agent]]
 name = "qa"
 stage = "qa"
 role = "Reviews coder work in worktrees: runs quality gates, verifies acceptance criteria, and reports findings."
 model = "sonnet"
-max_turns = 40
+max_turns = 120
+max_tool_turns = 40
 max_budget_usd = 4.00
 prompt = """You are the QA agent for story {{story_id}}. Your job is to verify the coder's work satisfies the story's acceptance criteria and produce a structured QA report.

@@ -149,7 +159,7 @@ Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/spec

 ### 1. Deterministic Gates (Prerequisites)
 Run these first — if any fail, reject immediately without proceeding to AC review:
- Call the `run_tests` MCP tool — it blocks until complete. All gates must pass (0 lint errors/warnings, all tests green, frontend build clean if applicable).
+- Call the `run_tests` MCP tool — it blocks until tests finish and returns the full result directly. All gates must pass (0 lint errors/warnings, all tests green, frontend build clean if applicable). All gates must pass (0 lint errors/warnings, all tests green, frontend build clean if applicable).

 ### 2. Code Change Review
 - Run `git diff master...HEAD --stat` to see what files changed
@@ -225,18 +235,21 @@ name = "mergemaster"
 stage = "mergemaster"
 role = "Merges completed coder work into master, runs quality gates, archives stories, and cleans up worktrees."
 model = "opus"
-max_turns = 30
-max_budget_usd = 5.00
+max_turns = 250
+max_tool_turns = 100
+max_budget_usd = 25.00
+inactivity_timeout_secs = 900
 prompt = """You are the mergemaster agent for story {{story_id}}. Your job is to merge the completed coder work into master.

 Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map.

 ## Your Workflow
-1. Call merge_agent_work(story_id='{{story_id}}'). It blocks until the merge completes and returns the full result.
-2. If success and gates passed: you're done. Exit.
-3. If gates failed: read the gate_output carefully, fix the issues in the merge workspace at `.huskies/merge_workspace/`, run run_tests MCP tool to verify, recommit, and call merge_agent_work again.
-4. If merge failed for any other reason: call report_merge_failure(story_id='{{story_id}}', reason='<details>') and exit.
-5. After 3 failed fix attempts, call report_merge_failure and exit.
+1. Call merge_agent_work(story_id='{{story_id}}'). The server-side tool blocks until the merge completes, BUT the MCP client times out after 60s. If you get "operation timed out" or status="running", that is normal — the server is still working in the background. Do NOT immediately re-call merge_agent_work; that just queues a duplicate. Instead, follow Step 2.
+2. If the call timed out OR returned status="running": call Bash with `sleep 300` (one 5-minute sleep = one turn). Then call get_merge_status once. Repeat up to 3 times (15 minutes total). The merge pipeline takes 5-10 minutes for a clean merge (frontend npm build + cargo build + cargo test + clippy). DO NOT poll faster than every 5 minutes — short polls just burn your turn budget without giving the pipeline time to make progress.
+3. If get_merge_status eventually returns success: you're done. Exit.
+4. If gates failed: read the gate_output carefully, fix the issues in the merge workspace at `.huskies/merge_workspace/`, run run_tests MCP tool to verify, recommit, and call merge_agent_work again.
+5. If merge failed for any other reason: call report_merge_failure(story_id='{{story_id}}', reason='<details>') and exit.
+6. After 3 failed fix attempts, call report_merge_failure and exit.

 ## Fixing Gate Failures

@@ -257,4 +270,4 @@ To fix:
 - NEVER manually move story files between pipeline stages
 - NEVER call accept_story — merge_agent_work handles that
 - ALWAYS call report_merge_failure if you can't fix the merge"""
-system_prompt = "You are the mergemaster agent. Call merge_agent_work to merge. If gates fail, fix the issues in the merge workspace, verify with run_lint and run_tests MCP tools, recommit, and retrigger. After 3 failed attempts, call report_merge_failure and exit. Never move story files or call accept_story."
+system_prompt = "You are the mergemaster agent. Call merge_agent_work to merge. If gates fail, fix the issues in the merge workspace, verify with run_lint and run_tests MCP tools, recommit, and retrigger. After 3 failed attempts, call report_merge_failure and exit. Never move story files or call accept_story. CRITICAL: When fixing gate failures, commit the fix on feature/story-{id} (the feature branch), NOT in the merge_workspace — commits made in the merge_workspace are discarded when the next squash-merge re-runs from the feature branch. Example: cd /workspace/.huskies/worktrees/{id} && git add ... && git commit && retrigger merge. When resolving merge conflicts: before editing any conflicted file, use git blame and git log on the merge commit to identify the originating story IDs for each side of the conflict. Read those stories' spec files (.huskies/work/ or .huskies/specs/) to understand the intent of each change. Resolve conflicts in a way that satisfies both stories' intent, and explain the resolution in the merge commit message (cite the story IDs and why you chose the resolution you did)."
@@ -0,0 +1,37 @@
+# Backlog Triage — Post-929/934 (Story 935)
+
+Reviewed all active backlog/parked stories against the changes landed in:
+- **929**: deleted `db/yaml_legacy.rs` — CRDT is the sole source of truth
+- **934**: typed `Stage` enum replaces the directory-string state model
+
+## Summary
+
+| Tag | Count | Stories |
+|-----|-------|---------|
+| subsumed-by-929 | 1 | 938 |
+| subsumed-by-934 | 0 | — |
+| deleted-as-duplicate | 1 | 931 (dup of 930) |
+| needs-rewire-to-typed-model | 3 | 895, 919, 930 |
+| unaffected | 8 | 810, 811, 893, 897, 899, 928, 937, 939 |
+| anomaly (zombie, no CRDT file) | 1 | 912 |
+
+**Total reviewed: 14**
+
+## Per-Story Tags
+
+| ID | Name | Tag | Action |
+|----|------|-----|--------|
+| 810 | Upgrade libsqlite3-sys | unaffected | — |
+| 811 | Fly.io Machines API spike | unaffected | — |
+| 893 | MergeFailure→Coding legal transition | unaffected | ACs already reference typed CRDT Stage |
+| 895 | Show Blocked section in chat status | needs-rewire-to-typed-model | Rewired ACs 0, 4, 5 to reference `Stage::Coding`, `Stage::MergeFailure`, `ArchiveReason::Frozen` |
+| 897 | Gateway permission prompts | unaffected | — |
+| 899 | Gateway↔sled WS migration | unaffected | — |
+| 912 | Auto-spawn mergemaster on conflict | anomaly | Listed in upcoming but `get_story_todos` returns "Story file not found" — no CRDT entry; zombie entry to investigate |
+| 919 | unblock_story MergeFailure regresses to backlog | needs-rewire-to-typed-model | Rewired all 3 ACs: replaced `4_merge` dir with `Stage::Merge`, "failure flag" with `Stage::MergeFailure` |
+| 928 | update_story depends_on doesn't persist | unaffected | ACs already reference CRDT register |
+| 930 | merge_agent_work doesn't auto-transition to Done | needs-rewire-to-typed-model | Rewired ACs 0 and 2: replaced `5_done` dir with `Stage::Done` |
+| 931 | Duplicate of 930 (same bug, same name) | deleted-as-duplicate | Also referenced `4_merge_failure`/`5_done` directories and ad-hoc `blocked`/`merge_failure` flags |
+| 937 | start_agent spawns on tombstoned story | unaffected | ACs already reference CRDT `is_deleted` |
+| 938 | start_agent falls back to .md files | subsumed-by-929 | The .md-file fallback was eliminated by 929; also a duplicate of 937 |
+| 939 | Move frontend API to WS-RPC | unaffected | — |
@@ -0,0 +1,401 @@
+# Spike 679: Migrate Inter-Component HTTP to Signed CRDT WebSocket Bus
+
+## 1. Endpoint Inventory
+
+Every HTTP/WS endpoint currently exposed by the gateway and project servers, with caller, purpose, and requirements.
+
+### Standard-Mode Server Endpoints
+
+#### WebSocket
+
+| Path | Caller | Purpose | Latency | Freshness | Durability |
+|------|--------|---------|---------|-----------|------------|
+| `/ws` | Browser frontend | Chat messages, command output streaming | Real-time | N/A (stream) | Ephemeral |
+| `/crdt-sync` | Peer nodes, headless agents | CRDT op replication, snapshot exchange | Sub-second | Must converge | Durable (SQLite) |
+
+#### MCP
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET/POST | `/mcp` | Claude Code agent (stdio), gateway proxy | Agent tool calls (story create/update, git, shell, etc.) | <500 ms | Strong (mutations) | Durable via CRDT |
+
+#### Agents API
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| POST | `/api/agents/start` | Frontend, MCP | Start a coding agent for a story | <1 s | N/A | Durable (process started) |
+| POST | `/api/agents/stop` | Frontend, MCP | Stop a running agent | <1 s | N/A | Durable (process killed) |
+| GET | `/api/agents` | Frontend | List active agents and status | <100 ms | Near-real-time | None (in-memory) |
+| GET | `/api/agents/config` | Frontend | Read agent config from project.toml | <100 ms | Seconds OK | None |
+| POST | `/api/agents/config/reload` | Frontend | Reload config from disk | <500 ms | N/A | None |
+| POST | `/api/agents/worktrees` | MCP | Create worktree for a story | <1 s | N/A | Durable (git) |
+| GET | `/api/agents/worktrees` | Frontend, MCP | List worktrees | <100 ms | Seconds OK | None |
+| DELETE | `/api/agents/worktrees/:story_id` | MCP | Remove a worktree | <1 s | N/A | Durable (git) |
+| GET | `/api/agents/:story_id/:name/output` | Frontend, MCP | Read agent log file | <200 ms | Seconds OK | Durable (JSONL file) |
+| GET | `/api/work-items/:story_id` | MCP | Get story test results | <100 ms | Seconds OK | Durable (file) |
+| GET | `/api/work-items/:story_id/test-results` | MCP | Fetch cached test run output | <100 ms | Seconds OK | Durable (file) |
+| GET | `/api/work-items/:story_id/token-cost` | MCP | Get token usage for story | <100 ms | Seconds OK | Durable (file) |
+| GET | `/api/token-usage` | Frontend | Aggregate token usage | <100 ms | Minutes OK | Durable (file) |
+
+#### Project Management
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET | `/api/project` | Frontend | Get current project config | <100 ms | Seconds OK | Durable (file) |
+| POST | `/api/project` | Frontend | Update project config | <500 ms | N/A | Durable (file) |
+| DELETE | `/api/project` | Frontend | Reset project config | <500 ms | N/A | Durable (file) |
+| GET | `/api/projects` | Frontend | List all known projects | <100 ms | Seconds OK | Durable (file) |
+| POST | `/api/projects/forget` | Frontend | Remove project from registry | <500 ms | N/A | Durable (file) |
+
+#### Chat
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| POST | `/api/chat/cancel` | Frontend | Cancel an in-progress chat | <100 ms | N/A | None |
+
+#### Settings
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET/PUT | `/api/settings` | Frontend | Read/write general settings | <100 ms | Seconds OK | Durable (JSON store) |
+| GET/PUT | `/api/settings/editor` | Frontend | Read/write editor setting | <100 ms | Seconds OK | Durable (JSON store) |
+| POST | `/api/settings/open-file` | Frontend | Open file in editor | <500 ms | N/A | None |
+
+#### IO (Filesystem/Shell)
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| POST | `/api/io/fs/read` | Agent (MCP alt), Frontend | Read file contents | <200 ms | Real-time | N/A |
+| POST | `/api/io/fs/write` | Agent (MCP alt), Frontend | Write file contents | <500 ms | N/A | Durable (fs) |
+| POST | `/api/io/fs/list` | Frontend | List directory relative to project | <100 ms | Real-time | N/A |
+| POST | `/api/io/fs/list/absolute` | Frontend | List absolute path directory | <100 ms | Real-time | N/A |
+| POST | `/api/io/fs/create/absolute` | Frontend | Create file at absolute path | <500 ms | N/A | Durable (fs) |
+| GET | `/api/io/fs/home` | Frontend | Get home directory | <50 ms | Stable | N/A |
+| GET | `/api/io/fs/files` | Frontend | File tree of project | <500 ms | Seconds OK | N/A |
+| POST | `/api/io/search` | Frontend | Ripgrep search | <1 s | Real-time | N/A |
+| POST | `/api/io/shell/exec` | Frontend | Execute shell command | Variable | N/A | None |
+
+#### Model / LLM Config
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET/POST | `/api/model` | Frontend | Read/write active model selection | <100 ms | Seconds OK | Durable (JSON store) |
+| GET | `/api/ollama/models` | Frontend | List available Ollama models | <1 s | Minutes OK | None |
+| GET | `/api/anthropic/key/exists` | Frontend | Check if API key is set | <50 ms | Seconds OK | None |
+| POST | `/api/anthropic/key` | Frontend | Store Anthropic API key | <100 ms | N/A | Durable (store) |
+| GET | `/api/anthropic/models` | Frontend | List Claude models | <1 s | Minutes OK | None |
+
+#### Wizard
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET | `/api/wizard` | Frontend | Get wizard state | <100 ms | Real-time | Durable (store) |
+| PUT | `/api/wizard/step/:step/content` | Frontend | Update step content | <200 ms | N/A | Durable (store) |
+| POST | `/api/wizard/step/:step/confirm` | Frontend | Confirm a wizard step | <200 ms | N/A | Durable |
+| POST | `/api/wizard/step/:step/skip` | Frontend | Skip a wizard step | <100 ms | N/A | Durable |
+| POST | `/api/wizard/step/:step/generating` | Frontend | Mark step as generating | <100 ms | N/A | Durable |
+
+#### Bot / Transports
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| POST | `/api/bot/command` | Frontend | Send a bot command | <500 ms | N/A | None |
+| GET/PUT | `/api/bot/config` | Frontend | Read/write bot config | <100 ms | Seconds OK | Durable (file) |
+
+#### Auth / OAuth
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET | `/oauth/authorize` | Browser redirect | Start OAuth flow | <200 ms | N/A | None |
+| GET | `/callback` | OAuth provider redirect | Handle OAuth callback | <500 ms | N/A | Durable (token) |
+| GET | `/oauth/status` | Frontend | Check OAuth connection status | <100 ms | Seconds OK | None |
+
+#### Webhooks (External Inbound)
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET/POST | `/webhook/whatsapp` | WhatsApp platform | Receive WhatsApp messages | <200 ms | Real-time | None (forwarded) |
+| POST | `/webhook/slack` | Slack platform | Receive Slack events | <200 ms | Real-time | None (forwarded) |
+| POST | `/webhook/slack/command` | Slack platform | Receive Slack slash commands | <200 ms | Real-time | None (forwarded) |
+
+#### Debug / Health
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET | `/health` | Gateway, load balancer | Health check | <50 ms | Real-time | None |
+| GET | `/debug/crdt` | Developer/ops | Dump raw CRDT state | <500 ms | Real-time | None |
+| GET (SSE) | `/api/agents/:story_id/:name/stream` | Frontend | Stream live agent output | Real-time | N/A | None |
+| GET | `/api/events` | Gateway polling task | Poll project events | <200 ms | Seconds OK | None |
+
+#### Frontend Assets
+
+| Path | Purpose |
+|------|---------|
+| `/` | SPA entry point |
+| `/assets/*` | JS/CSS/fonts (rust-embed) |
+| `/*path` | SPA fallback |
+
+---
+
+### Gateway-Mode Server Endpoints
+
+| Method | Path | Caller | Purpose | Latency | Freshness | Durability |
+|--------|------|--------|---------|---------|-----------|------------|
+| GET | `/health` | Load balancer, project containers | Health check | <50 ms | Real-time | None |
+| GET | `/bot-config` | Browser | Serve bot config HTML page | <100 ms | N/A | N/A |
+| GET | `/api/gateway` | Frontend | Get gateway state (active project, project list) | <100 ms | Seconds OK | Durable (toml) |
+| POST | `/api/gateway/switch` | Frontend, MCP | Switch active project | <200 ms | N/A | Durable (in-memory + file) |
+| GET | `/api/gateway/pipeline` | Frontend | Aggregate pipeline status across all projects | <1 s | Seconds OK | None (aggregated) |
+| POST | `/api/gateway/projects` | Frontend, init_project MCP | Register a new project in projects.toml | <500 ms | N/A | Durable (file) |
+| DELETE | `/api/gateway/projects/:name` | Frontend | Remove a registered project | <500 ms | N/A | Durable (file) |
+| GET/PUT | `/api/gateway/bot-config` | Frontend | Read/write bot config file | <100 ms | Seconds OK | Durable (file) |
+| GET/POST | `/mcp` | Claude Code agent | MCP proxy to active project | <500 ms | Strong | Durable via upstream |
+| GET | `/gateway/mode` | Frontend | Check whether gateway mode is active | <50 ms | Stable | None |
+| POST | `/gateway/tokens` | Ops/admin | Generate a headless-agent join token | <100 ms | N/A | Durable (in-memory HashMap) |
+| POST | `/gateway/register` | Headless build agent at startup | Register agent with token, supply address | <200 ms | N/A | In-memory Vec |
+| GET | `/gateway/agents` | Frontend, ops | List all registered headless agents | <100 ms | Seconds OK | In-memory Vec |
+| DELETE | `/gateway/agents/:id` | Frontend, ops | Deregister an agent | <200 ms | N/A | In-memory Vec |
+| POST | `/gateway/agents/:id/assign` | Frontend, ops | Assign agent to a project | <200 ms | N/A | In-memory Vec |
+| POST | `/gateway/agents/:id/heartbeat` | Headless agent (periodic) | Signal agent is alive | <100 ms | Real-time | In-memory Vec |
+
+---
+
+## 2. Classification
+
+| Endpoint Group | Classification |
+|---------------|----------------|
+| `/webhook/whatsapp`, `/webhook/slack`, `/webhook/slack/command` | **external-webhook** |
+| `/`, `/assets/*`, `/*path`, `/bot-config` (HTML) | **frontend-asset** |
+| `POST /api/agents/start`, `POST /api/agents/stop`, `POST /api/agents/worktrees`, `DELETE /api/agents/worktrees/:id` | **write** |
+| `POST /api/project`, `DELETE /api/project`, `POST /api/projects/forget` | **write** |
+| `PUT /api/settings`, `PUT /api/settings/editor`, `POST /api/settings/open-file` | **write** |
+| `POST /api/model`, `POST /api/anthropic/key` | **write** |
+| `POST /api/wizard/step/*`, `PUT /api/wizard/step/*` | **write** |
+| `POST /api/bot/command`, `PUT /api/bot/config` | **write** |
+| `POST /api/io/fs/write`, `POST /api/io/fs/create/absolute`, `POST /api/io/shell/exec` | **write** |
+| `POST /api/gateway/switch`, `POST /api/gateway/projects`, `DELETE /api/gateway/projects/:name` | **write** |
+| `POST /gateway/tokens`, `POST /gateway/register`, `DELETE /gateway/agents/:id`, `POST /gateway/agents/:id/assign` | **write** |
+| `POST /gateway/agents/:id/heartbeat` | **write** |
+| `POST /mcp`, `GET /mcp` | **write** (mutations dominate; reads via CRDT subscription eventually) |
+| All remaining `GET` endpoints | **read** |
+| `POST /api/chat/cancel`, `POST /api/agents/config/reload` | **write** (side-effect only, stateless result) |
+
+---
+
+## 3. Write Endpoints → Target CRDT Collections
+
+| Endpoint | Current Storage | Target CRDT Collection | Notes |
+|----------|----------------|----------------------|-------|
+| `POST /gateway/tokens` | `GatewayState.pending_tokens: HashMap<String, PendingToken>` | `tokens` — LWW map keyed by token UUID | TTL field; garbage-collect expired entries |
+| `POST /gateway/register` | `GatewayState.joined_agents: Vec<JoinedAgent>` | `nodes` — existing CRDT node collection (extend with agent metadata) | Already partially exists for CRDT mesh peers |
+| `POST /gateway/agents/:id/assign` | `joined_agents` Vec mutation | `nodes` — LWW field `assigned_project` per node entry | |
+| `DELETE /gateway/agents/:id` | `joined_agents` Vec mutation | `nodes` — tombstone / remove entry | Add-wins or explicit remove flag |
+| `POST /gateway/agents/:id/heartbeat` | `joined_agents` Vec `last_seen` field | `nodes` — LWW `last_seen_ms` field per node | Low-cost: just a timestamp LWW |
+| `POST /api/agents/start` | `AgentPool.agents: HashMap` | No new CRDT; agent process is local. Side-effect only. Assign record if cross-node visibility needed → `active_agents` LWW map | |
+| `POST /api/agents/stop` | `AgentPool.agents` mutation | Same as above | |
+| `POST /api/agents/worktrees` | git filesystem | No CRDT needed; git worktrees are local | |
+| `POST /api/gateway/switch` | `GatewayState.active_project` in-memory | `gateway_config` — LWW field `active_project` | |
+| `POST /api/gateway/projects` | `projects.toml` file | `gateway_config.projects` — LWW map by project name | |
+| `DELETE /api/gateway/projects/:name` | `projects.toml` file | `gateway_config.projects` — tombstone entry | |
+| `PUT /api/settings`, `PUT /api/settings/editor` | `JsonFileStore` | `settings` — LWW map per key | Low priority; settings are single-node today |
+| `POST /api/model` | `JsonFileStore` | `settings` — same LWW map | |
+| `POST /api/anthropic/key` | Encrypted file/env | Stay out of CRDT (secrets) | |
+| `PUT /api/bot/config` | `.huskies/bot.toml` file | Stay out of CRDT (credentials) | |
+| `POST /mcp` | CRDT (already) | Already replicated via CRDT WebSocket bus | Story/pipeline mutations are CRDT-native |
+| Merge job tracking | `AgentPool.merge_jobs: HashMap<String, MergeJob>` | `merge_jobs` — LWW map by story_id, or append-only log | Needed for cross-node merge visibility |
+| Test job tracking | `AppContext.test_job_registry: HashMap<WorkPath, TestJob>` | `test_jobs` — LWW map by story_id | Needed so any node can query test status |
+
+---
+
+## 4. Read Endpoints → Proposed RPC Frame Shapes
+
+| Endpoint | Request Fields | Response Fields |
+|----------|---------------|-----------------|
+| `GET /health` | _(none)_ | `{status: "ok", version: string, node_id: string}` |
+| `GET /api/gateway` | _(none)_ | `{active_project: string, projects: {name, url, healthy}[]}` |
+| `GET /api/gateway/pipeline` | _(none)_ | `{projects: {name: string, pipeline: PipelineStages}[]}` |
+| `GET /gateway/agents` | _(none)_ | `{agents: {id, label, address, assigned_project, last_seen_ms, alive: bool}[]}` |
+| `GET /api/agents` | _(none)_ | `{agents: {story_id, agent_name, pid, status, started_at}[]}` |
+| `GET /api/agents/worktrees` | _(none)_ | `{worktrees: {story_id, path, branch}[]}` |
+| `GET /api/agents/:id/:name/output` | _(path params)_ | `{lines: AgentLogLine[]}` |
+| `GET /api/work-items/:story_id/test-results` | _(path param)_ | `{passed: bool, output: string, ran_at: timestamp}` |
+| `GET /api/work-items/:story_id/token-cost` | _(path param)_ | `{input_tokens: u64, output_tokens: u64, cost_usd: f64}` |
+| `GET /api/token-usage` | _(none)_ | `{total_input: u64, total_output: u64, per_agent: {...}[]}` |
+| `GET /api/settings` | _(none)_ | `{settings: Record<string, JsonValue>}` |
+| `GET /api/model` | _(none)_ | `{provider: string, model: string}` |
+| `GET /api/events` | `{since: unix_ms}` | `{events: {type, payload, ts}[], next_since: unix_ms}` |
+| `GET /debug/crdt` | _(none)_ | `{crdt_doc: json}` |
+| `GET /api/wizard` | _(none)_ | `{steps: WizardStep[], current_step: string}` |
+| `GET /api/anthropic/models` | _(none)_ | `{models: {id, name}[]}` |
+| `GET /api/ollama/models` | _(none)_ | `{models: {name, size}[]}` |
+
+---
+
+## 5. Draft: Unsigned Read-RPC Protocol
+
+### Rationale
+
+Write mutations already flow through the CRDT bus (signed ops). Read endpoints are the remaining HTTP surface that could be migrated to the same WebSocket channel. This section drafts the envelope format so read RPCs can share the bus without requiring Ed25519 auth (unsigned reads are fine; only writes need authenticity guarantees).
+
+### Frame Envelope (JSON over WebSocket)
+
+```json
+// Request (caller → peer)
+{
+  "version": 1,
+  "kind": "rpc_request",
+  "correlation_id": "uuid-v4",
+  "ttl_ms": 5000,
+  "method": "get_pipeline_status",
+  "params": {}
+}
+
+// Success response (peer → caller)
+{
+  "version": 1,
+  "kind": "rpc_response",
+  "correlation_id": "uuid-v4",
+  "ok": true,
+  "result": { ... }
+}
+
+// Error response
+{
+  "version": 1,
+  "kind": "rpc_response",
+  "correlation_id": "uuid-v4",
+  "ok": false,
+  "error": "human-readable message",
+  "code": "NOT_FOUND | TIMEOUT | PEER_OFFLINE | INTERNAL"
+}
+```
+
+### Correlation IDs
+
+Each request carries a UUID v4 `correlation_id`. The responder echoes it verbatim. Callers maintain a `HashMap<String, oneshot::Sender>` to route responses back to waiting futures. On TTL expiry the entry is removed and the caller receives `Err(Timeout)`.
+
+### TTL Semantics
+
+- Caller specifies `ttl_ms` (default 5000, max 30000).
+- If the responding peer does not answer within the TTL, the caller synthesises a `TIMEOUT` error response locally.
+- Responders do not need to track TTLs; they answer as fast as they can.
+- Callers may use stale cached results if `ttl_ms == 0` is supplied and a cache entry exists (opt-in freshness trade-off).
+
+### Error Codes
+
+| Code | Meaning |
+|------|---------|
+| `NOT_FOUND` | Resource does not exist |
+| `TIMEOUT` | Peer did not respond within TTL |
+| `PEER_OFFLINE` | No live peer with the requested capability is connected |
+| `UNAUTHORIZED` | Caller lacks permission (future, when auth lands) |
+| `INTERNAL` | Unexpected server-side error |
+
+### Peer-Offline Handling
+
+- Before sending a request the caller checks whether any peer that can serve the method is currently connected.
+- If no peer is online, the caller immediately returns `PEER_OFFLINE` without queuing (fail-fast).
+- For idempotent reads, callers may fall back to a local CRDT-materialized view if `PEER_OFFLINE` or `TIMEOUT` is received.
+- Non-idempotent reads (e.g., `exec_shell`) must not be retried automatically.
+
+### Method Naming Convention
+
+`<noun>.<verb>` — e.g. `pipeline.get`, `agents.list`, `health.check`, `events.poll`.
+
+---
+
+## 6. In-Memory State → CRDT Collection Migration
+
+| Location | Field | Current Type | Proposed CRDT Type | Rationale |
+|----------|-------|-------------|-------------------|-----------|
+| `gateway.rs::GatewayState` | `pending_tokens` | `HashMap<String, PendingToken>` | **LWW-map** keyed by token UUID, with `expires_at` TTL field | Tokens are short-lived; LWW is fine; GC by TTL |
+| `gateway.rs::GatewayState` | `joined_agents` | `Vec<JoinedAgent>` | Extend existing **`nodes` CRDT collection** with agent metadata fields (label, address, assigned_project, last_seen_ms) | Nodes collection already exists for CRDT mesh peers |
+| `agents/pool/mod.rs::AgentPool` | `merge_jobs` | `HashMap<String, MergeJob>` | **LWW-map** keyed by story_id; fields: node_id, status, started_at, error | Required for cross-node merge visibility |
+| `agents/pool/mod.rs::AgentPool` | `agents` (running agent handles) | `HashMap<String, StoryAgent>` | **LWW-map** `active_agents` keyed by story_id; fields: node_id, agent_name, pid(optional), started_at, status | Process handles stay local; only metadata replicated |
+| `http/context.rs::AppContext` | `test_job_registry` | `HashMap<WorkPath, TestJob>` (TestJobRegistry) | **LWW-map** `test_jobs` keyed by story_id; fields: node_id, status, started_at, finished_at | Needed so any node can query test run status |
+| `agents/pool/auto_assign` | agent throttle / last-seen timestamps | Local variables / in-memory | **LWW-map** `agent_throttle` keyed by agent_name; field: last_dispatched_at | Prevents double-dispatch on multi-node |
+| `gateway.rs::GatewayState` | `active_project` | `Arc<RwLock<String>>` | **LWW register** in `gateway_config` collection, field `active_project` | Single-value; LWW is correct |
+| `gateway.rs::GatewayState` | `projects` (BTreeMap) | `Arc<RwLock<BTreeMap<String, ProjectEntry>>>` | **LWW-map** in `gateway_config.projects` keyed by project name | Infrequently mutated; LWW correct |
+
+### Summary of Proposed New CRDT Collections
+
+| Collection | Type | Notes |
+|-----------|------|-------|
+| `tokens` | LWW-map | Join tokens with TTL; garbage-collect on expiry |
+| `nodes` | LWW-map (extend existing) | Already exists; add agent metadata fields |
+| `merge_jobs` | LWW-map | One entry per story; overwritten on each merge attempt |
+| `active_agents` | LWW-map | One entry per story; metadata only (not process handles) |
+| `test_jobs` | LWW-map | One entry per story; test run status |
+| `agent_throttle` | LWW-map | One entry per agent name; last-dispatched timestamp |
+| `gateway_config` | LWW-map (or flat LWW fields) | `active_project`, `projects` map |
+
+---
+
+## 7. Migration Order and Dependencies
+
+### Blocking Dependency
+
+**Story 665 (Ed25519 auth)** must land before any write operation is migrated to the CRDT bus. Unsigned writes on a shared bus would allow any connected peer to forge mutations. Read RPCs do not require auth.
+
+### Wave 0 — Foundation (no story 665 needed)
+
+These can land in parallel with or before story 665:
+
+1. **Extend `nodes` CRDT collection** with `label`, `address`, `assigned_project`, `last_seen_ms` fields. This is a pure schema addition.
+2. **Add `merge_jobs` and `active_agents` LWW-maps** to the CRDT document schema (additive; existing nodes ignore unknown fields via `serde(default)`).
+3. **Implement unsigned read-RPC multiplexer** on the existing `/crdt-sync` WebSocket channel (new `kind: "rpc_request"/"rpc_response"` frame types, ignored by old peers).
+
+### Wave 1 — Migrate Heartbeat + Agent Registration (after `nodes` schema extended)
+
+- Replace `POST /gateway/agents/:id/heartbeat` HTTP call with a CRDT LWW write to `nodes[id].last_seen_ms`.
+- Replace `POST /gateway/register` with a CRDT insert into `nodes` collection.
+- Replace `POST /gateway/tokens` / token validation with CRDT `tokens` map read/write.
+- **Blocks on story 665** for the write side; read queries (list agents, check token) can migrate via read-RPC first.
+
+### Wave 2 — Migrate Read Endpoints to Read-RPC (no auth required)
+
+Can land in parallel with Wave 1 write migration:
+
+- `GET /health` → `health.check` RPC (gateway reads from CRDT `nodes` liveness)
+- `GET /gateway/agents` → `agents.list` RPC reading from CRDT `nodes`
+- `GET /api/events` polling loop → subscribe to CRDT op stream directly (eliminate polling)
+- `GET /api/gateway/pipeline` → `pipeline.get` RPC or direct CRDT materialisation (already replicated)
+- `GET /api/agents` → `active_agents.list` RPC reading from CRDT `active_agents`
+
+### Wave 3 — Migrate Merge and Test Job Tracking (after waves 0–1)
+
+- Replace `merge_jobs` HashMap with CRDT `merge_jobs` map writes on merge start/completion.
+- Replace `test_job_registry` HashMap with CRDT `test_jobs` map writes on test start/completion.
+- Enables: any node can query merge or test status without HTTP call to the node that started the job.
+
+### Wave 4 — Migrate Gateway Config Writes (after story 665)
+
+- `POST /api/gateway/switch`, `POST /api/gateway/projects`, `DELETE /api/gateway/projects/:name` → CRDT `gateway_config` LWW writes.
+- Low urgency; these are infrequent admin operations. Can keep HTTP as a thin wrapper that writes to CRDT.
+
+### Endpoints That Stay HTTP
+
+| Endpoint | Reason |
+|----------|--------|
+| `/webhook/whatsapp`, `/webhook/slack` | External platform callbacks; must remain HTTP |
+| `/oauth/authorize`, `/callback` | OAuth redirect flow; must remain HTTP |
+| `/api/io/*`, `/api/io/shell/exec` | Local filesystem/shell; process-local, not cross-node |
+| `/api/io/fs/*` | Same — local I/O only |
+| `/mcp` | External MCP clients (Claude Code CLI) speak HTTP/SSE; gateway proxy stays HTTP |
+| `/assets/*`, `/`, `/*path` | Static frontend assets |
+| `/api/anthropic/key`, `PUT /api/bot/config` | Credentials — must stay local, never in CRDT |
+| `GET /debug/crdt` | Debug only; HTTP fine |
+
+### Dependency Graph Summary
+
+```
+story 665 (Ed25519 auth)
+    └── Wave 1 write migrations (heartbeat, register, assign, tokens)
+        └── Wave 4 gateway config writes
+
+Wave 0 (schema extensions + read-RPC multiplexer)  [can start now, parallel]
+    └── Wave 2 read endpoint migrations            [can start now, parallel]
+    └── Wave 3 merge/test job tracking             [after Wave 0 schema]
+```
+
+**Critical path:** Story 665 → Wave 1 → Wave 4. Everything else is parallel.
@@ -0,0 +1,280 @@
+# Spike 811: Fly.io Machines API Integration for Multi-Tenant Huskies SaaS
+
+## Goal
+
+Investigate how to operate huskies as a hosted multi-tenant SaaS on
+[Fly.io Machines](https://fly.io/docs/machines/). Each tenant owns one or
+more huskies *project* containers; a fronting gateway routes traffic by
+tenant and provisions/destroys backing machines on demand. This document
+captures the architecture, the API surface we need, and the operational
+concerns that need answers before we start writing production code.
+
+## Architecture at a Glance
+
+```
+┌──────────────────────┐        ┌───────────────────────────────────────────┐
+│ Browser / CLI / Bot  │───────▶│ huskies-gateway  (Fly app: huskies-gw)    │
+└──────────────────────┘  HTTPS │   * authenticates tenant                  │
+                                │   * picks active project for tenant       │
+                                │   * proxies /mcp /ws /api to machine      │
+                                │   * provisions machines via Machines API  │
+                                └──────────────────┬────────────────────────┘
+                                                   │ .flycast (Wireguard)
+                                                   ▼
+                          ┌────────────────────────────────────────────────┐
+                          │ huskies-project-{tenant}-{project}             │
+                          │   (Fly app: huskies-projects, machine per tier)│
+                          │   * runs `huskies --port 3001 /data/project`   │
+                          │   * persistent volume mounted at /data         │
+                          │   * .huskies/ + sled CRDT live on volume       │
+                          └────────────────────────────────────────────────┘
+```
+
+Two Fly apps:
+
+* `huskies-gw` — small, always-on, replicated across regions; runs the
+  existing `huskies --gateway` binary plus a thin **Fly orchestrator**
+  layer that calls the Machines API.
+* `huskies-projects` — single Fly app holding *one machine per tenant
+  project*. Using one app (rather than one app per tenant) keeps quota
+  management, IAM, and image distribution simple while still giving us
+  per-machine networking (`{machine_id}.vm.huskies-projects.internal`)
+  and per-tenant Fly volumes.
+
+## Listed Concerns
+
+The story brief flags the following concerns. Each is addressed below.
+
+1. Machine lifecycle & API surface
+2. Tenant isolation
+3. Persistence and volumes
+4. Networking & routing
+5. Secrets and tenant credentials
+6. Cost model and idle-shutdown
+7. Wake-on-request / cold-start latency
+8. Observability and logs
+9. Disaster recovery and backups
+10. Quotas and abuse limits
+
+---
+
+### 1. Machine Lifecycle & API Surface
+
+Fly Machines is a REST API at `https://api.machines.dev/v1`. Auth is a
+single bearer token per Fly organization (`FLY_API_TOKEN`).
+
+Endpoints we will call:
+
+| Verb | Path | Use |
+|------|------|-----|
+| `POST` | `/apps/{app}/machines` | Create a new project machine |
+| `GET`  | `/apps/{app}/machines/{id}` | Poll status |
+| `GET`  | `/apps/{app}/machines/{id}/wait?state=started&timeout=30` | Block until state |
+| `POST` | `/apps/{app}/machines/{id}/start` | Wake a stopped machine |
+| `POST` | `/apps/{app}/machines/{id}/stop` | Graceful stop (idle scale-to-zero) |
+| `POST` | `/apps/{app}/machines/{id}/suspend` | Suspend RAM-to-disk (fast wake) |
+| `DELETE` | `/apps/{app}/machines/{id}?force=true` | Destroy permanently |
+| `GET`  | `/apps/{app}/machines` | Enumerate during reconcile |
+| `POST` | `/apps/{app}/volumes` | Create persistent volume for tenant |
+| `DELETE` | `/apps/{app}/volumes/{id}` | Reclaim volume when tenant deletes project |
+
+States the orchestrator observes: `created → starting → started → stopping
+→ stopped → destroying → destroyed` (`replacing` and `suspending` are
+transient).
+
+A successful provisioning sequence is:
+
+1. `POST /volumes` (one-time per tenant project, 1 GiB default).
+2. `POST /machines` with `config = { image, env, mounts: [{volume, path:"/data"}], guest, services }`.
+3. `GET /machines/{id}/wait?state=started` (~10–20 s on cold start).
+4. Cache `{tenant, project} → machine_id` in the gateway CRDT
+   (`gateway_projects` LWW-map already exists — extend the value with
+   `machine_id`, `volume_id`, `last_used_at`).
+
+Destruction:
+
+1. `POST /machines/{id}/stop` (graceful, lets sled flush).
+2. `DELETE /machines/{id}?force=true`.
+3. Optionally `DELETE /volumes/{id}` (only when tenant explicitly deletes
+   the project; idle stop must **never** delete volumes).
+
+### 2. Tenant Isolation
+
+* **Filesystem:** each machine has its own ephemeral root and its own
+  Fly volume mounted at `/data`. Volumes are not shareable across
+  machines, so tenants cannot read each other's CRDT.
+* **Network:** machines on the same Fly app can reach each other via
+  6PN private networking. We must explicitly *not* expose the project
+  server externally; only the gateway holds a public IP. Project
+  machines bind to `[::]:3001` and rely on `.flycast` private routing.
+* **Credentials:** project machines never see the gateway's
+  `FLY_API_TOKEN`. Tenant-supplied secrets (Anthropic key, Matrix
+  password, etc.) are stored as Fly secrets *scoped to the machine* via
+  the `secrets` field at create time, encrypted at rest by Fly.
+* **CPU/RAM:** `guest = { cpu_kind: "shared", cpus: 2, memory_mb: 2048 }`
+  is a sensible default; larger tenants get `performance` cpus. Hard
+  caps prevent a runaway agent from eating a neighbour's quota.
+
+### 3. Persistence and Volumes
+
+* Fly volumes are zone-pinned. We pick the volume region from the
+  tenant's primary region (`PRIMARY_REGION` env on the gateway), with
+  fallback to `iad`.
+* The volume holds:
+  * `/data/project/.huskies/` — pipeline.db (sled), bot.toml, project.toml
+  * `/data/project/.git` — repository (initially cloned at first run)
+  * `/data/project/` — working tree
+* Sled needs a clean shutdown. The orchestrator must always `stop`
+  before `destroy`. We rely on Fly's `kill_signal = "SIGTERM"` + the
+  existing huskies shutdown path in `rebuild.rs`.
+* **Snapshots:** Fly snapshots volumes daily by default (5-day
+  retention). For paid tiers we extend retention via `snapshot_retention`
+  on the volume.
+
+### 4. Networking & Routing
+
+The gateway already proxies MCP/WS/REST by active project. For SaaS we
+add tenant resolution **before** the project lookup:
+
+```
+Host: alice.huskies.app   →  tenant = alice
+  ↓
+GET /tenants/alice/projects/foo → project_id, machine_id
+  ↓
+proxy to fdaa:0:abcd:a7b:e2:1::3:3001  (or {machine_id}.vm.huskies-projects.internal:3001)
+```
+
+* Tenant resolution lives in a new `tenants` CRDT LWW-map keyed by
+  subdomain → tenant_id; reuses the existing CRDT bus.
+* Internal DNS: `<machine_id>.vm.huskies-projects.internal` resolves on
+  the private network. `<app>.flycast` is the load-balanced anycast
+  name; we prefer the explicit machine address since each tenant has
+  exactly one project machine at a time.
+* TLS terminates at the Fly edge for `*.huskies.app`. The gateway
+  receives plain HTTP/2 inside 6PN.
+
+### 5. Secrets and Tenant Credentials
+
+* `FLY_API_TOKEN` lives only on the gateway (`fly secrets set
+  FLY_API_TOKEN=… -a huskies-gw`).
+* Per-tenant `ANTHROPIC_API_KEY`, `MATRIX_PASSWORD`, etc. are POSTed by
+  the tenant in the SaaS UI, encrypted with the gateway's KMS key, and
+  passed to the machine at create time via the Machines API
+  `config.env` (Fly stores env values encrypted).
+* Rotation: changing a tenant secret means `POST /machines/{id}/update`
+  with the new env, which triggers a rolling replace. The orchestrator
+  schedules this during the tenant's idle window when possible.
+
+### 6. Cost Model and Idle-Shutdown
+
+Indicative pricing (us-east, 2026):
+
+| Machine | Hourly | Notes |
+|---------|--------|-------|
+| `shared-cpu-2x@2048` always-on | ~$0.027 | $19/mo if 24×7 |
+| `shared-cpu-2x@2048` suspended | ~$0.0009 | $0.65/mo idle |
+| Volume 1 GiB | ~$0.0002 | $0.15/mo |
+
+Multi-tenant pricing requires **suspend on idle**:
+
+* Auto-stop: in the machine config, set `services[].auto_stop_machines
+  = "suspend"` and `services[].auto_start_machines = true`. Fly's
+  internal proxy stops the machine after the configured `min_machines`
+  count is zero and there is no incoming traffic for ~5 min.
+* On the next request, the proxy auto-wakes the machine. Suspend resume
+  is ~300 ms (RAM snapshot from disk); a full `stopped → started` is
+  10–20 s. We prefer `suspend` for SaaS.
+* For long-lived agents (a coder agent running on the machine), the
+  gateway sends keepalive pings so Fly does not idle-stop while work is
+  in progress. Implementation: gateway tracks `active_agents` count for
+  each machine in CRDT; if `>0`, hit `/api/agents` once per minute.
+
+### 7. Wake-on-Request / Cold-Start Latency
+
+Three latency tiers:
+
+| Tier | Wake | When |
+|------|------|------|
+| Suspended | ~300 ms | Default for active tenants |
+| Stopped | 10–20 s | Tenants idle > 7 days |
+| Destroyed | 60–90 s (clone + boot) | Free tier reaped after 30 d |
+
+The gateway returns a `202 Accepted` with a `Retry-After: 1` header
+while wake is in progress and surfaces a "warming up" splash. The
+existing `huskies-gw` MCP code path needs an explicit wake call for
+in-flight requests because Fly's automatic wake only triggers on TCP
+SYN to a registered service port.
+
+### 8. Observability and Logs
+
+* `fly logs -a huskies-projects -i <machine_id>` streams stdout/stderr.
+  We expose this through the gateway as `GET /api/admin/tenants/{id}/logs`.
+* Each machine ships logs to the gateway via a sidecar `vector`
+  process? Decision: **no** — Fly's built-in NATS log shipper is enough
+  for v1; revisit if log volume grows.
+* Metrics: Fly auto-exports per-machine CPU/RAM/network as Prometheus
+  series scrapeable from a `huskies-metrics` machine in the same 6PN.
+  We hook into Grafana Cloud's free tier for the dashboard.
+
+### 9. Disaster Recovery and Backups
+
+* Volume snapshots (daily) cover hardware failure.
+* The CRDT replicates to the gateway over the existing `/crdt-sync`
+  WebSocket. The gateway keeps a 30-day rolling backup of each tenant's
+  CRDT in S3 (`s3://huskies-backups/{tenant}/{date}.ops`). This lets us
+  reconstruct the project tree even if a Fly volume is unrecoverable.
+* Restore flow: provision a fresh machine + volume, replay the latest
+  snapshot, then replay incremental ops from S3. Documented in a
+  follow-up runbook story.
+
+### 10. Quotas and Abuse Limits
+
+* Per-tenant: max 2 concurrent agents, max 8 GiB volume, max 4 CPU,
+  max 200 OAuth-paid model dollars per month. Enforced in the gateway
+  before calling the Machines API. Over-quota → `429 Too Many Requests`
+  with a Stripe upsell page.
+* Per-Fly-app: Fly soft-limits 1000 machines per app. At scale we
+  shard tenants across `huskies-projects-{0..9}` apps using
+  `consistent_hash(tenant_id)`.
+* Abuse: every tenant signs up with a verified email + Stripe card.
+  Free tier capped at 1 project, suspended after 7 days idle, destroyed
+  after 30 days idle.
+
+---
+
+## Decisions
+
+| Decision | Choice | Rejected alternative |
+|----------|--------|----------------------|
+| Apps topology | **Single `huskies-projects` app, one machine per tenant** | One app per tenant: clean isolation, but blows out Fly app quotas and complicates IAM |
+| Idle strategy | **Suspend, not stop** | Stop: cheaper but 20 s cold start is poor UX for chat |
+| Secrets path | **Machine env via Machines API at create time** | Fly app-level secrets: shared across all tenant machines, leaks across tenants |
+| State storage | **Per-tenant Fly volume holding sled + git** | Object storage only: would require rewriting sled backend |
+| Tenant resolution | **Subdomain → CRDT `tenants` LWW-map** | Path prefix routing: harder to issue per-tenant TLS, breaks browser cookies |
+| Volume retention | **Never delete on idle stop; only on explicit project deletion** | Auto-delete after N days idle: too easy to lose user data |
+
+## Open Questions
+
+1. How do we hand off long-running coder agents during a Fly host
+   evacuation (machine replace event)? Suspend won't survive a host
+   reboot; we may need a "draining" hook that finishes the current AC
+   and commits before allowing replacement.
+2. Should the gateway also live as Fly machines (auto-scale) or stay
+   as Fly app v1 with replicas? Probably the former for global routing,
+   but that's a separate spike.
+3. Billing surfaces: do we pass through Fly's per-machine cost to the
+   tenant, or amortize it into a flat per-project price? Product call.
+4. Outbound network egress (model API calls, git pushes) is metered by
+   Fly. At Claude Opus rates, model API egress dwarfs everything else,
+   so this is a rounding error — confirm at 100-tenant scale.
+
+## Proof-of-Concept Script
+
+A working sketch lives at
+[`fly_multitenant_poc.sh`](./fly_multitenant_poc.sh). It demonstrates
+end-to-end: read `FLY_API_TOKEN`, create a volume, create a machine
+attached to it, wait until started, stop, and destroy. The script is
+runnable but is **not** what production code looks like — production
+will translate these calls into Rust against a typed `flyio_machines`
+client crate, called from a new `server::service::cloud::fly`
+module that the gateway invokes on tenant signup.
@@ -0,0 +1,241 @@
+# Spike 814: Chat-Driven Update Command for Multi-Project Gateway
+
+## 1. Problem Statement
+
+In a multi-project gateway deployment (Docker Compose or similar), each project runs as its own container.
+Today, updating a project container requires direct operator access to the host — `docker pull`, `docker compose up -d <project>`, or equivalent.
+There is no way to trigger an update from chat.
+
+This spike designs a `update` bot command that:
+- Can be typed in the Matrix/Slack/Discord chat room.
+- Pulls the latest image (or rebuilds from source) for one or all project containers managed by the gateway.
+- Reports progress and outcome back to the room.
+- Supports rollback when a container fails to start cleanly.
+
+---
+
+## 2. Command Surface
+
+### Basic syntax
+
+```
+update [<project>|all] [--rollback]
+```
+
+| Invocation | Effect |
+|-----------|--------|
+| `update huskies` | Update and restart the `huskies` container. |
+| `update all` | Update every registered project container, one at a time. |
+| `update` (no args) | Same as `update all`. |
+| `update huskies --rollback` | Roll back `huskies` to its previous image tag. |
+
+### Progress feedback
+
+The bot posts incremental updates to the room (editing the same message where the platform supports it):
+
+```
+[huskies] Pulling image…  ⏳
+[huskies] Image pulled (sha256:abc123). Stopping container…
+[huskies] Container stopped. Starting new container…
+[huskies] Health check passed ✅ (2 s)
+```
+
+On failure:
+```
+[huskies] Health check failed after 30 s ❌
+[huskies] Rolling back to previous image (sha256:def456)…
+[huskies] Rollback complete ✅
+```
+
+### Error cases
+
+| Condition | Response |
+|-----------|----------|
+| Unknown project name | `Unknown project 'foo'. Known projects: huskies, robot-studio` |
+| No Docker socket access | `Update not available: Docker socket not mounted` |
+| Rollback with no previous image | `No previous image recorded for 'huskies'; cannot roll back` |
+| Project container not managed by Docker | `'huskies' is not a container-managed project; rebuild it manually` |
+
+---
+
+## 3. Auth
+
+### 3.1 Threat model
+
+The update command triggers container replacement — a privileged operation equivalent to `docker compose up -d`.
+An unauthenticated attacker who can send a message to the bot room could force a rolling restart or roll back a working container.
+
+### 3.2 Proposed approach: room + role guard
+
+**Layer 1 — Room restriction.**
+The update command is only accepted in a designated *ops room*, configured in `bot.toml` (or `projects.toml`):
+
+```toml
+[gateway.ops_room]
+room_id = "!abc123:homeserver.example.com"
+```
+
+Messages from other rooms are rejected with: `The update command is only available in the ops room.`
+
+**Layer 2 — Sender role check (Matrix/Slack).**
+The bot checks the sender's power level (Matrix) or admin status (Slack/Discord).
+Only users with power level ≥ 50 (moderator) on Matrix, or workspace admin on Slack, may issue `update`.
+Unapproved senders receive: `You do not have permission to issue update commands.`
+
+**Layer 3 — Confirmation prompt for destructive operations.**
+`update all` affects every project.
+The bot responds with a confirmation challenge:
+
+```
+This will restart all 3 project containers. Reply `yes` within 60 s to confirm, or `no` to cancel.
+```
+
+Single-project updates (`update huskies`) do **not** require confirmation — they are already scoped.
+
+### 3.3 Future: Ed25519 operator token
+
+When story 665 (Ed25519 auth) lands, the gateway's node identity keypair can sign an operator token.
+The bot verifies the token against the node's public key before acting.
+This removes the room/role dependency and allows the command to be issued programmatically
+(e.g. from a CI pipeline via MCP).
+
+For now the room + role guard is sufficient.
+
+---
+
+## 4. Rollout Approach
+
+### 4.1 Docker-managed containers (primary path)
+
+The gateway process has access to the Docker socket (mounted as a volume at `/var/run/docker.sock`).
+The update sequence for a single project:
+
+1. **Record current image** — read the running container's image digest (store in gateway's `update_history` LWW-map in CRDT, keyed by project name).
+2. **Pull new image** — `docker pull <image>` (or the compose-file equivalent tag).
+3. **Drain connections** — gateway marks the project as `updating`; new proxy requests return 503 with a `Retry-After: 5` header; in-flight requests are allowed to complete (30 s grace window).
+4. **Stop old container** — `docker stop --time=30 <container_name>`.
+5. **Start new container** — `docker start <container_name>` (or `docker compose up -d <service>`).
+6. **Health check** — poll the project's `/health` endpoint until 200 OK or 30 s timeout.
+7. **Restore routing** — remove the `updating` flag; proxy resumes normal operation.
+
+Steps 1–7 are serialised per project. When `update all` is used, projects are updated **one at a time** (not in parallel) to limit blast radius.
+
+### 4.2 Source-rebuild path (non-Docker / dev mode)
+
+When Docker is not available (the gateway binary is running directly on the host, not in a container),
+the update command falls back to the existing `rebuild_and_restart` flow (`server/src/rebuild.rs`):
+`cargo build` → re-exec.
+This path cannot update individual projects independently — it rebuilds the gateway itself.
+
+### 4.3 Gateway state during update
+
+```
+normal → updating → (success) normal
+                  → (failure) rolling_back → normal
+```
+
+The CRDT `gateway_config` collection gains two new LWW fields per project:
+
+| Field | Type | Purpose |
+|-------|------|---------|
+| `update_state` | `"idle" \| "updating" \| "rolling_back"` | Current update lifecycle stage |
+| `update_started_at` | `u64` (unix ms) | When the update was triggered |
+| `previous_image` | `string` | Image digest before the most recent update |
+| `current_image` | `string` | Image digest currently running |
+
+These fields are replicated to all nodes so that other gateway instances and headless agents
+can observe update progress without polling HTTP.
+
+---
+
+## 5. Rollback Approach
+
+### 5.1 Automatic rollback
+
+If the health check in step 6 (§4.1) times out or returns a non-200 status, the gateway automatically:
+
+1. Logs the failure: `[update] health check failed for huskies after 30 s`.
+2. Posts to the ops room: `Health check failed. Rolling back…`.
+3. Runs `docker stop` on the new container.
+4. Pulls and starts the previous image digest (stored in `previous_image`).
+5. Re-runs the health check on the rolled-back container.
+6. Reports outcome to the room.
+
+If the rollback health check also fails, the bot reports:
+```
+Rollback failed. Manual intervention required. Previous image: sha256:def456
+```
+and sets `update_state = "error"` in the CRDT. The ops room is notified; no further automatic action is taken.
+
+### 5.2 Manual rollback
+
+An operator can issue `update huskies --rollback` at any time when the project is in `idle` state.
+The command replays steps 3–7 of §4.1 with `previous_image` substituted for the target image.
+`previous_image` is overwritten with the image that was displaced, so repeated rollbacks alternate between two images.
+
+### 5.3 Rollback unavailability
+
+Rollback is unavailable when:
+- No `previous_image` is recorded (first-ever update on this installation).
+- `update_state` is already `"updating"` or `"rolling_back"` (only one concurrent update per project).
+
+---
+
+## 6. Implementation Sketch
+
+### 6.1 New files
+
+| Path | Purpose |
+|------|---------|
+| `server/src/chat/commands/update.rs` | Synchronous `handle_update` stub (returns `None` — async, like `rebuild`) |
+| `server/src/service/gateway/update.rs` | Core update/rollback logic; calls Docker API or falls back to `rebuild.rs` |
+| `server/src/service/gateway/docker.rs` | Thin wrapper around Docker socket HTTP API (`/containers/:id/start` etc.) |
+
+### 6.2 New CRDT fields
+
+Extend the `gateway_config` CRDT document (already exists per Spike 679 §6) with:
+- `projects.<name>.update_state` (LWW string)
+- `projects.<name>.update_started_at` (LWW u64)
+- `projects.<name>.previous_image` (LWW string)
+- `projects.<name>.current_image` (LWW string)
+
+### 6.3 Gateway HTTP changes
+
+Add one endpoint for the Docker-fallback check:
+
+```
+GET /gateway/update/available
+→ {"available": true, "mode": "docker"} | {"available": true, "mode": "rebuild"} | {"available": false}
+```
+
+The frontend can use this to show/hide an "Update" button in the gateway project list.
+
+### 6.4 Async dispatch
+
+`update` is an async command (like `rebuild`, `htop`, `start`).
+The command keyword is detected in `on_room_message` before `try_handle_command` is invoked.
+The handler spawns a `tokio::spawn` task, posts incremental updates via the existing transport's `send_message` / `edit_message` API, and returns.
+
+---
+
+## 7. Open Questions
+
+| # | Question | Notes |
+|---|----------|-------|
+| 1 | Should the Docker socket be mounted in the gateway container by default? | Security trade-off: socket access = container escape risk. Alternative: `docker exec` via a sidecar. |
+| 2 | Should `update all` use a sequential or rolling strategy? | Sequential is safer; rolling is faster. Sequential chosen for v1. |
+| 3 | How do we handle projects not managed by Docker (e.g. running on bare metal)? | Fallback to `rebuild` covers the gateway itself; project-specific fallback is out of scope for v1. |
+| 4 | Should the confirmation challenge expire? | Yes — 60 s timeout, configurable in `bot.toml`. |
+| 5 | Should update history be persisted beyond CRDT (i.e. across full gateway restarts)? | CRDT persists to SQLite, so yes, as long as the CRDT DB survives the restart. |
+| 6 | Multi-gateway HA: which node triggers the actual Docker call? | The node that owns the Docker socket. CRDT `update_state` prevents double-triggering. |
+
+---
+
+## 8. Dependencies
+
+| Story / Spike | Dependency type |
+|--------------|----------------|
+| Spike 679 (HTTP → CRDT bus) | Soft — `gateway_config` LWW collection needed for update state; can stub without it |
+| Story 665 (Ed25519 auth) | Soft — operator token auth is a future hardening step; room+role guard suffices for v1 |
+| `server/src/rebuild.rs` | Direct — reuse `rebuild_and_restart` for the non-Docker path |
+| `server/src/gateway_relay.rs` | Indirect — update state changes should trigger relay events to connected frontends |
@@ -32,94 +32,141 @@ website/             — Static marketing/docs site

 ## Source Map

-### Core
+One row per directory or top-level file. Descriptions are pulled from the module's `//!` doc-comment where present. **Use this to know where to look — do not re-discover the codebase via grep.**

-| File | Description |
-|------|-------------|
-| `server/src/main.rs` | Entry point, CLI argument parsing, and server startup |
-| `server/src/config.rs` | Parses `project.toml` for agents, components, and server settings |
-| `server/src/state.rs` | Global mutable session state (project root, cancellation) |
-| `server/src/store.rs` | JSON-backed persistent key-value store for settings |
-| `server/src/gateway.rs` | Multi-project gateway mode (MCP proxy, project switching, agent registration) |
+### Top-level backend files (`server/src/`)

-### Agents
+| File | Purpose |
+|------|---------|
+| `server/src/agent_log.rs` | Agent log persistence — reads and writes JSONL agent event logs to disk. |
+| `server/src/agent_mode.rs` | Headless build-agent mode for distributed, rendezvous-based story processing. |
+| `server/src/cli.rs` | Command-line argument parsing for the huskies binary. |
+| `server/src/crdt_wire.rs` | CRDT wire codec — serialization format for `SignedOp` sync messages between nodes. |
+| `server/src/gateway.rs` | Multi-project gateway — entrypoint wiring and route tree.  When `huskies --gateway` is used, the server starts in gateway mode. B… |
+| `server/src/gateway_relay.rs` | Gateway relay task — pushes project status events to the gateway via WebSocket.  When `gateway_url` is configured in `project.tom… |
+| `server/src/log_buffer.rs` | Bounded in-memory ring buffer for server log output.  Use the [`slog!`] macro (INFO), [`slog_warn!`] (WARN), or [`slog_error!`] (… |
+| `server/src/main.rs` | Huskies server — entry point, CLI argument parsing, and server startup. |
+| `server/src/mesh.rs` | Peer mesh discovery — supplementary CRDT sync connections between build agents.  When mesh discovery is enabled, a build agent pe… |
+| `server/src/node_identity.rs` | Node identity — Ed25519 keypair foundation for distributed huskies.  Each huskies node has a stable identity derived from an Ed25… |
+| `server/src/rebuild.rs` | Server rebuild and restart logic shared between the MCP tool and Matrix bot command. |
+| `server/src/services.rs` | Shared services bundle — common state threaded through HTTP handlers and chat transports.  `Services` bundles the fields that eve… |
+| `server/src/state.rs` | Session state — global mutable state shared across the server (project root, cancellation). |
+| `server/src/store.rs` | Key-value store — JSON-backed persistent storage for user settings and preferences. |
+| `server/src/workflow.rs` | Workflow module: test result tracking and acceptance evaluation. |

-| File | Description |
-|------|-------------|
-| `server/src/agents/mod.rs` | Types, configuration, and orchestration for coding agents |
-| `server/src/agents/gates.rs` | Runs test suites and validation scripts in agent worktrees |
-| `server/src/agents/lifecycle.rs` | File creation, archival, and stage transitions for pipeline items |
-| `server/src/agents/merge.rs` | Rebases agent work onto master and runs post-merge validation |
-| `server/src/agents/pty.rs` | Spawns agent processes in pseudo-terminals and streams output |
-| `server/src/agents/token_usage.rs` | Persists per-agent token consumption records to disk |
-| `server/src/agent_log.rs` | Reads and writes JSONL agent event logs to disk |
-| `server/src/agent_mode.rs` | Headless build-agent mode for distributed story processing |
+### Backend modules (`server/src/`)

-### Agent Pool
+| Path | Purpose |
+|------|---------|
+| `server/src/` |  |
+| `server/src/agents/` | Agent subsystem — types, configuration, and orchestration for coding agents. |
+| `server/src/agents/merge/` | Merge operations — rebases agent work onto master and runs post-merge validation. |
+| `server/src/agents/merge/squash/` | Squash-merge orchestration: rebase agent work onto master and run post-merge gates. |
+| `server/src/agents/pool/` | Agent pool — manages the set of active agents across all pipeline stages. |
+| `server/src/agents/pool/auto_assign/` | Auto-assign submodules: wires focused sub-files and re-exports public items. |
+| `server/src/agents/pool/auto_assign/watchdog/` | Watchdog task: detects orphaned agents, enforces turn/budget limits, and triggers auto-assign. |
+| `server/src/agents/pool/auto_assign/watchdog/tests/` | Shared test helpers for the watchdog module. |
+| `server/src/agents/pool/pipeline/` | Pipeline operations — stage advancement, completion handling, and merge orchestration. |
+| `server/src/agents/pool/pipeline/advance/` | Pipeline advance — moves stories forward through pipeline stages after agent completion. |
+| `server/src/agents/pool/pipeline/completion/` | Agent completion handling — processes exit results and triggers pipeline advancement. |
+| `server/src/agents/pool/start/` | Agent start — spawns a new agent process in a worktree for a given story. |
+| `server/src/agents/runtime/` | Agent runtimes — pluggable backends (Claude Code, Gemini, OpenAI) for running agents. |
+| `server/src/chat/` | Transport abstraction for chat platforms.  The [`ChatTransport`] trait defines a platform-agnostic interface for sending and edit… |
+| `server/src/chat/commands/` | Bot-level command registry shared by all chat transports.  Commands registered here are handled directly by the bot without invok… |
+| `server/src/chat/transport/` | Chat transports — pluggable backends (Matrix, Slack, WhatsApp, Discord) for bot messaging. |
+| `server/src/chat/transport/discord/` | Discord Bot integration.  Provides: - [`DiscordTransport`] — a [`ChatTransport`] that sends messages via the Discord REST API (`/… |
+| `server/src/chat/transport/matrix/` | Matrix bot integration for Story Kit.  When a `.huskies/bot.toml` file is present with `enabled = true`, the server spawns a Matr… |
+| `server/src/chat/transport/matrix/bot/` | Matrix bot — sub-modules for the Matrix chat bot implementation. |
+| `server/src/chat/transport/matrix/bot/messages/` | Matrix message handler — processes incoming room messages and dispatches commands. |
+| `server/src/chat/transport/matrix/config/` | Matrix transport configuration — deserialization of `bot.toml` Matrix settings. |
+| `server/src/chat/transport/slack/` | Slack Bot API integration.  Provides: - [`SlackTransport`] — a [`ChatTransport`] that sends messages via the Slack Web API (`api.… |
+| `server/src/chat/transport/slack/commands/` | Slack incoming message dispatch and slash command handling. |
+| `server/src/chat/transport/whatsapp/` | WhatsApp Business API integration.  Provides: - [`WhatsAppTransport`] — a [`ChatTransport`] that sends messages via the Meta Grap… |
+| `server/src/chat/transport/whatsapp/commands/` | WhatsApp command handling — processes incoming WhatsApp messages as bot commands. |
+| `server/src/config/` | Project configuration — parses `project.toml` for agents, components, and server settings. |
+| `server/src/crdt_snapshot/` | CRDT snapshot compaction with cross-node coordination.  This module implements full CRDT state snapshots for compacting the op jo… |
+| `server/src/crdt_state/` | CRDT state layer — manages pipeline state as a conflict-free replicated document backed by SQLite.  The CRDT document is the prim… |
+| `server/src/crdt_sync/` | CRDT sync — WebSocket-based replication of pipeline state between huskies nodes. WebSocket-based CRDT sync layer for replicating… |
+| `server/src/crdt_sync/server/` | Server-side `/crdt-sync` WebSocket handler. |
+| `server/src/db/` | SQLite storage layer — content store, shadow writes, and CRDT op persistence. |
+| `server/src/http/` | HTTP server — module declarations for all REST, MCP, WebSocket, and SSE endpoints. |
+| `server/src/http/agents/` | HTTP agent endpoints — thin adapters over `service::agents`.  Each handler: extracts payload → calls `service::agents::X` → shape… |
+| `server/src/http/gateway/` | Gateway HTTP handlers — thin transport shells for the gateway service.  Each handler calls `service::gateway::*` for business log… |
+| `server/src/http/mcp/` | HTTP MCP server module. |
+| `server/src/http/mcp/agent_tools/` | MCP agent tools — start, stop, wait, list, and inspect agents via MCP. |
+| `server/src/http/mcp/diagnostics/` | MCP diagnostic tools — server logs, CRDT dump, version, line counting, story movement. |
+| `server/src/http/mcp/shell_tools/` | MCP shell tools — run commands, execute tests, and stream output via MCP.  This file is a thin adapter: it deserialises MCP paylo… |
+| `server/src/http/mcp/story_tools/` | MCP story tools — create, update, move, and manage stories, bugs, refactors, and spikes via MCP.  This module is a thin adapter:… |
+| `server/src/http/mcp/story_tools/story/` | Story creation, listing, update, and lifecycle MCP tools. |
+| `server/src/http/mcp/tools_list/` | `tools/list` MCP method — returns the static schema for every tool the server exposes. |
+| `server/src/http/workflow/` | Workflow helpers — shared story/bug file operations used by HTTP and MCP handlers. |
+| `server/src/http/workflow/story_ops/` | Story operations — creates, updates, and manages acceptance criteria in story files. |
+| `server/src/io/` | I/O subsystem — filesystem, shell, search, onboarding, and story metadata operations. |
+| `server/src/io/fs/` | Filesystem I/O — module declarations and re-exports for file operations. |
+| `server/src/io/fs/scaffold/` | Project scaffolding — creates the `.huskies/` directory structure and default files. |
+| `server/src/io/fs/scaffold/detect/` | Stack detection — inspect the project root for marker files and emit TOML `[[component]]` entries plus `script/build\|lint\|test`… |
+| `server/src/io/watcher/` | Filesystem watcher for `.huskies/project.toml` and `.huskies/agents.toml`.  Watches config files for changes and broadcasts a [`W… |
+| `server/src/llm/` | LLM subsystem — chat orchestration, prompts, OAuth, and provider integrations. |
+| `server/src/llm/chat/` | LLM chat — orchestrates multi-turn conversations with tool-calling LLM providers. |
+| `server/src/llm/providers/` | LLM providers — module declarations for Anthropic, Claude Code, and Ollama backends. |
+| `server/src/llm/providers/claude_code/` | Claude Code provider — runs Claude Code CLI in a PTY and parses structured output. |
+| `server/src/pipeline_state/` | Typed pipeline state machine (story 520).  Replaces the stringly-typed CRDT views with strict Rust enums so that impossible state… |
+| `server/src/service/` | Service layer — domain logic extracted from HTTP handlers.  Each sub-module follows the conventions documented in `docs/architect… |
+| `server/src/service/agents/` | Agent service — public API for the agent domain.  This module orchestrates calls to `io.rs` (side effects) and the pure topic mod… |
+| `server/src/service/anthropic/` | Anthropic service — public API for Anthropic API-key management and model listing.  Exposes functions to check, store, and use th… |
+| `server/src/service/bot_command/` | Bot command service — domain logic for dispatching slash commands.  Extracted from `http/bot_command.rs` so that argument parsing… |
+| `server/src/service/common/` | Shared pure helpers used by multiple service modules.  All sub-modules here are pure (no I/O, no side effects). Any helper that d… |
+| `server/src/service/diagnostics/` | Diagnostics service — server logs, CRDT dump, permission management, and story movement.  Extracted from `http/mcp/diagnostics.rs… |
+| `server/src/service/events/` | Events service — public API for the events domain.  This module re-exports the pure buffer types from `buffer.rs` and the side-ef… |
+| `server/src/service/file_io/` | File I/O service — public API for filesystem and shell operations.  Exposes functions for reading, writing, and listing files sco… |
+| `server/src/service/gateway/` | Gateway service — domain logic for the multi-project gateway.  Follows the conventions in `docs/architecture/service-modules.md`:… |
+| `server/src/service/git_ops/` | Git operations service — worktree path validation and git command execution.  Extracted from `http/mcp/git_tools.rs` following th… |
+| `server/src/service/merge/` | Merge service — domain logic for merging agent work to master.  Extracted from `http/mcp/merge_tools.rs` following the convention… |
+| `server/src/service/notifications/` | Notifications service — pipeline-event fan-out to chat transports.  Subscribes to [`WatcherEvent`] broadcasts and posts human-rea… |
+| `server/src/service/notifications/io/` | I/O side of the notifications service.  This is the **only** file inside `service/notifications/` that may perform side effects:… |
+| `server/src/service/oauth/` | OAuth service — domain logic for the Anthropic OAuth 2.0 PKCE flow.  Extracts business logic from `http/oauth.rs` following the c… |
+| `server/src/service/pipeline/` | Pipeline service — shared pipeline-domain logic.  Contains pure functions for parsing and aggregating pipeline status data. Used… |
+| `server/src/service/project/` | Project service — public API for the project domain.  Exposes functions to open, close, query, and manage known projects. HTTP ha… |
+| `server/src/service/qa/` | QA service — domain logic for requesting, approving, and rejecting QA reviews.  Extracted from `http/mcp/qa_tools.rs` following t… |
+| `server/src/service/settings/` | Settings service — domain logic for project settings and editor configuration.  Extracts business logic from `http/settings.rs` f… |
+| `server/src/service/shell/` | Shell service — command safety, path sandboxing, and output helpers.  Extracted from `http/mcp/shell_tools.rs` following the conv… |
+| `server/src/service/status/` | Status broadcaster — unified pipeline-event fan-out for all consumers.  [`StatusBroadcaster`] lives on the [`crate::services::Ser… |
+| `server/src/service/story/` | Story service — domain logic for creating, updating, and managing pipeline work items.  Extracted from `http/mcp/story_tools.rs`… |
+| `server/src/service/timer/` | Timer service — deferred agent start via one-shot timers.  Provides [`TimerStore`] for persisting timers to `.huskies/timers.json… |
+| `server/src/service/wizard/` | Wizard service — domain logic for the multi-step project setup wizard.  Follows the conventions from `docs/architecture/service-m… |
+| `server/src/service/ws/` | WebSocket service — domain logic for real-time pipeline updates, chat, and permission prompts.  This module extracts the business… |
+| `server/src/worktree/` | Git worktree management — creates, lists, and removes worktrees for agent isolation. |

-| File | Description |
-|------|-------------|
-| `server/src/agents/pool/mod.rs` | Manages the set of active agents across all pipeline stages |
-| `server/src/agents/pool/start.rs` | Spawns a new agent process in a worktree for a story |
-| `server/src/agents/pool/stop.rs` | Terminates a running agent while preserving its worktree |
-| `server/src/agents/pool/pipeline/advance.rs` | Moves stories forward through pipeline stages |
-| `server/src/agents/pool/pipeline/completion.rs` | Processes exit results and triggers pipeline advancement |
-| `server/src/agents/pool/pipeline/merge.rs` | Orchestrates the merge-to-master flow for completed stories |
-| `server/src/agents/pool/auto_assign/auto_assign.rs` | Scans pipeline stages and dispatches agents to unassigned stories |
+### Crates

-### CRDT & Database
+| Path | Purpose |
+|------|---------|
+| `crates/bft-json-crdt/benches/` |  |
+| `crates/bft-json-crdt/bft-crdt-derive/src/` |  |
+| `crates/bft-json-crdt/src/` |  |
+| `crates/bft-json-crdt/tests/` |  |

-| File | Description |
-|------|-------------|
-| `server/src/crdt_state.rs` | Pipeline state as a conflict-free replicated document backed by SQLite |
-| `server/src/crdt_sync.rs` | WebSocket-based replication of pipeline state between nodes |
-| `server/src/pipeline_state.rs` | Typed pipeline state machine |
-| `server/src/db/mod.rs` | Content store, shadow writes, and CRDT op persistence |
+### Frontend (`frontend/src/`)

-### HTTP — MCP Tools (the tools agents call)
+| Path | Purpose |
+|------|---------|
+| `frontend/src/` |  |
+| `frontend/src/api/` |  |
+| `frontend/src/components/` |  |
+| `frontend/src/components/selection/` |  |
+| `frontend/src/hooks/` |  |
+| `frontend/src/utils/` |  |

-| File | Description |
-|------|-------------|
-| `server/src/http/mcp/mod.rs` | MCP endpoint dispatching tool calls |
-| `server/src/http/mcp/agent_tools.rs` | Start, stop, wait, list, and inspect agents |
-| `server/src/http/mcp/git_tools.rs` | Status, diff, add, commit, and log on agent worktrees |
-| `server/src/http/mcp/merge_tools.rs` | Merge agent work to master and report failures |
-| `server/src/http/mcp/shell_tools.rs` | Run commands, execute tests, and stream output |
-| `server/src/http/mcp/story_tools.rs` | Create, update, move, and manage stories/bugs/refactors |
-| `server/src/http/mcp/diagnostics.rs` | Server logs, CRDT dump, version, and story movement helpers |
+### Canonical patterns (copy these when adding new things)
+- **New CRDT LWW-map collection:** see `server/src/crdt_state/lww_maps.rs`
+- **New read-RPC handler:** register in `server/src/crdt_sync/rpc.rs`; call from frontend via `rpcCall<T>("method.name")` from `frontend/src/api/rpc.ts`
+- **Migrate HTTP route → CRDT:** delete from `gateway.rs` / `http/*`, add op to `service/<area>/`, write through `crdt_state/`
+- **New front-matter field:** add to `StoryMetadata` and `FrontMatter` in `io/story_metadata.rs` plus a `write_<name>_in_content` helper
+- **New service module:** copy `service/agents/` structure (`mod.rs` + `io.rs` + `selection.rs`)
+- **New chat command:** add a file under `chat/commands/` and register in `chat/commands/mod.rs::dispatch_command`
+- **New auto-assigner predicate:** add to `agents/pool/auto_assign/story_checks.rs`, wire in `auto_assign/auto_assign.rs`
+- **CRDT-seeding test helper:** `crate::db::write_item_with_content(story_id, stage, content)` — do not `fs::write` to `.huskies/work/{stage}/`

-### Chat — Bot Commands
-
-| File | Description |
-|------|-------------|
-| `server/src/chat/commands/mod.rs` | Bot-level command registry shared by all transports |
-| `server/src/chat/commands/status.rs` | `status` command and pipeline status helpers |
-| `server/src/chat/commands/backlog.rs` | `backlog` command — shows only backlog-stage items |
-| `server/src/chat/commands/run_tests.rs` | `run_tests` command — run the project's test suite |
-
-### Chat — Transports
-
-| File | Description |
-|------|-------------|
-| `server/src/chat/transport/matrix/` | Matrix bot integration |
-| `server/src/chat/transport/slack/` | Slack bot integration |
-| `server/src/chat/transport/whatsapp/` | WhatsApp Business API integration |
-| `server/src/chat/transport/discord/` | Discord bot integration |
-
-### Frontend
-
-| Directory | Description |
-|-----------|-------------|
-| `frontend/src/components/` | React UI components |
-| `frontend/src/api/` | API client code (gateway, agents, etc.) |
-
-### Utilities
-
-| File | Description |
-|------|-------------|
-| `server/src/rebuild.rs` | Server rebuild and restart logic |
-| `server/src/worktree.rs` | Creates, lists, and removes git worktrees for agent isolation |
-| `server/src/io/watcher.rs` | Filesystem watcher for `.huskies/work/` and `project.toml` |

 ## Quality Gates
 All enforced by `script/test`:
@@ -0,0 +1,350 @@
+# Pipeline State Machine
+
+This document describes the huskies pipeline state machine in two halves:
+**(a)** the model that runs in production today, and **(b)** transitions, refinements,
+and corrections we have identified as needed but not yet implemented.
+
+The codebase is in a deliberate transitional state: a typed CRDT state machine
+exists at `server/src/pipeline_state.rs` (introduced by story 520) with strict Rust
+enums for every stage, archive reason, execution state, and event. It is fully
+defined and tested but **not yet called from non-test code** (`#![allow(dead_code)]`
+at the top of the module). Consumers will migrate incrementally.
+
+The model that is actually doing work is the older **filesystem-stage-string +
+front-matter-flag** model. Section (a) below documents both representations and
+the migration intent.
+
+---
+
+## (a) The current state machine
+
+### Stages (production: filesystem string; future: typed enum)
+
+| Filesystem (production) | Typed (future) | Meaning |
+|---|---|---|
+| `work/1_backlog/` | `Stage::Backlog` | Story exists, waiting for dependencies or auto-assign promotion |
+| `work/2_current/` | `Stage::Coding` | Coder agent is running (or about to) |
+| `work/3_qa/` | `Stage::Qa` | Coder finished; gates / human review running |
+| `work/4_merge/` | `Stage::Merge { feature_branch, commits_ahead: NonZeroU32 }` | Gates passed, mergemaster ready to squash |
+| `work/5_done/` | `Stage::Done { merged_at, merge_commit }` | Mergemaster squashed to master |
+| `work/6_archived/` | `Stage::Archived { archived_at, reason: ArchiveReason }` | Out of the active flow |
+
+`5_done` auto-sweeps to `6_archived` after four hours. The typed `Stage::Done`
+variant always carries the merge SHA and timestamp; `Stage::Merge`'s
+`commits_ahead: NonZeroU32` makes "Merge with nothing to merge" structurally
+impossible (eliminates bug 519).
+
+### Archive reasons (`pipeline_state.rs::ArchiveReason`)
+
+The typed model already enumerates the reasons a story can leave the active flow
+(subsumes the legacy `blocked`, `merge_failure`, and `review_hold` front-matter
+fields per story 436):
+
+- `Completed` — happy-path
+- `Abandoned` — user explicitly abandoned
+- `Superseded { by: StoryId }` — replaced by another story
+- `Blocked { reason: String }` — manually blocked, awaiting human resolution
+- `MergeFailed { reason: String }` — mergemaster gave up after retry budget
+- `ReviewHeld { reason: String }` — held for human review at user request
+
+### Per-node execution state (`pipeline_state.rs::ExecutionState`)
+
+Stage is shared/CRDT-replicated. Execution state is per-node and lives under
+each node's pubkey in the CRDT, so there are no inter-author merge conflicts:
+
+- `Idle`
+- `Pending { agent, since }` — worktree being created, agent about to start
+- `Running { agent, started_at, last_heartbeat }`
+- `RateLimited { agent, resume_at }`
+- `Completed { agent, exit_code, completed_at }`
+
+### Pipeline events (`pipeline_state.rs::PipelineEvent`)
+
+The typed model defines every event that drives a Stage transition. Each variant
+carries the data needed to construct the destination state, so a transition
+function can never accidentally land in an underspecified state:
+
+- `DepsMet` — dependencies met; promote from backlog
+- `GatesStarted` — coder starting gates
+- `GatesPassed { feature_branch, commits_ahead }`
+- `GatesFailed { reason }`
+- `QaSkipped { feature_branch, commits_ahead }` — qa-mode = "server"; skip QA, go to merge
+- `MergeSucceeded { merge_commit }`
+- `MergeFailedFinal { reason }`
+- `Accepted` — Done → Archived(Completed)
+
+### Transitions (current production = MCP verb shape)
+
+#### Backlog → Coding (a.k.a. backlog → 2_current)
+
+- **Auto path**: `AgentPool::auto_assign_available_work` calls
+  `promote_ready_backlog_stories`. A backlog story is promoted iff (a) it has
+  an explicit non-empty `depends_on` AND (b) every dep is in `5_done` or
+  `6_archived`. Stories with no `depends_on` are NOT auto-promoted — they wait
+  for human scheduling.
+  - Implemented in `server/src/agents/pool/auto_assign/auto_assign.rs::promote_ready_backlog_stories`.
+- **Manual path**: `mcp__huskies__move_story story_id=X target_stage=current`,
+  or `mcp__huskies__start_agent` (which moves the story to current as a
+  side-effect of starting an agent).
+- **Archived-dep warning**: if a dep was satisfied via `6_archived` rather than
+  `5_done` (e.g. abandoned/superseded), the auto-assigner logs a prominent
+  warning so the user can see the promotion was triggered by an archived dep.
+
+#### Coding → Qa (current → 3_qa)
+
+- Triggered when the coder agent finishes (gates start running).
+- `mcp__huskies__request_qa` is the manual verb.
+
+#### Qa → Coding (qa → current — rejection path)
+
+- `mcp__huskies__reject_qa story_id=X notes="..."` moves qa → current,
+  **clears `review_hold`**, and writes the rejection notes
+  (`agents/lifecycle.rs:210`).
+- Used when a qa agent fails or a human reviewer rejects the work.
+
+#### Qa → Merge (qa → 4_merge)
+
+- Triggered when QA gates pass. `mcp__huskies__move_story_to_merge` is the
+  dedicated verb.
+- For server-mode QA: typed-side `PipelineEvent::QaSkipped` allows going from
+  Coding → Merge directly without entering Qa.
+
+#### Merge → Done (merge → 5_done)
+
+- Mergemaster picks up a story in `4_merge/`, squashes the feature branch onto
+  master, then transitions to `5_done`.
+- `mcp__huskies__move_story_to_merge` queues; mergemaster does the actual work.
+
+#### Done → Archived(Completed) (5_done → 6_archived)
+
+- Auto-sweep after four hours, OR
+- `mcp__huskies__accept_story` (immediate manual archive).
+
+#### Any-stage → Archived(other reasons)
+
+- **Abandoned / Superseded**: today done by `mcp__huskies__move_story
+  target_stage=done` (no first-class verbs for these reasons; see (b) below).
+- **Blocked**: `blocked: true` flag in front matter is set on retry-limit
+  exceedance. `mcp__huskies__unblock_story` clears the flag and resets
+  retry_count.
+- **MergeFailed**: written to front matter when mergemaster fails; auto-assign
+  skips these stories (`has_merge_failure` check).
+- **ReviewHeld**: `review_hold: true` flag is set automatically on spike
+  completion; auto-assign skips these stories until the flag is cleared.
+
+#### Tombstone / purge
+
+- `mcp__huskies__delete_story` and `mcp__huskies__purge_story` permanently
+  remove. Purge writes a CRDT tombstone.
+
+### Auto-assign skip conditions (current production)
+
+`auto_assign_available_work` walks `2_current/`, `3_qa/`, `4_merge/` in order
+and attempts to dispatch a free agent to each unassigned story. It **skips**
+any story that:
+
+1. Has `review_hold: true` in front matter (spikes after QA, manual hold).
+2. Is `frozen` (`is_story_frozen` — pipeline advancement suspended for this story).
+3. Has `blocked: true` (retry limit exceeded; cleared via `unblock_story`).
+4. Has unmet `depends_on` dependencies.
+5. (Merge stage only) Has a recorded merge failure (`has_merge_failure`).
+6. (Merge stage only) Has an empty diff on the feature branch — auto-writes
+   `merge_failure` and blocks immediately rather than wasting a mergemaster turn.
+
+### Front-matter fields that gate transitions
+
+| Field | Type | Effect |
+|---|---|---|
+| `depends_on` | list of story IDs | Blocks backlog → current promotion until all deps are in 5_done or 6_archived |
+| `agent` | string (e.g. `coder-opus`) | Pins the preferred agent for next assignment |
+| `review_hold` | bool | Auto-assign skips this story; cleared by `reject_qa` or manual unblock |
+| `blocked` | bool | Auto-assign skips this story; cleared by `unblock_story` |
+| `frozen` | bool | Auto-assign skips this story; manual unfreeze required |
+| `merge_failure` | string | Auto-assign skips merge-stage agents on this story |
+| `retry_count` | int | Local-only (not in CRDT); incremented by orchestrator |
+
+### Spike-specific behavior
+
+Per the typical lifecycle, a spike runs through `current → qa` like any work
+item, then **stops** in qa awaiting human review (`spikes skip merge`). This
+is implemented via `review_hold: true` being written automatically when a
+spike's qa gates pass. The user accepts (move qa → done) or rejects (move
+qa → current). Spikes do NOT auto-promote to merge.
+
+### Mergemaster lifecycle
+
+The mergemaster agent only runs against stories in `4_merge/`. It:
+
+1. Verifies the feature branch has commits (or the story is auto-blocked).
+2. Squashes the feature branch onto master with a deterministic commit message.
+3. Transitions the story to `5_done` with `merged_at` and `merge_commit`.
+4. On failure beyond the retry budget, writes `merge_failure` and blocks the
+   story (auto-assign then skips it).
+
+### Agent terminated with committed work (bug 645 recovery path)
+
+When a coder agent terminates abnormally (e.g. the Claude Code CLI's
+`output.write(&bytes).is_ok()` PTY write assertion fires mid-session), the
+server-owned completion path detects the crash and checks for surviving work:
+
+1. If the worktree is dirty but has commits ahead of master, reset the
+   uncommitted files (`git checkout . && git clean -fd`) and run gates
+   against the committed code.
+2. If gates still fail but `git log master..HEAD` shows commits and
+   `cargo check` passes, **advance to QA** instead of entering the
+   retry/block path.  This is the "work survived" check, implemented in
+   `server/src/agents/pool/pipeline/advance.rs`.
+3. Agents that die WITHOUT committed work (no commits ahead of master)
+   still follow the existing retry → block path unchanged.
+
+This prevents false-positive blocking of stories where the agent completed
+meaningful work before crashing.
+
+### Watchdog (current production)
+
+The "watchdog" at `server/src/agents/pool/auto_assign/watchdog.rs` runs every
+30 ticks of the unified background loop. Today it does **one** thing: detect
+orphaned agents whose tokio task is `is_finished()` but whose status is still
+`Running` or `Pending`, and mark them `Failed` with an `AgentEvent::Error`
+emission. Bug 624 (now merged) extends it to also enforce `max_turns` and
+`max_budget_usd` limits — an agent over either limit is killed via the
+existing `kill_child_for_key` path and recorded with a typed termination
+reason.
+
+---
+
+## (b) Transitions and behaviors that don't yet exist (or are only partially wired)
+
+### Migration of consumers off legacy strings to typed `Stage` enum
+
+The biggest outstanding piece. `pipeline_state.rs` is `#![allow(dead_code)]`.
+Every consumer (auto-assign, mergemaster, MCP tools, chat commands) still
+works with stage strings (`"2_current"`, `"4_merge"`) and front-matter flags.
+The projection layer (`TryFrom<PipelineItemView> for PipelineItem` and
+friends) exists but isn't called outside tests. Migration is intentionally
+incremental.
+
+**Opportunity**: pick a leaf consumer (e.g. one MCP tool that reads the stage
+string) and migrate it to read `Stage` instead. Pattern repeats outward until
+all consumers go through the typed projection and the legacy stage-string
+code can be deleted.
+
+### First-class verbs for archive reasons
+
+`ArchiveReason` already has six variants but only `Completed` (via
+`accept_story`) and `Blocked` (via the `blocked: true` flag) have dedicated
+MCP verbs. Today, `Abandoned`, `Superseded`, `MergeFailed`, and `ReviewHeld`
+are reached either via `move_story target_stage=done` (which doesn't carry
+the reason) or via setting front-matter flags on the live story.
+
+**Missing transitions**:
+
+- `mcp__huskies__supersede_story story_id=X by=Y` — sets stage to
+  `Archived { reason: Superseded { by: Y } }`. Today we use
+  `move_story → done`, losing the `by` reference. (Came up 2026-04-25 with
+  spike 621 → refactor 623.)
+- `mcp__huskies__abandon_story story_id=X reason="..."` — sets
+  `Archived { reason: Abandoned }`. Today done via `move_story → done` or
+  `purge_story`.
+- `mcp__huskies__hold_for_review story_id=X reason="..."` — explicitly puts
+  a story in `Archived { reason: ReviewHeld }` rather than relying on the
+  auto-set `review_hold` flag.
+
+### Type-conversion transitions
+
+Spike → story conversion is a real workflow (we do it when a spike's scope
+grows into an implementation story). Today, converting type via `update_story
+front_matter={"type": "story"}` does not bootstrap the
+`## Acceptance Criteria` section, and `add_criterion` then permanently fails
+on that story (see **bug 625** filed 2026-04-25). The `type` field passed via
+front_matter is also silently dropped — same silent-drop bug class as
+`acceptance_criteria`. The state machine should treat type conversion as a
+transition with side effects — at minimum, ensuring the AC section exists
+when transitioning to a type that requires it, and the displayed type
+reflects the new value (today the display chip is parsed from the immutable
+story_id prefix; story 578 in backlog will fix this by switching to
+numeric-only IDs).
+
+### Limit-based agent termination (turn / budget)
+
+Pre-624 master: `max_turns` and `max_budget_usd` per-agent config were read
+by the metric tool (`tool_get_agent_remaining_turns_and_budget`) but **not
+enforced** anywhere. Observed `coder-1` running 282/50 turns and $10.05/$5.00
+USD on story 623 before a human stopped it (bug 624, now merged).
+
+The bug 624 fix adds enforcement to the watchdog. The state-machine impact:
+introduces a new agent-termination path distinct from "Failed (orphan)" —
+something like `Failed(LimitExceeded { kind: Turns | Budget })`. The
+`ExecutionState` enum may want a corresponding terminal variant so it can be
+distinguished from generic `Failed`.
+
+### Pinned-agent honoring under contention
+
+When a story has `agent: coder-opus` pinned but `coder-opus` is busy, today's
+auto-assign behavior is to leave the story unassigned — but if a human stops
+the running attempt and the story sits in `current/`, auto-assign **re-grabs
+it with the default coder** rather than waiting for the pinned agent.
+Observed multiple times on 2026-04-25 with story 623: pinning `coder-opus`
+did not prevent `coder-1` (sonnet) from being auto-assigned during opus's
+busy window.
+
+**Missing behavior**: auto-assign should treat a pinned agent as a hard
+filter ("only this agent can take this story"), not a preference. Today the
+workaround is to also set `depends_on` on a phantom story, or move the story
+back to backlog and let the dependency system gate it.
+
+### Honoring the `blocked` flag (bug 559)
+
+`559_bug_mergemaster_ignores_blocked_flag_and_keeps_respawning_on_blocked_stories`
+is in backlog. Even though `blocked: true` is documented as a skip condition
+in `auto_assign_available_work`, mergemaster's spawn path apparently checks
+something different (or earlier) and respawns on blocked merge-stage stories.
+The state machine should make `Stage::Archived { reason: Blocked }` a single
+authoritative source so no consumer can incidentally bypass it.
+
+### Formal "ghost story recovery" transition
+
+The `move_story` MCP tool description mentions "recovering a ghost story by
+moving it back to current" as a valid use. Ghost stories are CRDT entries
+with no corresponding filesystem stage directory (or the inverse). Today this
+is an `update_story + move_story` ad-hoc dance. A first-class
+`recover_ghost_story` verb that reconciles the CRDT and filesystem would
+formalize the recovery path.
+
+### Operator-level visibility / observability
+
+There is no UI, CLI, or doc that shows "the state machine as a diagram." The
+typed enums are the closest thing to a canonical specification, but they
+aren't rendered anywhere a human can see at a glance: which stages exist,
+which transitions are valid, which events trigger them. A generated state
+diagram (graphviz or mermaid, dumped into this doc on each release) would
+help both new contributors and operators triaging stuck pipelines.
+
+### Review-hold cleanup verb
+
+`review_hold: true` is set automatically on spike completion. Clearing it is
+done as a side effect of `reject_qa` (which also moves the story qa →
+current) or by manually editing front matter. There is no clean "I have
+reviewed this, release the hold" verb that doesn't also move the story.
+
+### Cross-node concurrency for execution state
+
+`ExecutionState` is per-node (keyed by pubkey) so two nodes can't fight over
+who's running an agent. But there is no formal transition that says "node A
+hands the story to node B" if node A goes offline. The state machine's
+distributed semantics for this case are not yet specified.
+
+---
+
+## How to update this document
+
+Whenever you discover a transition that doesn't yet exist, or a flag that
+behaves surprisingly, add it to **section (b)** with:
+
+- A short description of the desired behavior
+- Citation of the work item or incident that surfaced it
+- Pointer to the place in `pipeline_state.rs` where it should be modeled (or
+  note "needs a new variant" if it doesn't fit any existing enum yet)
+
+When a transition from (b) ships, move it to (a) with the relevant file:line
+citations.
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+# fly_multitenant_poc.sh — Proof of concept for Spike 811.
+#
+# Demonstrates the Fly.io Machines API calls that the huskies gateway
+# will eventually make to provision and tear down a per-tenant project
+# machine. Run against a real Fly org with FLY_API_TOKEN set, or read it
+# as a commented sketch — the calls are the contract.
+#
+# This is NOT production code. Production will issue these requests
+# from Rust (see server::service::cloud::fly) with retries, structured
+# errors, and CRDT writes to record machine_id/volume_id. The shell
+# script exists so the spec is verifiable end-to-end.
+#
+# Required env:
+#   FLY_API_TOKEN   - org-scoped Fly token
+#   FLY_APP         - name of the huskies-projects Fly app (must exist)
+#   TENANT_ID       - identifier used to tag and name the machine
+#   REGION          - Fly region code, e.g. "iad" (default: iad)
+
+set -euo pipefail
+
+: "${FLY_API_TOKEN:?FLY_API_TOKEN must be set}"
+: "${FLY_APP:?FLY_APP must be set}"
+: "${TENANT_ID:?TENANT_ID must be set}"
+REGION="${REGION:-iad}"
+IMAGE="registry.fly.io/huskies-projects:latest"
+
+API="https://api.machines.dev/v1"
+AUTH=(-H "Authorization: Bearer ${FLY_API_TOKEN}" -H "Content-Type: application/json")
+
+echo "==> 1. Create a 1 GiB persistent volume for tenant ${TENANT_ID}"
+VOLUME_JSON=$(curl -sS -X POST "${API}/apps/${FLY_APP}/volumes" "${AUTH[@]}" --data @- <<EOF
+{
+  "name":   "huskies_${TENANT_ID}",
+  "region": "${REGION}",
+  "size_gb": 1
+}
+EOF
+)
+VOLUME_ID=$(echo "${VOLUME_JSON}" | jq -r .id)
+echo "    volume_id = ${VOLUME_ID}"
+
+echo "==> 2. Create a machine attached to the volume, with auto-suspend"
+MACHINE_JSON=$(curl -sS -X POST "${API}/apps/${FLY_APP}/machines" "${AUTH[@]}" --data @- <<EOF
+{
+  "name":   "huskies-${TENANT_ID}",
+  "region": "${REGION}",
+  "config": {
+    "image": "${IMAGE}",
+    "env": {
+      "TENANT_ID":      "${TENANT_ID}",
+      "HUSKIES_PORT":   "3001",
+      "PRIMARY_REGION": "${REGION}"
+    },
+    "guest": { "cpu_kind": "shared", "cpus": 2, "memory_mb": 2048 },
+    "mounts": [ { "volume": "${VOLUME_ID}", "path": "/data" } ],
+    "services": [ {
+      "ports": [
+        { "port": 443, "handlers": ["tls","http"] },
+        { "port": 80,  "handlers": ["http"] }
+      ],
+      "protocol": "tcp",
+      "internal_port": 3001,
+      "auto_stop_machines":  "suspend",
+      "auto_start_machines": true,
+      "min_machines_running": 0
+    } ],
+    "metadata": { "tenant": "${TENANT_ID}", "managed_by": "huskies-gw" },
+    "restart": { "policy": "on-failure", "max_retries": 5 }
+  }
+}
+EOF
+)
+MACHINE_ID=$(echo "${MACHINE_JSON}" | jq -r .id)
+PRIVATE_IP=$(echo "${MACHINE_JSON}" | jq -r .private_ip)
+echo "    machine_id = ${MACHINE_ID}"
+echo "    private_ip = ${PRIVATE_IP}"
+
+echo "==> 3. Wait for the machine to reach 'started' (long-poll, 60s timeout)"
+curl -sS "${API}/apps/${FLY_APP}/machines/${MACHINE_ID}/wait?state=started&timeout=60" "${AUTH[@]}" \
+  | jq -r '"    state = " + .ok'
+
+echo "    machine reachable at ${MACHINE_ID}.vm.${FLY_APP}.internal:3001"
+
+# ----- At this point the gateway would record (tenant, machine_id, volume_id)
+# ----- into the CRDT and start proxying traffic. We pause here.
+sleep 2
+
+echo "==> 4. Graceful stop (lets sled flush; idle-suspend uses the same path)"
+curl -sS -X POST "${API}/apps/${FLY_APP}/machines/${MACHINE_ID}/stop" "${AUTH[@]}" \
+  --data '{"signal":"SIGTERM","timeout":"30s"}' > /dev/null
+
+echo "==> 5. Destroy the machine"
+curl -sS -X DELETE "${API}/apps/${FLY_APP}/machines/${MACHINE_ID}?force=true" "${AUTH[@]}" > /dev/null
+echo "    machine destroyed"
+
+echo "==> 6. Reclaim the volume (only when the tenant deletes the project)"
+curl -sS -X DELETE "${API}/apps/${FLY_APP}/volumes/${VOLUME_ID}" "${AUTH[@]}" > /dev/null
+echo "    volume reclaimed"
+
+echo "==> done."
@@ -1,5 +1,5 @@
 [workspace]
-members = ["server", "crates/bft-json-crdt"]
+members = ["server", "crates/bft-json-crdt", "crates/source-map-gen"]
 resolver = "3"

 [workspace.dependencies]
@@ -15,25 +15,30 @@ ignore = "0.4.25"
 mime_guess = "2"
 notify = "8.2.0"
 poem = { version = "3", features = ["websocket", "test"] }
-poem-openapi = { version = "5", features = ["swagger-ui"] }
 portable-pty = "0.9.0"
-reqwest = { version = "0.13.2", features = ["json", "stream"] }
+reqwest = { version = "0.13.3", features = ["json", "stream"] }
 rust-embed = "8"
+ed25519-dalek = { version = "2", default-features = false, features = ["rand_core"] }
+indexmap = { version = "2.14.0", features = ["serde"] }
+rand = "0.10"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 serde_urlencoded = "0.7"
+sha1 = "0.11"
 sha2 = "0.11.0"
+hmac = "0.13"
+subtle = "2"
+base64 = "0.22"
 serde_yaml = "0.9"
 strip-ansi-escapes = "0.2"
 tempfile = "3"
 tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
-toml = "1.1.0"
-uuid = { version = "1.22.0", features = ["v4", "serde"] }
+toml = "1.1.2"
+uuid = { version = "1.23.1", features = ["v4", "serde"] }
 tokio-tungstenite = { version = "0.29.0", features = ["connect", "rustls-tls-native-roots"] }
 walkdir = "2.5.0"
 filetime = "0.2"
-matrix-sdk = { version = "0.16.0", default-features = false, features = [
-    "rustls-tls",
+matrix-sdk = { version = "0.17", default-features = false, features = [
    "sqlite",
    "e2e-encryption",
 ] }
@@ -42,6 +47,9 @@ pulldown-cmark = { version = "0.13.3", default-features = false, features = [
 ] }
 regex = "1"
 libc = "0.2"
+nutype = { version = "0.7", features = ["serde"] }
+garde = { version = "0.22", features = ["derive"] }
+ammonia = "4.1"
 sqlx = { version = "=0.9.0-alpha.1", default-features = false, features = [
    "runtime-tokio",
    "sqlite",
@@ -4,7 +4,7 @@ A story-driven development server that manages work items, spawns coding agents,

 ## Getting started with Claude Code

-1. Download the huskies binary (or build from source — see below).
+1. Download the huskies binary (or build from source — see below). Add it to your $PATH. 

 2. From your project directory, scaffold and start the server:

@@ -33,7 +33,7 @@ Huskies can be controlled via bot commands in **Matrix**, **WhatsApp**, and **Sl

 ## Prerequisites for building

- Rust (2024 edition)
+- Rust 1.93 or newer (2024 edition; MSRV is 1.93, pulled in by matrix-sdk 0.17's use of `Duration::from_mins`)
 - Node.js and npm
 - Docker (for Linux cross-compilation and container deployment)
 - `cross` (`cargo install cross`) optional, for Linux static builds. Only needed if you are building for a different architecture, e.g. if you want to build a Linux binary from a Mac.
@@ -15,21 +15,20 @@ bft = []

 [dependencies]
 bft-crdt-derive = { path = "bft-crdt-derive" }
-colored = "2.0.0"
-fastcrypto = "0.1.8"
-indexmap = { version = "2.2.6", features = ["serde"] }
-rand = "0.8.5"
-random_color = "0.6.1"
-serde = { version = "1.0", features = ["derive"] }
-serde_json = { version = "1.0.85", features = ["preserve_order"] }
-serde_with = "3.8.1"
-sha2 = "0.10.6"
+colored = "3"
+ed25519-dalek = { workspace = true }
+indexmap = { workspace = true, features = ["serde"] }
+rand = { workspace = true }
+random_color = "1"
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true, features = ["preserve_order"] }
+serde_with = "3"
+sha2 = { workspace = true }

 [dev-dependencies]
-criterion = { version = "0.4", features = ["html_reports"] }
-time = "0.1"
-serde = { version = "1.0", features = ["derive"] }
-serde_json = { version = "1.0.85", features = ["preserve_order"] }
+criterion = { version = "0.8", features = ["html_reports"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true, features = ["preserve_order"] }

 [[bench]]
 name = "speed"
@@ -1,3 +1,4 @@
+//! Benchmarks for BFT JSON CRDT operation throughput.
 use bft_json_crdt::{
    json_crdt::JsonValue, keypair::make_author, list_crdt::ListCrdt, op::Op, op::ROOT_ID,
 };
@@ -32,7 +33,7 @@ fn bench_insert_many_agents_conflicts(c: &mut Criterion) {
    c.bench_function("bench insert many agents conflicts", |b| {
        b.iter(|| {
            const N: u8 = 10;
-            let mut rng = rand::thread_rng();
+            let mut rng = rand::rng();
            let mut crdts: Vec<ListCrdt<i64>> = Vec::with_capacity(N as usize);
            let mut logs: Vec<Op<JsonValue>> = Vec::new();
            for i in 0..N {
@@ -10,6 +10,6 @@ proc-macro = true
 [dependencies]
 indexmap = { version = "2.2.6", features = ["serde"] }
 proc-macro2 = "1.0.47"
-proc-macro-crate = "1.2.1"
+proc-macro-crate = "3"
 quote = "1.0.21"
-syn = { version = "1.0.103", features = ["full"] }
+syn = { version = "2", features = ["full"] }
@@ -1,3 +1,9 @@
+//! Procedural macros for the BFT JSON CRDT library.
+//!
+//! Provides `#[add_crdt_fields]` to inject `path` and `id` fields into a struct,
+//! and `#[derive(CrdtNode)]` to auto-implement the [`CrdtNode`] trait for structs
+//! whose fields are themselves [`CrdtNode`]s.
+
 use proc_macro::TokenStream as OgTokenStream;
 use proc_macro2::{Ident, Span, TokenStream};
 use proc_macro_crate::{crate_name, FoundCrate};
@@ -153,7 +159,7 @@ pub fn derive_json_crdt(input: OgTokenStream) -> OgTokenStream {
                        }

                        fn view(&self) -> #crate_name::json_crdt::JsonValue {
-                            let mut view_map = indexmap::IndexMap::new();
+                            let mut view_map = #crate_name::indexmap::IndexMap::new();
                            #(view_map.insert(#ident_strings.to_string(), self.#ident_literals.view().into());)*
                            #crate_name::json_crdt::JsonValue::Object(view_map)
                        }
@@ -1,3 +1,9 @@
+//! Debug helpers and the [`DebugView`] trait for rendering CRDT internals.
+//!
+//! Most items in this module are no-ops in release builds. They are activated by
+//! the `logging-base`, `logging-json`, and `logging-list` Cargo features so that
+//! debug output can be toggled without changing production code.
+
 use crate::{
    json_crdt::{BaseCrdt, CrdtNode, SignedOp},
    keypair::SignedDigest,
@@ -12,7 +18,7 @@ use {
        op::{print_hex, print_path, ROOT_ID},
    },
    colored::Colorize,
-    random_color::{Luminosity, RandomColor},
+    random_color::{options::Luminosity, RandomColor},
 };

 #[cfg(feature = "logging-list")]
@@ -37,6 +43,7 @@ fn display_op_id<T: CrdtNode>(op: &Op<T>) -> String {
    )
 }

+/// Log a type-mismatch warning when deserialising a JSON value into a CRDT node fails.
 pub fn debug_type_mismatch(_msg: String) {
    #[cfg(feature = "logging-base")]
    {
@@ -44,6 +51,7 @@ pub fn debug_type_mismatch(_msg: String) {
    }
 }

+/// Log a path-mismatch warning when an operation's path does not match the CRDT's path.
 pub fn debug_path_mismatch(_our_path: Vec<PathSegment>, _op_path: Vec<PathSegment>) {
    #[cfg(feature = "logging-base")]
    {
@@ -56,6 +64,7 @@ pub fn debug_path_mismatch(_our_path: Vec<PathSegment>, _op_path: Vec<PathSegmen
    }
 }

+/// Log a warning when an operation is applied to a primitive (terminal) CRDT node.
 pub fn debug_op_on_primitive(_op_path: Vec<PathSegment>) {
    #[cfg(feature = "logging-base")]
    {
@@ -79,16 +88,20 @@ fn display_author(author: AuthorId) -> String {
        .to_string()
 }

+/// Render CRDT state as an indented human-readable string for debugging.
 pub trait DebugView {
+    /// Return a multi-line debug string for this CRDT node, indented by `indent` spaces.
    fn debug_view(&self, indent: usize) -> String;
 }

 impl<T: CrdtNode + DebugView> BaseCrdt<T> {
+    /// Print the current document state as an indented debug tree (no-op in release builds).
    pub fn debug_view(&self) {
        #[cfg(feature = "logging-json")]
        println!("document is now:\n{}", self.doc.debug_view(0));
    }

+    /// Log an attempt to apply `op` before the result is known (no-op in release builds).
    pub fn log_try_apply(&self, _op: &SignedOp) {
        #[cfg(feature = "logging-json")]
        println!(
@@ -99,6 +112,7 @@ impl<T: CrdtNode + DebugView> BaseCrdt<T> {
        );
    }

+    /// Log a signature-digest verification failure for `op` (no-op in release builds).
    pub fn debug_digest_failure(&self, _op: SignedOp) {
        #[cfg(feature = "logging-json")]
        println!(
@@ -108,6 +122,7 @@ impl<T: CrdtNode + DebugView> BaseCrdt<T> {
        );
    }

+    /// Log that a causal dependency identified by `missing` has not yet been received.
    pub fn log_missing_causal_dep(&self, _missing: &SignedDigest) {
        #[cfg(feature = "logging-json")]
        println!(
@@ -117,6 +132,7 @@ impl<T: CrdtNode + DebugView> BaseCrdt<T> {
        );
    }

+    /// Log that `op` is about to be integrated into the document (no-op in release builds).
    pub fn log_actually_apply(&self, _op: &SignedOp) {
        #[cfg(feature = "logging-json")]
        {
@@ -133,6 +149,7 @@ impl<T> Op<T>
 where
    T: CrdtNode,
 {
+    /// Log an operation hash verification failure showing expected and computed IDs.
    pub fn debug_hash_failure(&self) {
        #[cfg(feature = "logging-base")]
        {
@@ -191,6 +208,7 @@ impl<T> ListCrdt<T>
 where
    T: CrdtNode,
 {
+    /// Print the full operation log as a tree, optionally highlighting one operation (no-op in release builds).
    pub fn log_ops(&self, _highlight: Option<OpId>) {
        #[cfg(feature = "logging-list")]
        {
@@ -289,6 +307,7 @@ where
        }
    }

+    /// Log the insert or delete being performed for `op` (no-op in release builds).
    pub fn log_apply(&self, _op: &Op<T>) {
        #[cfg(feature = "logging-list")]
        {
@@ -1,947 +0,0 @@
-use std::{
-    collections::{HashMap, HashSet},
-    fmt::Display,
-};
-
-use crate::{
-    debug::{debug_op_on_primitive, DebugView},
-    keypair::{sha256, sign, AuthorId, SignedDigest},
-    list_crdt::ListCrdt,
-    lww_crdt::LwwRegisterCrdt,
-    op::{print_hex, print_path, Hashable, Op, OpId, PathSegment},
-};
-pub use bft_crdt_derive::*;
-use fastcrypto::traits::VerifyingKey;
-use fastcrypto::{
-    ed25519::{Ed25519KeyPair, Ed25519PublicKey, Ed25519Signature},
-    traits::{KeyPair, ToFromBytes},
-    // Verifier,
-};
-// TODO: serde's json object serialization and deserialization (correctly) do not define anything
-// object field order in JSON objects. However, the hash check impl in bft-json-bft-crdt does take order
-// into account. This is going to cause problems later for non-Rust implementations, BFT hash checking
-// currently depends on JSON serialization/deserialization object order. This shouldn't be the case
-// but I've hacked in an IndexMap for the moment to get the PoC working. To see the problem, replace this with
-// a std HashMap, everything will screw up (annoyingly, only *most* of the time).
-use indexmap::IndexMap;
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, Bytes};
-
-/// Anything that can be nested in a JSON CRDT
-pub trait CrdtNode: CrdtNodeFromValue + Hashable + Clone {
-    /// Create a new CRDT of this type
-    fn new(id: AuthorId, path: Vec<PathSegment>) -> Self;
-    /// Apply an operation to this CRDT, forwarding if necessary
-    fn apply(&mut self, op: Op<JsonValue>) -> OpState;
-    /// Get a JSON representation of the value in this node
-    fn view(&self) -> JsonValue;
-}
-
-/// Enum representing possible outcomes of applying an operation to a CRDT
-#[derive(Debug, PartialEq)]
-pub enum OpState {
-    /// Operation applied successfully
-    Ok,
-    /// Tried to apply an operation to a non-CRDT primitive (i.e. f64, bool, etc.)
-    /// If you would like a mutable primitive, wrap it in a [`LWWRegisterCRDT`]
-    ErrApplyOnPrimitive,
-    /// Tried to apply an operation to a static struct CRDT
-    /// If you would like a mutable object, use a [`Value`]
-    ErrApplyOnStruct,
-    /// Tried to apply an operation that contains content of the wrong type.
-    /// In other words, the content cannot be coerced to the CRDT at the path specified.
-    ErrMismatchedType,
-    /// The signed digest of the message did not match the claimed author of the message.
-    /// This can happen if the message was tampered with during delivery
-    ErrDigestMismatch,
-    /// The hash of the message did not match the contents of the message.
-    /// This can happen if the author tried to perform an equivocation attack by creating an
-    /// operation and modifying it has already been created
-    ErrHashMismatch,
-    /// Tried to apply an operation to a non-existent path. The author may have forgotten to attach
-    /// a causal dependency
-    ErrPathMismatch,
-    /// Trying to modify/delete the sentinel (zero-th) node element that is used for book-keeping
-    ErrListApplyToEmpty,
-    /// We have not received all of the causal dependencies of this operation. It has been queued
-    /// up and will be executed when its causal dependencies have been delivered
-    MissingCausalDependencies,
-    /// This op has already been applied (identified by its `signed_digest`).
-    /// The CRDT state is unchanged — this is a no-op (idempotent self-loop guard).
-    AlreadySeen,
-}
-
-/// Maximum total number of ops that may sit in the causal-order hold queue at any
-/// one time, summed across all pending dependency buckets.
-///
-/// **Overflow policy: drop oldest.**
-/// When the limit is reached, the oldest pending op in the largest dependency bucket
-/// is silently evicted before the new op is queued.  Rationale: a misbehaving or
-/// heavily-partitioned peer can send ops whose causal ancestors never arrive, causing
-/// unbounded memory growth.  Dropping the oldest entry preserves the most recent
-/// information and caps memory use.  The peer can reconnect and receive a fresh bulk
-/// state dump to recover any dropped ops.
-pub const CAUSAL_QUEUE_MAX: usize = 256;
-
-/// The following types can be used as a 'terminal' type in CRDTs
-pub trait MarkPrimitive: Into<JsonValue> + Default {}
-impl MarkPrimitive for bool {}
-impl MarkPrimitive for i32 {}
-impl MarkPrimitive for i64 {}
-impl MarkPrimitive for f64 {}
-impl MarkPrimitive for char {}
-impl MarkPrimitive for String {}
-impl MarkPrimitive for JsonValue {}
-
-/// Implement CrdtNode for non-CRDTs
-/// This is a stub implementation so most functions don't do anything/log an error
-impl<T> CrdtNode for T
-where
-    T: CrdtNodeFromValue + MarkPrimitive + Hashable + Clone,
-{
-    fn apply(&mut self, _op: Op<JsonValue>) -> OpState {
-        OpState::ErrApplyOnPrimitive
-    }
-
-    fn view(&self) -> JsonValue {
-        self.to_owned().into()
-    }
-
-    fn new(_id: AuthorId, _path: Vec<PathSegment>) -> Self {
-        debug_op_on_primitive(_path);
-        Default::default()
-    }
-}
-
-/// The base struct for a JSON CRDT. Allows for declaring causal
-/// dependencies across fields. It only accepts messages of [`SignedOp`] for BFT.
-pub struct BaseCrdt<T: CrdtNode> {
-    /// Public key of this CRDT
-    pub id: AuthorId,
-
-    /// Internal base CRDT
-    pub doc: T,
-
-    /// In a real world scenario, this would be a proper hash graph that allows for
-    /// efficient reconciliation of missing dependencies. We naively keep a hash set
-    /// of messages we've seen (represented by their [`SignedDigest`]).
-    received: HashSet<SignedDigest>,
-    message_q: HashMap<SignedDigest, Vec<SignedOp>>,
-
-    /// Total count of ops currently held in [`message_q`] waiting for their causal
-    /// dependencies to be delivered.  Used to enforce [`CAUSAL_QUEUE_MAX`].
-    queue_len: usize,
-}
-
-/// An [`Op<Value>`] with a few bits of extra metadata
-#[serde_as]
-#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)]
-pub struct SignedOp {
-    // Note that this can be different from the author of the inner op as the inner op could have been created
-    // by a different person
-    author: AuthorId,
-    /// Signed hash using priv key of author. Effectively [`OpID`] Use this as the ID to figure out what has been delivered already
-    #[serde_as(as = "Bytes")]
-    pub signed_digest: SignedDigest,
-    pub inner: Op<JsonValue>,
-    /// List of causal dependencies
-    #[serde_as(as = "Vec<Bytes>")]
-    pub depends_on: Vec<SignedDigest>,
-}
-
-impl SignedOp {
-    pub fn id(&self) -> OpId {
-        self.inner.id
-    }
-
-    pub fn author(&self) -> AuthorId {
-        self.author
-    }
-
-    /// Creates a digest of the following fields. Any changes in the fields will change the signed digest
-    ///  - id (hash of the following)
-    ///    - origin
-    ///    - author
-    ///    - seq
-    ///    - is_deleted
-    ///  - path
-    ///  - dependencies
-    fn digest(&self) -> [u8; 32] {
-        let path_string = print_path(self.inner.path.clone());
-        let dependency_string = self
-            .depends_on
-            .iter()
-            .map(print_hex)
-            .collect::<Vec<_>>()
-            .join("");
-        let fmt_str = format!("{:?},{path_string},{dependency_string}", self.id());
-        sha256(fmt_str)
-    }
-
-    /// Sign this digest with the given keypair. Shouldn't need to be called manually,
-    /// just use [`SignedOp::from_op`] instead
-    fn sign_digest(&mut self, keypair: &Ed25519KeyPair) {
-        self.signed_digest = sign(keypair, &self.digest()).sig.to_bytes()
-    }
-
-    /// Ensure digest was actually signed by the author it claims to be signed by
-    pub fn is_valid_digest(&self) -> bool {
-        let digest = Ed25519Signature::from_bytes(&self.signed_digest);
-        let pubkey = Ed25519PublicKey::from_bytes(&self.author());
-        match (digest, pubkey) {
-            (Ok(digest), Ok(pubkey)) => pubkey.verify(&self.digest(), &digest).is_ok(),
-            (_, _) => false,
-        }
-    }
-
-    /// Sign a normal op and add all the needed metadata
-    pub fn from_op<T: CrdtNode>(
-        value: Op<T>,
-        keypair: &Ed25519KeyPair,
-        depends_on: Vec<SignedDigest>,
-    ) -> Self {
-        let author = keypair.public().0.to_bytes();
-        let mut new = Self {
-            inner: Op {
-                content: value.content.map(|c| c.view()),
-                origin: value.origin,
-                author: value.author,
-                seq: value.seq,
-                path: value.path,
-                is_deleted: value.is_deleted,
-                id: value.id,
-            },
-            author,
-            signed_digest: [0u8; 64],
-            depends_on,
-        };
-        new.sign_digest(keypair);
-        new
-    }
-}
-
-impl<T: CrdtNode + DebugView> BaseCrdt<T> {
-    /// Create a new BaseCRDT of the given type. Multiple BaseCRDTs
-    /// can be created from a single keypair but you are responsible for
-    /// routing messages to the right BaseCRDT. Usually you should just make a single
-    /// struct that contains all the state you need.
-    pub fn new(keypair: &Ed25519KeyPair) -> Self {
-        let id = keypair.public().0.to_bytes();
-        Self {
-            id,
-            doc: T::new(id, vec![]),
-            received: HashSet::new(),
-            message_q: HashMap::new(),
-            queue_len: 0,
-        }
-    }
-
-    /// Apply a signed operation to this BaseCRDT, verifying integrity and routing to the right
-    /// nested CRDT
-    pub fn apply(&mut self, op: SignedOp) -> OpState {
-        // self.log_try_apply(&op);
-
-        #[cfg(feature = "bft")]
-        if !op.is_valid_digest() {
-            self.debug_digest_failure(op);
-            return OpState::ErrDigestMismatch;
-        }
-
-        let op_id = op.signed_digest;
-
-        // Self-loop / dedup guard: if we have already processed this op (identified by
-        // its signed_digest), return immediately without re-applying it.  This prevents
-        // echo loops where an op we broadcast to a peer comes back to us.
-        if self.received.contains(&op_id) {
-            return OpState::AlreadySeen;
-        }
-
-        if !op.depends_on.is_empty() {
-            for origin in &op.depends_on {
-                if !self.received.contains(origin) {
-                    self.log_missing_causal_dep(origin);
-
-                    // Bounded queue overflow: evict the oldest op from the largest
-                    // pending bucket before adding the new one.  See CAUSAL_QUEUE_MAX.
-                    if self.queue_len >= CAUSAL_QUEUE_MAX {
-                        if let Some(bucket) = self.message_q.values_mut().max_by_key(|v| v.len()) {
-                            if !bucket.is_empty() {
-                                bucket.remove(0);
-                                self.queue_len = self.queue_len.saturating_sub(1);
-                            }
-                        }
-                    }
-
-                    self.message_q.entry(*origin).or_default().push(op);
-                    self.queue_len += 1;
-                    return OpState::MissingCausalDependencies;
-                }
-            }
-        }
-
-        // apply
-        // self.log_actually_apply(&op);
-        let status = self.doc.apply(op.inner);
-        // self.debug_view();
-
-        // Only record the op as seen when it applied successfully.  If the op
-        // was rejected (e.g. ErrHashMismatch from a tampered payload), we must
-        // NOT add its signed_digest to `received`: a legitimate op that shares
-        // the same signed_digest (e.g. the un-tampered original) would otherwise
-        // be silently dropped as AlreadySeen.
-        // Only mark as received and unblock dependents when the op was actually
-        // applied. If we insert on error (e.g. ErrHashMismatch), a subsequent
-        // apply of a *legitimate* op with the same signed_digest would be
-        // silently dropped as AlreadySeen, preventing equivocation detection
-        // from working correctly.
-        if status == OpState::Ok {
-            self.received.insert(op_id);
-
-            // apply all of its causal dependents if there are any
-            let dependent_queue = self.message_q.remove(&op_id);
-            if let Some(mut q) = dependent_queue {
-                self.queue_len = self.queue_len.saturating_sub(q.len());
-                for dependent in q.drain(..) {
-                    self.apply(dependent);
-                }
-            }
-        }
-        status
-    }
-
-    /// Number of ops currently held in the causal-order queue waiting for their
-    /// dependencies to be satisfied.
-    pub fn causal_queue_len(&self) -> usize {
-        self.queue_len
-    }
-}
-
-/// An enum representing a JSON value
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub enum JsonValue {
-    Null,
-    Bool(bool),
-    Number(f64),
-    String(String),
-    Array(Vec<JsonValue>),
-    Object(IndexMap<String, JsonValue>),
-}
-
-impl Display for JsonValue {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                JsonValue::Null => "null".to_string(),
-                JsonValue::Bool(b) => b.to_string(),
-                JsonValue::Number(n) => n.to_string(),
-                JsonValue::String(s) => format!("\"{s}\""),
-                JsonValue::Array(arr) => {
-                    if arr.len() > 1 {
-                        format!(
-                            "[\n{}\n]",
-                            arr.iter()
-                                .map(|x| format!("  {x}"))
-                                .collect::<Vec<_>>()
-                                .join(",\n")
-                        )
-                    } else {
-                        format!(
-                            "[ {} ]",
-                            arr.iter()
-                                .map(|x| x.to_string())
-                                .collect::<Vec<_>>()
-                                .join(", ")
-                        )
-                    }
-                }
-                JsonValue::Object(obj) => format!(
-                    "{{ {} }}",
-                    obj.iter()
-                        .map(|(k, v)| format!("  \"{k}\": {v}"))
-                        .collect::<Vec<_>>()
-                        .join(",\n")
-                ),
-            }
-        )
-    }
-}
-
-impl Default for JsonValue {
-    fn default() -> Self {
-        Self::Null
-    }
-}
-
-/// Allow easy conversion to and from serde's JSON format. This allows us to use the [`json!`]
-/// macro
-impl From<JsonValue> for serde_json::Value {
-    fn from(value: JsonValue) -> Self {
-        match value {
-            JsonValue::Null => serde_json::Value::Null,
-            JsonValue::Bool(x) => serde_json::Value::Bool(x),
-            JsonValue::Number(x) => {
-                serde_json::Value::Number(serde_json::Number::from_f64(x).unwrap())
-            }
-            JsonValue::String(x) => serde_json::Value::String(x),
-            JsonValue::Array(x) => {
-                serde_json::Value::Array(x.iter().map(|a| a.clone().into()).collect())
-            }
-            JsonValue::Object(x) => serde_json::Value::Object(
-                x.iter()
-                    .map(|(k, v)| (k.clone(), v.clone().into()))
-                    .collect(),
-            ),
-        }
-    }
-}
-
-impl From<serde_json::Value> for JsonValue {
-    fn from(value: serde_json::Value) -> Self {
-        match value {
-            serde_json::Value::Null => JsonValue::Null,
-            serde_json::Value::Bool(x) => JsonValue::Bool(x),
-            serde_json::Value::Number(x) => JsonValue::Number(x.as_f64().unwrap()),
-            serde_json::Value::String(x) => JsonValue::String(x),
-            serde_json::Value::Array(x) => {
-                JsonValue::Array(x.iter().map(|a| a.clone().into()).collect())
-            }
-            serde_json::Value::Object(x) => JsonValue::Object(
-                x.iter()
-                    .map(|(k, v)| (k.clone(), v.clone().into()))
-                    .collect(),
-            ),
-        }
-    }
-}
-
-impl JsonValue {
-    pub fn into_json(self) -> serde_json::Value {
-        self.into()
-    }
-}
-
-/// Conversions from primitive types to [`JsonValue`]
-impl From<bool> for JsonValue {
-    fn from(val: bool) -> Self {
-        JsonValue::Bool(val)
-    }
-}
-
-impl From<i64> for JsonValue {
-    fn from(val: i64) -> Self {
-        JsonValue::Number(val as f64)
-    }
-}
-
-impl From<i32> for JsonValue {
-    fn from(val: i32) -> Self {
-        JsonValue::Number(val as f64)
-    }
-}
-
-impl From<f64> for JsonValue {
-    fn from(val: f64) -> Self {
-        JsonValue::Number(val)
-    }
-}
-
-impl From<String> for JsonValue {
-    fn from(val: String) -> Self {
-        JsonValue::String(val)
-    }
-}
-
-impl From<char> for JsonValue {
-    fn from(val: char) -> Self {
-        JsonValue::String(val.into())
-    }
-}
-
-impl<T> From<Option<T>> for JsonValue
-where
-    T: CrdtNode,
-{
-    fn from(val: Option<T>) -> Self {
-        match val {
-            Some(x) => x.view(),
-            None => JsonValue::Null,
-        }
-    }
-}
-
-impl<T> From<Vec<T>> for JsonValue
-where
-    T: CrdtNode,
-{
-    fn from(value: Vec<T>) -> Self {
-        JsonValue::Array(value.iter().map(|x| x.view()).collect())
-    }
-}
-
-/// Fallibly create a CRDT Node from a JSON Value
-pub trait CrdtNodeFromValue: Sized {
-    fn node_from(value: JsonValue, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String>;
-}
-
-/// Fallibly cast a JSON Value into a CRDT Node
-pub trait IntoCrdtNode<T>: Sized {
-    fn into_node(self, id: AuthorId, path: Vec<PathSegment>) -> Result<T, String>;
-}
-
-/// [`CrdtNodeFromValue`] implies [`IntoCrdtNode<T>`]
-impl<T> IntoCrdtNode<T> for JsonValue
-where
-    T: CrdtNodeFromValue,
-{
-    fn into_node(self, id: AuthorId, path: Vec<PathSegment>) -> Result<T, String> {
-        T::node_from(self, id, path)
-    }
-}
-
-/// Trivial conversion from [`JsonValue`] to [`JsonValue`] as [`CrdtNodeFromValue`]
-impl CrdtNodeFromValue for JsonValue {
-    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
-        Ok(value)
-    }
-}
-
-/// Conversions from bool to CRDT
-impl CrdtNodeFromValue for bool {
-    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
-        if let JsonValue::Bool(x) = value {
-            Ok(x)
-        } else {
-            Err(format!("failed to convert {value:?} -> bool"))
-        }
-    }
-}
-
-/// Conversions from f64 to CRDT
-impl CrdtNodeFromValue for f64 {
-    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
-        if let JsonValue::Number(x) = value {
-            Ok(x)
-        } else {
-            Err(format!("failed to convert {value:?} -> f64"))
-        }
-    }
-}
-
-/// Conversions from i64 to CRDT
-impl CrdtNodeFromValue for i64 {
-    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
-        if let JsonValue::Number(x) = value {
-            Ok(x as i64)
-        } else {
-            Err(format!("failed to convert {value:?} -> f64"))
-        }
-    }
-}
-
-/// Conversions from String to CRDT
-impl CrdtNodeFromValue for String {
-    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
-        if let JsonValue::String(x) = value {
-            Ok(x)
-        } else {
-            Err(format!("failed to convert {value:?} -> String"))
-        }
-    }
-}
-
-/// Conversions from char to CRDT
-impl CrdtNodeFromValue for char {
-    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
-        if let JsonValue::String(x) = value.clone() {
-            x.chars().next().ok_or(format!(
-                "failed to convert {value:?} -> char: found a zero-length string"
-            ))
-        } else {
-            Err(format!("failed to convert {value:?} -> char"))
-        }
-    }
-}
-
-impl<T> CrdtNodeFromValue for LwwRegisterCrdt<T>
-where
-    T: CrdtNode,
-{
-    fn node_from(value: JsonValue, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String> {
-        let mut crdt = LwwRegisterCrdt::new(id, path);
-        crdt.set(value);
-        Ok(crdt)
-    }
-}
-
-impl<T> CrdtNodeFromValue for ListCrdt<T>
-where
-    T: CrdtNode,
-{
-    fn node_from(value: JsonValue, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String> {
-        if let JsonValue::Array(arr) = value {
-            let mut crdt = ListCrdt::new(id, path);
-            let result: Result<(), String> =
-                arr.into_iter().enumerate().try_for_each(|(i, val)| {
-                    crdt.insert_idx(i, val);
-                    Ok(())
-                });
-            result?;
-            Ok(crdt)
-        } else {
-            Err(format!("failed to convert {value:?} -> ListCRDT<T>"))
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use serde_json::json;
-
-    use crate::{
-        json_crdt::{add_crdt_fields, BaseCrdt, CrdtNode, IntoCrdtNode, JsonValue, OpState},
-        keypair::make_keypair,
-        list_crdt::ListCrdt,
-        lww_crdt::LwwRegisterCrdt,
-        op::{print_path, ROOT_ID},
-    };
-
-    #[test]
-    fn test_derive_basic() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Player {
-            x: LwwRegisterCrdt<f64>,
-            y: LwwRegisterCrdt<f64>,
-        }
-
-        let keypair = make_keypair();
-        let crdt = BaseCrdt::<Player>::new(&keypair);
-        assert_eq!(print_path(crdt.doc.x.path), "x");
-        assert_eq!(print_path(crdt.doc.y.path), "y");
-    }
-
-    #[test]
-    fn test_derive_nested() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Position {
-            x: LwwRegisterCrdt<f64>,
-            y: LwwRegisterCrdt<f64>,
-        }
-
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Player {
-            pos: Position,
-            balance: LwwRegisterCrdt<f64>,
-            messages: ListCrdt<String>,
-        }
-
-        let keypair = make_keypair();
-        let crdt = BaseCrdt::<Player>::new(&keypair);
-        assert_eq!(print_path(crdt.doc.pos.x.path), "pos.x");
-        assert_eq!(print_path(crdt.doc.pos.y.path), "pos.y");
-        assert_eq!(print_path(crdt.doc.balance.path), "balance");
-        assert_eq!(print_path(crdt.doc.messages.path), "messages");
-    }
-
-    #[test]
-    fn test_lww_ops() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Test {
-            a: LwwRegisterCrdt<f64>,
-            b: LwwRegisterCrdt<bool>,
-            c: LwwRegisterCrdt<String>,
-        }
-
-        let kp1 = make_keypair();
-        let kp2 = make_keypair();
-        let mut base1 = BaseCrdt::<Test>::new(&kp1);
-        let mut base2 = BaseCrdt::<Test>::new(&kp2);
-
-        let _1_a_1 = base1.doc.a.set(3.0).sign(&kp1);
-        let _1_b_1 = base1.doc.b.set(true).sign(&kp1);
-        let _2_a_1 = base2.doc.a.set(1.5).sign(&kp2);
-        let _2_a_2 = base2.doc.a.set(2.13).sign(&kp2);
-        let _2_c_1 = base2.doc.c.set("abc".to_string()).sign(&kp2);
-
-        assert_eq!(base1.doc.a.view(), json!(3.0).into());
-        assert_eq!(base2.doc.a.view(), json!(2.13).into());
-        assert_eq!(base1.doc.b.view(), json!(true).into());
-        assert_eq!(base2.doc.c.view(), json!("abc").into());
-
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "a": 3.0,
-                "b": true,
-                "c": null,
-            })
-        );
-        assert_eq!(
-            base2.doc.view().into_json(),
-            json!({
-                "a": 2.13,
-                "b": null,
-                "c": "abc",
-            })
-        );
-
-        assert_eq!(base2.apply(_1_a_1), OpState::Ok);
-        assert_eq!(base2.apply(_1_b_1), OpState::Ok);
-        assert_eq!(base1.apply(_2_a_1), OpState::Ok);
-        assert_eq!(base1.apply(_2_a_2), OpState::Ok);
-        assert_eq!(base1.apply(_2_c_1), OpState::Ok);
-
-        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "a": 2.13,
-                "b": true,
-                "c": "abc"
-            })
-        )
-    }
-
-    #[test]
-    fn test_vec_and_map_ops() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Test {
-            a: ListCrdt<String>,
-        }
-
-        let kp1 = make_keypair();
-        let kp2 = make_keypair();
-        let mut base1 = BaseCrdt::<Test>::new(&kp1);
-        let mut base2 = BaseCrdt::<Test>::new(&kp2);
-
-        let _1a = base1.doc.a.insert(ROOT_ID, "a".to_string()).sign(&kp1);
-        let _1b = base1.doc.a.insert(_1a.id(), "b".to_string()).sign(&kp1);
-        let _2c = base2.doc.a.insert(ROOT_ID, "c".to_string()).sign(&kp2);
-        let _2d = base2.doc.a.insert(_1b.id(), "d".to_string()).sign(&kp2);
-
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "a": ["a", "b"],
-            })
-        );
-
-        // as _1b hasn't been delivered to base2 yet
-        assert_eq!(
-            base2.doc.view().into_json(),
-            json!({
-                "a": ["c"],
-            })
-        );
-
-        assert_eq!(base2.apply(_1b), OpState::MissingCausalDependencies);
-        assert_eq!(base2.apply(_1a), OpState::Ok);
-        assert_eq!(base1.apply(_2d), OpState::Ok);
-        assert_eq!(base1.apply(_2c), OpState::Ok);
-        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
-    }
-
-    #[test]
-    fn test_causal_field_dependency() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Item {
-            name: LwwRegisterCrdt<String>,
-            soulbound: LwwRegisterCrdt<bool>,
-        }
-
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Player {
-            inventory: ListCrdt<Item>,
-            balance: LwwRegisterCrdt<f64>,
-        }
-
-        let kp1 = make_keypair();
-        let kp2 = make_keypair();
-        let mut base1 = BaseCrdt::<Player>::new(&kp1);
-        let mut base2 = BaseCrdt::<Player>::new(&kp2);
-
-        // require balance update to happen before inventory update
-        let _add_money = base1.doc.balance.set(5000.0).sign(&kp1);
-        let _spend_money = base1
-            .doc
-            .balance
-            .set(3000.0)
-            .sign_with_dependencies(&kp1, vec![&_add_money]);
-
-        let sword: JsonValue = json!({
-            "name": "Sword",
-            "soulbound": true,
-        })
-        .into();
-        let _new_inventory_item = base1
-            .doc
-            .inventory
-            .insert_idx(0, sword)
-            .sign_with_dependencies(&kp1, vec![&_spend_money]);
-
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "balance": 3000.0,
-                "inventory": [
-                    {
-                        "name": "Sword",
-                        "soulbound": true
-                    }
-                ]
-            })
-        );
-
-        // do it completely out of order
-        assert_eq!(
-            base2.apply(_new_inventory_item),
-            OpState::MissingCausalDependencies
-        );
-        assert_eq!(
-            base2.apply(_spend_money),
-            OpState::MissingCausalDependencies
-        );
-        assert_eq!(base2.apply(_add_money), OpState::Ok);
-        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
-    }
-
-    #[test]
-    fn test_2d_grid() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Game {
-            grid: ListCrdt<ListCrdt<LwwRegisterCrdt<bool>>>,
-        }
-
-        let kp1 = make_keypair();
-        let kp2 = make_keypair();
-        let mut base1 = BaseCrdt::<Game>::new(&kp1);
-        let mut base2 = BaseCrdt::<Game>::new(&kp2);
-
-        // init a 2d grid
-        let row0: JsonValue = json!([true, false]).into();
-        let row1: JsonValue = json!([false, true]).into();
-        let construct1 = base1.doc.grid.insert_idx(0, row0).sign(&kp1);
-        let construct2 = base1.doc.grid.insert_idx(1, row1).sign(&kp1);
-
-        assert_eq!(base2.apply(construct1), OpState::Ok);
-        assert_eq!(base2.apply(construct2.clone()), OpState::Ok);
-
-        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "grid": [[true, false], [false, true]]
-            })
-        );
-
-        let set1 = base1.doc.grid[0][0].set(false).sign(&kp1);
-        let set2 = base2.doc.grid[1][1].set(false).sign(&kp2);
-        assert_eq!(base1.apply(set2), OpState::Ok);
-        assert_eq!(base2.apply(set1), OpState::Ok);
-
-        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "grid": [[false, false], [false, false]]
-            })
-        );
-
-        let topright = base1.doc.grid[0].id_at(1).unwrap();
-        base1.doc.grid[0].delete(topright);
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "grid": [[false], [false, false]]
-            })
-        );
-
-        base1.doc.grid.delete(construct2.id());
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "grid": [[false]]
-            })
-        );
-    }
-
-    #[test]
-    fn test_arb_json() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Test {
-            reg: LwwRegisterCrdt<JsonValue>,
-        }
-
-        let kp1 = make_keypair();
-        let mut base1 = BaseCrdt::<Test>::new(&kp1);
-
-        let base_val: JsonValue = json!({
-            "a": true,
-            "b": "asdf",
-            "c": {
-                "d": [],
-                "e": [ false ]
-            }
-        })
-        .into();
-        base1.doc.reg.set(base_val).sign(&kp1);
-        assert_eq!(
-            base1.doc.view().into_json(),
-            json!({
-                "reg": {
-                    "a": true,
-                    "b": "asdf",
-                    "c": {
-                        "d": [],
-                        "e": [ false ]
-                    }
-                }
-            })
-        );
-    }
-
-    #[test]
-    fn test_wrong_json_types() {
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Nested {
-            list: ListCrdt<f64>,
-        }
-
-        #[add_crdt_fields]
-        #[derive(Clone, CrdtNode, Debug)]
-        struct Test {
-            reg: LwwRegisterCrdt<bool>,
-            strct: ListCrdt<Nested>,
-        }
-
-        let key = make_keypair();
-        let mut crdt = BaseCrdt::<Test>::new(&key);
-
-        // wrong type should not go through
-        crdt.doc.reg.set(32);
-        assert_eq!(crdt.doc.reg.view(), json!(null).into());
-        crdt.doc.reg.set(true);
-        assert_eq!(crdt.doc.reg.view(), json!(true).into());
-
-        // set nested
-        let mut list_view: JsonValue = crdt.doc.strct.view().into();
-        assert_eq!(list_view, json!([]).into());
-
-        // only keeps actual numbers
-        let list: JsonValue = json!({"list": [0, 123, -0.45, "char", []]}).into();
-        crdt.doc.strct.insert_idx(0, list);
-        list_view = crdt.doc.strct.view().into();
-        assert_eq!(list_view, json!([{ "list": [0, 123, -0.45]}]).into());
-    }
-}
@@ -0,0 +1,126 @@
+//! [`BaseCrdt`] — the top-level causal-delivery wrapper around any [`CrdtNode`].
+
+use std::collections::{HashMap, HashSet};
+
+use crate::keypair::Ed25519KeyPair;
+
+use crate::debug::DebugView;
+use crate::keypair::SignedDigest;
+
+use super::{CrdtNode, OpState, SignedOp, CAUSAL_QUEUE_MAX};
+
+/// The base struct for a JSON CRDT. Allows for declaring causal
+/// dependencies across fields. It only accepts messages of [`SignedOp`] for BFT.
+pub struct BaseCrdt<T: CrdtNode> {
+    /// Public key of this CRDT
+    pub id: crate::keypair::AuthorId,
+
+    /// Internal base CRDT
+    pub doc: T,
+
+    /// In a real world scenario, this would be a proper hash graph that allows for
+    /// efficient reconciliation of missing dependencies. We naively keep a hash set
+    /// of messages we've seen (represented by their [`SignedDigest`]).
+    received: HashSet<SignedDigest>,
+    message_q: HashMap<SignedDigest, Vec<SignedOp>>,
+
+    /// Total count of ops currently held in [`message_q`] waiting for their causal
+    /// dependencies to be delivered.  Used to enforce [`CAUSAL_QUEUE_MAX`].
+    queue_len: usize,
+}
+
+impl<T: CrdtNode + DebugView> BaseCrdt<T> {
+    /// Create a new BaseCRDT of the given type. Multiple BaseCRDTs
+    /// can be created from a single keypair but you are responsible for
+    /// routing messages to the right BaseCRDT. Usually you should just make a single
+    /// struct that contains all the state you need.
+    pub fn new(keypair: &Ed25519KeyPair) -> Self {
+        let id = keypair.verifying_key().to_bytes();
+        Self {
+            id,
+            doc: T::new(id, vec![]),
+            received: HashSet::new(),
+            message_q: HashMap::new(),
+            queue_len: 0,
+        }
+    }
+
+    /// Apply a signed operation to this BaseCRDT, verifying integrity and routing to the right
+    /// nested CRDT
+    pub fn apply(&mut self, op: SignedOp) -> OpState {
+        // self.log_try_apply(&op);
+
+        #[cfg(feature = "bft")]
+        if !op.is_valid_digest() {
+            self.debug_digest_failure(op);
+            return OpState::ErrDigestMismatch;
+        }
+
+        let op_id = op.signed_digest;
+
+        // Self-loop / dedup guard: if we have already processed this op (identified by
+        // its signed_digest), return immediately without re-applying it.  This prevents
+        // echo loops where an op we broadcast to a peer comes back to us.
+        if self.received.contains(&op_id) {
+            return OpState::AlreadySeen;
+        }
+
+        if !op.depends_on.is_empty() {
+            for origin in &op.depends_on {
+                if !self.received.contains(origin) {
+                    self.log_missing_causal_dep(origin);
+
+                    // Bounded queue overflow: evict the oldest op from the largest
+                    // pending bucket before adding the new one.  See CAUSAL_QUEUE_MAX.
+                    if self.queue_len >= CAUSAL_QUEUE_MAX {
+                        if let Some(bucket) = self.message_q.values_mut().max_by_key(|v| v.len()) {
+                            if !bucket.is_empty() {
+                                bucket.remove(0);
+                                self.queue_len = self.queue_len.saturating_sub(1);
+                            }
+                        }
+                    }
+
+                    self.message_q.entry(*origin).or_default().push(op);
+                    self.queue_len += 1;
+                    return OpState::MissingCausalDependencies;
+                }
+            }
+        }
+
+        // apply
+        // self.log_actually_apply(&op);
+        let status = self.doc.apply(op.inner);
+        // self.debug_view();
+
+        // Only record the op as seen when it applied successfully.  If the op
+        // was rejected (e.g. ErrHashMismatch from a tampered payload), we must
+        // NOT add its signed_digest to `received`: a legitimate op that shares
+        // the same signed_digest (e.g. the un-tampered original) would otherwise
+        // be silently dropped as AlreadySeen.
+        // Only mark as received and unblock dependents when the op was actually
+        // applied. If we insert on error (e.g. ErrHashMismatch), a subsequent
+        // apply of a *legitimate* op with the same signed_digest would be
+        // silently dropped as AlreadySeen, preventing equivocation detection
+        // from working correctly.
+        if status == OpState::Ok {
+            self.received.insert(op_id);
+
+            // apply all of its causal dependents if there are any
+            let dependent_queue = self.message_q.remove(&op_id);
+            if let Some(mut q) = dependent_queue {
+                self.queue_len = self.queue_len.saturating_sub(q.len());
+                for dependent in q.drain(..) {
+                    self.apply(dependent);
+                }
+            }
+        }
+        status
+    }
+
+    /// Number of ops currently held in the causal-order queue waiting for their
+    /// dependencies to be satisfied.
+    pub fn causal_queue_len(&self) -> usize {
+        self.queue_len
+    }
+}
@@ -0,0 +1,439 @@
+//! JSON CRDT public interface: core traits, re-exports, and integration tests.
+// TODO: serde's json object serialization and deserialization (correctly) do not define anything
+// object field order in JSON objects. However, the hash check impl in bft-json-bft-crdt does take order
+// into account. This is going to cause problems later for non-Rust implementations, BFT hash checking
+// currently depends on JSON serialization/deserialization object order. This shouldn't be the case
+// but I've hacked in an IndexMap for the moment to get the PoC working. To see the problem, replace this with
+// a std HashMap, everything will screw up (annoyingly, only *most* of the time).
+
+use crate::debug::debug_op_on_primitive;
+use crate::keypair::AuthorId;
+use crate::op::{Hashable, Op, PathSegment};
+
+pub use bft_crdt_derive::*;
+
+mod base;
+mod signed_op;
+mod value;
+
+pub use base::BaseCrdt;
+pub use signed_op::{OpState, SignedOp, CAUSAL_QUEUE_MAX};
+pub use value::JsonValue;
+
+/// Anything that can be nested in a JSON CRDT
+pub trait CrdtNode: CrdtNodeFromValue + Hashable + Clone {
+    /// Create a new CRDT of this type
+    fn new(id: AuthorId, path: Vec<PathSegment>) -> Self;
+    /// Apply an operation to this CRDT, forwarding if necessary
+    fn apply(&mut self, op: Op<JsonValue>) -> OpState;
+    /// Get a JSON representation of the value in this node
+    fn view(&self) -> JsonValue;
+}
+
+/// The following types can be used as a 'terminal' type in CRDTs
+pub trait MarkPrimitive: Into<JsonValue> + Default {}
+impl MarkPrimitive for bool {}
+impl MarkPrimitive for i32 {}
+impl MarkPrimitive for i64 {}
+impl MarkPrimitive for f64 {}
+impl MarkPrimitive for char {}
+impl MarkPrimitive for String {}
+impl MarkPrimitive for JsonValue {}
+
+/// Implement CrdtNode for non-CRDTs
+/// This is a stub implementation so most functions don't do anything/log an error
+impl<T> CrdtNode for T
+where
+    T: CrdtNodeFromValue + MarkPrimitive + Hashable + Clone,
+{
+    fn apply(&mut self, _op: Op<JsonValue>) -> OpState {
+        OpState::ErrApplyOnPrimitive
+    }
+
+    fn view(&self) -> JsonValue {
+        self.to_owned().into()
+    }
+
+    fn new(_id: AuthorId, _path: Vec<PathSegment>) -> Self {
+        debug_op_on_primitive(_path);
+        Default::default()
+    }
+}
+
+/// Fallibly create a CRDT Node from a JSON Value
+pub trait CrdtNodeFromValue: Sized {
+    fn node_from(value: JsonValue, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String>;
+}
+
+/// Fallibly cast a JSON Value into a CRDT Node
+pub trait IntoCrdtNode<T>: Sized {
+    fn into_node(self, id: AuthorId, path: Vec<PathSegment>) -> Result<T, String>;
+}
+
+/// [`CrdtNodeFromValue`] implies [`IntoCrdtNode<T>`]
+impl<T> IntoCrdtNode<T> for JsonValue
+where
+    T: CrdtNodeFromValue,
+{
+    fn into_node(self, id: AuthorId, path: Vec<PathSegment>) -> Result<T, String> {
+        T::node_from(self, id, path)
+    }
+}
+
+/// Trivial conversion from [`JsonValue`] to [`JsonValue`] as [`CrdtNodeFromValue`]
+impl CrdtNodeFromValue for JsonValue {
+    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
+        Ok(value)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use serde_json::json;
+
+    use crate::{
+        json_crdt::{add_crdt_fields, BaseCrdt, CrdtNode, IntoCrdtNode, JsonValue, OpState},
+        keypair::make_keypair,
+        list_crdt::ListCrdt,
+        lww_crdt::LwwRegisterCrdt,
+        op::{print_path, ROOT_ID},
+    };
+
+    #[test]
+    fn test_derive_basic() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Player {
+            x: LwwRegisterCrdt<f64>,
+            y: LwwRegisterCrdt<f64>,
+        }
+
+        let keypair = make_keypair();
+        let crdt = BaseCrdt::<Player>::new(&keypair);
+        assert_eq!(print_path(crdt.doc.x.path), "x");
+        assert_eq!(print_path(crdt.doc.y.path), "y");
+    }
+
+    #[test]
+    fn test_derive_nested() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Position {
+            x: LwwRegisterCrdt<f64>,
+            y: LwwRegisterCrdt<f64>,
+        }
+
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Player {
+            pos: Position,
+            balance: LwwRegisterCrdt<f64>,
+            messages: ListCrdt<String>,
+        }
+
+        let keypair = make_keypair();
+        let crdt = BaseCrdt::<Player>::new(&keypair);
+        assert_eq!(print_path(crdt.doc.pos.x.path), "pos.x");
+        assert_eq!(print_path(crdt.doc.pos.y.path), "pos.y");
+        assert_eq!(print_path(crdt.doc.balance.path), "balance");
+        assert_eq!(print_path(crdt.doc.messages.path), "messages");
+    }
+
+    #[test]
+    fn test_lww_ops() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Test {
+            a: LwwRegisterCrdt<f64>,
+            b: LwwRegisterCrdt<bool>,
+            c: LwwRegisterCrdt<String>,
+        }
+
+        let kp1 = make_keypair();
+        let kp2 = make_keypair();
+        let mut base1 = BaseCrdt::<Test>::new(&kp1);
+        let mut base2 = BaseCrdt::<Test>::new(&kp2);
+
+        let _1_a_1 = base1.doc.a.set(3.0).sign(&kp1);
+        let _1_b_1 = base1.doc.b.set(true).sign(&kp1);
+        let _2_a_1 = base2.doc.a.set(1.5).sign(&kp2);
+        let _2_a_2 = base2.doc.a.set(2.13).sign(&kp2);
+        let _2_c_1 = base2.doc.c.set("abc".to_string()).sign(&kp2);
+
+        assert_eq!(base1.doc.a.view(), json!(3.0).into());
+        assert_eq!(base2.doc.a.view(), json!(2.13).into());
+        assert_eq!(base1.doc.b.view(), json!(true).into());
+        assert_eq!(base2.doc.c.view(), json!("abc").into());
+
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "a": 3.0,
+                "b": true,
+                "c": null,
+            })
+        );
+        assert_eq!(
+            base2.doc.view().into_json(),
+            json!({
+                "a": 2.13,
+                "b": null,
+                "c": "abc",
+            })
+        );
+
+        assert_eq!(base2.apply(_1_a_1), OpState::Ok);
+        assert_eq!(base2.apply(_1_b_1), OpState::Ok);
+        assert_eq!(base1.apply(_2_a_1), OpState::Ok);
+        assert_eq!(base1.apply(_2_a_2), OpState::Ok);
+        assert_eq!(base1.apply(_2_c_1), OpState::Ok);
+
+        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "a": 2.13,
+                "b": true,
+                "c": "abc"
+            })
+        )
+    }
+
+    #[test]
+    fn test_vec_and_map_ops() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Test {
+            a: ListCrdt<String>,
+        }
+
+        let kp1 = make_keypair();
+        let kp2 = make_keypair();
+        let mut base1 = BaseCrdt::<Test>::new(&kp1);
+        let mut base2 = BaseCrdt::<Test>::new(&kp2);
+
+        let _1a = base1.doc.a.insert(ROOT_ID, "a".to_string()).sign(&kp1);
+        let _1b = base1.doc.a.insert(_1a.id(), "b".to_string()).sign(&kp1);
+        let _2c = base2.doc.a.insert(ROOT_ID, "c".to_string()).sign(&kp2);
+        let _2d = base2.doc.a.insert(_1b.id(), "d".to_string()).sign(&kp2);
+
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "a": ["a", "b"],
+            })
+        );
+
+        // as _1b hasn't been delivered to base2 yet
+        assert_eq!(
+            base2.doc.view().into_json(),
+            json!({
+                "a": ["c"],
+            })
+        );
+
+        assert_eq!(base2.apply(_1b), OpState::MissingCausalDependencies);
+        assert_eq!(base2.apply(_1a), OpState::Ok);
+        assert_eq!(base1.apply(_2d), OpState::Ok);
+        assert_eq!(base1.apply(_2c), OpState::Ok);
+        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
+    }
+
+    #[test]
+    fn test_causal_field_dependency() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Item {
+            name: LwwRegisterCrdt<String>,
+            soulbound: LwwRegisterCrdt<bool>,
+        }
+
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Player {
+            inventory: ListCrdt<Item>,
+            balance: LwwRegisterCrdt<f64>,
+        }
+
+        let kp1 = make_keypair();
+        let kp2 = make_keypair();
+        let mut base1 = BaseCrdt::<Player>::new(&kp1);
+        let mut base2 = BaseCrdt::<Player>::new(&kp2);
+
+        // require balance update to happen before inventory update
+        let _add_money = base1.doc.balance.set(5000.0).sign(&kp1);
+        let _spend_money = base1
+            .doc
+            .balance
+            .set(3000.0)
+            .sign_with_dependencies(&kp1, vec![&_add_money]);
+
+        let sword: JsonValue = json!({
+            "name": "Sword",
+            "soulbound": true,
+        })
+        .into();
+        let _new_inventory_item = base1
+            .doc
+            .inventory
+            .insert_idx(0, sword)
+            .sign_with_dependencies(&kp1, vec![&_spend_money]);
+
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "balance": 3000.0,
+                "inventory": [
+                    {
+                        "name": "Sword",
+                        "soulbound": true
+                    }
+                ]
+            })
+        );
+
+        // do it completely out of order
+        assert_eq!(
+            base2.apply(_new_inventory_item),
+            OpState::MissingCausalDependencies
+        );
+        assert_eq!(
+            base2.apply(_spend_money),
+            OpState::MissingCausalDependencies
+        );
+        assert_eq!(base2.apply(_add_money), OpState::Ok);
+        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
+    }
+
+    #[test]
+    fn test_2d_grid() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Game {
+            grid: ListCrdt<ListCrdt<LwwRegisterCrdt<bool>>>,
+        }
+
+        let kp1 = make_keypair();
+        let kp2 = make_keypair();
+        let mut base1 = BaseCrdt::<Game>::new(&kp1);
+        let mut base2 = BaseCrdt::<Game>::new(&kp2);
+
+        // init a 2d grid
+        let row0: JsonValue = json!([true, false]).into();
+        let row1: JsonValue = json!([false, true]).into();
+        let construct1 = base1.doc.grid.insert_idx(0, row0).sign(&kp1);
+        let construct2 = base1.doc.grid.insert_idx(1, row1).sign(&kp1);
+
+        assert_eq!(base2.apply(construct1), OpState::Ok);
+        assert_eq!(base2.apply(construct2.clone()), OpState::Ok);
+
+        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "grid": [[true, false], [false, true]]
+            })
+        );
+
+        let set1 = base1.doc.grid[0][0].set(false).sign(&kp1);
+        let set2 = base2.doc.grid[1][1].set(false).sign(&kp2);
+        assert_eq!(base1.apply(set2), OpState::Ok);
+        assert_eq!(base2.apply(set1), OpState::Ok);
+
+        assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "grid": [[false, false], [false, false]]
+            })
+        );
+
+        let topright = base1.doc.grid[0].id_at(1).unwrap();
+        base1.doc.grid[0].delete(topright);
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "grid": [[false], [false, false]]
+            })
+        );
+
+        base1.doc.grid.delete(construct2.id());
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "grid": [[false]]
+            })
+        );
+    }
+
+    #[test]
+    fn test_arb_json() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Test {
+            reg: LwwRegisterCrdt<JsonValue>,
+        }
+
+        let kp1 = make_keypair();
+        let mut base1 = BaseCrdt::<Test>::new(&kp1);
+
+        let base_val: JsonValue = json!({
+            "a": true,
+            "b": "asdf",
+            "c": {
+                "d": [],
+                "e": [ false ]
+            }
+        })
+        .into();
+        base1.doc.reg.set(base_val).sign(&kp1);
+        assert_eq!(
+            base1.doc.view().into_json(),
+            json!({
+                "reg": {
+                    "a": true,
+                    "b": "asdf",
+                    "c": {
+                        "d": [],
+                        "e": [ false ]
+                    }
+                }
+            })
+        );
+    }
+
+    #[test]
+    fn test_wrong_json_types() {
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Nested {
+            list: ListCrdt<f64>,
+        }
+
+        #[add_crdt_fields]
+        #[derive(Clone, CrdtNode, Debug)]
+        struct Test {
+            reg: LwwRegisterCrdt<bool>,
+            strct: ListCrdt<Nested>,
+        }
+
+        let key = make_keypair();
+        let mut crdt = BaseCrdt::<Test>::new(&key);
+
+        // wrong type should not go through
+        crdt.doc.reg.set(32);
+        assert_eq!(crdt.doc.reg.view(), json!(null).into());
+        crdt.doc.reg.set(true);
+        assert_eq!(crdt.doc.reg.view(), json!(true).into());
+
+        // set nested
+        let mut list_view: JsonValue = crdt.doc.strct.view().into();
+        assert_eq!(list_view, json!([]).into());
+
+        // only keeps actual numbers
+        let list: JsonValue = json!({"list": [0, 123, -0.45, "char", []]}).into();
+        crdt.doc.strct.insert_idx(0, list);
+        list_view = crdt.doc.strct.view().into();
+        assert_eq!(list_view, json!([{ "list": [0, 123, -0.45]}]).into());
+    }
+}
@@ -0,0 +1,143 @@
+//! [`SignedOp`], [`OpState`], and the causal queue capacity constant.
+
+use crate::keypair::{Ed25519KeyPair, Ed25519PublicKey, Ed25519Signature};
+use ed25519_dalek::Verifier as _;
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, Bytes};
+
+use crate::keypair::{sha256, sign, AuthorId, SignedDigest};
+use crate::op::{print_hex, print_path, Op, OpId};
+
+use super::{CrdtNode, JsonValue};
+
+/// Enum representing possible outcomes of applying an operation to a CRDT
+#[derive(Debug, PartialEq)]
+pub enum OpState {
+    /// Operation applied successfully
+    Ok,
+    /// Tried to apply an operation to a non-CRDT primitive (i.e. f64, bool, etc.)
+    /// If you would like a mutable primitive, wrap it in a [`LWWRegisterCRDT`]
+    ErrApplyOnPrimitive,
+    /// Tried to apply an operation to a static struct CRDT
+    /// If you would like a mutable object, use a [`Value`]
+    ErrApplyOnStruct,
+    /// Tried to apply an operation that contains content of the wrong type.
+    /// In other words, the content cannot be coerced to the CRDT at the path specified.
+    ErrMismatchedType,
+    /// The signed digest of the message did not match the claimed author of the message.
+    /// This can happen if the message was tampered with during delivery
+    ErrDigestMismatch,
+    /// The hash of the message did not match the contents of the message.
+    /// This can happen if the author tried to perform an equivocation attack by creating an
+    /// operation and modifying it has already been created
+    ErrHashMismatch,
+    /// Tried to apply an operation to a non-existent path. The author may have forgotten to attach
+    /// a causal dependency
+    ErrPathMismatch,
+    /// Trying to modify/delete the sentinel (zero-th) node element that is used for book-keeping
+    ErrListApplyToEmpty,
+    /// We have not received all of the causal dependencies of this operation. It has been queued
+    /// up and will be executed when its causal dependencies have been delivered
+    MissingCausalDependencies,
+    /// This op has already been applied (identified by its `signed_digest`).
+    /// The CRDT state is unchanged — this is a no-op (idempotent self-loop guard).
+    AlreadySeen,
+}
+
+/// Maximum total number of ops that may sit in the causal-order hold queue at any
+/// one time, summed across all pending dependency buckets.
+///
+/// **Overflow policy: drop oldest.**
+/// When the limit is reached, the oldest pending op in the largest dependency bucket
+/// is silently evicted before the new op is queued.  Rationale: a misbehaving or
+/// heavily-partitioned peer can send ops whose causal ancestors never arrive, causing
+/// unbounded memory growth.  Dropping the oldest entry preserves the most recent
+/// information and caps memory use.  The peer can reconnect and receive a fresh bulk
+/// state dump to recover any dropped ops.
+pub const CAUSAL_QUEUE_MAX: usize = 256;
+
+/// An [`Op<Value>`] with a few bits of extra metadata
+#[serde_as]
+#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)]
+pub struct SignedOp {
+    // Note that this can be different from the author of the inner op as the inner op could have been created
+    // by a different person
+    author: AuthorId,
+    /// Signed hash using priv key of author. Effectively [`OpID`] Use this as the ID to figure out what has been delivered already
+    #[serde_as(as = "Bytes")]
+    pub signed_digest: SignedDigest,
+    pub inner: Op<JsonValue>,
+    /// List of causal dependencies
+    #[serde_as(as = "Vec<Bytes>")]
+    pub depends_on: Vec<SignedDigest>,
+}
+
+impl SignedOp {
+    pub fn id(&self) -> OpId {
+        self.inner.id
+    }
+
+    pub fn author(&self) -> AuthorId {
+        self.author
+    }
+
+    /// Creates a digest of the following fields. Any changes in the fields will change the signed digest
+    ///  - id (hash of the following)
+    ///    - origin
+    ///    - author
+    ///    - seq
+    ///    - is_deleted
+    ///  - path
+    ///  - dependencies
+    fn digest(&self) -> [u8; 32] {
+        let path_string = print_path(self.inner.path.clone());
+        let dependency_string = self
+            .depends_on
+            .iter()
+            .map(print_hex)
+            .collect::<Vec<_>>()
+            .join("");
+        let fmt_str = format!("{:?},{path_string},{dependency_string}", self.id());
+        sha256(fmt_str)
+    }
+
+    /// Sign this digest with the given keypair. Shouldn't need to be called manually,
+    /// just use [`SignedOp::from_op`] instead
+    fn sign_digest(&mut self, keypair: &Ed25519KeyPair) {
+        self.signed_digest = sign(keypair, &self.digest()).to_bytes()
+    }
+
+    /// Ensure digest was actually signed by the author it claims to be signed by
+    pub fn is_valid_digest(&self) -> bool {
+        let digest = Ed25519Signature::from_bytes(&self.signed_digest);
+        match Ed25519PublicKey::from_bytes(&self.author()) {
+            Ok(pubkey) => pubkey.verify(&self.digest(), &digest).is_ok(),
+            Err(_) => false,
+        }
+    }
+
+    /// Sign a normal op and add all the needed metadata
+    pub fn from_op<T: CrdtNode>(
+        value: Op<T>,
+        keypair: &Ed25519KeyPair,
+        depends_on: Vec<SignedDigest>,
+    ) -> Self {
+        let author = keypair.verifying_key().to_bytes();
+        let mut new = Self {
+            inner: Op {
+                content: value.content.map(|c| c.view()),
+                origin: value.origin,
+                author: value.author,
+                seq: value.seq,
+                path: value.path,
+                is_deleted: value.is_deleted,
+                id: value.id,
+            },
+            author,
+            signed_digest: [0u8; 64],
+            depends_on,
+        };
+        new.sign_digest(keypair);
+        new
+    }
+}
@@ -0,0 +1,257 @@
+//! The [`JsonValue`] enum and all its conversions to/from primitive and CRDT types.
+
+use std::fmt::Display;
+
+use indexmap::IndexMap;
+use serde::{Deserialize, Serialize};
+
+use crate::{keypair::AuthorId, list_crdt::ListCrdt, lww_crdt::LwwRegisterCrdt, op::PathSegment};
+
+use super::{CrdtNode, CrdtNodeFromValue};
+
+/// An enum representing a JSON value
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub enum JsonValue {
+    #[default]
+    Null,
+    Bool(bool),
+    Number(f64),
+    String(String),
+    Array(Vec<JsonValue>),
+    Object(IndexMap<String, JsonValue>),
+}
+
+impl Display for JsonValue {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                JsonValue::Null => "null".to_string(),
+                JsonValue::Bool(b) => b.to_string(),
+                JsonValue::Number(n) => n.to_string(),
+                JsonValue::String(s) => format!("\"{s}\""),
+                JsonValue::Array(arr) => {
+                    if arr.len() > 1 {
+                        format!(
+                            "[\n{}\n]",
+                            arr.iter()
+                                .map(|x| format!("  {x}"))
+                                .collect::<Vec<_>>()
+                                .join(",\n")
+                        )
+                    } else {
+                        format!(
+                            "[ {} ]",
+                            arr.iter()
+                                .map(|x| x.to_string())
+                                .collect::<Vec<_>>()
+                                .join(", ")
+                        )
+                    }
+                }
+                JsonValue::Object(obj) => format!(
+                    "{{ {} }}",
+                    obj.iter()
+                        .map(|(k, v)| format!("  \"{k}\": {v}"))
+                        .collect::<Vec<_>>()
+                        .join(",\n")
+                ),
+            }
+        )
+    }
+}
+
+/// Allow easy conversion to and from serde's JSON format. This allows us to use the [`json!`]
+/// macro
+impl From<JsonValue> for serde_json::Value {
+    fn from(value: JsonValue) -> Self {
+        match value {
+            JsonValue::Null => serde_json::Value::Null,
+            JsonValue::Bool(x) => serde_json::Value::Bool(x),
+            JsonValue::Number(x) => {
+                serde_json::Value::Number(serde_json::Number::from_f64(x).unwrap())
+            }
+            JsonValue::String(x) => serde_json::Value::String(x),
+            JsonValue::Array(x) => {
+                serde_json::Value::Array(x.iter().map(|a| a.clone().into()).collect())
+            }
+            JsonValue::Object(x) => serde_json::Value::Object(
+                x.iter()
+                    .map(|(k, v)| (k.clone(), v.clone().into()))
+                    .collect(),
+            ),
+        }
+    }
+}
+
+impl From<serde_json::Value> for JsonValue {
+    fn from(value: serde_json::Value) -> Self {
+        match value {
+            serde_json::Value::Null => JsonValue::Null,
+            serde_json::Value::Bool(x) => JsonValue::Bool(x),
+            serde_json::Value::Number(x) => JsonValue::Number(x.as_f64().unwrap()),
+            serde_json::Value::String(x) => JsonValue::String(x),
+            serde_json::Value::Array(x) => {
+                JsonValue::Array(x.iter().map(|a| a.clone().into()).collect())
+            }
+            serde_json::Value::Object(x) => JsonValue::Object(
+                x.iter()
+                    .map(|(k, v)| (k.clone(), v.clone().into()))
+                    .collect(),
+            ),
+        }
+    }
+}
+
+impl JsonValue {
+    pub fn into_json(self) -> serde_json::Value {
+        self.into()
+    }
+}
+
+/// Conversions from primitive types to [`JsonValue`]
+impl From<bool> for JsonValue {
+    fn from(val: bool) -> Self {
+        JsonValue::Bool(val)
+    }
+}
+
+impl From<i64> for JsonValue {
+    fn from(val: i64) -> Self {
+        JsonValue::Number(val as f64)
+    }
+}
+
+impl From<i32> for JsonValue {
+    fn from(val: i32) -> Self {
+        JsonValue::Number(val as f64)
+    }
+}
+
+impl From<f64> for JsonValue {
+    fn from(val: f64) -> Self {
+        JsonValue::Number(val)
+    }
+}
+
+impl From<String> for JsonValue {
+    fn from(val: String) -> Self {
+        JsonValue::String(val)
+    }
+}
+
+impl From<char> for JsonValue {
+    fn from(val: char) -> Self {
+        JsonValue::String(val.into())
+    }
+}
+
+impl<T> From<Option<T>> for JsonValue
+where
+    T: CrdtNode,
+{
+    fn from(val: Option<T>) -> Self {
+        match val {
+            Some(x) => x.view(),
+            None => JsonValue::Null,
+        }
+    }
+}
+
+impl<T> From<Vec<T>> for JsonValue
+where
+    T: CrdtNode,
+{
+    fn from(value: Vec<T>) -> Self {
+        JsonValue::Array(value.iter().map(|x| x.view()).collect())
+    }
+}
+
+/// Conversions from bool to CRDT
+impl CrdtNodeFromValue for bool {
+    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
+        if let JsonValue::Bool(x) = value {
+            Ok(x)
+        } else {
+            Err(format!("failed to convert {value:?} -> bool"))
+        }
+    }
+}
+
+/// Conversions from f64 to CRDT
+impl CrdtNodeFromValue for f64 {
+    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
+        if let JsonValue::Number(x) = value {
+            Ok(x)
+        } else {
+            Err(format!("failed to convert {value:?} -> f64"))
+        }
+    }
+}
+
+/// Conversions from i64 to CRDT
+impl CrdtNodeFromValue for i64 {
+    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
+        if let JsonValue::Number(x) = value {
+            Ok(x as i64)
+        } else {
+            Err(format!("failed to convert {value:?} -> f64"))
+        }
+    }
+}
+
+/// Conversions from String to CRDT
+impl CrdtNodeFromValue for String {
+    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
+        if let JsonValue::String(x) = value {
+            Ok(x)
+        } else {
+            Err(format!("failed to convert {value:?} -> String"))
+        }
+    }
+}
+
+/// Conversions from char to CRDT
+impl CrdtNodeFromValue for char {
+    fn node_from(value: JsonValue, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
+        if let JsonValue::String(x) = value.clone() {
+            x.chars().next().ok_or(format!(
+                "failed to convert {value:?} -> char: found a zero-length string"
+            ))
+        } else {
+            Err(format!("failed to convert {value:?} -> char"))
+        }
+    }
+}
+
+impl<T> CrdtNodeFromValue for LwwRegisterCrdt<T>
+where
+    T: CrdtNode,
+{
+    fn node_from(value: JsonValue, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String> {
+        let mut crdt = LwwRegisterCrdt::new(id, path);
+        crdt.set(value);
+        Ok(crdt)
+    }
+}
+
+impl<T> CrdtNodeFromValue for ListCrdt<T>
+where
+    T: CrdtNode,
+{
+    fn node_from(value: JsonValue, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String> {
+        if let JsonValue::Array(arr) = value {
+            let mut crdt = ListCrdt::new(id, path);
+            let result: Result<(), String> =
+                arr.into_iter().enumerate().try_for_each(|(i, val)| {
+                    crdt.insert_idx(i, val);
+                    Ok(())
+                });
+            result?;
+            Ok(crdt)
+        } else {
+            Err(format!("failed to convert {value:?} -> ListCRDT<T>"))
+        }
+    }
+}
@@ -1,14 +1,25 @@
-use fastcrypto::traits::VerifyingKey;
-pub use fastcrypto::{
-    ed25519::{
-        Ed25519KeyPair, Ed25519PublicKey, Ed25519Signature, ED25519_PUBLIC_KEY_LENGTH,
-        ED25519_SIGNATURE_LENGTH,
-    },
-    traits::{KeyPair, Signer},
-    // Verifier,
-};
+//! Ed25519 keypair utilities and type aliases for node identity and signing.
+//!
+//! Provides the [`AuthorId`] and [`SignedDigest`] type aliases, a SHA-256 helper,
+//! and convenience wrappers around the `ed25519-dalek` Ed25519 primitives used
+//! throughout the CRDT codebase.
+
+use ed25519_dalek::Signer as _;
+use ed25519_dalek::Verifier as _;
 use sha2::{Digest, Sha256};

+/// Ed25519 signing key (private + public pair).
+pub type Ed25519KeyPair = ed25519_dalek::SigningKey;
+/// Ed25519 verifying (public) key.
+pub type Ed25519PublicKey = ed25519_dalek::VerifyingKey;
+/// Ed25519 signature.
+pub type Ed25519Signature = ed25519_dalek::Signature;
+
+/// Length of an Ed25519 public key in bytes.
+pub const ED25519_PUBLIC_KEY_LENGTH: usize = 32;
+/// Length of an Ed25519 signature in bytes.
+pub const ED25519_SIGNATURE_LENGTH: usize = 64;
+
 /// Represents the ID of a unique node. An Ed25519 public key
 pub type AuthorId = [u8; ED25519_PUBLIC_KEY_LENGTH];

@@ -42,8 +53,10 @@ pub fn sha256(input: String) -> [u8; 32] {

 /// Generate a random Ed25519 keypair from OS rng
 pub fn make_keypair() -> Ed25519KeyPair {
-    let mut csprng = rand::thread_rng();
-    Ed25519KeyPair::generate(&mut csprng)
+    use rand::Rng as _;
+    let mut seed = [0u8; 32];
+    rand::rng().fill_bytes(&mut seed);
+    Ed25519KeyPair::from_bytes(&seed)
 }

 /// Sign a byte array
@@ -1,8 +1,26 @@
+//! BFT JSON CRDT library — a Byzantine Fault-Tolerant replicated JSON document
+//! built on an RGA list CRDT, an LWW register CRDT, and a signed-op substrate.
+//!
+//! Each document is identified by an Ed25519 keypair. Operations are signed and
+//! carry causal dependencies so that every node converges to the same value
+//! regardless of message delivery order.
+
+/// Debug helpers and the [`DebugView`] trait for rendering CRDT internals.
 pub mod debug;
+/// JSON CRDT public interface: core traits, types, and signed-op substrate.
 pub mod json_crdt;
+/// Ed25519 keypair utilities and primitive type aliases used throughout the crate.
 pub mod keypair;
+/// RGA-style list CRDT that can store any [`CrdtNode`] as its element type.
 pub mod list_crdt;
+/// Last-writer-wins (LWW) register CRDT for single-value fields.
 pub mod lww_crdt;
+/// Core operation types: [`Op`], [`PathSegment`], and hashing helpers.
 pub mod op;

 extern crate self as bft_json_crdt;
+
+/// Re-exported so that code generated by `#[derive(CrdtNode)]` can resolve
+/// `indexmap` through this crate without requiring downstream crates to
+/// declare it as a direct dependency.
+pub use indexmap;
@@ -1,3 +1,9 @@
+//! RGA-style list CRDT that stores any [`CrdtNode`] as its element type.
+//!
+//! Implements the Replicated Growable Array (RGA) algorithm with causal ordering.
+//! Concurrent inserts at the same position are resolved by sequence number then
+//! by author public key so that all replicas converge to the same sequence.
+
 use crate::{
    debug::debug_path_mismatch,
    json_crdt::{CrdtNode, JsonValue, OpState},
@@ -47,6 +53,21 @@ where
        }
    }

+    /// Returns the current Lamport sequence number for this list.
+    pub fn our_seq(&self) -> SequenceNumber {
+        self.our_seq
+    }
+
+    /// Advance the internal sequence counter to at least `seq`.
+    ///
+    /// After `advance_seq(n)`, the next local op will carry `seq = max(our_seq, n) + 1`
+    /// instead of the default `1`.  Used on restart to resume the Lamport clock
+    /// from the document-wide floor so that newly-created registers don't
+    /// re-emit low sequence numbers.
+    pub fn advance_seq(&mut self, seq: SequenceNumber) {
+        self.our_seq = max(self.our_seq, seq);
+    }
+
    /// Locally insert some content causally after the given operation
    pub fn insert<U: Into<JsonValue>>(&mut self, after: OpId, content: U) -> Op<JsonValue> {
        let mut op = Op::new(
@@ -278,9 +299,12 @@ where
    fn index(&self, idx: usize) -> &Self::Output {
        let mut i = 0;
        for op in &self.ops {
-            if !op.is_deleted && op.content.is_some() {
+            if op.is_deleted {
+                continue;
+            }
+            if let Some(content) = op.content.as_ref() {
                if idx == i {
-                    return op.content.as_ref().unwrap();
+                    return content;
                }
                i += 1;
            }
@@ -297,9 +321,12 @@ where
    fn index_mut(&mut self, idx: usize) -> &mut Self::Output {
        let mut i = 0;
        for op in &mut self.ops {
-            if !op.is_deleted && op.content.is_some() {
+            if op.is_deleted {
+                continue;
+            }
+            if let Some(content) = op.content.as_mut() {
                if idx == i {
-                    return op.content.as_mut().unwrap();
+                    return content;
                }
                i += 1;
            }
@@ -365,6 +392,18 @@ mod test {
        assert_eq!(list.view(), vec![1, 4, 2, 3]);
    }

+    #[test]
+    fn test_advance_seq_resumes_from_floor() {
+        let mut list = ListCrdt::<i64>::new(make_author(1), vec![]);
+        list.advance_seq(100);
+        assert_eq!(list.our_seq(), 100);
+        let op = list.insert(ROOT_ID, 42);
+        assert_eq!(
+            op.seq, 101,
+            "first op after advance_seq(100) must have seq=101"
+        );
+    }
+
    #[test]
    fn test_list_idempotence() {
        let mut list = ListCrdt::<i64>::new(make_author(1), vec![]);
@@ -1,3 +1,9 @@
+//! Last-writer-wins (LWW) register CRDT.
+//!
+//! Implements a delete-wins LWW register for primitive values inside a nested
+//! JSON CRDT. Concurrent writes are resolved by sequence number; ties are broken
+//! by author public key so every node converges to the same value.
+
 use crate::debug::DebugView;
 use crate::json_crdt::{CrdtNode, JsonValue, OpState};
 use crate::op::{join_path, print_path, Op, PathSegment, SequenceNumber};
@@ -37,6 +43,21 @@ where
        }
    }

+    /// Returns the current Lamport sequence number for this register.
+    pub fn our_seq(&self) -> SequenceNumber {
+        self.our_seq
+    }
+
+    /// Advance the internal sequence counter to at least `seq`.
+    ///
+    /// After `advance_seq(n)`, the next local op will carry `seq = max(our_seq, n) + 1`
+    /// instead of the default `1`.  Used on restart to resume the Lamport clock
+    /// from the document-wide floor so that newly-created registers don't
+    /// re-emit low sequence numbers.
+    pub fn advance_seq(&mut self, seq: SequenceNumber) {
+        self.our_seq = max(self.our_seq, seq);
+    }
+
    /// Sets the current value of the register
    pub fn set<U: Into<JsonValue>>(&mut self, content: U) -> Op<JsonValue> {
        let mut op = Op::new(
@@ -174,6 +195,18 @@ mod test {
        assert_eq!(register.view(), Some(1));
    }

+    #[test]
+    fn test_advance_seq_resumes_from_floor() {
+        let mut register = LwwRegisterCrdt::<i64>::new(make_author(1), vec![]);
+        register.advance_seq(100);
+        assert_eq!(register.our_seq(), 100);
+        let op = register.set(42);
+        assert_eq!(
+            op.seq, 101,
+            "first op after advance_seq(100) must have seq=101"
+        );
+    }
+
    #[test]
    fn test_lww_consistent_tiebreak() {
        let mut register1 = LwwRegisterCrdt::new(make_author(1), vec![]);
@@ -1,7 +1,12 @@
+//! Core operation types for the BFT JSON CRDT.
+//!
+//! Defines [`Op`] (the fundamental unit of change), [`PathSegment`] (for
+//! addressing nested CRDTs), and [`SequenceNumber`] / [`OpId`] type aliases.
+//! Also provides hashing utilities used when computing operation identifiers.
+
 use crate::debug::{debug_path_mismatch, debug_type_mismatch};
 use crate::json_crdt::{CrdtNode, CrdtNodeFromValue, IntoCrdtNode, JsonValue, SignedOp};
-use crate::keypair::{sha256, AuthorId};
-use fastcrypto::ed25519::Ed25519KeyPair;
+use crate::keypair::{sha256, AuthorId, Ed25519KeyPair};
 use serde::{Deserialize, Serialize};
 use std::fmt::Debug;

@@ -113,6 +118,7 @@ where

 /// Conversion from Op<Value> -> Op<T> given that T is a CRDT that can be created from a JSON value
 impl Op<JsonValue> {
+    /// Convert this `Op<JsonValue>` into an `Op<T>` by deserialising the content via `T::node_from`.
    pub fn into<T: CrdtNodeFromValue + CrdtNode>(self) -> Op<T> {
        let content = if let Some(inner_content) = self.content {
            match inner_content.into_node(self.id, self.path.clone()) {
@@ -141,10 +147,12 @@ impl<T> Op<T>
 where
    T: CrdtNode,
 {
+    /// Sign this operation with `keypair`, producing a [`SignedOp`] with no causal dependencies.
    pub fn sign(self, keypair: &Ed25519KeyPair) -> SignedOp {
        SignedOp::from_op(self, keypair, vec![])
    }

+    /// Sign this operation and attach explicit causal `dependencies`.
    pub fn sign_with_dependencies(
        self,
        keypair: &Ed25519KeyPair,
@@ -160,14 +168,17 @@ where
        )
    }

+    /// Return the [`AuthorId`] (Ed25519 public key) of the node that created this operation.
    pub fn author(&self) -> AuthorId {
        self.author
    }

+    /// Return the Lamport sequence number carried by this operation.
    pub fn sequence_num(&self) -> SequenceNumber {
        self.seq
    }

+    /// Construct a new operation, computing its [`OpId`] hash from the supplied fields.
    pub fn new(
        origin: OpId,
        author: AuthorId,
@@ -1,3 +1,4 @@
+//! Integration tests verifying Byzantine fault tolerance of the CRDT.
 use bft_json_crdt::{
    json_crdt::{add_crdt_fields, BaseCrdt, CrdtNode, IntoCrdtNode, OpState},
    keypair::make_keypair,
@@ -1,12 +1,16 @@
+//! Integration tests verifying commutativity of CRDT operations.
 use bft_json_crdt::{
    json_crdt::{CrdtNode, JsonValue},
    keypair::make_author,
    list_crdt::ListCrdt,
    op::{Op, OpId, ROOT_ID},
 };
-use rand::{rngs::ThreadRng, seq::SliceRandom, Rng};
+use rand::{
+    seq::{IndexedRandom, SliceRandom},
+    Rng, RngExt,
+};

-fn random_op<T: CrdtNode>(arr: &[Op<T>], rng: &mut ThreadRng) -> OpId {
+fn random_op<T: CrdtNode>(arr: &[Op<T>], rng: &mut impl Rng) -> OpId {
    arr.choose(rng).map(|op| op.id).unwrap_or(ROOT_ID)
 }

@@ -14,7 +18,7 @@ const TEST_N: usize = 100;

 #[test]
 fn test_list_fuzz_commutative() {
-    let mut rng = rand::thread_rng();
+    let mut rng = rand::rng();
    let mut op_log = Vec::<Op<JsonValue>>::new();
    let mut op_log1 = Vec::<Op<JsonValue>>::new();
    let mut op_log2 = Vec::<Op<JsonValue>>::new();
@@ -22,14 +26,14 @@ fn test_list_fuzz_commutative() {
    let mut l2 = ListCrdt::<char>::new(make_author(2), vec![]);
    let mut chk = ListCrdt::<char>::new(make_author(3), vec![]);
    for _ in 0..TEST_N {
-        let letter1: char = rng.gen_range(b'a'..=b'z') as char;
-        let letter2: char = rng.gen_range(b'a'..=b'z') as char;
-        let op1 = if rng.gen_bool(4.0 / 5.0) {
+        let letter1: char = rng.random_range(b'a'..=b'z') as char;
+        let letter2: char = rng.random_range(b'a'..=b'z') as char;
+        let op1 = if rng.random_bool(4.0 / 5.0) {
            l1.insert(random_op(&op_log1, &mut rng), letter1)
        } else {
            l1.delete(random_op(&op_log1, &mut rng))
        };
-        let op2 = if rng.gen_bool(4.0 / 5.0) {
+        let op2 = if rng.random_bool(4.0 / 5.0) {
            l2.insert(random_op(&op_log2, &mut rng), letter2)
        } else {
            l2.delete(random_op(&op_log2, &mut rng))
@@ -66,8 +70,8 @@ fn test_list_fuzz_commutative() {
    let mut op_log1 = Vec::<Op<JsonValue>>::new();
    let mut op_log2 = Vec::<Op<JsonValue>>::new();
    for _ in 0..TEST_N {
-        let letter1: char = rng.gen_range(b'a'..=b'z') as char;
-        let letter2: char = rng.gen_range(b'a'..=b'z') as char;
+        let letter1: char = rng.random_range(b'a'..=b'z') as char;
+        let letter2: char = rng.random_range(b'a'..=b'z') as char;
        let op1 = l1.insert(random_op(&op_log, &mut rng), letter1);
        let op2 = l2.insert(random_op(&op_log, &mut rng), letter2);
        op_log1.push(op1);
@@ -1,8 +1,9 @@
+//! Integration tests that replay the Kleppmann editing trace to validate list-CRDT correctness and performance.
+
 use bft_json_crdt::keypair::make_author;
 use bft_json_crdt::list_crdt::ListCrdt;
 use bft_json_crdt::op::{OpId, ROOT_ID};
-use std::{fs::File, io::Read};
-use time::PreciseTime;
+use std::{fs::File, io::Read, time::Instant};

 use serde::Deserialize;

@@ -47,7 +48,7 @@ fn test_editing_trace() {
    let mut list = ListCrdt::<char>::new(make_author(1), vec![]);
    let mut ops: Vec<OpId> = Vec::new();
    ops.push(ROOT_ID);
-    let start = PreciseTime::now();
+    let start = Instant::now();
    let edits = t.edits;
    for (i, op) in edits.into_iter().enumerate() {
        let origin = ops[op.pos];
@@ -61,17 +62,13 @@ fn test_editing_trace() {

        match i {
            10_000 | 100_000 => {
-                let end = PreciseTime::now();
-                let runtime_sec = start.to(end);
-                println!("took {runtime_sec:?} to run {i} ops");
+                println!("took {:?} to run {i} ops", start.elapsed());
            }
            _ => {}
        };
    }

-    let end = PreciseTime::now();
-    let runtime_sec = start.to(end);
-    println!("took {runtime_sec:?} to finish");
+    println!("took {:?} to finish", start.elapsed());
    let result = list.iter().collect::<String>();
    let expected = t.final_text;
    assert_eq!(result.len(), expected.len());
@@ -0,0 +1,21 @@
+[package]
+name = "source-map-gen"
+version = "0.1.0"
+edition = "2024"
+
+[lib]
+crate-type = ["lib"]
+
+[[bin]]
+name = "source-map-check"
+path = "src/main.rs"
+
+[[bin]]
+name = "source-map-regen"
+path = "src/regen_main.rs"
+
+[dependencies]
+serde_json = { workspace = true }
+
+[dev-dependencies]
+tempfile = { workspace = true }
@@ -0,0 +1,111 @@
+# source-map-gen
+
+LLM-friendly source map generation and documentation coverage checking for the
+huskies pipeline.
+
+The crate exposes two artifacts:
+
+- A **library** that extracts public-item signatures from Rust and TypeScript
+  source files, writes them to a JSON map, and checks doc-comment coverage on a
+  changed-file set.
+- Two **CLI binaries** (`source-map-check`, `source-map-regen`) used by
+  `script/check` and by autonomous coder agents.
+
+## Why this exists
+
+The huskies orchestrator embeds `.huskies/source-map.json` directly into the
+orientation prompt of every autonomous coder it spawns (see
+`server/src/agents/local_prompt.rs`). The map is a compact, sorted index of
+every public item in the project — function and method signatures, struct
+fields, exported TS symbols — that lets a fresh agent answer "what's already
+here?" without scanning the tree itself.
+
+Two properties matter:
+
+1. **Determinism.** Running the regenerator twice on an unchanged tree must
+   produce a byte-identical file. Sorted keys, sorted arrays, stable formatting.
+2. **No stale entries.** The map cannot reference items that no longer exist,
+   or the orientation bundle lies to agents.
+
+## Binaries
+
+### `source-map-check`
+
+Doc-coverage validator. Used by the pre-commit gate and by coder agents before
+they commit.
+
+```
+cargo run -p source-map-gen --bin source-map-check -- \
+    --worktree . --base master
+```
+
+Collects every file that differs from `--base` in any git state (committed,
+staged, unstaged, untracked), runs the per-language adapter's check, and exits
+non-zero with one actionable line per undocumented public item:
+
+```
+server/src/foo.rs:42: add a doc comment to fn `bar`. Example: `/// Brief description.` above the declaration
+```
+
+Coverage is *ratcheted to added lines*: only items whose declaration falls
+inside a hunk added since `--base` are reported. Pre-existing undocumented
+items in untouched lines are ignored, so the gate cannot retroactively block
+work on an unrelated change.
+
+### `source-map-regen`
+
+Rebuilds `.huskies/source-map.json` from scratch.
+
+```
+cargo run -p source-map-gen --bin source-map-regen -- --project-root .
+```
+
+Enumerates every tracked file via `git ls-files`, extracts its public items via
+the language adapter, and writes a sorted JSON map. Wired into `script/check`
+so each pre-commit run captures a fresh snapshot. Cannot leave stale entries —
+unlike incremental update, this path always starts from the empty map.
+
+## Library
+
+```rust
+use source_map_gen::{check_files_ratcheted, regenerate_source_map, CheckResult};
+```
+
+Key entry points:
+
+- `regenerate_source_map(worktree, source_map_path)` — full rebuild from
+  `git ls-files`. Deterministic.
+- `check_files_ratcheted(files, worktree, base)` — doc-coverage check filtered
+  to lines added since `base`.
+- `check_files(files)` — non-ratcheted variant; reports every undocumented
+  public item.
+- `added_line_ranges(worktree, base, file)` — 1-based inclusive line ranges in
+  `file` added since `base`, covering all git states (committed, staged,
+  unstaged, untracked).
+- `update_source_map(passing_files, source_map_path, root)` — patches the map
+  in place for the given files. Used by the incremental path; production code
+  prefers `regenerate_source_map` to avoid stale entries.
+
+Languages plug in via the `LanguageAdapter` trait. The crate ships
+`RustAdapter` and `TypeScriptAdapter`.
+
+## Map format
+
+`.huskies/source-map.json` is a JSON object keyed by repo-relative file path,
+each value an array of public-item signatures from that file:
+
+```json
+{
+  "server/src/foo.rs": [
+    "pub fn parse_config(path: &Path) -> Result<Config, Error>",
+    "pub struct Config"
+  ],
+  "frontend/src/api.ts": [
+    "export function fetchStories(): Promise<Story[]>"
+  ]
+}
+```
+
+Keys are sorted alphabetically; each value array preserves the order returned
+by the adapter. The file is checked into git only as a generated artifact —
+treat it as build output, not as something to hand-edit.
@@ -0,0 +1,111 @@
+//! CLI for checking documentation coverage on files changed since a base branch.
+//!
+//! Usage: `source-map-check [--worktree <path>] [--base <branch>]`
+//!
+//! Exits with code 1 and prints LLM-friendly directions when public items are
+//! missing doc comments. Exits 0 (silently) when all changed files are fully
+//! documented or when there are no relevant changes to check.
+//!
+//! The file set is derived from all worktree states: committed changes since
+//! `base`, staged changes, unstaged changes, and untracked files. This ensures
+//! the result is independent of git index state.
+
+use source_map_gen::{CheckResult, check_files_ratcheted};
+use std::collections::HashSet;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+    let worktree = parse_arg(&args, "--worktree").unwrap_or_else(|| ".".to_string());
+    let base = parse_arg(&args, "--base").unwrap_or_else(|| "master".to_string());
+
+    let worktree_path = Path::new(&worktree);
+
+    let changed = collect_changed_files(worktree_path, &base);
+
+    if changed.is_empty() {
+        return;
+    }
+
+    let file_refs: Vec<&Path> = changed.iter().map(PathBuf::as_path).collect();
+
+    match check_files_ratcheted(&file_refs, worktree_path, &base) {
+        CheckResult::Ok => {}
+        CheckResult::Failures(failures) => {
+            eprintln!(
+                "Doc coverage check failed. Add doc comments to the following items before committing:\n"
+            );
+            for f in &failures {
+                eprintln!("  {}", f.to_direction());
+            }
+            eprintln!(
+                "\nRe-run: cargo run -p source-map-gen --bin source-map-check -- --worktree . --base master"
+            );
+            std::process::exit(1);
+        }
+    }
+}
+
+/// Collect all files that differ from `base` in any git state: committed, staged,
+/// unstaged, or untracked. Returns deduplicated absolute paths that exist on disk.
+fn collect_changed_files(worktree_path: &Path, base: &str) -> Vec<PathBuf> {
+    let mut names: HashSet<String> = HashSet::new();
+
+    // Committed changes since base (three-dot diff handles divergent histories).
+    run_git_name_list(
+        worktree_path,
+        &["diff", "--name-only", &format!("{base}...HEAD")],
+        &mut names,
+    );
+
+    // Staged changes not yet committed.
+    run_git_name_list(
+        worktree_path,
+        &["diff", "--name-only", "--cached"],
+        &mut names,
+    );
+
+    // Unstaged changes to tracked files.
+    run_git_name_list(worktree_path, &["diff", "--name-only"], &mut names);
+
+    // Untracked files (new files not yet added to the index).
+    run_git_name_list(
+        worktree_path,
+        &["ls-files", "--others", "--exclude-standard"],
+        &mut names,
+    );
+
+    names
+        .into_iter()
+        .map(|l| worktree_path.join(l))
+        .filter(|p| p.exists())
+        .collect()
+}
+
+/// Run a git command and collect each non-empty output line into `out`.
+///
+/// Silently ignores git errors so a missing base branch or a fresh repo without
+/// any commits does not abort the check.
+fn run_git_name_list(worktree_path: &Path, args: &[&str], out: &mut HashSet<String>) {
+    let Ok(output) = Command::new("git")
+        .args(args)
+        .current_dir(worktree_path)
+        .output()
+    else {
+        return;
+    };
+    if !output.status.success() {
+        return;
+    }
+    for line in String::from_utf8_lossy(&output.stdout).lines() {
+        if !line.is_empty() {
+            out.insert(line.to_string());
+        }
+    }
+}
+
+/// Parse a flag value from an argument list (e.g. `--flag value`).
+fn parse_arg(args: &[String], flag: &str) -> Option<String> {
+    args.windows(2).find(|w| w[0] == flag).map(|w| w[1].clone())
+}
@@ -0,0 +1,32 @@
+//! CLI binary for manual regeneration of `.huskies/source-map.json`.
+//!
+//! Usage: `source-map-regen [--project-root <path>]`
+//!
+//! Scans every tracked Rust and TypeScript file in the project via `git ls-files`,
+//! extracts public item signatures, and writes a fresh sorted JSON map. The output
+//! is byte-identical across runs on the same source tree (deterministic).
+//!
+//! The pre-commit gate (`script/check`) no longer calls this binary directly — map
+//! regeneration is now inlined into the coder spawn path (`local_prompt.rs`) so every
+//! agent session starts with a fresh snapshot. This binary is kept as an escape hatch
+//! for manual out-of-band regeneration (e.g. after bulk refactors outside the pipeline).
+
+use source_map_gen::regenerate_source_map;
+use std::path::Path;
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+    let root = parse_arg(&args, "--project-root").unwrap_or_else(|| ".".to_string());
+    let root_path = Path::new(&root);
+    let map_path = root_path.join(".huskies").join("source-map.json");
+
+    if let Err(e) = regenerate_source_map(root_path, &map_path) {
+        eprintln!("source-map-regen: {e}");
+        std::process::exit(1);
+    }
+}
+
+/// Parse a flag value from an argument list (e.g. `--flag value`).
+fn parse_arg(args: &[String], flag: &str) -> Option<String> {
+    args.windows(2).find(|w| w[0] == flag).map(|w| w[1].clone())
+}
@@ -0,0 +1,273 @@
+//! Rust documentation coverage adapter.
+//!
+//! Checks for:
+//! - A `//!` module-level doc comment somewhere in every `.rs` file.
+//! - A `///` doc comment immediately before every `pub` item (`fn`, `struct`,
+//!   `enum`, `trait`, `type`, `const`, `static`, `mod`).
+
+use std::fs;
+use std::path::Path;
+
+use crate::{CheckFailure, CheckResult, LanguageAdapter, relative_key};
+
+/// Rust documentation coverage adapter.
+pub struct RustAdapter;
+
+impl RustAdapter {
+    fn check_file(&self, path: &Path) -> Vec<CheckFailure> {
+        let content = match fs::read_to_string(path) {
+            Ok(c) => c,
+            Err(_) => return vec![],
+        };
+        let lines: Vec<&str> = content.lines().collect();
+        let mut failures = Vec::new();
+
+        // Module-level doc comment (//!)
+        if !lines.iter().any(|l| l.trim_start().starts_with("//!")) {
+            failures.push(CheckFailure {
+                file_path: path.to_path_buf(),
+                line: 1,
+                item_kind: "module".to_string(),
+                item_name: module_name(path),
+            });
+        }
+
+        // Public items missing /// doc comments
+        for (i, &line) in lines.iter().enumerate() {
+            if let Some((kind, name)) = parse_pub_item(line)
+                && !has_doc_before(&lines, i)
+            {
+                failures.push(CheckFailure {
+                    file_path: path.to_path_buf(),
+                    line: i + 1,
+                    item_kind: kind,
+                    item_name: name,
+                });
+            }
+        }
+
+        failures
+    }
+
+    /// Extract public item signatures from a Rust file as `"kind name"` strings.
+    pub(crate) fn extract_items(path: &Path) -> Vec<String> {
+        let content = match fs::read_to_string(path) {
+            Ok(c) => c,
+            Err(_) => return vec![],
+        };
+        content
+            .lines()
+            .filter_map(|line| {
+                let (kind, name) = parse_pub_item(line)?;
+                Some(format!("{kind} {name}"))
+            })
+            .collect()
+    }
+}
+
+impl LanguageAdapter for RustAdapter {
+    fn check(&self, files: &[&Path]) -> CheckResult {
+        let failures: Vec<CheckFailure> = files.iter().flat_map(|&f| self.check_file(f)).collect();
+        if failures.is_empty() {
+            CheckResult::Ok
+        } else {
+            CheckResult::Failures(failures)
+        }
+    }
+
+    fn update_source_map(
+        &self,
+        passing_files: &[&Path],
+        source_map_path: &Path,
+        root: Option<&Path>,
+    ) -> Result<(), String> {
+        let mut map = crate::read_map(source_map_path)?;
+        for &file in passing_files {
+            let key = relative_key(file, root);
+            let items: Vec<serde_json::Value> = Self::extract_items(file)
+                .into_iter()
+                .map(serde_json::Value::String)
+                .collect();
+            map.insert(key, serde_json::Value::Array(items));
+        }
+        crate::write_map(source_map_path, map)
+    }
+}
+
+fn module_name(path: &Path) -> String {
+    path.file_stem()
+        .and_then(|s| s.to_str())
+        .unwrap_or("unknown")
+        .to_string()
+}
+
+/// Parse a line as a public Rust item declaration.
+///
+/// Returns `(kind, name)` if the line declares a public item, `None` otherwise.
+fn parse_pub_item(line: &str) -> Option<(String, String)> {
+    let trimmed = line.trim();
+
+    // Strip visibility: "pub(…)" or "pub "
+    let rest = if let Some(r) = trimmed.strip_prefix("pub(") {
+        let end = r.find(')')?;
+        r[end + 1..].trim_start()
+    } else if let Some(r) = trimmed.strip_prefix("pub ") {
+        r.trim_start()
+    } else {
+        return None;
+    };
+
+    // Handle "async fn"
+    let rest = if let Some(r) = rest.strip_prefix("async ") {
+        r.trim_start()
+    } else {
+        rest
+    };
+
+    // Match item keyword and extract name part
+    let (kind, name_part) = if let Some(r) = rest.strip_prefix("fn ") {
+        ("fn", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("struct ") {
+        ("struct", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("enum ") {
+        ("enum", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("trait ") {
+        ("trait", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("type ") {
+        ("type", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("const ") {
+        ("const", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("static ") {
+        ("static", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("mod ") {
+        ("mod", r.trim_start())
+    } else {
+        return None;
+    };
+
+    let name: String = name_part
+        .chars()
+        .take_while(|&c| c.is_alphanumeric() || c == '_')
+        .collect();
+
+    if name.is_empty() {
+        return None;
+    }
+
+    Some((kind.to_string(), name))
+}
+
+/// Return `true` if a `///` doc comment appears before the item at `item_idx`.
+///
+/// Scans backward from `item_idx`, skipping blank lines and `#[…]` attribute
+/// lines. Returns `true` if the first substantive line is a `///` comment.
+fn has_doc_before(lines: &[&str], item_idx: usize) -> bool {
+    let mut i = item_idx;
+    while i > 0 {
+        i -= 1;
+        let line = lines[i].trim();
+        if line.starts_with("///") {
+            return true;
+        }
+        if line.starts_with("#[") || line.starts_with("#![") || line.is_empty() {
+            continue;
+        }
+        break;
+    }
+    false
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    fn write_rs(dir: &Path, name: &str, content: &str) -> std::path::PathBuf {
+        let path = dir.join(name);
+        std::fs::write(&path, content).unwrap();
+        path
+    }
+
+    #[test]
+    fn check_fully_documented_file_returns_ok() {
+        let tmp = TempDir::new().unwrap();
+        let path = write_rs(
+            tmp.path(),
+            "lib.rs",
+            "//! Module doc.\n\n/// A function.\npub fn hello() {}\n\n/// A struct.\npub struct Foo;\n",
+        );
+        let adapter = RustAdapter;
+        assert_eq!(adapter.check(&[&path]), CheckResult::Ok);
+    }
+
+    #[test]
+    fn check_detects_missing_module_doc() {
+        let tmp = TempDir::new().unwrap();
+        let path = write_rs(tmp.path(), "lib.rs", "/// A function.\npub fn hello() {}\n");
+        let adapter = RustAdapter;
+        let result = adapter.check(&[&path]);
+        assert!(
+            matches!(&result, CheckResult::Failures(v) if v.iter().any(|f| f.item_kind == "module")),
+            "expected module failure, got {result:?}"
+        );
+    }
+
+    #[test]
+    fn check_detects_missing_fn_doc_with_correct_fields() {
+        let tmp = TempDir::new().unwrap();
+        let path = write_rs(tmp.path(), "bar.rs", "//! Module.\n\npub fn no_doc() {}\n");
+        let adapter = RustAdapter;
+        let result = adapter.check(&[&path]);
+        if let CheckResult::Failures(failures) = result {
+            let f = failures.iter().find(|f| f.item_kind == "fn").unwrap();
+            assert_eq!(f.item_name, "no_doc");
+            assert_eq!(f.line, 3);
+            assert_eq!(f.file_path, path);
+        } else {
+            panic!("expected failures");
+        }
+    }
+
+    #[test]
+    fn check_passes_item_with_attribute_before_doc() {
+        let tmp = TempDir::new().unwrap();
+        // Attribute between doc and item is fine; doc between attribute and item is fine too
+        let path = write_rs(
+            tmp.path(),
+            "lib.rs",
+            "//! Module.\n\n/// Doc.\n#[derive(Debug)]\npub struct Foo;\n",
+        );
+        let adapter = RustAdapter;
+        assert_eq!(adapter.check(&[&path]), CheckResult::Ok);
+    }
+
+    #[test]
+    fn parse_pub_item_recognises_various_kinds() {
+        assert_eq!(
+            parse_pub_item("pub fn foo()"),
+            Some(("fn".into(), "foo".into()))
+        );
+        assert_eq!(
+            parse_pub_item("    pub async fn bar()"),
+            Some(("fn".into(), "bar".into()))
+        );
+        assert_eq!(
+            parse_pub_item("pub struct Baz"),
+            Some(("struct".into(), "Baz".into()))
+        );
+        assert_eq!(
+            parse_pub_item("pub enum Qux"),
+            Some(("enum".into(), "Qux".into()))
+        );
+        assert_eq!(
+            parse_pub_item("pub trait MyTrait"),
+            Some(("trait".into(), "MyTrait".into()))
+        );
+        assert_eq!(
+            parse_pub_item("pub(crate) fn inner()"),
+            Some(("fn".into(), "inner".into()))
+        );
+        assert_eq!(parse_pub_item("fn private()"), None);
+        assert_eq!(parse_pub_item("let x = 1;"), None);
+    }
+}
@@ -0,0 +1,295 @@
+//! TypeScript documentation coverage adapter.
+//!
+//! Checks for:
+//! - A leading file-level JSDoc comment (`/** … */`) at the top of every
+//!   `.ts` / `.tsx` file.
+//! - A JSDoc comment before every exported declaration (`export function`,
+//!   `export class`, `export type`, `export interface`, `export const`, etc.).
+
+use std::fs;
+use std::path::Path;
+
+use crate::{CheckFailure, CheckResult, LanguageAdapter, relative_key};
+
+/// TypeScript documentation coverage adapter.
+pub struct TypeScriptAdapter;
+
+impl TypeScriptAdapter {
+    fn check_file(&self, path: &Path) -> Vec<CheckFailure> {
+        let content = match fs::read_to_string(path) {
+            Ok(c) => c,
+            Err(_) => return vec![],
+        };
+        let lines: Vec<&str> = content.lines().collect();
+        let mut failures = Vec::new();
+
+        // File-level JSDoc: first non-empty line must start with "/**"
+        if !has_file_level_jsdoc(&content) {
+            failures.push(CheckFailure {
+                file_path: path.to_path_buf(),
+                line: 1,
+                item_kind: "file".to_string(),
+                item_name: file_stem(path),
+            });
+        }
+
+        // Exported items missing JSDoc
+        for (i, &line) in lines.iter().enumerate() {
+            if let Some((kind, name)) = parse_exported_item(line)
+                && !has_jsdoc_before(&lines, i)
+            {
+                failures.push(CheckFailure {
+                    file_path: path.to_path_buf(),
+                    line: i + 1,
+                    item_kind: kind,
+                    item_name: name,
+                });
+            }
+        }
+
+        failures
+    }
+
+    /// Extract exported item signatures from a TypeScript file as `"kind name"` strings.
+    pub(crate) fn extract_items(path: &Path) -> Vec<String> {
+        let content = match fs::read_to_string(path) {
+            Ok(c) => c,
+            Err(_) => return vec![],
+        };
+        content
+            .lines()
+            .filter_map(|line| {
+                let (kind, name) = parse_exported_item(line)?;
+                Some(format!("{kind} {name}"))
+            })
+            .collect()
+    }
+}
+
+impl LanguageAdapter for TypeScriptAdapter {
+    fn check(&self, files: &[&Path]) -> CheckResult {
+        let failures: Vec<CheckFailure> = files.iter().flat_map(|&f| self.check_file(f)).collect();
+        if failures.is_empty() {
+            CheckResult::Ok
+        } else {
+            CheckResult::Failures(failures)
+        }
+    }
+
+    fn update_source_map(
+        &self,
+        passing_files: &[&Path],
+        source_map_path: &Path,
+        root: Option<&Path>,
+    ) -> Result<(), String> {
+        let mut map = crate::read_map(source_map_path)?;
+        for &file in passing_files {
+            let key = relative_key(file, root);
+            let items: Vec<serde_json::Value> = Self::extract_items(file)
+                .into_iter()
+                .map(serde_json::Value::String)
+                .collect();
+            map.insert(key, serde_json::Value::Array(items));
+        }
+        crate::write_map(source_map_path, map)
+    }
+}
+
+fn file_stem(path: &Path) -> String {
+    path.file_stem()
+        .and_then(|s| s.to_str())
+        .unwrap_or("unknown")
+        .to_string()
+}
+
+/// Return `true` if the file starts with a JSDoc block comment (`/**`).
+fn has_file_level_jsdoc(content: &str) -> bool {
+    for line in content.lines() {
+        let trimmed = line.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+        return trimmed.starts_with("/**");
+    }
+    false
+}
+
+/// Parse a line as an exported TypeScript declaration.
+///
+/// Returns `(kind, name)` for supported export forms, `None` otherwise.
+fn parse_exported_item(line: &str) -> Option<(String, String)> {
+    let trimmed = line.trim();
+
+    // Strip "export default" or "export"
+    let rest = if let Some(r) = trimmed.strip_prefix("export default ") {
+        r.trim_start()
+    } else if let Some(r) = trimmed.strip_prefix("export ") {
+        r.trim_start()
+    } else {
+        return None;
+    };
+
+    // Strip optional "async"
+    let rest = if let Some(r) = rest.strip_prefix("async ") {
+        r.trim_start()
+    } else {
+        rest
+    };
+
+    let (kind, name_part) = if let Some(r) = rest.strip_prefix("function ") {
+        ("function", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("class ") {
+        ("class", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("type ") {
+        ("type", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("interface ") {
+        ("interface", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("const ") {
+        ("const", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("let ") {
+        ("let", r.trim_start())
+    } else if let Some(r) = rest.strip_prefix("enum ") {
+        ("enum", r.trim_start())
+    } else {
+        return None;
+    };
+
+    let name: String = name_part
+        .chars()
+        .take_while(|&c| c.is_alphanumeric() || c == '_')
+        .collect();
+
+    if name.is_empty() {
+        // "export default function() {}" — anonymous default export
+        return Some((kind.to_string(), "default".to_string()));
+    }
+
+    Some((kind.to_string(), name))
+}
+
+/// Return `true` if a JSDoc comment appears before the item at `item_idx`.
+///
+/// Scans backward, skipping blank lines and decorator lines (`@…`). Returns
+/// `true` if the first substantive line ends with `*/` (closing a JSDoc block)
+/// or starts with `/**` (single-line JSDoc).
+fn has_jsdoc_before(lines: &[&str], item_idx: usize) -> bool {
+    let mut i = item_idx;
+    while i > 0 {
+        i -= 1;
+        let line = lines[i].trim();
+        if line.is_empty() {
+            // A blank line breaks the JSDoc–item adjacency: stop searching.
+            return false;
+        }
+        if line.starts_with('@') {
+            // Decorator — keep scanning upward
+            continue;
+        }
+        return line.ends_with("*/") || line.starts_with("/**");
+    }
+    false
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    fn write_ts(dir: &Path, name: &str, content: &str) -> std::path::PathBuf {
+        let path = dir.join(name);
+        std::fs::write(&path, content).unwrap();
+        path
+    }
+
+    #[test]
+    fn check_fully_documented_file_returns_ok() {
+        let tmp = TempDir::new().unwrap();
+        let path = write_ts(
+            tmp.path(),
+            "app.ts",
+            "/**\n * File doc.\n */\n\n/** Does something. */\nexport function hello(): void {}\n",
+        );
+        let adapter = TypeScriptAdapter;
+        assert_eq!(adapter.check(&[&path]), CheckResult::Ok);
+    }
+
+    #[test]
+    fn check_detects_missing_file_jsdoc() {
+        let tmp = TempDir::new().unwrap();
+        let path = write_ts(
+            tmp.path(),
+            "app.ts",
+            "/** Does something. */\nexport function hello(): void {}\n",
+        );
+        // First non-empty line IS "/**", so this file passes the file-level check.
+        // Use a file that starts with code instead.
+        let path2 = write_ts(
+            tmp.path(),
+            "app2.ts",
+            "import { foo } from './foo';\n/** A function. */\nexport function hello(): void {}\n",
+        );
+        let adapter = TypeScriptAdapter;
+        let result = adapter.check(&[&path2]);
+        assert!(
+            matches!(&result, CheckResult::Failures(v) if v.iter().any(|f| f.item_kind == "file")),
+            "expected file failure, got {result:?}"
+        );
+        // The first file (starts with /**) should pass the file-level check
+        let result2 = adapter.check(&[&path]);
+        // It may still fail on the export if there's no separate export doc,
+        // but the file-level check itself should pass (first line is /**)
+        assert!(
+            !matches!(&result2, CheckResult::Failures(v) if v.iter().any(|f| f.item_kind == "file")),
+            "file starting with /** should not have file-level failure"
+        );
+    }
+
+    #[test]
+    fn check_detects_missing_export_jsdoc_with_correct_fields() {
+        let tmp = TempDir::new().unwrap();
+        let path = write_ts(
+            tmp.path(),
+            "app.ts",
+            "/**\n * File doc.\n */\n\nexport function undocumented(): void {}\n",
+        );
+        let adapter = TypeScriptAdapter;
+        let result = adapter.check(&[&path]);
+        if let CheckResult::Failures(failures) = result {
+            let f = failures.iter().find(|f| f.item_kind == "function").unwrap();
+            assert_eq!(f.item_name, "undocumented");
+            assert_eq!(f.file_path, path);
+        } else {
+            panic!("expected failures");
+        }
+    }
+
+    #[test]
+    fn parse_exported_item_recognises_various_kinds() {
+        assert_eq!(
+            parse_exported_item("export function foo()"),
+            Some(("function".into(), "foo".into()))
+        );
+        assert_eq!(
+            parse_exported_item("export async function bar()"),
+            Some(("function".into(), "bar".into()))
+        );
+        assert_eq!(
+            parse_exported_item("export class Baz"),
+            Some(("class".into(), "Baz".into()))
+        );
+        assert_eq!(
+            parse_exported_item("export type Qux = string;"),
+            Some(("type".into(), "Qux".into()))
+        );
+        assert_eq!(
+            parse_exported_item("export interface IFoo"),
+            Some(("interface".into(), "IFoo".into()))
+        );
+        assert_eq!(
+            parse_exported_item("export const MY_CONST = 1;"),
+            Some(("const".into(), "MY_CONST".into()))
+        );
+        assert_eq!(parse_exported_item("function notExported()"), None);
+        assert_eq!(parse_exported_item("const x = 1;"), None);
+    }
+}
@@ -7,7 +7,7 @@
 #
 # Tested with: OrbStack (recommended on macOS), Docker Desktop (slower bind mounts)

-FROM rust:1.90-bookworm AS base
+FROM rust:1.93-bookworm AS base

 # Clippy and rustfmt are needed at runtime for acceptance gates
 RUN rustup component add clippy rustfmt
@@ -0,0 +1,196 @@
+# Architecture Roadmap: Transports, Services, State Machine, CRDT
+
+*Spike 613 — April 2026*
+
+This document captures the current architecture across four key layers and charts
+the recommended next steps for each.
+
+---
+
+## 1. Current State
+
+### 1.1 Service Layer
+
+Stories 604–619 established a clean service extraction pattern. The
+`server/src/service/` directory now has 21 sub-modules, each following the
+functional-core / imperative-shell convention documented in
+[service-modules.md](service-modules.md).
+
+**Extracted so far:**
+`agents`, `anthropic`, `bot_command`, `common`, `diagnostics`, `events`,
+`file_io`, `gateway`, `git_ops`, `health`, `merge`, `notifications`, `oauth`,
+`pipeline`, `project`, `qa`, `settings`, `shell`, `story`, `timer`, `wizard`,
+`ws`
+
+**Remaining in HTTP handlers** (see [future-extractions.md](future-extractions.md)):
+The list there was written before stories 615–619. After those stories landed,
+the remaining surface is smaller. The HTTP handlers still containing inline
+business logic are: `http/ws.rs` (WebSocket dispatch) and scattered ad-hoc
+helpers in `http/mcp/` that have not yet been migrated to typed service modules.
+
+### 1.2 Chat Transports
+
+Four transport backends implement `ChatTransport` (defined in `chat/mod.rs`):
+
+| Transport | Connection model | Rooms / channels |
+|-----------|-----------------|-----------------|
+| Matrix | Long-lived WebSocket to homeserver | Dynamic (per-room history) |
+| Slack | HTTP webhook (Events API) | Fixed at startup from bot.toml |
+| WhatsApp | HTTP webhook (Meta Graph API or Twilio) | Ambient (tracked active senders) |
+| Discord | Gateway WebSocket + REST | Fixed at startup from bot.toml |
+
+All four are instantiated manually in `main.rs` (~lines 567–690) and passed into
+`AppContext`. Stage-transition notifications are pushed through
+`service/notifications/`.
+
+**Known issue (Bug 501):** The Matrix bot spawns its own `TimerStore` instead of
+consuming the shared `AppContext.timer_store`. This means MCP-tool cancellations
+and the bot's tick loop see different in-memory state.
+
+### 1.3 Pipeline State Machine
+
+`server/src/pipeline_state.rs` provides a typed, compile-time-safe state machine
+that replaces the old stringly-typed CRDT views.
+
+**Synced stages (all nodes converge):**
+```
+Backlog → Coding → Qa → Merge { feature_branch, commits_ahead: NonZeroU32 }
+       → Done { merged_at, merge_commit }
+       → Archived { archived_at, reason }
+```
+
+`ArchiveReason` subsumes the old `blocked`, `merge_failure`, and `review_hold`
+flags: `Completed | Abandoned | Superseded | Blocked | MergeFailed | ReviewHeld`.
+
+`NonZeroU32` in `Merge` makes zero-commit merges structurally impossible.
+
+**Per-node execution state (local, not replicated):**
+`Idle → Pending → Running → RateLimited → Completed`
+
+**Status:** The typed state machine is defined and the projection layer
+(`PipelineItemView → PipelineItem via TryFrom`) is in place. Consumer
+migration — replacing ad-hoc string comparisons across the codebase — is the
+remaining work (tracked by Story 520).
+
+### 1.4 CRDT Layer
+
+`server/src/crdt_state.rs` + `crdt_sync.rs` form the distributed-state
+foundation:
+
+- **Document model:** `PipelineDoc { items: ListCrdt<PipelineItemCrdt>, nodes: ListCrdt<NodePresenceCrdt> }`
+- **Registers:** `LwwRegisterCrdt<T>` for all mutable fields
+- **Persistence:** Ops stored in SQLite (`pipeline.db`); `CrdtEvent` broadcast on every stage change
+- **Sync protocol:** WebSocket `/crdt-sync` — bulk dump on connect (text), individual `SignedOp`s in real-time (binary)
+- **Backpressure:** Slow peers are disconnected; they reconnect and get a fresh bulk dump
+
+**Filesystem shadows** (`huskies/work/`) are now a secondary output only — CRDT is
+the source of truth. Several clean-up stories (513, 517) remain backlogged to
+remove the remaining fallback paths.
+
+---
+
+## 2. Roadmap
+
+### Phase A — Finish the State Machine Migration (Story 520)
+
+**Goal:** Every pipeline query uses the typed `PipelineItem` enum instead of
+raw string comparisons on `stage`.
+
+Work:
+1. Replace `stage == "current"` / `"qa"` / `"merge"` patterns in `agents/`,
+   `http/mcp/`, `chat/commands/`, and `gateway.rs` with `matches!(item, PipelineItem::Coding)` etc.
+2. Remove the `PipelineItemView` → string projection paths once all consumers
+   use the typed enum.
+3. Add exhaustive match tests in `pipeline_state.rs` so new stages cause
+   compile-time failures, not silent mismatches.
+
+### Phase B — Transport Registry Abstraction
+
+**Goal:** Replace the manual transport wiring in `main.rs` with a pluggable
+registry, making it easy to add or remove transports without modifying the
+startup sequence.
+
+Work:
+1. Define a `TransportRegistry` that holds `Vec<Box<dyn ChatTransport>>` keyed
+   by `TransportKind` (Matrix, Slack, WhatsApp, Discord).
+2. Move the per-transport instantiation logic from `main.rs` into
+   `service/transport/` following the service module conventions.
+3. Unify webhook signature verification (currently duplicated between Slack and
+   WhatsApp) into a shared `service/transport/verify.rs`.
+4. Fix Bug 501: pass the shared `AppContext.timer_store` into the Matrix bot
+   instead of spawning a private instance.
+5. Unify message history persistence (each transport currently owns a separate
+   history file format) into a common `service/transport/history.rs`.
+
+### Phase C — CRDT Cleanup (Stories 513, 517, 518, 519, 521)
+
+**Goal:** Remove all legacy filesystem-first paths and complete the
+CRDT-as-source-of-truth migration.
+
+Priority order (based on risk/value):
+1. **519** — Mergemaster must detect zero-commits-ahead and fail loudly instead of
+   silently exiting. Structural fix: `Merge { commits_ahead: NonZeroU32 }` already
+   enforces this — just ensure mergemaster reads from the typed enum.
+2. **518** — `apply_and_persist` should log when the persist tx fails instead of
+   silently dropping ops.
+3. **513** — Startup reconciliation pass: detect drift between CRDT pipeline items
+   and filesystem shadows, heal or report.
+4. **517** — Remove filesystem shadow fallback paths from `lifecycle.rs`.
+5. **521** — MCP HTTP capability to write a CRDT tombstone-delete op, clearing a
+   story from in-memory state cleanly.
+6. **511** — Lamport clock inner seq resets to 1 on restart instead of resuming
+   from `max(own_author_seq) + 1`. Low risk to fix, high risk to leave.
+
+### Phase D — Distributed Node Authentication (Story 480)
+
+**Goal:** Cryptographic node identity for the distributed mesh.
+
+Nodes already carry an Ed25519 pubkey as their `node_id` in `NodePresenceCrdt`.
+Work:
+1. Sign each `SignedOp` with the node's Ed25519 key before broadcast.
+2. Verify signatures on receipt in `crdt_sync.rs` before applying ops.
+3. Expose the node's public key via `NodePresenceCrdt.address` so peers can
+   bootstrap trust.
+4. Add a key-rotation path for long-lived nodes.
+
+### Phase E — Build Agent Mode Polish (Story 479)
+
+**Goal:** Stable headless build-agent mode (`huskies --rendezvous`) for
+distributing story processing across multiple machines.
+
+Work:
+1. Resolve claim-timeout races: if a node claims a story and dies, the claim
+   should expire after a configurable TTL and be re-claimable.
+2. Stale merge-job lock (Bug 498) — a lock left by a dead node should be
+   detectable and clearable by the surviving cluster.
+3. CRDT Lamport clock fix (511) is a prerequisite — distributed agents need
+   monotonically increasing sequences to converge correctly.
+
+---
+
+## 3. Dependency Graph
+
+```
+Phase A (State Machine)
+    ↓
+Phase B (Transport Registry)     Phase C (CRDT Cleanup: 511, 518, 513, 517, 521, 519)
+                                      ↓
+                                 Phase D (Cryptographic Auth)
+                                      ↓
+                                 Phase E (Build Agent Polish)
+```
+
+Phase A and C can progress in parallel. Phase B is independent of C/D/E.
+Phase D requires Phase C (especially 511 and 518). Phase E requires Phase D.
+
+---
+
+## 4. What NOT to Do
+
+- **Don't split `crdt_state.rs` prematurely.** It's large but internally
+  cohesive. A split should wait until the cleanup stories (Phase C) are done.
+- **Don't add a transport abstraction layer before fixing Bug 501.** A registry
+  that instantiates a broken Matrix bot just propagates the bug.
+- **Don't extract `http/ws.rs` to a service module before Phase A is done.**
+  The WebSocket handler touches pipeline state in string form; migrating it
+  while the state machine migration is in progress will cause double-churn.
@@ -1,12 +1,12 @@
 {
 	"name": "huskies",
-	"version": "0.10.4",
+	"version": "0.11.0",
 	"lockfileVersion": 3,
 	"requires": true,
 	"packages": {
 		"": {
 			"name": "huskies",
-			"version": "0.10.4",
+			"version": "0.11.0",
 			"dependencies": {
 				"@types/react-syntax-highlighter": "^15.5.13",
 				"react": "^19.1.0",
@@ -3832,9 +3832,9 @@
 			}
 		},
 		"node_modules/postcss": {
-			"version": "8.5.8",
-			"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz",
-			"integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==",
+			"version": "8.5.12",
+			"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz",
+			"integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==",
 			"dev": true,
 			"funding": [
 				{
@@ -1,7 +1,7 @@
 {
 	"name": "huskies",
 	"private": true,
-	"version": "0.10.4",
+	"version": "0.11.0",
 	"type": "module",
 	"scripts": {
 		"dev": "vite",
@@ -209,6 +209,16 @@ body,
 	}
 }

+/* Spinner for in-progress deterministic merges */
+@keyframes spin {
+	from {
+		transform: rotate(0deg);
+	}
+	to {
+		transform: rotate(360deg);
+	}
+}
+
 /* Agent lozenge appearance animation (simulates arriving from agents panel) */
@keyframes agentAppear {
 	from {
@@ -31,6 +31,7 @@ function App() {
 	}, []);

 	React.useEffect(() => {
+		if (isGateway === null || isGateway) return;
 		let active = true;
 		function fetchOAuthStatus() {
 			api
@@ -46,9 +47,14 @@ function App() {
 			active = false;
 			window.clearInterval(intervalId);
 		};
-	}, []);
+	}, [isGateway]);

 	React.useEffect(() => {
+		if (isGateway === null) return;
+		if (isGateway) {
+			setIsCheckingProject(false);
+			return;
+		}
 		api
 			.getCurrentProject()
 			.then((path) => {
@@ -60,7 +66,7 @@ function App() {
 			.finally(() => {
 				setIsCheckingProject(false);
 			});
-	}, []);
+	}, [isGateway]);

 	React.useEffect(() => {
 		if (projectPath) {
@@ -74,13 +80,15 @@ function App() {
 	}, [projectPath]);

 	React.useEffect(() => {
+		if (isGateway === null || isGateway) return;
 		api
 			.getKnownProjects()
 			.then((projects) => setKnownProjects(projects))
 			.catch((error) => console.error(error));
-	}, []);
+	}, [isGateway]);

 	React.useEffect(() => {
+		if (isGateway === null || isGateway) return;
 		let active = true;
 		api
 			.getHomeDirectory()
@@ -102,7 +110,7 @@ function App() {
 		return () => {
 			active = false;
 		};
-	}, []);
+	}, [isGateway]);

 	const {
 		matchList,
@@ -0,0 +1,151 @@
+/**
+ * Test helpers for stubbing the WebSocket used by `rpcCall`.
+ *
+ * `rpcCall` opens a transient WebSocket, sends an `rpc_request` frame, and
+ * resolves once the matching `rpc_response` arrives.  `installRpcMock`
+ * installs a `WebSocket` global that records sent frames and replies with
+ * canned responses keyed by RPC method name.
+ */
+
+import { vi } from "vitest";
+
+interface MockSocket {
+	url: string;
+	sent: string[];
+	onopen: ((ev: Event) => void) | null;
+	onmessage: ((ev: { data: string }) => void) | null;
+	onerror: ((ev: Event) => void) | null;
+	onclose: ((ev: CloseEvent) => void) | null;
+	readyState: number;
+	send(data: string): void;
+	close(): void;
+}
+
+/**
+ * Test handle returned by `installMockRpcWebSocket`: records sockets and calls,
+ * lets the test register canned responses (or override responses for specific
+ * methods), and restores the real `WebSocket` constructor on cleanup.
+ */
+export interface MockRpcInstaller {
+	/** All sockets created during the test, in order. */
+	instances: MockSocket[];
+	/** All RPC method names that were called. */
+	calls: { method: string; params: Record<string, unknown> }[];
+	/**
+	 * Register a result to be returned for `method`.  If the value is a
+	 * function, it is invoked with the request params and its return value
+	 * (or the resolved promise) is used as the result.
+	 */
+	respond(method: string, result: unknown): void;
+	/** Make `method` reply with an `ok:false` response. */
+	respondError(method: string, error: string, code?: string): void;
+}
+
+/**
+ * Install a stub `WebSocket` global that synchronously resolves RPC calls
+ * with results registered via the returned [`MockRpcInstaller`].
+ */
+export function installRpcMock(): MockRpcInstaller {
+	const instances: MockSocket[] = [];
+	const calls: { method: string; params: Record<string, unknown> }[] = [];
+	const results = new Map<string, unknown>();
+	const errors = new Map<string, { error: string; code?: string }>();
+
+	class MockWebSocket implements MockSocket {
+		static readonly CONNECTING = 0;
+		static readonly OPEN = 1;
+		static readonly CLOSING = 2;
+		static readonly CLOSED = 3;
+
+		url: string;
+		sent: string[] = [];
+		onopen: ((ev: Event) => void) | null = null;
+		onmessage: ((ev: { data: string }) => void) | null = null;
+		onerror: ((ev: Event) => void) | null = null;
+		onclose: ((ev: CloseEvent) => void) | null = null;
+		readyState = 0;
+
+		constructor(url: string) {
+			this.url = url;
+			instances.push(this);
+			queueMicrotask(() => {
+				this.readyState = 1;
+				this.onopen?.(new Event("open"));
+			});
+		}
+
+		send(data: string) {
+			this.sent.push(data);
+			let frame: {
+				correlation_id?: string;
+				method?: string;
+				params?: Record<string, unknown>;
+			};
+			try {
+				frame = JSON.parse(data);
+			} catch {
+				return;
+			}
+			const { correlation_id, method, params } = frame;
+			if (!correlation_id || !method) return;
+			calls.push({ method, params: params ?? {} });
+			queueMicrotask(() => {
+				const err = errors.get(method);
+				if (err) {
+					this.onmessage?.({
+						data: JSON.stringify({
+							kind: "rpc_response",
+							version: 1,
+							correlation_id,
+							ok: false,
+							error: err.error,
+							code: err.code,
+						}),
+					});
+					return;
+				}
+				if (results.has(method)) {
+					this.onmessage?.({
+						data: JSON.stringify({
+							kind: "rpc_response",
+							version: 1,
+							correlation_id,
+							ok: true,
+							result: results.get(method),
+						}),
+					});
+					return;
+				}
+				// No registered response — synthesise NOT_FOUND so the test fails
+				// loudly instead of timing out.
+				this.onmessage?.({
+					data: JSON.stringify({
+						kind: "rpc_response",
+						version: 1,
+						correlation_id,
+						ok: false,
+						error: `no mock for ${method}`,
+						code: "NOT_FOUND",
+					}),
+				});
+			});
+		}
+
+		close() {
+			this.readyState = 3;
+		}
+	}
+
+	vi.stubGlobal("WebSocket", MockWebSocket);
+
+	return {
+		instances,
+		calls,
+		respond(method, result) {
+			results.set(method, result);
+		},
+		respondError(method, error, code) {
+			errors.set(method, { error, code });
+		},
+	};
+}
@@ -1,28 +1,16 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { AgentConfigInfo, AgentEvent, AgentInfo } from "./agents";
 import { agentsApi, subscribeAgentStream } from "./agents";
-
-const mockFetch = vi.fn();
+import { installRpcMock } from "./__test_utils__/mockRpcWebSocket";

 beforeEach(() => {
-	vi.stubGlobal("fetch", mockFetch);
+	vi.stubGlobal("fetch", vi.fn());
 });

 afterEach(() => {
 	vi.restoreAllMocks();
 });

-function okResponse(body: unknown) {
-	return new Response(JSON.stringify(body), {
-		status: 200,
-		headers: { "Content-Type": "application/json" },
-	});
-}
-
-function errorResponse(status: number, text: string) {
-	return new Response(text, { status });
-}
-
 const sampleAgent: AgentInfo = {
 	story_id: "42_story_test",
 	agent_name: "coder",
@@ -47,185 +35,97 @@ const sampleConfig: AgentConfigInfo = {

 describe("agentsApi", () => {
 	describe("startAgent", () => {
-		it("sends POST to /agents/start with story_id", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(sampleAgent));
+		it("dispatches agents.start RPC with story_id and returns AgentInfo", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("agents.start", sampleAgent);

 			const result = await agentsApi.startAgent("42_story_test");

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/agents/start",
-				expect.objectContaining({
-					method: "POST",
-					body: JSON.stringify({
-						story_id: "42_story_test",
-						agent_name: undefined,
-					}),
-				}),
-			);
+			expect(rpc.calls).toEqual([
+				{
+					method: "agents.start",
+					params: { story_id: "42_story_test", agent_name: undefined },
+				},
+			]);
 			expect(result).toEqual(sampleAgent);
 		});

-		it("sends POST with optional agent_name", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(sampleAgent));
+		it("sends optional agent_name in params", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("agents.start", sampleAgent);

 			await agentsApi.startAgent("42_story_test", "coder");

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/agents/start",
-				expect.objectContaining({
-					body: JSON.stringify({
-						story_id: "42_story_test",
-						agent_name: "coder",
-					}),
-				}),
-			);
-		});
-
-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(sampleAgent));
-
-			await agentsApi.startAgent(
-				"42_story_test",
-				undefined,
-				"http://localhost:3002/api",
-			);
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:3002/api/agents/start",
-				expect.objectContaining({ method: "POST" }),
-			);
+			expect(rpc.calls).toEqual([
+				{
+					method: "agents.start",
+					params: { story_id: "42_story_test", agent_name: "coder" },
+				},
+			]);
 		});
 	});

 	describe("stopAgent", () => {
-		it("sends POST to /agents/stop with story_id and agent_name", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(true));
+		it("dispatches agents.stop RPC with story_id and agent_name", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("agents.stop", true);

 			const result = await agentsApi.stopAgent("42_story_test", "coder");

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/agents/stop",
-				expect.objectContaining({
-					method: "POST",
-					body: JSON.stringify({
-						story_id: "42_story_test",
-						agent_name: "coder",
-					}),
-				}),
-			);
+			expect(rpc.calls).toEqual([
+				{
+					method: "agents.stop",
+					params: { story_id: "42_story_test", agent_name: "coder" },
+				},
+			]);
 			expect(result).toBe(true);
 		});
-
-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(false));
-
-			await agentsApi.stopAgent(
-				"42_story_test",
-				"coder",
-				"http://localhost:3002/api",
-			);
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:3002/api/agents/stop",
-				expect.objectContaining({ method: "POST" }),
-			);
-		});
-	});
-
-	describe("listAgents", () => {
-		it("sends GET to /agents and returns agent list", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse([sampleAgent]));
-
-			const result = await agentsApi.listAgents();
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/agents",
-				expect.objectContaining({}),
-			);
-			expect(result).toEqual([sampleAgent]);
-		});
-
-		it("returns empty array when no agents running", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse([]));
-
-			const result = await agentsApi.listAgents();
-			expect(result).toEqual([]);
-		});
-
-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse([]));
-
-			await agentsApi.listAgents("http://localhost:3002/api");
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:3002/api/agents",
-				expect.objectContaining({}),
-			);
-		});
 	});

 	describe("getAgentConfig", () => {
-		it("sends GET to /agents/config and returns config list", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse([sampleConfig]));
+		it("dispatches an agent_config.list RPC and returns the config list", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("agent_config.list", [sampleConfig]);

 			const result = await agentsApi.getAgentConfig();

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/agents/config",
-				expect.objectContaining({}),
-			);
+			expect(rpc.calls).toEqual([
+				{ method: "agent_config.list", params: {} },
+			]);
 			expect(result).toEqual([sampleConfig]);
 		});

-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse([sampleConfig]));
+		it("surfaces RPC errors visibly", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError("agent_config.list", "config not found", "NOT_FOUND");

-			await agentsApi.getAgentConfig("http://localhost:3002/api");
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:3002/api/agents/config",
-				expect.objectContaining({}),
+			await expect(agentsApi.getAgentConfig()).rejects.toThrow(
+				"config not found",
 			);
 		});
 	});

 	describe("reloadConfig", () => {
-		it("sends POST to /agents/config/reload", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse([sampleConfig]));
+		it("dispatches agent_config.list RPC and returns the config list", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("agent_config.list", [sampleConfig]);

 			const result = await agentsApi.reloadConfig();

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/agents/config/reload",
-				expect.objectContaining({ method: "POST" }),
-			);
+			expect(rpc.calls).toEqual([
+				{ method: "agent_config.list", params: {} },
+			]);
 			expect(result).toEqual([sampleConfig]);
 		});
-
-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse([]));
-
-			await agentsApi.reloadConfig("http://localhost:3002/api");
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:3002/api/agents/config/reload",
-				expect.objectContaining({ method: "POST" }),
-			);
-		});
 	});

 	describe("error handling", () => {
-		it("throws on non-ok response with body text", async () => {
-			mockFetch.mockResolvedValueOnce(errorResponse(404, "agent not found"));
+		it("surfaces RPC errors from startAgent", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError("agents.start", "story not found", "NOT_FOUND");

-			await expect(agentsApi.listAgents()).rejects.toThrow("agent not found");
-		});
-
-		it("throws with status code when no body", async () => {
-			mockFetch.mockResolvedValueOnce(errorResponse(500, ""));
-
-			await expect(agentsApi.listAgents()).rejects.toThrow(
-				"Request failed (500)",
+			await expect(agentsApi.startAgent("missing_story")).rejects.toThrow(
+				"story not found",
 			);
 		});
 	});
@@ -1,3 +1,5 @@
+import { rpcCall } from "./rpc";
+
 export type AgentStatusValue = "pending" | "running" | "completed" | "failed";

 export interface AgentInfo {
@@ -38,84 +40,38 @@ export interface AgentConfigInfo {
 	max_budget_usd: number | null;
 }

-const DEFAULT_API_BASE = "/api";
-
-function buildApiUrl(path: string, baseUrl = DEFAULT_API_BASE): string {
-	return `${baseUrl}${path}`;
-}
-
-async function requestJson<T>(
-	path: string,
-	options: RequestInit = {},
-	baseUrl = DEFAULT_API_BASE,
-): Promise<T> {
-	const res = await fetch(buildApiUrl(path, baseUrl), {
-		headers: {
-			"Content-Type": "application/json",
-			...(options.headers ?? {}),
-		},
-		...options,
-	});
-
-	if (!res.ok) {
-		const text = await res.text();
-		throw new Error(text || `Request failed (${res.status})`);
-	}
-
-	return res.json() as Promise<T>;
-}
-
 export const agentsApi = {
-	startAgent(storyId: string, agentName?: string, baseUrl?: string) {
-		return requestJson<AgentInfo>(
-			"/agents/start",
-			{
-				method: "POST",
-				body: JSON.stringify({
-					story_id: storyId,
-					agent_name: agentName,
-				}),
-			},
-			baseUrl,
-		);
+	startAgent(storyId: string, agentName?: string) {
+		return rpcCall<AgentInfo>("agents.start", {
+			story_id: storyId,
+			agent_name: agentName,
+		});
 	},

-	stopAgent(storyId: string, agentName: string, baseUrl?: string) {
-		return requestJson<boolean>(
-			"/agents/stop",
-			{
-				method: "POST",
-				body: JSON.stringify({
-					story_id: storyId,
-					agent_name: agentName,
-				}),
-			},
-			baseUrl,
-		);
+	stopAgent(storyId: string, agentName: string) {
+		return rpcCall<boolean>("agents.stop", {
+			story_id: storyId,
+			agent_name: agentName,
+		});
 	},

-	listAgents(baseUrl?: string) {
-		return requestJson<AgentInfo[]>("/agents", {}, baseUrl);
+	listAgents(_baseUrl?: string) {
+		return rpcCall<AgentInfo[]>("active_agents.list");
 	},

-	getAgentConfig(baseUrl?: string) {
-		return requestJson<AgentConfigInfo[]>("/agents/config", {}, baseUrl);
+	getAgentConfig(_baseUrl?: string) {
+		return rpcCall<AgentConfigInfo[]>("agent_config.list");
 	},

-	reloadConfig(baseUrl?: string) {
-		return requestJson<AgentConfigInfo[]>(
-			"/agents/config/reload",
-			{ method: "POST" },
-			baseUrl,
-		);
+	reloadConfig() {
+		return rpcCall<AgentConfigInfo[]>("agent_config.list");
 	},

-	getAgentOutput(storyId: string, agentName: string, baseUrl?: string) {
-		return requestJson<{ output: string }>(
-			`/agents/${encodeURIComponent(storyId)}/${encodeURIComponent(agentName)}/output`,
-			{},
-			baseUrl,
-		);
+	getAgentOutput(storyId: string, agentName: string, _baseUrl?: string) {
+		return rpcCall<{ output: string }>("agents.get_output", {
+			story_id: storyId,
+			agent_name: agentName,
+		});
 	},
 };

@@ -1,43 +1,18 @@
-export interface BotConfig {
-	transport: string | null;
-	enabled: boolean | null;
-	homeserver: string | null;
-	username: string | null;
-	password: string | null;
-	room_ids: string[] | null;
-	slack_bot_token: string | null;
-	slack_signing_secret: string | null;
-	slack_channel_ids: string[] | null;
-}
+/**
+ * WS-RPC client for chat-bot transport config (Matrix / Slack / WhatsApp).
+ */
+import { rpcCall } from "./rpc";
+import type { BotConfigPayload } from "./rpcContract";

-const DEFAULT_API_BASE = "/api";
-
-async function requestJson<T>(
-	path: string,
-	options: RequestInit = {},
-	baseUrl = DEFAULT_API_BASE,
-): Promise<T> {
-	const res = await fetch(`${baseUrl}${path}`, {
-		headers: { "Content-Type": "application/json", ...(options.headers ?? {}) },
-		...options,
-	});
-	if (!res.ok) {
-		const text = await res.text();
-		throw new Error(text || `Request failed (${res.status})`);
-	}
-	return res.json() as Promise<T>;
-}
+/** Re-export of the wire-format `BotConfigPayload` as the client-facing `BotConfig` alias. */
+export type BotConfig = BotConfigPayload;

 export const botConfigApi = {
-	getConfig(baseUrl?: string): Promise<BotConfig> {
-		return requestJson<BotConfig>("/bot/config", {}, baseUrl);
+	getConfig(_baseUrl?: string): Promise<BotConfig> {
+		return rpcCall<BotConfig>("bot_config.get");
 	},

-	saveConfig(config: BotConfig, baseUrl?: string): Promise<BotConfig> {
-		return requestJson<BotConfig>(
-			"/bot/config",
-			{ method: "PUT", body: JSON.stringify(config) },
-			baseUrl,
-		);
+	saveConfig(config: BotConfig, _baseUrl?: string): Promise<BotConfig> {
+		return rpcCall<BotConfigPayload>("bot_config.save", config);
 	},
 };
@@ -1,5 +1,6 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import { api, ChatWebSocket, resolveWsHost } from "./client";
+import { installRpcMock } from "./__test_utils__/mockRpcWebSocket";

 const mockFetch = vi.fn();

@@ -11,33 +12,21 @@ afterEach(() => {
 	vi.restoreAllMocks();
 });

-function okResponse(body: unknown) {
-	return new Response(JSON.stringify(body), {
-		status: 200,
-		headers: { "Content-Type": "application/json" },
-	});
-}
-
-function errorResponse(status: number, text: string) {
-	return new Response(text, { status });
-}
-
 describe("api client", () => {
 	describe("getCurrentProject", () => {
-		it("sends GET to /project", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse("/home/user/project"));
+		it("dispatches project.current RPC and returns the path", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("project.current", "/home/user/project");

 			const result = await api.getCurrentProject();

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/project",
-				expect.objectContaining({}),
-			);
+			expect(rpc.calls).toEqual([{ method: "project.current", params: {} }]);
 			expect(result).toBe("/home/user/project");
 		});

 		it("returns null when no project open", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(null));
+			const rpc = installRpcMock();
+			rpc.respond("project.current", null);

 			const result = await api.getCurrentProject();
 			expect(result).toBeNull();
@@ -45,95 +34,119 @@ describe("api client", () => {
 	});

 	describe("openProject", () => {
-		it("sends POST with path", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse("/home/user/project"));
+		it("dispatches project.open RPC with path and returns the canonical path", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("project.open", { path: "/home/user/project" });

-			await api.openProject("/home/user/project");
+			const result = await api.openProject("/home/user/project");

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/project",
-				expect.objectContaining({
-					method: "POST",
-					body: JSON.stringify({ path: "/home/user/project" }),
-				}),
-			);
+			expect(rpc.calls).toEqual([
+				{
+					method: "project.open",
+					params: { path: "/home/user/project" },
+				},
+			]);
+			expect(result).toBe("/home/user/project");
 		});
 	});

 	describe("closeProject", () => {
-		it("sends DELETE to /project", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(true));
+		it("dispatches project.close RPC and returns ok", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("project.close", { ok: true });

-			await api.closeProject();
+			const result = await api.closeProject();

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/project",
-				expect.objectContaining({ method: "DELETE" }),
-			);
+			expect(rpc.calls).toEqual([{ method: "project.close", params: {} }]);
+			expect(result).toBe(true);
+		});
+	});
+
+	describe("forgetKnownProject", () => {
+		it("dispatches project.forget RPC with path", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("project.forget", { ok: true });
+
+			const result = await api.forgetKnownProject("/some/path");
+
+			expect(rpc.calls).toEqual([
+				{ method: "project.forget", params: { path: "/some/path" } },
+			]);
+			expect(result).toBe(true);
+		});
+	});
+
+	describe("setModelPreference", () => {
+		it("dispatches model.set_preference RPC", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("model.set_preference", { ok: true });
+
+			await api.setModelPreference("claude-sonnet-4-6");
+
+			expect(rpc.calls).toEqual([
+				{
+					method: "model.set_preference",
+					params: { model: "claude-sonnet-4-6" },
+				},
+			]);
+		});
+	});
+
+	describe("setAnthropicApiKey", () => {
+		it("dispatches anthropic.set_api_key RPC", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("anthropic.set_api_key", { ok: true });
+
+			await api.setAnthropicApiKey("sk-ant-xxx");
+
+			expect(rpc.calls).toEqual([
+				{
+					method: "anthropic.set_api_key",
+					params: { api_key: "sk-ant-xxx" },
+				},
+			]);
+		});
+	});
+
+	describe("cancelChat", () => {
+		it("dispatches chat.cancel RPC", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("chat.cancel", { ok: true });
+
+			await api.cancelChat();
+
+			expect(rpc.calls).toEqual([{ method: "chat.cancel", params: {} }]);
 		});
 	});

 	describe("getKnownProjects", () => {
-		it("returns array of project paths", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(["/a", "/b"]));
+		it("dispatches project.known RPC and returns the path list", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("project.known", ["/a", "/b"]);

 			const result = await api.getKnownProjects();
+			expect(rpc.calls).toEqual([{ method: "project.known", params: {} }]);
 			expect(result).toEqual(["/a", "/b"]);
 		});
 	});

 	describe("error handling", () => {
-		it("throws on non-ok response with body text", async () => {
-			mockFetch.mockResolvedValueOnce(errorResponse(404, "Not found"));
+		it("surfaces RPC errors visibly", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError("project.current", "store offline", "INTERNAL");

-			await expect(api.getCurrentProject()).rejects.toThrow("Not found");
+			await expect(api.getCurrentProject()).rejects.toThrow("store offline");
 		});

-		it("throws with status code when no body", async () => {
-			mockFetch.mockResolvedValueOnce(errorResponse(500, ""));
+		it("surfaces RPC errors visibly for write methods", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError("project.open", "No such directory", "INTERNAL");

-			await expect(api.getCurrentProject()).rejects.toThrow(
-				"Request failed (500)",
+			await expect(api.openProject("/some/path")).rejects.toThrow(
+				"No such directory",
 			);
 		});
-	});

-	describe("searchFiles", () => {
-		it("sends POST with query", async () => {
-			mockFetch.mockResolvedValueOnce(
-				okResponse([{ path: "src/main.rs", matches: 1 }]),
-			);
-
-			const result = await api.searchFiles("hello");
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/fs/search",
-				expect.objectContaining({
-					method: "POST",
-					body: JSON.stringify({ query: "hello" }),
-				}),
-			);
-			expect(result).toHaveLength(1);
-		});
-	});
-
-	describe("execShell", () => {
-		it("sends POST with command and args", async () => {
-			mockFetch.mockResolvedValueOnce(
-				okResponse({ stdout: "output", stderr: "", exit_code: 0 }),
-			);
-
-			const result = await api.execShell("ls", ["-la"]);
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/shell/exec",
-				expect.objectContaining({
-					method: "POST",
-					body: JSON.stringify({ command: "ls", args: ["-la"] }),
-				}),
-			);
-			expect(result.exit_code).toBe(0);
-		});
 	});

 	describe("resolveWsHost", () => {
@@ -267,6 +280,7 @@ describe("ChatWebSocket", () => {
 			qa: [],
 			merge: [],
 			done: [],
+			deterministic_merges_in_flight: [],
 		};
 		instances[1].simulateMessage({ type: "pipeline_state", ...freshState });

@@ -1,748 +0,0 @@
-export type WsRequest =
-	| {
-			type: "chat";
-			messages: Message[];
-			config: ProviderConfig;
-	  }
-	| {
-			type: "cancel";
-	  }
-	| {
-			type: "permission_response";
-			request_id: string;
-			approved: boolean;
-			always_allow: boolean;
-	  }
-	| { type: "ping" }
-	| {
-			type: "side_question";
-			question: string;
-			context_messages: Message[];
-			config: ProviderConfig;
-	  };
-
-export interface WizardStepInfo {
-	step: string;
-	label: string;
-	status: string;
-	content?: string;
-}
-
-export interface WizardStateData {
-	steps: WizardStepInfo[];
-	current_step_index: number;
-	completed: boolean;
-}
-
-export interface AgentAssignment {
-	agent_name: string;
-	model: string | null;
-	status: string;
-}
-
-export interface PipelineStageItem {
-	story_id: string;
-	name: string | null;
-	error: string | null;
-	merge_failure: string | null;
-	agent: AgentAssignment | null;
-	review_hold: boolean | null;
-	qa: string | null;
-	depends_on: number[] | null;
-}
-
-export interface PipelineState {
-	backlog: PipelineStageItem[];
-	current: PipelineStageItem[];
-	qa: PipelineStageItem[];
-	merge: PipelineStageItem[];
-	done: PipelineStageItem[];
-}
-
-export type WsResponse =
-	| { type: "token"; content: string }
-	| { type: "update"; messages: Message[] }
-	| { type: "session_id"; session_id: string }
-	| { type: "error"; message: string }
-	| {
-			type: "pipeline_state";
-			backlog: PipelineStageItem[];
-			current: PipelineStageItem[];
-			qa: PipelineStageItem[];
-			merge: PipelineStageItem[];
-			done: PipelineStageItem[];
-	  }
-	| {
-			type: "permission_request";
-			request_id: string;
-			tool_name: string;
-			tool_input: Record<string, unknown>;
-	  }
-	| { type: "tool_activity"; tool_name: string }
-	| {
-			type: "reconciliation_progress";
-			story_id: string;
-			status: string;
-			message: string;
-	  }
-	/** `.story_kit/project.toml` was modified; re-fetch the agent roster. */
-	| { type: "agent_config_changed" }
-	/** An agent started, stopped, or changed state; re-fetch agent list. */
-	| { type: "agent_state_changed" }
-	| { type: "tool_activity"; tool_name: string }
-	/** Heartbeat response confirming the connection is alive. */
-	| { type: "pong" }
-	/** Sent on connect when the project still needs onboarding (specs are placeholders). */
-	| { type: "onboarding_status"; needs_onboarding: boolean }
-	/** Sent on connect when a setup wizard is active. */
-	| {
-			type: "wizard_state";
-			steps: WizardStepInfo[];
-			current_step_index: number;
-			completed: boolean;
-	  }
-	/** Streaming thinking token from an extended-thinking block, separate from regular text. */
-	| { type: "thinking_token"; content: string }
-	/** Streaming token from a /btw side question response. */
-	| { type: "side_question_token"; content: string }
-	/** Final signal that the /btw side question has been fully answered. */
-	| { type: "side_question_done"; response: string }
-	/** A single server log entry (bulk on connect, then live). */
-	| { type: "log_entry"; timestamp: string; level: string; message: string };
-
-export interface ProviderConfig {
-	provider: string;
-	model: string;
-	base_url?: string;
-	enable_tools?: boolean;
-	session_id?: string;
-}
-
-export type Role = "system" | "user" | "assistant" | "tool";
-
-export interface ToolCall {
-	id?: string;
-	type: string;
-	function: {
-		name: string;
-		arguments: string;
-	};
-}
-
-export interface Message {
-	role: Role;
-	content: string;
-	tool_calls?: ToolCall[];
-	tool_call_id?: string;
-}
-
-export interface AnthropicModelInfo {
-	id: string;
-	context_window: number;
-}
-
-export interface WorkItemContent {
-	content: string;
-	stage: string;
-	name: string | null;
-	agent: string | null;
-}
-
-export interface TestCaseResult {
-	name: string;
-	status: "pass" | "fail";
-	details: string | null;
-}
-
-export interface TestResultsResponse {
-	unit: TestCaseResult[];
-	integration: TestCaseResult[];
-}
-
-export interface FileEntry {
-	name: string;
-	kind: "file" | "dir";
-}
-
-export interface SearchResult {
-	path: string;
-	matches: number;
-}
-
-export interface AgentCostEntry {
-	agent_name: string;
-	model: string | null;
-	input_tokens: number;
-	output_tokens: number;
-	cache_creation_input_tokens: number;
-	cache_read_input_tokens: number;
-	total_cost_usd: number;
-}
-
-export interface TokenCostResponse {
-	total_cost_usd: number;
-	agents: AgentCostEntry[];
-}
-
-export interface TokenUsageRecord {
-	story_id: string;
-	agent_name: string;
-	model: string | null;
-	timestamp: string;
-	input_tokens: number;
-	output_tokens: number;
-	cache_creation_input_tokens: number;
-	cache_read_input_tokens: number;
-	total_cost_usd: number;
-}
-
-export interface AllTokenUsageResponse {
-	records: TokenUsageRecord[];
-}
-
-export interface CommandOutput {
-	stdout: string;
-	stderr: string;
-	exit_code: number;
-}
-
-export interface OAuthStatus {
-	authenticated: boolean;
-	expired: boolean;
-	expires_at: number;
-	has_refresh_token: boolean;
-}
-
-declare const __HUSKIES_PORT__: string;
-
-const DEFAULT_API_BASE = "/api";
-const DEFAULT_WS_PATH = "/ws";
-
-export function resolveWsHost(
-	isDev: boolean,
-	envPort: string | undefined,
-	locationHost: string,
-): string {
-	return isDev ? `127.0.0.1:${envPort || "3001"}` : locationHost;
-}
-
-function buildApiUrl(path: string, baseUrl = DEFAULT_API_BASE): string {
-	return `${baseUrl}${path}`;
-}
-
-async function requestJson<T>(
-	path: string,
-	options: RequestInit = {},
-	baseUrl = DEFAULT_API_BASE,
-): Promise<T> {
-	const res = await fetch(buildApiUrl(path, baseUrl), {
-		headers: {
-			"Content-Type": "application/json",
-			...(options.headers ?? {}),
-		},
-		...options,
-	});
-
-	if (!res.ok) {
-		const text = await res.text();
-		throw new Error(text || `Request failed (${res.status})`);
-	}
-
-	return res.json() as Promise<T>;
-}
-
-export const api = {
-	getCurrentProject(baseUrl?: string) {
-		return requestJson<string | null>("/project", {}, baseUrl);
-	},
-	getKnownProjects(baseUrl?: string) {
-		return requestJson<string[]>("/projects", {}, baseUrl);
-	},
-	forgetKnownProject(path: string, baseUrl?: string) {
-		return requestJson<boolean>(
-			"/projects/forget",
-			{ method: "POST", body: JSON.stringify({ path }) },
-			baseUrl,
-		);
-	},
-	openProject(path: string, baseUrl?: string) {
-		return requestJson<string>(
-			"/project",
-			{ method: "POST", body: JSON.stringify({ path }) },
-			baseUrl,
-		);
-	},
-	closeProject(baseUrl?: string) {
-		return requestJson<boolean>("/project", { method: "DELETE" }, baseUrl);
-	},
-	getModelPreference(baseUrl?: string) {
-		return requestJson<string | null>("/model", {}, baseUrl);
-	},
-	setModelPreference(model: string, baseUrl?: string) {
-		return requestJson<boolean>(
-			"/model",
-			{ method: "POST", body: JSON.stringify({ model }) },
-			baseUrl,
-		);
-	},
-	getOllamaModels(baseUrlParam?: string, baseUrl?: string) {
-		const url = new URL(
-			buildApiUrl("/ollama/models", baseUrl),
-			window.location.origin,
-		);
-		if (baseUrlParam) {
-			url.searchParams.set("base_url", baseUrlParam);
-		}
-		return requestJson<string[]>(url.pathname + url.search, {}, "");
-	},
-	getAnthropicApiKeyExists(baseUrl?: string) {
-		return requestJson<boolean>("/anthropic/key/exists", {}, baseUrl);
-	},
-	getAnthropicModels(baseUrl?: string) {
-		return requestJson<AnthropicModelInfo[]>("/anthropic/models", {}, baseUrl);
-	},
-	setAnthropicApiKey(api_key: string, baseUrl?: string) {
-		return requestJson<boolean>(
-			"/anthropic/key",
-			{ method: "POST", body: JSON.stringify({ api_key }) },
-			baseUrl,
-		);
-	},
-	readFile(path: string, baseUrl?: string) {
-		return requestJson<string>(
-			"/fs/read",
-			{ method: "POST", body: JSON.stringify({ path }) },
-			baseUrl,
-		);
-	},
-	writeFile(path: string, content: string, baseUrl?: string) {
-		return requestJson<boolean>(
-			"/fs/write",
-			{ method: "POST", body: JSON.stringify({ path, content }) },
-			baseUrl,
-		);
-	},
-	listDirectory(path: string, baseUrl?: string) {
-		return requestJson<FileEntry[]>(
-			"/fs/list",
-			{ method: "POST", body: JSON.stringify({ path }) },
-			baseUrl,
-		);
-	},
-	listDirectoryAbsolute(path: string, baseUrl?: string) {
-		return requestJson<FileEntry[]>(
-			"/io/fs/list/absolute",
-			{ method: "POST", body: JSON.stringify({ path }) },
-			baseUrl,
-		);
-	},
-	createDirectoryAbsolute(path: string, baseUrl?: string) {
-		return requestJson<boolean>(
-			"/io/fs/create/absolute",
-			{ method: "POST", body: JSON.stringify({ path }) },
-			baseUrl,
-		);
-	},
-	getHomeDirectory(baseUrl?: string) {
-		return requestJson<string>("/io/fs/home", {}, baseUrl);
-	},
-	listProjectFiles(baseUrl?: string) {
-		return requestJson<string[]>("/io/fs/files", {}, baseUrl);
-	},
-	searchFiles(query: string, baseUrl?: string) {
-		return requestJson<SearchResult[]>(
-			"/fs/search",
-			{ method: "POST", body: JSON.stringify({ query }) },
-			baseUrl,
-		);
-	},
-	execShell(command: string, args: string[], baseUrl?: string) {
-		return requestJson<CommandOutput>(
-			"/shell/exec",
-			{ method: "POST", body: JSON.stringify({ command, args }) },
-			baseUrl,
-		);
-	},
-	cancelChat(baseUrl?: string) {
-		return requestJson<boolean>("/chat/cancel", { method: "POST" }, baseUrl);
-	},
-	getWorkItemContent(storyId: string, baseUrl?: string) {
-		return requestJson<WorkItemContent>(
-			`/work-items/${encodeURIComponent(storyId)}`,
-			{},
-			baseUrl,
-		);
-	},
-	getTestResults(storyId: string, baseUrl?: string) {
-		return requestJson<TestResultsResponse | null>(
-			`/work-items/${encodeURIComponent(storyId)}/test-results`,
-			{},
-			baseUrl,
-		);
-	},
-	getTokenCost(storyId: string, baseUrl?: string) {
-		return requestJson<TokenCostResponse>(
-			`/work-items/${encodeURIComponent(storyId)}/token-cost`,
-			{},
-			baseUrl,
-		);
-	},
-	getAllTokenUsage(baseUrl?: string) {
-		return requestJson<AllTokenUsageResponse>("/token-usage", {}, baseUrl);
-	},
-	/** Trigger a server rebuild and restart. */
-	rebuildAndRestart() {
-		return callMcpTool("rebuild_and_restart", {});
-	},
-	/** Approve a story in QA, moving it to merge. */
-	approveQa(storyId: string) {
-		return callMcpTool("approve_qa", { story_id: storyId });
-	},
-	/** Reject a story in QA, moving it back to current with notes. */
-	rejectQa(storyId: string, notes: string) {
-		return callMcpTool("reject_qa", { story_id: storyId, notes });
-	},
-	/** Launch the QA app for a story's worktree. */
-	launchQaApp(storyId: string) {
-		return callMcpTool("launch_qa_app", { story_id: storyId });
-	},
-	/** Delete a story from the pipeline, stopping any running agent and removing the worktree. */
-	deleteStory(storyId: string) {
-		return callMcpTool("delete_story", { story_id: storyId });
-	},
-	/** Fetch OAuth status from the server. */
-	getOAuthStatus() {
-		return requestJson<OAuthStatus>("/oauth/status", {}, "");
-	},
-	/** Execute a bot slash command without LLM invocation. Returns markdown response text. */
-	botCommand(command: string, args: string, baseUrl?: string) {
-		return requestJson<{ response: string }>(
-			"/bot/command",
-			{ method: "POST", body: JSON.stringify({ command, args }) },
-			baseUrl,
-		);
-	},
-};
-
-async function callMcpTool(
-	toolName: string,
-	args: Record<string, unknown>,
-): Promise<string> {
-	const res = await fetch("/mcp", {
-		method: "POST",
-		headers: { "Content-Type": "application/json" },
-		body: JSON.stringify({
-			jsonrpc: "2.0",
-			id: 1,
-			method: "tools/call",
-			params: { name: toolName, arguments: args },
-		}),
-	});
-	const json = await res.json();
-	if (json.error) {
-		throw new Error(json.error.message);
-	}
-	const text = json.result?.content?.[0]?.text ?? "";
-	return text;
-}
-
-export class ChatWebSocket {
-	private static sharedSocket: WebSocket | null = null;
-	private static refCount = 0;
-	private socket?: WebSocket;
-	private onToken?: (content: string) => void;
-	private onThinkingToken?: (content: string) => void;
-	private onUpdate?: (messages: Message[]) => void;
-	private onSessionId?: (sessionId: string) => void;
-	private onError?: (message: string) => void;
-	private onPipelineState?: (state: PipelineState) => void;
-	private onPermissionRequest?: (
-		requestId: string,
-		toolName: string,
-		toolInput: Record<string, unknown>,
-	) => void;
-	private onActivity?: (toolName: string) => void;
-	private onReconciliationProgress?: (
-		storyId: string,
-		status: string,
-		message: string,
-	) => void;
-	private onAgentConfigChanged?: () => void;
-	private onAgentStateChanged?: () => void;
-	private onOnboardingStatus?: (needsOnboarding: boolean) => void;
-	private onWizardState?: (state: WizardStateData) => void;
-	private onSideQuestionToken?: (content: string) => void;
-	private onSideQuestionDone?: (response: string) => void;
-	private onLogEntry?: (
-		timestamp: string,
-		level: string,
-		message: string,
-	) => void;
-	private onConnected?: () => void;
-	private connected = false;
-	private closeTimer?: number;
-	private wsPath = DEFAULT_WS_PATH;
-	private reconnectTimer?: number;
-	private reconnectDelay = 1000;
-	private shouldReconnect = false;
-	private heartbeatInterval?: number;
-	private heartbeatTimeout?: number;
-	private static readonly HEARTBEAT_INTERVAL = 30_000;
-	private static readonly HEARTBEAT_TIMEOUT = 5_000;
-
-	private _startHeartbeat(): void {
-		this._stopHeartbeat();
-		this.heartbeatInterval = window.setInterval(() => {
-			if (!this.socket || this.socket.readyState !== WebSocket.OPEN) return;
-			const ping: WsRequest = { type: "ping" };
-			this.socket.send(JSON.stringify(ping));
-			this.heartbeatTimeout = window.setTimeout(() => {
-				// No pong received within timeout; close socket to trigger reconnect.
-				this.socket?.close();
-			}, ChatWebSocket.HEARTBEAT_TIMEOUT);
-		}, ChatWebSocket.HEARTBEAT_INTERVAL);
-	}
-
-	private _stopHeartbeat(): void {
-		window.clearInterval(this.heartbeatInterval);
-		window.clearTimeout(this.heartbeatTimeout);
-		this.heartbeatInterval = undefined;
-		this.heartbeatTimeout = undefined;
-	}
-
-	private _buildWsUrl(): string {
-		const protocol = window.location.protocol === "https:" ? "wss" : "ws";
-		const wsHost = resolveWsHost(
-			import.meta.env.DEV,
-			typeof __HUSKIES_PORT__ !== "undefined" ? __HUSKIES_PORT__ : undefined,
-			window.location.host,
-		);
-		return `${protocol}://${wsHost}${this.wsPath}`;
-	}
-
-	private _attachHandlers(): void {
-		if (!this.socket) return;
-		this.socket.onopen = () => {
-			this.reconnectDelay = 1000;
-			this._startHeartbeat();
-			this.onConnected?.();
-		};
-		this.socket.onmessage = (event) => {
-			try {
-				const data = JSON.parse(event.data) as WsResponse;
-				if (data.type === "token") this.onToken?.(data.content);
-				if (data.type === "thinking_token")
-					this.onThinkingToken?.(data.content);
-				if (data.type === "update") this.onUpdate?.(data.messages);
-				if (data.type === "session_id") this.onSessionId?.(data.session_id);
-				if (data.type === "error") this.onError?.(data.message);
-				if (data.type === "pipeline_state")
-					this.onPipelineState?.({
-						backlog: data.backlog,
-						current: data.current,
-						qa: data.qa,
-						merge: data.merge,
-						done: data.done,
-					});
-				if (data.type === "permission_request")
-					this.onPermissionRequest?.(
-						data.request_id,
-						data.tool_name,
-						data.tool_input,
-					);
-				if (data.type === "tool_activity") this.onActivity?.(data.tool_name);
-				if (data.type === "reconciliation_progress")
-					this.onReconciliationProgress?.(
-						data.story_id,
-						data.status,
-						data.message,
-					);
-				if (data.type === "agent_config_changed") this.onAgentConfigChanged?.();
-				if (data.type === "agent_state_changed") this.onAgentStateChanged?.();
-				if (data.type === "onboarding_status")
-					this.onOnboardingStatus?.(data.needs_onboarding);
-				if (data.type === "wizard_state")
-					this.onWizardState?.({
-						steps: data.steps,
-						current_step_index: data.current_step_index,
-						completed: data.completed,
-					});
-				if (data.type === "side_question_token")
-					this.onSideQuestionToken?.(data.content);
-				if (data.type === "side_question_done")
-					this.onSideQuestionDone?.(data.response);
-				if (data.type === "log_entry")
-					this.onLogEntry?.(data.timestamp, data.level, data.message);
-				if (data.type === "pong") {
-					window.clearTimeout(this.heartbeatTimeout);
-					this.heartbeatTimeout = undefined;
-				}
-			} catch (err) {
-				this.onError?.(String(err));
-			}
-		};
-		this.socket.onerror = () => {
-			this.onError?.("WebSocket error");
-		};
-		this.socket.onclose = () => {
-			if (this.shouldReconnect && this.connected) {
-				this._scheduleReconnect();
-			}
-		};
-	}
-
-	private _scheduleReconnect(): void {
-		window.clearTimeout(this.reconnectTimer);
-		const delay = this.reconnectDelay;
-		this.reconnectDelay = Math.min(this.reconnectDelay * 2, 30000);
-		this.reconnectTimer = window.setTimeout(() => {
-			this.reconnectTimer = undefined;
-			const wsUrl = this._buildWsUrl();
-			ChatWebSocket.sharedSocket = new WebSocket(wsUrl);
-			this.socket = ChatWebSocket.sharedSocket;
-			this._attachHandlers();
-		}, delay);
-	}
-
-	connect(
-		handlers: {
-			onToken?: (content: string) => void;
-			onThinkingToken?: (content: string) => void;
-			onUpdate?: (messages: Message[]) => void;
-			onSessionId?: (sessionId: string) => void;
-			onError?: (message: string) => void;
-			onPipelineState?: (state: PipelineState) => void;
-			onPermissionRequest?: (
-				requestId: string,
-				toolName: string,
-				toolInput: Record<string, unknown>,
-			) => void;
-			onActivity?: (toolName: string) => void;
-			onReconciliationProgress?: (
-				storyId: string,
-				status: string,
-				message: string,
-			) => void;
-			onAgentConfigChanged?: () => void;
-			onAgentStateChanged?: () => void;
-			onOnboardingStatus?: (needsOnboarding: boolean) => void;
-			onWizardState?: (state: WizardStateData) => void;
-			onSideQuestionToken?: (content: string) => void;
-			onSideQuestionDone?: (response: string) => void;
-			onLogEntry?: (timestamp: string, level: string, message: string) => void;
-			onConnected?: () => void;
-		},
-		wsPath = DEFAULT_WS_PATH,
-	) {
-		this.onToken = handlers.onToken;
-		this.onThinkingToken = handlers.onThinkingToken;
-		this.onUpdate = handlers.onUpdate;
-		this.onSessionId = handlers.onSessionId;
-		this.onError = handlers.onError;
-		this.onPipelineState = handlers.onPipelineState;
-		this.onPermissionRequest = handlers.onPermissionRequest;
-		this.onActivity = handlers.onActivity;
-		this.onReconciliationProgress = handlers.onReconciliationProgress;
-		this.onAgentConfigChanged = handlers.onAgentConfigChanged;
-		this.onAgentStateChanged = handlers.onAgentStateChanged;
-		this.onOnboardingStatus = handlers.onOnboardingStatus;
-		this.onWizardState = handlers.onWizardState;
-		this.onSideQuestionToken = handlers.onSideQuestionToken;
-		this.onSideQuestionDone = handlers.onSideQuestionDone;
-		this.onLogEntry = handlers.onLogEntry;
-		this.onConnected = handlers.onConnected;
-		this.wsPath = wsPath;
-		this.shouldReconnect = true;
-
-		if (this.connected) {
-			return;
-		}
-		this.connected = true;
-		ChatWebSocket.refCount += 1;
-
-		if (
-			!ChatWebSocket.sharedSocket ||
-			ChatWebSocket.sharedSocket.readyState === WebSocket.CLOSED ||
-			ChatWebSocket.sharedSocket.readyState === WebSocket.CLOSING
-		) {
-			const wsUrl = this._buildWsUrl();
-			ChatWebSocket.sharedSocket = new WebSocket(wsUrl);
-		}
-		this.socket = ChatWebSocket.sharedSocket;
-		this._attachHandlers();
-	}
-
-	sendChat(messages: Message[], config: ProviderConfig) {
-		this.send({ type: "chat", messages, config });
-	}
-
-	sendSideQuestion(
-		question: string,
-		contextMessages: Message[],
-		config: ProviderConfig,
-	) {
-		this.send({
-			type: "side_question",
-			question,
-			context_messages: contextMessages,
-			config,
-		});
-	}
-
-	cancel() {
-		this.send({ type: "cancel" });
-	}
-
-	sendPermissionResponse(
-		requestId: string,
-		approved: boolean,
-		alwaysAllow = false,
-	) {
-		this.send({
-			type: "permission_response",
-			request_id: requestId,
-			approved,
-			always_allow: alwaysAllow,
-		});
-	}
-
-	close() {
-		this.shouldReconnect = false;
-		this._stopHeartbeat();
-		window.clearTimeout(this.reconnectTimer);
-		this.reconnectTimer = undefined;
-
-		if (!this.connected) return;
-		this.connected = false;
-		ChatWebSocket.refCount = Math.max(0, ChatWebSocket.refCount - 1);
-
-		if (import.meta.env.DEV) {
-			if (this.closeTimer) {
-				window.clearTimeout(this.closeTimer);
-			}
-			this.closeTimer = window.setTimeout(() => {
-				if (ChatWebSocket.refCount === 0) {
-					ChatWebSocket.sharedSocket?.close();
-					ChatWebSocket.sharedSocket = null;
-				}
-				this.socket = ChatWebSocket.sharedSocket ?? undefined;
-				this.closeTimer = undefined;
-			}, 250);
-			return;
-		}
-
-		if (ChatWebSocket.refCount === 0) {
-			ChatWebSocket.sharedSocket?.close();
-			ChatWebSocket.sharedSocket = null;
-		}
-		this.socket = ChatWebSocket.sharedSocket ?? undefined;
-	}
-
-	private send(payload: WsRequest) {
-		if (!this.socket || this.socket.readyState !== WebSocket.OPEN) {
-			this.onError?.("WebSocket is not connected");
-			return;
-		}
-		this.socket.send(JSON.stringify(payload));
-	}
-}
@@ -0,0 +1,169 @@
+/**
+ * HTTP transport layer for the Huskies API client.
+ * Provides the `callMcpTool` function for MCP JSON-RPC calls, the
+ * `resolveWsHost` utility, and the `api` object exposing all endpoints.
+ */
+
+import { rpcCall } from "../rpc";
+import type {
+	OkResult,
+	OpenProjectResult,
+	SetAnthropicApiKeyParams,
+	SetModelPreferenceParams,
+} from "../rpcContract";
+import type {
+	AllTokenUsageResponse,
+	AnthropicModelInfo,
+	FileEntry,
+	OAuthStatus,
+	TestResultsResponse,
+	TokenCostResponse,
+	WorkItemContent,
+} from "./types";
+
+/**
+ * Resolve the WebSocket host to connect to.
+ * In development, uses the injected port (or 3001); in production, uses the
+ * current page's host so the socket connects to the same origin.
+ */
+export function resolveWsHost(
+	isDev: boolean,
+	envPort: string | undefined,
+	locationHost: string,
+): string {
+	return isDev ? `127.0.0.1:${envPort || "3001"}` : locationHost;
+}
+
+/**
+ * Invoke an MCP tool via the server's JSON-RPC `/mcp` endpoint.
+ * Returns the first text content block from the tool result, or an empty
+ * string if the result has no content.
+ */
+export async function callMcpTool(
+	toolName: string,
+	args: Record<string, unknown>,
+): Promise<string> {
+	const res = await fetch("/mcp", {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({
+			jsonrpc: "2.0",
+			id: 1,
+			method: "tools/call",
+			params: { name: toolName, arguments: args },
+		}),
+	});
+	const json = await res.json();
+	if (json.error) {
+		throw new Error(json.error.message);
+	}
+	const text = json.result?.content?.[0]?.text ?? "";
+	return text;
+}
+
+/** Typed wrappers for all Huskies server endpoints. */
+export const api = {
+	getCurrentProject(_baseUrl?: string) {
+		return rpcCall<string | null>("project.current");
+	},
+	getKnownProjects(_baseUrl?: string) {
+		return rpcCall<string[]>("project.known");
+	},
+	async forgetKnownProject(path: string, _baseUrl?: string) {
+		const r = await rpcCall<OkResult>("project.forget", { path });
+		return r.ok;
+	},
+	async openProject(path: string, _baseUrl?: string) {
+		const r = await rpcCall<OpenProjectResult>("project.open", { path });
+		return r.path;
+	},
+	async closeProject(_baseUrl?: string) {
+		const r = await rpcCall<OkResult>("project.close");
+		return r.ok;
+	},
+	getModelPreference(_baseUrl?: string) {
+		return rpcCall<string | null>("model.get_preference");
+	},
+	async setModelPreference(model: string, _baseUrl?: string) {
+		const params: SetModelPreferenceParams = { model };
+		const r = await rpcCall<OkResult>("model.set_preference", params);
+		return r.ok;
+	},
+	getOllamaModels(baseUrlParam?: string, _baseUrl?: string) {
+		return rpcCall<string[]>(
+			"ollama.list_models",
+			baseUrlParam ? { base_url: baseUrlParam } : {},
+		);
+	},
+	getAnthropicApiKeyExists(_baseUrl?: string) {
+		return rpcCall<boolean>("anthropic.key_exists");
+	},
+	getAnthropicModels(_baseUrl?: string) {
+		return rpcCall<AnthropicModelInfo[]>("anthropic.list_models");
+	},
+	async setAnthropicApiKey(api_key: string, _baseUrl?: string) {
+		const params: SetAnthropicApiKeyParams = { api_key };
+		const r = await rpcCall<OkResult>("anthropic.set_api_key", params);
+		return r.ok;
+	},
+	readFile(path: string) {
+		return rpcCall<string>("io.read_file", { path });
+	},
+	listDirectoryAbsolute(path: string) {
+		return rpcCall<FileEntry[]>("io.list_directory_absolute", { path });
+	},
+	getHomeDirectory(_baseUrl?: string) {
+		return rpcCall<string>("io.home_directory");
+	},
+	listProjectFiles(_baseUrl?: string) {
+		return rpcCall<string[]>("io.list_project_files");
+	},
+	async cancelChat(_baseUrl?: string) {
+		const r = await rpcCall<OkResult>("chat.cancel");
+		return r.ok;
+	},
+	getWorkItemContent(storyId: string, _baseUrl?: string) {
+		return rpcCall<WorkItemContent>("work_items.get", { story_id: storyId });
+	},
+	getTestResults(storyId: string, _baseUrl?: string) {
+		return rpcCall<TestResultsResponse | null>("work_items.test_results", {
+			story_id: storyId,
+		});
+	},
+	getTokenCost(storyId: string, _baseUrl?: string) {
+		return rpcCall<TokenCostResponse>("work_items.token_cost", {
+			story_id: storyId,
+		});
+	},
+	getAllTokenUsage(_baseUrl?: string) {
+		return rpcCall<AllTokenUsageResponse>("token_usage.all");
+	},
+	/** Trigger a server rebuild and restart. */
+	rebuildAndRestart() {
+		return callMcpTool("rebuild_and_restart", {});
+	},
+	/** Approve a story in QA, moving it to merge. */
+	approveQa(storyId: string) {
+		return callMcpTool("approve_qa", { story_id: storyId });
+	},
+	/** Reject a story in QA, moving it back to current with notes. */
+	rejectQa(storyId: string, notes: string) {
+		return callMcpTool("reject_qa", { story_id: storyId, notes });
+	},
+	/** Launch the QA app for a story's worktree. */
+	launchQaApp(storyId: string) {
+		return callMcpTool("launch_qa_app", { story_id: storyId });
+	},
+	/** Delete a story from the pipeline, stopping any running agent and removing the worktree. */
+	deleteStory(storyId: string) {
+		return callMcpTool("delete_story", { story_id: storyId });
+	},
+	/** Fetch OAuth status from the server. */
+	getOAuthStatus() {
+		return rpcCall<OAuthStatus>("oauth.status");
+	},
+	/** Execute a bot slash command without LLM invocation. Returns markdown response text. */
+	botCommand(command: string, args: string) {
+		return rpcCall<{ response: string }>("bot.command", { command, args });
+	},
+};
@@ -0,0 +1,38 @@
+/**
+ * Public API surface for the Huskies client module.
+ * Re-exports all types, HTTP helpers, and the WebSocket client so that
+ * callers importing from `api/client` continue to work without changes
+ * after the module was decomposed into focused submodules.
+ */
+
+/** All domain types and interfaces from the client module. */
+export type {
+	AgentAssignment,
+	AgentCostEntry,
+	AllTokenUsageResponse,
+	AnthropicModelInfo,
+	CommandOutput,
+	FileEntry,
+	Message,
+	OAuthStatus,
+	PipelineState,
+	PipelineStageItem,
+	ProviderConfig,
+	Role,
+	SearchResult,
+	StatusEvent,
+	TestCaseResult,
+	TestResultsResponse,
+	TokenCostResponse,
+	TokenUsageRecord,
+	ToolCall,
+	WizardStateData,
+	WizardStepInfo,
+	WorkItemContent,
+	WsRequest,
+	WsResponse,
+} from "./types";
+
+export { api, callMcpTool, resolveWsHost } from "./http";
+
+export { ChatWebSocket } from "./websocket";
@@ -0,0 +1,292 @@
+/**
+ * Type and interface definitions for the Huskies API client.
+ * All shared domain types — WebSocket messages, pipeline state,
+ * provider configuration, and response shapes — live here.
+ */
+
+/** A message sent from the browser to the Huskies server over WebSocket. */
+export type WsRequest =
+	| {
+			type: "chat";
+			messages: Message[];
+			config: ProviderConfig;
+	  }
+	| {
+			type: "cancel";
+	  }
+	| {
+			type: "permission_response";
+			request_id: string;
+			approved: boolean;
+			always_allow: boolean;
+	  }
+	| { type: "ping" }
+	| {
+			type: "side_question";
+			question: string;
+			context_messages: Message[];
+			config: ProviderConfig;
+	  };
+
+/** Metadata for a single step in the setup wizard flow. */
+export interface WizardStepInfo {
+	step: string;
+	label: string;
+	status: string;
+	content?: string;
+}
+
+/** Full state snapshot of the setup wizard, including all steps and completion flag. */
+export interface WizardStateData {
+	steps: WizardStepInfo[];
+	current_step_index: number;
+	completed: boolean;
+}
+
+/** Describes the agent currently assigned to a pipeline work item. */
+export interface AgentAssignment {
+	agent_name: string;
+	model: string | null;
+	status: string;
+}
+
+/** A single item in any pipeline stage (backlog, current, QA, merge, or done). */
+export interface PipelineStageItem {
+	story_id: string;
+	name: string;
+	error: string | null;
+	merge_failure: string | null;
+	agent: AgentAssignment | null;
+	review_hold: boolean | null;
+	qa: string | null;
+	depends_on: number[] | null;
+	/** True when the item is in Stage::Blocked — awaiting human unblock. */
+	blocked?: boolean | null;
+	/** True when the item is in Stage::Frozen — paused at its current stage. */
+	frozen?: boolean | null;
+}
+
+/** Snapshot of all pipeline stages returned via WebSocket or REST. */
+export interface PipelineState {
+	backlog: PipelineStageItem[];
+	current: PipelineStageItem[];
+	qa: PipelineStageItem[];
+	merge: PipelineStageItem[];
+	done: PipelineStageItem[];
+	/** Story IDs that currently have a deterministic merge in progress. */
+	deterministic_merges_in_flight: string[];
+}
+
+/** A message received from the Huskies server over WebSocket. */
+export type WsResponse =
+	| { type: "token"; content: string }
+	| { type: "update"; messages: Message[] }
+	| { type: "session_id"; session_id: string }
+	| { type: "error"; message: string }
+	| {
+			type: "pipeline_state";
+			backlog: PipelineStageItem[];
+			current: PipelineStageItem[];
+			qa: PipelineStageItem[];
+			merge: PipelineStageItem[];
+			done: PipelineStageItem[];
+			deterministic_merges_in_flight: string[];
+	  }
+	| {
+			type: "permission_request";
+			request_id: string;
+			tool_name: string;
+			tool_input: Record<string, unknown>;
+	  }
+	| { type: "tool_activity"; tool_name: string }
+	| {
+			type: "reconciliation_progress";
+			story_id: string;
+			status: string;
+			message: string;
+	  }
+	/** `.story_kit/project.toml` was modified; re-fetch the agent roster. */
+	| { type: "agent_config_changed" }
+	/** An agent started, stopped, or changed state; re-fetch agent list. */
+	| { type: "agent_state_changed" }
+	/** Heartbeat response confirming the connection is alive. */
+	| { type: "pong" }
+	/** Sent on connect when the project still needs onboarding (specs are placeholders). */
+	| { type: "onboarding_status"; needs_onboarding: boolean }
+	/** Sent on connect when a setup wizard is active. */
+	| {
+			type: "wizard_state";
+			steps: WizardStepInfo[];
+			current_step_index: number;
+			completed: boolean;
+	  }
+	/** Streaming thinking token from an extended-thinking block, separate from regular text. */
+	| { type: "thinking_token"; content: string }
+	/** Streaming token from a /btw side question response. */
+	| { type: "side_question_token"; content: string }
+	/** Final signal that the /btw side question has been fully answered. */
+	| { type: "side_question_done"; response: string }
+	/** A single server log entry (bulk on connect, then live). */
+	| { type: "log_entry"; timestamp: string; level: string; message: string }
+	/** A structured pipeline status event from the status broadcaster. */
+	| { type: "status_update"; event: StatusEvent };
+
+/**
+ * A structured pipeline status event emitted by the status broadcaster.
+ *
+ * The discriminant `type` field enables per-event-type rendering without
+ * parsing strings. All fields from the original event are preserved so
+ * future UI stories can add dedicated icons, banners, or filters.
+ */
+export type StatusEvent =
+	| {
+			type: "stage_transition";
+			story_id: string;
+			story_name: string;
+			from_stage: string;
+			to_stage: string;
+	  }
+	| {
+			type: "merge_failure";
+			story_id: string;
+			story_name: string;
+			reason: string;
+	  }
+	| {
+			type: "story_blocked";
+			story_id: string;
+			story_name: string;
+			reason: string;
+	  }
+	| {
+			type: "rate_limit_warning";
+			story_id: string;
+			story_name: string;
+			agent_name: string;
+	  }
+	| {
+			type: "rate_limit_hard_block";
+			story_id: string;
+			story_name: string;
+			agent_name: string;
+			reset_at: string;
+	  };
+
+/** LLM provider configuration used when initiating a chat request. */
+export interface ProviderConfig {
+	provider: string;
+	model: string;
+	base_url?: string;
+	enable_tools?: boolean;
+	session_id?: string;
+}
+
+/** Valid role values for a chat message. */
+export type Role = "system" | "user" | "assistant" | "tool";
+
+/** An LLM tool call embedded in an assistant message. */
+export interface ToolCall {
+	id?: string;
+	type: string;
+	function: {
+		name: string;
+		arguments: string;
+	};
+}
+
+/** A single chat message exchanged with the LLM. */
+export interface Message {
+	role: Role;
+	content: string;
+	tool_calls?: ToolCall[];
+	tool_call_id?: string;
+}
+
+/** Anthropic model metadata returned by the models endpoint. */
+export interface AnthropicModelInfo {
+	id: string;
+	context_window: number;
+}
+
+/** Content and metadata for a pipeline work item fetched from the server. */
+export interface WorkItemContent {
+	content: string;
+	stage: string;
+	name: string;
+	agent: string | null;
+}
+
+/** Result for a single test case from the server's test runner. */
+export interface TestCaseResult {
+	name: string;
+	status: "pass" | "fail";
+	details: string | null;
+}
+
+/** Combined unit and integration test results for a work item. */
+export interface TestResultsResponse {
+	unit: TestCaseResult[];
+	integration: TestCaseResult[];
+}
+
+/** A file-system entry (file or directory) returned by listing endpoints. */
+export interface FileEntry {
+	name: string;
+	kind: "file" | "dir";
+}
+
+/** A single file-search match with path and match count. */
+export interface SearchResult {
+	path: string;
+	matches: number;
+}
+
+/** Per-agent token usage and cost breakdown within a story. */
+export interface AgentCostEntry {
+	agent_name: string;
+	model: string | null;
+	input_tokens: number;
+	output_tokens: number;
+	cache_creation_input_tokens: number;
+	cache_read_input_tokens: number;
+	total_cost_usd: number;
+}
+
+/** Total token cost for a work item, broken down by agent. */
+export interface TokenCostResponse {
+	total_cost_usd: number;
+	agents: AgentCostEntry[];
+}
+
+/** A single token-usage record from the server's usage log. */
+export interface TokenUsageRecord {
+	story_id: string;
+	agent_name: string;
+	model: string | null;
+	timestamp: string;
+	input_tokens: number;
+	output_tokens: number;
+	cache_creation_input_tokens: number;
+	cache_read_input_tokens: number;
+	total_cost_usd: number;
+}
+
+/** All token-usage records returned by the usage endpoint. */
+export interface AllTokenUsageResponse {
+	records: TokenUsageRecord[];
+}
+
+/** Output captured from a shell command executed on the server. */
+export interface CommandOutput {
+	stdout: string;
+	stderr: string;
+	exit_code: number;
+}
+
+/** OAuth authentication status returned by the server. */
+export interface OAuthStatus {
+	authenticated: boolean;
+	expired: boolean;
+	expires_at: number;
+	has_refresh_token: boolean;
+}
@@ -0,0 +1,337 @@
+/**
+ * WebSocket client for real-time communication with the Huskies server.
+ * Manages a shared socket with reference counting, automatic reconnection,
+ * and heartbeat keepalive. All inbound message types are dispatched to
+ * caller-supplied handler callbacks.
+ */
+
+import { resolveWsHost } from "./http";
+import type {
+	Message,
+	PipelineState,
+	ProviderConfig,
+	StatusEvent,
+	WizardStateData,
+	WsRequest,
+	WsResponse,
+} from "./types";
+
+declare const __HUSKIES_PORT__: string;
+
+const DEFAULT_WS_PATH = "/ws";
+
+/**
+ * Singleton-backed WebSocket client with automatic reconnection and heartbeat.
+ * Multiple callers share one underlying socket via reference counting; the
+ * socket is closed only when the last caller disconnects.
+ */
+export class ChatWebSocket {
+	private static sharedSocket: WebSocket | null = null;
+	private static refCount = 0;
+	private socket?: WebSocket;
+	private onToken?: (content: string) => void;
+	private onThinkingToken?: (content: string) => void;
+	private onUpdate?: (messages: Message[]) => void;
+	private onSessionId?: (sessionId: string) => void;
+	private onError?: (message: string) => void;
+	private onPipelineState?: (state: PipelineState) => void;
+	private onPermissionRequest?: (
+		requestId: string,
+		toolName: string,
+		toolInput: Record<string, unknown>,
+	) => void;
+	private onActivity?: (toolName: string) => void;
+	private onReconciliationProgress?: (
+		storyId: string,
+		status: string,
+		message: string,
+	) => void;
+	private onAgentConfigChanged?: () => void;
+	private onAgentStateChanged?: () => void;
+	private onOnboardingStatus?: (needsOnboarding: boolean) => void;
+	private onWizardState?: (state: WizardStateData) => void;
+	private onSideQuestionToken?: (content: string) => void;
+	private onSideQuestionDone?: (response: string) => void;
+	private onLogEntry?: (
+		timestamp: string,
+		level: string,
+		message: string,
+	) => void;
+	private onStatusUpdate?: (event: StatusEvent) => void;
+	private onConnected?: () => void;
+	private onDisconnected?: () => void;
+	private connected = false;
+	private closeTimer?: number;
+	private wsPath = DEFAULT_WS_PATH;
+	private reconnectTimer?: number;
+	private reconnectDelay = 1000;
+	private shouldReconnect = false;
+	private heartbeatInterval?: number;
+	private heartbeatTimeout?: number;
+	private static readonly HEARTBEAT_INTERVAL = 30_000;
+	private static readonly HEARTBEAT_TIMEOUT = 5_000;
+
+	private _startHeartbeat(): void {
+		this._stopHeartbeat();
+		this.heartbeatInterval = window.setInterval(() => {
+			if (!this.socket || this.socket.readyState !== WebSocket.OPEN) return;
+			const ping: WsRequest = { type: "ping" };
+			this.socket.send(JSON.stringify(ping));
+			this.heartbeatTimeout = window.setTimeout(() => {
+				// No pong received within timeout; close socket to trigger reconnect.
+				this.socket?.close();
+			}, ChatWebSocket.HEARTBEAT_TIMEOUT);
+		}, ChatWebSocket.HEARTBEAT_INTERVAL);
+	}
+
+	private _stopHeartbeat(): void {
+		window.clearInterval(this.heartbeatInterval);
+		window.clearTimeout(this.heartbeatTimeout);
+		this.heartbeatInterval = undefined;
+		this.heartbeatTimeout = undefined;
+	}
+
+	private _buildWsUrl(): string {
+		const protocol = window.location.protocol === "https:" ? "wss" : "ws";
+		const wsHost = resolveWsHost(
+			import.meta.env.DEV,
+			typeof __HUSKIES_PORT__ !== "undefined" ? __HUSKIES_PORT__ : undefined,
+			window.location.host,
+		);
+		return `${protocol}://${wsHost}${this.wsPath}`;
+	}
+
+	private _attachHandlers(): void {
+		if (!this.socket) return;
+		this.socket.onopen = () => {
+			this.reconnectDelay = 1000;
+			this._startHeartbeat();
+			this.onConnected?.();
+		};
+		this.socket.onmessage = (event) => {
+			try {
+				const data = JSON.parse(event.data) as WsResponse;
+				if (data.type === "token") this.onToken?.(data.content);
+				if (data.type === "thinking_token")
+					this.onThinkingToken?.(data.content);
+				if (data.type === "update") this.onUpdate?.(data.messages);
+				if (data.type === "session_id") this.onSessionId?.(data.session_id);
+				if (data.type === "error") this.onError?.(data.message);
+				if (data.type === "pipeline_state")
+					this.onPipelineState?.({
+						backlog: data.backlog,
+						current: data.current,
+						qa: data.qa,
+						merge: data.merge,
+						done: data.done,
+						deterministic_merges_in_flight:
+							data.deterministic_merges_in_flight ?? [],
+					});
+				if (data.type === "permission_request")
+					this.onPermissionRequest?.(
+						data.request_id,
+						data.tool_name,
+						data.tool_input,
+					);
+				if (data.type === "tool_activity") this.onActivity?.(data.tool_name);
+				if (data.type === "reconciliation_progress")
+					this.onReconciliationProgress?.(
+						data.story_id,
+						data.status,
+						data.message,
+					);
+				if (data.type === "agent_config_changed") this.onAgentConfigChanged?.();
+				if (data.type === "agent_state_changed") this.onAgentStateChanged?.();
+				if (data.type === "onboarding_status")
+					this.onOnboardingStatus?.(data.needs_onboarding);
+				if (data.type === "wizard_state")
+					this.onWizardState?.({
+						steps: data.steps,
+						current_step_index: data.current_step_index,
+						completed: data.completed,
+					});
+				if (data.type === "side_question_token")
+					this.onSideQuestionToken?.(data.content);
+				if (data.type === "side_question_done")
+					this.onSideQuestionDone?.(data.response);
+				if (data.type === "log_entry")
+					this.onLogEntry?.(data.timestamp, data.level, data.message);
+				if (data.type === "status_update") this.onStatusUpdate?.(data.event);
+				if (data.type === "pong") {
+					window.clearTimeout(this.heartbeatTimeout);
+					this.heartbeatTimeout = undefined;
+				}
+			} catch (err) {
+				this.onError?.(String(err));
+			}
+		};
+		this.socket.onerror = () => {
+			this.onError?.("WebSocket error");
+		};
+		this.socket.onclose = () => {
+			if (this.shouldReconnect && this.connected) {
+				this.onDisconnected?.();
+				this._scheduleReconnect();
+			}
+		};
+	}
+
+	private _scheduleReconnect(): void {
+		window.clearTimeout(this.reconnectTimer);
+		const delay = this.reconnectDelay;
+		this.reconnectDelay = Math.min(this.reconnectDelay * 2, 30000);
+		this.reconnectTimer = window.setTimeout(() => {
+			this.reconnectTimer = undefined;
+			const wsUrl = this._buildWsUrl();
+			ChatWebSocket.sharedSocket = new WebSocket(wsUrl);
+			this.socket = ChatWebSocket.sharedSocket;
+			this._attachHandlers();
+		}, delay);
+	}
+
+	connect(
+		handlers: {
+			onToken?: (content: string) => void;
+			onThinkingToken?: (content: string) => void;
+			onUpdate?: (messages: Message[]) => void;
+			onSessionId?: (sessionId: string) => void;
+			onError?: (message: string) => void;
+			onPipelineState?: (state: PipelineState) => void;
+			onPermissionRequest?: (
+				requestId: string,
+				toolName: string,
+				toolInput: Record<string, unknown>,
+			) => void;
+			onActivity?: (toolName: string) => void;
+			onReconciliationProgress?: (
+				storyId: string,
+				status: string,
+				message: string,
+			) => void;
+			onAgentConfigChanged?: () => void;
+			onAgentStateChanged?: () => void;
+			onOnboardingStatus?: (needsOnboarding: boolean) => void;
+			onWizardState?: (state: WizardStateData) => void;
+			onSideQuestionToken?: (content: string) => void;
+			onSideQuestionDone?: (response: string) => void;
+			onLogEntry?: (timestamp: string, level: string, message: string) => void;
+			onStatusUpdate?: (event: StatusEvent) => void;
+			onConnected?: () => void;
+			onDisconnected?: () => void;
+		},
+		wsPath = DEFAULT_WS_PATH,
+	) {
+		this.onToken = handlers.onToken;
+		this.onThinkingToken = handlers.onThinkingToken;
+		this.onUpdate = handlers.onUpdate;
+		this.onSessionId = handlers.onSessionId;
+		this.onError = handlers.onError;
+		this.onPipelineState = handlers.onPipelineState;
+		this.onPermissionRequest = handlers.onPermissionRequest;
+		this.onActivity = handlers.onActivity;
+		this.onReconciliationProgress = handlers.onReconciliationProgress;
+		this.onAgentConfigChanged = handlers.onAgentConfigChanged;
+		this.onAgentStateChanged = handlers.onAgentStateChanged;
+		this.onOnboardingStatus = handlers.onOnboardingStatus;
+		this.onWizardState = handlers.onWizardState;
+		this.onSideQuestionToken = handlers.onSideQuestionToken;
+		this.onSideQuestionDone = handlers.onSideQuestionDone;
+		this.onLogEntry = handlers.onLogEntry;
+		this.onStatusUpdate = handlers.onStatusUpdate;
+		this.onConnected = handlers.onConnected;
+		this.onDisconnected = handlers.onDisconnected;
+		this.wsPath = wsPath;
+		this.shouldReconnect = true;
+
+		if (this.connected) {
+			return;
+		}
+		this.connected = true;
+		ChatWebSocket.refCount += 1;
+
+		if (
+			!ChatWebSocket.sharedSocket ||
+			ChatWebSocket.sharedSocket.readyState === WebSocket.CLOSED ||
+			ChatWebSocket.sharedSocket.readyState === WebSocket.CLOSING
+		) {
+			const wsUrl = this._buildWsUrl();
+			ChatWebSocket.sharedSocket = new WebSocket(wsUrl);
+		}
+		this.socket = ChatWebSocket.sharedSocket;
+		this._attachHandlers();
+	}
+
+	sendChat(messages: Message[], config: ProviderConfig) {
+		this.send({ type: "chat", messages, config });
+	}
+
+	sendSideQuestion(
+		question: string,
+		contextMessages: Message[],
+		config: ProviderConfig,
+	) {
+		this.send({
+			type: "side_question",
+			question,
+			context_messages: contextMessages,
+			config,
+		});
+	}
+
+	cancel() {
+		this.send({ type: "cancel" });
+	}
+
+	sendPermissionResponse(
+		requestId: string,
+		approved: boolean,
+		alwaysAllow = false,
+	) {
+		this.send({
+			type: "permission_response",
+			request_id: requestId,
+			approved,
+			always_allow: alwaysAllow,
+		});
+	}
+
+	close() {
+		this.shouldReconnect = false;
+		this._stopHeartbeat();
+		window.clearTimeout(this.reconnectTimer);
+		this.reconnectTimer = undefined;
+
+		if (!this.connected) return;
+		this.connected = false;
+		ChatWebSocket.refCount = Math.max(0, ChatWebSocket.refCount - 1);
+
+		if (import.meta.env.DEV) {
+			if (this.closeTimer) {
+				window.clearTimeout(this.closeTimer);
+			}
+			this.closeTimer = window.setTimeout(() => {
+				if (ChatWebSocket.refCount === 0) {
+					ChatWebSocket.sharedSocket?.close();
+					ChatWebSocket.sharedSocket = null;
+				}
+				this.socket = ChatWebSocket.sharedSocket ?? undefined;
+				this.closeTimer = undefined;
+			}, 250);
+			return;
+		}
+
+		if (ChatWebSocket.refCount === 0) {
+			ChatWebSocket.sharedSocket?.close();
+			ChatWebSocket.sharedSocket = null;
+		}
+		this.socket = ChatWebSocket.sharedSocket ?? undefined;
+	}
+
+	private send(payload: WsRequest) {
+		if (!this.socket || this.socket.readyState !== WebSocket.OPEN) {
+			this.onError?.("WebSocket is not connected");
+			return;
+		}
+		this.socket.send(JSON.stringify(payload));
+	}
+}
@@ -38,6 +38,7 @@ export interface ProjectPipelineStatus {
 	active: PipelineItem[];
 	backlog: { story_id: string; name: string }[];
 	backlog_count: number;
+	archived?: PipelineItem[];
 	error?: string;
 }

@@ -54,6 +55,21 @@ export interface ServerMode {
 	mode: "gateway" | "standard";
 }

+/// Type guard: verify that an unknown value has the AllProjectsPipeline shape.
+/// Prevents silent "no active stories" when the backend response shape drifts.
+function isAllProjectsPipeline(value: unknown): value is AllProjectsPipeline {
+	if (typeof value !== "object" || value === null) return false;
+	const v = value as Record<string, unknown>;
+	if (typeof v.active !== "string") return false;
+	if (typeof v.projects !== "object" || v.projects === null) return false;
+	for (const proj of Object.values(v.projects as Record<string, unknown>)) {
+		if (typeof proj !== "object" || proj === null) return false;
+		const p = proj as Record<string, unknown>;
+		if (!Array.isArray(p.active) && typeof p.error !== "string") return false;
+	}
+	return true;
+}
+
 async function gatewayRequest<T>(
 	path: string,
 	options: RequestInit = {},
@@ -73,6 +89,39 @@ async function gatewayRequest<T>(
 	return res.json() as Promise<T>;
 }

+let _mcpRequestId = 1;
+
+/// Call a gateway MCP tool via JSON-RPC and return the result.
+async function gatewayMcpCall<T>(
+	toolName: string,
+	args: Record<string, unknown> = {},
+): Promise<T> {
+	const id = _mcpRequestId++;
+	const body = JSON.stringify({
+		jsonrpc: "2.0",
+		id,
+		method: "tools/call",
+		params: { name: toolName, arguments: args },
+	});
+	const res = await fetch("/mcp", {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body,
+	});
+	if (!res.ok) {
+		const text = await res.text();
+		throw new Error(text || `MCP request failed (${res.status})`);
+	}
+	const json = (await res.json()) as {
+		result?: Record<string, unknown>;
+		error?: { message: string };
+	};
+	if (json.error) {
+		throw new Error(json.error.message);
+	}
+	return json.result as T;
+}
+
 export const gatewayApi = {
 	/// Returns `{ mode: "gateway" }` if this server is a gateway, otherwise rejects.
 	getServerMode(): Promise<ServerMode> {
@@ -88,7 +137,9 @@ export const gatewayApi = {

 	/// List all build agents that have registered with this gateway.
 	listAgents(): Promise<JoinedAgent[]> {
-		return gatewayRequest<JoinedAgent[]>("/gateway/agents");
+		return gatewayMcpCall<{ agents: JoinedAgent[] }>("agents.list").then(
+			(result) => result.agents ?? [],
+		);
 	},

 	/// Remove a registered build agent by its ID.
@@ -111,22 +162,6 @@ export const gatewayApi = {
 		return gatewayRequest<GatewayInfo>("/api/gateway");
 	},

-	/// Add a new project to the gateway config.
-	addProject(name: string, url: string): Promise<GatewayProject> {
-		return gatewayRequest<GatewayProject>("/api/gateway/projects", {
-			method: "POST",
-			body: JSON.stringify({ name, url }),
-		});
-	},
-
-	/// Remove a project from the gateway config.
-	removeProject(name: string): Promise<void> {
-		return gatewayRequest<void>(
-			`/api/gateway/projects/${encodeURIComponent(name)}`,
-			{ method: "DELETE" },
-		);
-	},
-
 	/// Send a heartbeat for an agent to update its last-seen timestamp.
 	heartbeat(id: string): Promise<void> {
 		return gatewayRequest<void>(`/gateway/agents/${id}/heartbeat`, {
@@ -134,16 +169,44 @@ export const gatewayApi = {
 		});
 	},

-	/// Fetch pipeline status from all registered projects.
-	getAllProjectsPipeline(): Promise<AllProjectsPipeline> {
-		return gatewayRequest<AllProjectsPipeline>("/api/gateway/pipeline");
+	/// Fetch pipeline status from all registered projects via the pipeline.get read-RPC.
+	async getAllProjectsPipeline(): Promise<AllProjectsPipeline> {
+		const res = await fetch("/mcp", {
+			method: "POST",
+			headers: { "Content-Type": "application/json" },
+			body: JSON.stringify({ jsonrpc: "2.0", id: 1, method: "pipeline.get", params: {} }),
+		});
+		if (!res.ok) {
+			const text = await res.text();
+			throw new Error(text || `Request failed (${res.status})`);
+		}
+		const rpc = await res.json() as { result?: unknown; error?: { message: string } };
+		if (rpc.error) {
+			throw new Error(rpc.error.message);
+		}
+		const result = rpc.result;
+		if (!isAllProjectsPipeline(result)) {
+			throw new Error("pipeline.get returned unexpected shape");
+		}
+		return result;
 	},

-	/// Switch the active project.
-	switchProject(project: string): Promise<{ ok: boolean; error?: string }> {
-		return gatewayRequest<{ ok: boolean; error?: string }>(
-			"/api/gateway/switch",
-			{ method: "POST", body: JSON.stringify({ project }) },
-		);
+	/// Switch the active project via the MCP switch_project tool.
+	async switchProject(project: string): Promise<{ ok: boolean; error?: string }> {
+		const res = await fetch("/mcp", {
+			method: "POST",
+			headers: { "Content-Type": "application/json" },
+			body: JSON.stringify({
+				jsonrpc: "2.0",
+				id: 1,
+				method: "tools/call",
+				params: { name: "switch_project", arguments: { project } },
+			}),
+		});
+		const data = await res.json();
+		if (data.error) {
+			return { ok: false, error: data.error.message ?? String(data.error) };
+		}
+		return { ok: true };
 	},
 };
@@ -0,0 +1,238 @@
+/**
+ * Lightweight read-RPC client over the `/ws` WebSocket.
+ *
+ * Each `rpcCall` opens a short-lived WebSocket, sends an `rpc_request` frame,
+ * waits for the matching `rpc_response`, then closes the connection.
+ *
+ * On a transient connection failure the call is retried once before rejecting,
+ * which lets a freshly-started backend race finish before the user sees an
+ * error.  Failures surface as `Error` instances whose `.message` is intended
+ * to be visible (toast / banner) — callers must not swallow them silently.
+ */
+
+let correlationCounter = 0;
+
+function nextCorrelationId(): string {
+	return `rpc-${Date.now()}-${++correlationCounter}`;
+}
+
+/**
+ * Build the WebSocket URL for the `/ws` endpoint, deriving the protocol
+ * (ws/wss) and host from the current page location.
+ */
+function buildWsUrl(): string {
+	const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
+	return `${proto}//${window.location.host}/ws`;
+}
+
+export interface RpcResponse<T = unknown> {
+	ok: boolean;
+	result?: T;
+	error?: string;
+	code?: string;
+}
+
+/** Error subclass for RPC failures so callers can recognise them. */
+export class RpcError extends Error {
+	constructor(
+		message: string,
+		public readonly code?: string,
+		public readonly method?: string,
+	) {
+		super(message);
+		this.name = "RpcError";
+	}
+}
+
+/** Maximum number of automatic retries on transient WebSocket failure. */
+const MAX_RETRIES = 1;
+
+/** Delay between retry attempts (ms). */
+const RETRY_DELAY_MS = 250;
+
+/**
+ * Internal: a single one-shot RPC attempt. Resolves with the result or
+ * rejects with an `RpcError`.
+ */
+function rpcAttempt<T>(
+	method: string,
+	params: object,
+	timeoutMs: number,
+): Promise<T> {
+	return new Promise<T>((resolve, reject) => {
+		const correlationId = nextCorrelationId();
+		let ws: WebSocket;
+		try {
+			ws = new WebSocket(buildWsUrl());
+		} catch (err) {
+			reject(
+				new RpcError(
+					`Failed to open WebSocket for ${method}: ${(err as Error).message}`,
+					"CONNECT_FAILED",
+					method,
+				),
+			);
+			return;
+		}
+		let settled = false;
+
+		const timer = setTimeout(() => {
+			if (!settled) {
+				settled = true;
+				try {
+					ws.close();
+				} catch {
+					/* ignore */
+				}
+				reject(new RpcError(`RPC timeout for ${method}`, "TIMEOUT", method));
+			}
+		}, timeoutMs);
+
+		ws.onopen = () => {
+			ws.send(
+				JSON.stringify({
+					kind: "rpc_request",
+					version: 1,
+					correlation_id: correlationId,
+					ttl_ms: timeoutMs,
+					method,
+					params,
+				}),
+			);
+		};
+
+		ws.onmessage = (event) => {
+			let data: unknown;
+			try {
+				data = JSON.parse(event.data);
+			} catch {
+				// Non-JSON frame is not ours — keep waiting.
+				return;
+			}
+			if (!data || typeof data !== "object") {
+				return;
+			}
+			const frame = data as {
+				kind?: unknown;
+				correlation_id?: unknown;
+				ok?: unknown;
+				result?: unknown;
+				error?: unknown;
+				code?: unknown;
+			};
+			if (frame.kind !== "rpc_response" || frame.correlation_id !== correlationId) {
+				// Not addressed to this call — ignore (pipeline_state, etc.).
+				return;
+			}
+			settled = true;
+			clearTimeout(timer);
+			try {
+				ws.close();
+			} catch {
+				/* ignore */
+			}
+			if (typeof frame.ok !== "boolean") {
+				reject(
+					new RpcError(
+						`Malformed RPC response for ${method}: missing or non-boolean 'ok' field`,
+						"MALFORMED",
+						method,
+					),
+				);
+				return;
+			}
+			if (frame.ok) {
+				if (!("result" in frame)) {
+					reject(
+						new RpcError(
+							`Malformed RPC response for ${method}: 'ok:true' frame missing 'result' field`,
+							"MALFORMED",
+							method,
+						),
+					);
+					return;
+				}
+				resolve(frame.result as T);
+			} else {
+				const errMsg =
+					typeof frame.error === "string" ? frame.error : undefined;
+				const errCode = typeof frame.code === "string" ? frame.code : undefined;
+				reject(
+					new RpcError(
+						errMsg || `RPC error: ${errCode || "UNKNOWN"}`,
+						errCode,
+						method,
+					),
+				);
+			}
+		};
+
+		ws.onerror = () => {
+			if (!settled) {
+				settled = true;
+				clearTimeout(timer);
+				reject(
+					new RpcError(
+						`WebSocket error during RPC call to ${method}`,
+						"CONNECT_FAILED",
+						method,
+					),
+				);
+			}
+		};
+
+		ws.onclose = () => {
+			if (!settled) {
+				settled = true;
+				clearTimeout(timer);
+				reject(
+					new RpcError(
+						`WebSocket closed before RPC response for ${method}`,
+						"CONNECT_FAILED",
+						method,
+					),
+				);
+			}
+		};
+	});
+}
+
+/** Return true if the error is one we should retry (connection-level). */
+function isRetryable(err: unknown): boolean {
+	return (
+		err instanceof RpcError &&
+		(err.code === "CONNECT_FAILED" || err.code === "TIMEOUT")
+	);
+}
+
+function sleep(ms: number): Promise<void> {
+	return new Promise((r) => setTimeout(r, ms));
+}
+
+/**
+ * Send a read-RPC request over a temporary WebSocket connection and return
+ * the result.  On transient connection failure the call is retried once
+ * before rejecting.  Rejects with [`RpcError`] on server-side errors,
+ * timeouts, or persistent connection failures.
+ */
+export async function rpcCall<T = unknown>(
+	method: string,
+	params: object = {},
+	timeoutMs = 5000,
+): Promise<T> {
+	let lastErr: unknown;
+	for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+		try {
+			return await rpcAttempt<T>(method, params, timeoutMs);
+		} catch (err) {
+			lastErr = err;
+			if (attempt < MAX_RETRIES && isRetryable(err)) {
+				await sleep(RETRY_DELAY_MS);
+				continue;
+			}
+			throw err;
+		}
+	}
+	// Unreachable but TypeScript can't prove it.
+	throw lastErr;
+}
@@ -0,0 +1,117 @@
+{
+  "model.set_preference": {
+    "params": {
+      "model": "claude-sonnet-4-6"
+    },
+    "result": {
+      "ok": true
+    }
+  },
+  "anthropic.set_api_key": {
+    "params": {
+      "api_key": "sk-ant-..."
+    },
+    "result": {
+      "ok": true
+    }
+  },
+  "settings.put_editor": {
+    "params": {
+      "editor_command": "zed"
+    },
+    "result": {
+      "editor_command": "zed"
+    }
+  },
+  "settings.open_file": {
+    "params": {
+      "path": "src/main.rs",
+      "line": 42
+    },
+    "result": {
+      "ok": true
+    }
+  },
+  "settings.put_project": {
+    "params": {
+      "default_qa": "server",
+      "default_coder_model": null,
+      "max_coders": null,
+      "max_retries": 2,
+      "base_branch": null,
+      "rate_limit_notifications": true,
+      "timezone": null,
+      "rendezvous": null,
+      "watcher_sweep_interval_secs": 60,
+      "watcher_done_retention_secs": 86400
+    },
+    "result": {
+      "default_qa": "server",
+      "default_coder_model": null,
+      "max_coders": null,
+      "max_retries": 2,
+      "base_branch": null,
+      "rate_limit_notifications": true,
+      "timezone": null,
+      "rendezvous": null,
+      "watcher_sweep_interval_secs": 60,
+      "watcher_done_retention_secs": 86400
+    }
+  },
+  "project.open": {
+    "params": {
+      "path": "/path/to/project"
+    },
+    "result": {
+      "path": "/path/to/project"
+    }
+  },
+  "project.close": {
+    "params": {},
+    "result": {
+      "ok": true
+    }
+  },
+  "project.forget": {
+    "params": {
+      "path": "/path/to/project"
+    },
+    "result": {
+      "ok": true
+    }
+  },
+  "bot_config.save": {
+    "params": {
+      "transport": "matrix",
+      "enabled": true,
+      "homeserver": "https://matrix.example",
+      "username": "bot",
+      "password": "secret",
+      "room_ids": [
+        "!room:example"
+      ],
+      "slack_bot_token": null,
+      "slack_signing_secret": null,
+      "slack_channel_ids": null
+    },
+    "result": {
+      "transport": "matrix",
+      "enabled": true,
+      "homeserver": "https://matrix.example",
+      "username": "bot",
+      "password": "secret",
+      "room_ids": [
+        "!room:example"
+      ],
+      "slack_bot_token": null,
+      "slack_signing_secret": null,
+      "slack_channel_ids": null
+    }
+  },
+  "chat.cancel": {
+    "params": {},
+    "result": {
+      "ok": true
+    }
+  }
+}
@@ -0,0 +1,29 @@
+/**
+ * Snapshot test: the frontend `CONTRACT_FIXTURES` table must match the
+ * Rust-side snapshot.  When the Rust contract changes, the snapshot file
+ * regenerates (via `UPDATE_RPC_CONTRACT_SNAPSHOT=1 cargo test`) and this
+ * test catches any TS shapes that have drifted.
+ */
+import { describe, expect, it } from "vitest";
+import { CONTRACT_FIXTURES } from "./rpcContract";
+import snapshot from "./rpcContract.snapshot.json";
+
+describe("rpcContract", () => {
+	it("CONTRACT_FIXTURES matches the Rust-generated snapshot", () => {
+		// Convert TS fixtures into the same shape the Rust snapshot serialises
+		// to: a method-keyed object of `{ params, result }`.
+		const fromTs = Object.fromEntries(
+			Object.entries(CONTRACT_FIXTURES).map(([method, payloads]) => [
+				method,
+				{ params: payloads.params, result: payloads.result },
+			]),
+		);
+		expect(fromTs).toEqual(snapshot);
+	});
+
+	it("declares the same method names as the snapshot", () => {
+		const tsMethods = Object.keys(CONTRACT_FIXTURES).sort();
+		const rustMethods = Object.keys(snapshot).sort();
+		expect(tsMethods).toEqual(rustMethods);
+	});
+});
@@ -0,0 +1,247 @@
+/**
+ * Frontend mirror of the Rust typed RPC contract in
+ * `server/src/crdt_sync/rpc_contract.rs`.
+ *
+ * Every typed write method declared on the backend has matching TypeScript
+ * params/result types here.  The `CONTRACT_FIXTURES` table also exposes the
+ * same canonical example payloads as the Rust `CONTRACT_METHODS` slice — the
+ * `rpcContract.test.ts` test compares them against the committed
+ * `rpcContract.snapshot.json` that the Rust test regenerates.  If the Rust
+ * shapes drift from the TS shapes, the snapshot drifts and one side fails in
+ * CI — surfacing the mismatch as a compile / test error instead of a runtime
+ * one.
+ *
+ * When adding a method on the backend:
+ *  1. Add the params + result type here.
+ *  2. Add the entry to `CONTRACT_FIXTURES` with a canonical example.
+ *  3. Re-run `UPDATE_RPC_CONTRACT_SNAPSHOT=1 cargo test` to refresh
+ *     `rpcContract.snapshot.json`.
+ */
+
+// ── Params types ────────────────────────────────────────────────────────────
+
+/** Params for `model.set_preference`. */
+export interface SetModelPreferenceParams {
+	model: string;
+}
+
+/** Params for `anthropic.set_api_key`. */
+export interface SetAnthropicApiKeyParams {
+	api_key: string;
+}
+
+/** Params for `settings.put_editor`. */
+export interface PutEditorParams {
+	editor_command: string | null;
+}
+
+/** Params for `settings.open_file`. */
+export interface OpenFileParams {
+	path: string;
+	line: number | null;
+}
+
+/** Params for `project.open`. */
+export interface OpenProjectParams {
+	path: string;
+}
+
+/** Params for `project.forget`. */
+export interface ForgetProjectParams {
+	path: string;
+}
+
+/** Payload for `bot_config.save` (and result of `bot_config.get`). */
+export interface BotConfigPayload {
+	transport: string | null;
+	enabled: boolean | null;
+	homeserver: string | null;
+	username: string | null;
+	password: string | null;
+	room_ids: string[] | null;
+	slack_bot_token: string | null;
+	slack_signing_secret: string | null;
+	slack_channel_ids: string[] | null;
+}
+
+/** Payload for `settings.put_project` (also returned by `settings.get_project`). */
+export interface ProjectSettingsPayload {
+	default_qa: string;
+	default_coder_model: string | null;
+	max_coders: number | null;
+	max_retries: number;
+	base_branch: string | null;
+	rate_limit_notifications: boolean;
+	timezone: string | null;
+	rendezvous: string | null;
+	watcher_sweep_interval_secs: number;
+	watcher_done_retention_secs: number;
+}
+
+// ── Result types ────────────────────────────────────────────────────────────
+
+/** Result envelope for write methods that simply succeed or fail. */
+export interface OkResult {
+	ok: boolean;
+}
+
+/** Result for `settings.put_editor`. */
+export interface EditorSettingsResult {
+	editor_command: string | null;
+}
+
+/** Result for `project.open`. */
+export interface OpenProjectResult {
+	path: string;
+}
+
+// ── Method → params/result mapping ──────────────────────────────────────────
+
+/**
+ * Compile-time mapping from typed RPC method name to its params + result
+ * shapes.  Used by `callTypedRpc` to enforce that callers pass the right
+ * params and receive the right return type for a method.
+ */
+export interface TypedRpcMethods {
+	"model.set_preference": {
+		params: SetModelPreferenceParams;
+		result: OkResult;
+	};
+	"anthropic.set_api_key": {
+		params: SetAnthropicApiKeyParams;
+		result: OkResult;
+	};
+	"settings.put_editor": {
+		params: PutEditorParams;
+		result: EditorSettingsResult;
+	};
+	"settings.open_file": {
+		params: OpenFileParams;
+		result: OkResult;
+	};
+	"settings.put_project": {
+		params: ProjectSettingsPayload;
+		result: ProjectSettingsPayload;
+	};
+	"project.open": {
+		params: OpenProjectParams;
+		result: OpenProjectResult;
+	};
+	"project.close": {
+		params: Record<string, never>;
+		result: OkResult;
+	};
+	"project.forget": {
+		params: ForgetProjectParams;
+		result: OkResult;
+	};
+	"bot_config.save": {
+		params: BotConfigPayload;
+		result: BotConfigPayload;
+	};
+	"chat.cancel": {
+		params: Record<string, never>;
+		result: OkResult;
+	};
+}
+
+/** Union of all typed RPC method names declared in the contract. */
+export type TypedRpcMethodName = keyof TypedRpcMethods;
+
+// ── Canonical fixtures (mirror of Rust `CONTRACT_METHODS`) ──────────────────
+
+/**
+ * One canonical example payload per typed RPC method.  The shape *must*
+ * match the corresponding Rust `CONTRACT_METHODS` entry.  Drift between this
+ * table and `rpcContract.snapshot.json` (regenerated by the Rust side) fails
+ * the `rpcContract.test.ts` snapshot check.
+ */
+export const CONTRACT_FIXTURES: {
+	[K in TypedRpcMethodName]: {
+		params: TypedRpcMethods[K]["params"];
+		result: TypedRpcMethods[K]["result"];
+	};
+} = {
+	"model.set_preference": {
+		params: { model: "claude-sonnet-4-6" },
+		result: { ok: true },
+	},
+	"anthropic.set_api_key": {
+		params: { api_key: "sk-ant-..." },
+		result: { ok: true },
+	},
+	"settings.put_editor": {
+		params: { editor_command: "zed" },
+		result: { editor_command: "zed" },
+	},
+	"settings.open_file": {
+		params: { path: "src/main.rs", line: 42 },
+		result: { ok: true },
+	},
+	"settings.put_project": {
+		params: {
+			default_qa: "server",
+			default_coder_model: null,
+			max_coders: null,
+			max_retries: 2,
+			base_branch: null,
+			rate_limit_notifications: true,
+			timezone: null,
+			rendezvous: null,
+			watcher_sweep_interval_secs: 60,
+			watcher_done_retention_secs: 86_400,
+		},
+		result: {
+			default_qa: "server",
+			default_coder_model: null,
+			max_coders: null,
+			max_retries: 2,
+			base_branch: null,
+			rate_limit_notifications: true,
+			timezone: null,
+			rendezvous: null,
+			watcher_sweep_interval_secs: 60,
+			watcher_done_retention_secs: 86_400,
+		},
+	},
+	"project.open": {
+		params: { path: "/path/to/project" },
+		result: { path: "/path/to/project" },
+	},
+	"project.close": {
+		params: {},
+		result: { ok: true },
+	},
+	"project.forget": {
+		params: { path: "/path/to/project" },
+		result: { ok: true },
+	},
+	"bot_config.save": {
+		params: {
+			transport: "matrix",
+			enabled: true,
+			homeserver: "https://matrix.example",
+			username: "bot",
+			password: "secret",
+			room_ids: ["!room:example"],
+			slack_bot_token: null,
+			slack_signing_secret: null,
+			slack_channel_ids: null,
+		},
+		result: {
+			transport: "matrix",
+			enabled: true,
+			homeserver: "https://matrix.example",
+			username: "bot",
+			password: "secret",
+			room_ids: ["!room:example"],
+			slack_bot_token: null,
+			slack_signing_secret: null,
+			slack_channel_ids: null,
+		},
+	},
+	"chat.cancel": {
+		params: {},
+		result: { ok: true },
+	},
+};
@@ -1,28 +1,13 @@
-import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+/** Tests for the `settings` WS-RPC client (project settings read/write). */
+import { afterEach, describe, expect, it, vi } from "vitest";
 import type { ProjectSettings } from "./settings";
 import { settingsApi } from "./settings";
-
-const mockFetch = vi.fn();
-
-beforeEach(() => {
-	vi.stubGlobal("fetch", mockFetch);
-});
+import { installRpcMock } from "./__test_utils__/mockRpcWebSocket";

 afterEach(() => {
 	vi.restoreAllMocks();
 });

-function okResponse(body: unknown) {
-	return new Response(JSON.stringify(body), {
-		status: 200,
-		headers: { "Content-Type": "application/json" },
-	});
-}
-
-function errorResponse(status: number, text: string) {
-	return new Response(text, { status });
-}
-
 const defaultProjectSettings: ProjectSettings = {
 	default_qa: "server",
 	default_coder_model: null,
@@ -38,52 +23,48 @@ const defaultProjectSettings: ProjectSettings = {

 describe("settingsApi", () => {
 	describe("getProjectSettings", () => {
-		it("sends GET to /settings and returns project settings", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(defaultProjectSettings));
+		it("dispatches settings.get_project RPC and returns project settings", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("settings.get_project", defaultProjectSettings);

 			const result = await settingsApi.getProjectSettings();

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/settings",
-				expect.objectContaining({
-					headers: expect.objectContaining({
-						"Content-Type": "application/json",
-					}),
-				}),
-			);
+			expect(rpc.calls).toEqual([
+				{ method: "settings.get_project", params: {} },
+			]);
 			expect(result).toEqual(defaultProjectSettings);
 		});

-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse(defaultProjectSettings));
-			await settingsApi.getProjectSettings("http://localhost:4000/api");
-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:4000/api/settings",
-				expect.anything(),
+		it("surfaces RPC errors visibly", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError("settings.get_project", "no project open", "INTERNAL");
+
+			await expect(settingsApi.getProjectSettings()).rejects.toThrow(
+				"no project open",
 			);
 		});
 	});

 	describe("putProjectSettings", () => {
-		it("sends PUT to /settings with settings body", async () => {
+		it("dispatches settings.put_project RPC with settings", async () => {
 			const updated = { ...defaultProjectSettings, default_qa: "agent" };
-			mockFetch.mockResolvedValueOnce(okResponse(updated));
+			const rpc = installRpcMock();
+			rpc.respond("settings.put_project", updated);

 			const result = await settingsApi.putProjectSettings(updated);

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/settings",
-				expect.objectContaining({
-					method: "PUT",
-					body: JSON.stringify(updated),
-				}),
-			);
+			expect(rpc.calls).toEqual([
+				{ method: "settings.put_project", params: updated },
+			]);
 			expect(result.default_qa).toBe("agent");
 		});

-		it("throws on validation error", async () => {
-			mockFetch.mockResolvedValueOnce(
-				errorResponse(400, "Invalid default_qa value"),
+		it("throws on validation error from RPC", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError(
+				"settings.put_project",
+				"Invalid default_qa value",
+				"INVALID",
 			);
 			await expect(
 				settingsApi.putProjectSettings({
@@ -95,107 +76,104 @@ describe("settingsApi", () => {
 	});

 	describe("getEditorCommand", () => {
-		it("sends GET to /settings/editor and returns editor settings", async () => {
+		it("dispatches settings.get_editor RPC and returns editor settings", async () => {
+			const rpc = installRpcMock();
 			const expected = { editor_command: "zed" };
-			mockFetch.mockResolvedValueOnce(okResponse(expected));
+			rpc.respond("settings.get_editor", expected);

 			const result = await settingsApi.getEditorCommand();

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/settings/editor",
-				expect.objectContaining({
-					headers: expect.objectContaining({
-						"Content-Type": "application/json",
-					}),
-				}),
-			);
+			expect(rpc.calls).toEqual([
+				{ method: "settings.get_editor", params: {} },
+			]);
 			expect(result).toEqual(expected);
 		});

 		it("returns null editor_command when not configured", async () => {
-			const expected = { editor_command: null };
-			mockFetch.mockResolvedValueOnce(okResponse(expected));
+			const rpc = installRpcMock();
+			rpc.respond("settings.get_editor", { editor_command: null });

 			const result = await settingsApi.getEditorCommand();
 			expect(result.editor_command).toBeNull();
 		});
-
-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse({ editor_command: "code" }));
-
-			await settingsApi.getEditorCommand("http://localhost:4000/api");
-
-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:4000/api/settings/editor",
-				expect.anything(),
-			);
-		});
 	});

 	describe("setEditorCommand", () => {
-		it("sends PUT to /settings/editor with command body", async () => {
-			const expected = { editor_command: "zed" };
-			mockFetch.mockResolvedValueOnce(okResponse(expected));
+		it("dispatches settings.put_editor RPC with command", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("settings.put_editor", { editor_command: "zed" });

 			const result = await settingsApi.setEditorCommand("zed");

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/settings/editor",
-				expect.objectContaining({
-					method: "PUT",
-					body: JSON.stringify({ editor_command: "zed" }),
-				}),
-			);
-			expect(result).toEqual(expected);
+			expect(rpc.calls).toEqual([
+				{
+					method: "settings.put_editor",
+					params: { editor_command: "zed" },
+				},
+			]);
+			expect(result).toEqual({ editor_command: "zed" });
 		});

-		it("sends PUT with null to clear the editor command", async () => {
-			const expected = { editor_command: null };
-			mockFetch.mockResolvedValueOnce(okResponse(expected));
+		it("dispatches settings.put_editor with null to clear", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("settings.put_editor", { editor_command: null });

 			const result = await settingsApi.setEditorCommand(null);

-			expect(mockFetch).toHaveBeenCalledWith(
-				"/api/settings/editor",
-				expect.objectContaining({
-					method: "PUT",
-					body: JSON.stringify({ editor_command: null }),
-				}),
-			);
+			expect(rpc.calls).toEqual([
+				{
+					method: "settings.put_editor",
+					params: { editor_command: null },
+				},
+			]);
 			expect(result.editor_command).toBeNull();
 		});
+	});

-		it("uses custom baseUrl when provided", async () => {
-			mockFetch.mockResolvedValueOnce(okResponse({ editor_command: "vim" }));
+	describe("openFile", () => {
+		it("dispatches settings.open_file RPC with path and line", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("settings.open_file", { ok: true });

-			await settingsApi.setEditorCommand("vim", "http://localhost:4000/api");
+			const result = await settingsApi.openFile("src/main.rs", 42);

-			expect(mockFetch).toHaveBeenCalledWith(
-				"http://localhost:4000/api/settings/editor",
-				expect.objectContaining({ method: "PUT" }),
-			);
+			expect(rpc.calls).toEqual([
+				{
+					method: "settings.open_file",
+					params: { path: "src/main.rs", line: 42 },
+				},
+			]);
+			expect(result).toEqual({ success: true });
+		});
+
+		it("dispatches settings.open_file with null line when omitted", async () => {
+			const rpc = installRpcMock();
+			rpc.respond("settings.open_file", { ok: true });
+
+			await settingsApi.openFile("src/main.rs");
+
+			expect(rpc.calls).toEqual([
+				{
+					method: "settings.open_file",
+					params: { path: "src/main.rs", line: null },
+				},
+			]);
 		});
 	});

 	describe("error handling", () => {
-		it("throws with response body text on non-ok response", async () => {
-			mockFetch.mockResolvedValueOnce(errorResponse(400, "Bad Request"));
+		it("surfaces RPC errors for getEditorCommand", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError("settings.get_editor", "store unavailable", "INTERNAL");

 			await expect(settingsApi.getEditorCommand()).rejects.toThrow(
-				"Bad Request",
+				"store unavailable",
 			);
 		});

-		it("throws with status code message when response body is empty", async () => {
-			mockFetch.mockResolvedValueOnce(errorResponse(500, ""));
-
-			await expect(settingsApi.getEditorCommand()).rejects.toThrow(
-				"Request failed (500)",
-			);
-		});
-
-		it("throws on setEditorCommand error", async () => {
-			mockFetch.mockResolvedValueOnce(errorResponse(403, "Forbidden"));
+		it("surfaces RPC errors for setEditorCommand", async () => {
+			const rpc = installRpcMock();
+			rpc.respondError("settings.put_editor", "Forbidden", "FORBIDDEN");

 			await expect(settingsApi.setEditorCommand("code")).rejects.toThrow(
 				"Forbidden",
@@ -1,3 +1,15 @@
+/**
+ * WS-RPC client for editor and project settings.
+ */
+import { rpcCall } from "./rpc";
+import type {
+	EditorSettingsResult,
+	OkResult,
+	OpenFileParams,
+	ProjectSettingsPayload,
+	PutEditorParams,
+} from "./rpcContract";
+
 export interface EditorSettings {
 	editor_command: string | null;
 }
@@ -19,80 +31,39 @@ export interface OpenFileResult {
 	success: boolean;
 }

-const DEFAULT_API_BASE = "/api";
-
-function buildApiUrl(path: string, baseUrl = DEFAULT_API_BASE): string {
-	return `${baseUrl}${path}`;
-}
-
-async function requestJson<T>(
-	path: string,
-	options: RequestInit = {},
-	baseUrl = DEFAULT_API_BASE,
-): Promise<T> {
-	const res = await fetch(buildApiUrl(path, baseUrl), {
-		headers: {
-			"Content-Type": "application/json",
-			...(options.headers ?? {}),
-		},
-		...options,
-	});
-
-	if (!res.ok) {
-		const text = await res.text();
-		throw new Error(text || `Request failed (${res.status})`);
-	}
-
-	return res.json() as Promise<T>;
-}
-
 export const settingsApi = {
-	getProjectSettings(baseUrl?: string): Promise<ProjectSettings> {
-		return requestJson<ProjectSettings>("/settings", {}, baseUrl);
+	getProjectSettings(_baseUrl?: string): Promise<ProjectSettings> {
+		return rpcCall<ProjectSettings>("settings.get_project");
 	},

-	putProjectSettings(
+	async putProjectSettings(
 		settings: ProjectSettings,
-		baseUrl?: string,
+		_baseUrl?: string,
 	): Promise<ProjectSettings> {
-		return requestJson<ProjectSettings>(
-			"/settings",
-			{ method: "PUT", body: JSON.stringify(settings) },
-			baseUrl,
-		);
+		const params: ProjectSettingsPayload = settings;
+		return rpcCall<ProjectSettingsPayload>("settings.put_project", params);
 	},

-	getEditorCommand(baseUrl?: string): Promise<EditorSettings> {
-		return requestJson<EditorSettings>("/settings/editor", {}, baseUrl);
+	getEditorCommand(_baseUrl?: string): Promise<EditorSettings> {
+		return rpcCall<EditorSettings>("settings.get_editor");
 	},

-	setEditorCommand(
+	async setEditorCommand(
 		command: string | null,
-		baseUrl?: string,
+		_baseUrl?: string,
 	): Promise<EditorSettings> {
-		return requestJson<EditorSettings>(
-			"/settings/editor",
-			{
-				method: "PUT",
-				body: JSON.stringify({ editor_command: command }),
-			},
-			baseUrl,
-		);
+		const params: PutEditorParams = { editor_command: command };
+		const r = await rpcCall<EditorSettingsResult>("settings.put_editor", params);
+		return { editor_command: r.editor_command };
 	},

-	openFile(
+	async openFile(
 		path: string,
 		line?: number,
-		baseUrl?: string,
+		_baseUrl?: string,
 	): Promise<OpenFileResult> {
-		const params = new URLSearchParams({ path });
-		if (line !== undefined) {
-			params.set("line", String(line));
-		}
-		return requestJson<OpenFileResult>(
-			`/settings/open-file?${params.toString()}`,
-			{ method: "POST" },
-			baseUrl,
-		);
+		const params: OpenFileParams = { path, line: line ?? null };
+		const r = await rpcCall<OkResult>("settings.open_file", params);
+		return { success: r.ok };
 	},
 };
@@ -0,0 +1,112 @@
+/** Agent logs card sub-component for WorkItemDetailPanel. */
+
+import type { AgentInfo, AgentStatusValue } from "../api/agents";
+import { STATUS_COLORS } from "./workItemDetailPanelUtils";
+
+interface AgentLogsSectionProps {
+	agentInfo: AgentInfo | null;
+	agentStatus: AgentStatusValue | null;
+	agentLog: string[];
+}
+
+/**
+ * Renders the "Agent Logs" card when an agent is active, or a placeholder
+ * when no agent is assigned to the story.
+ */
+export function AgentLogsSection({
+	agentInfo,
+	agentStatus,
+	agentLog,
+}: AgentLogsSectionProps) {
+	if (!agentInfo) {
+		return (
+			<div
+				data-testid="placeholder-agent-logs"
+				style={{
+					border: "1px solid #2a2a2a",
+					borderRadius: "8px",
+					padding: "10px 12px",
+					background: "#161616",
+				}}
+			>
+				<div
+					style={{
+						fontWeight: 600,
+						fontSize: "0.8em",
+						color: "#555",
+						marginBottom: "4px",
+					}}
+				>
+					Agent Logs
+				</div>
+				<div style={{ fontSize: "0.75em", color: "#444" }}>Coming soon</div>
+			</div>
+		);
+	}
+
+	return (
+		<div
+			data-testid="agent-logs-section"
+			style={{
+				border: "1px solid #2a2a2a",
+				borderRadius: "8px",
+				padding: "10px 12px",
+				background: "#161616",
+			}}
+		>
+			<div
+				style={{
+					display: "flex",
+					alignItems: "center",
+					justifyContent: "space-between",
+					marginBottom: "6px",
+				}}
+			>
+				<div
+					style={{
+						fontWeight: 600,
+						fontSize: "0.8em",
+						color: "#888",
+					}}
+				>
+					Agent Logs
+				</div>
+				{agentStatus && (
+					<div
+						data-testid="agent-status-badge"
+						style={{
+							fontSize: "0.7em",
+							color: STATUS_COLORS[agentStatus],
+							fontWeight: 600,
+						}}
+					>
+						{agentInfo.agent_name} — {agentStatus}
+					</div>
+				)}
+			</div>
+			{agentLog.length > 0 ? (
+				<div
+					data-testid="agent-log-output"
+					style={{
+						fontSize: "0.75em",
+						fontFamily: "monospace",
+						color: "#ccc",
+						whiteSpace: "pre-wrap",
+						wordBreak: "break-word",
+						lineHeight: "1.5",
+						maxHeight: "200px",
+						overflowY: "auto",
+					}}
+				>
+					{agentLog.join("")}
+				</div>
+			) : (
+				<div style={{ fontSize: "0.75em", color: "#444" }}>
+					{agentStatus === "running" || agentStatus === "pending"
+						? "Waiting for output..."
+						: "No output."}
+				</div>
+			)}
+		</div>
+	);
+}
@@ -0,0 +1,530 @@
+import {
+	act,
+	fireEvent,
+	render,
+	screen,
+	waitFor,
+} from "@testing-library/react";
+
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { api } from "../api/client";
+import type { Message } from "../types";
+import { Chat } from "./Chat";
+
+// Module-level store for the WebSocket handlers captured during connect().
+type WsHandlers = {
+	onToken: (content: string) => void;
+	onUpdate: (history: Message[]) => void;
+	onSessionId: (sessionId: string) => void;
+	onError: (message: string) => void;
+	onActivity: (toolName: string) => void;
+	onReconciliationProgress: (
+		storyId: string,
+		status: string,
+		message: string,
+	) => void;
+};
+let capturedWsHandlers: WsHandlers | null = null;
+
+vi.mock("../api/client", () => {
+	const api = {
+		getOllamaModels: vi.fn(),
+		getAnthropicApiKeyExists: vi.fn(),
+		getAnthropicModels: vi.fn(),
+		getModelPreference: vi.fn(),
+		setModelPreference: vi.fn(),
+		cancelChat: vi.fn(),
+		setAnthropicApiKey: vi.fn(),
+		readFile: vi.fn(),
+		listProjectFiles: vi.fn(),
+		botCommand: vi.fn(),
+	};
+	class ChatWebSocket {
+		connect(handlers: WsHandlers) {
+			capturedWsHandlers = handlers;
+		}
+		close() {}
+		sendChat() {}
+		cancel() {}
+	}
+	return { api, ChatWebSocket };
+});
+
+const mockedApi = {
+	getOllamaModels: vi.mocked(api.getOllamaModels),
+	getAnthropicApiKeyExists: vi.mocked(api.getAnthropicApiKeyExists),
+	getAnthropicModels: vi.mocked(api.getAnthropicModels),
+	getModelPreference: vi.mocked(api.getModelPreference),
+	setModelPreference: vi.mocked(api.setModelPreference),
+	cancelChat: vi.mocked(api.cancelChat),
+	setAnthropicApiKey: vi.mocked(api.setAnthropicApiKey),
+	readFile: vi.mocked(api.readFile),
+	listProjectFiles: vi.mocked(api.listProjectFiles),
+	botCommand: vi.mocked(api.botCommand),
+};
+
+function setupMocks() {
+	mockedApi.getOllamaModels.mockResolvedValue(["llama3.1"]);
+	mockedApi.getAnthropicApiKeyExists.mockResolvedValue(true);
+	mockedApi.getAnthropicModels.mockResolvedValue([]);
+	mockedApi.getModelPreference.mockResolvedValue(null);
+	mockedApi.setModelPreference.mockResolvedValue(true);
+	mockedApi.readFile.mockResolvedValue("");
+	mockedApi.listProjectFiles.mockResolvedValue([]);
+	mockedApi.cancelChat.mockResolvedValue(true);
+	mockedApi.setAnthropicApiKey.mockResolvedValue(true);
+	mockedApi.botCommand.mockResolvedValue({ response: "Bot response" });
+}
+
+describe("Chat activity status indicator (Bug 140)", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		setupMocks();
+	});
+
+	it("shows activity label when tool activity fires during streaming content", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Simulate sending a message to set loading=true
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Read my file" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Simulate tokens arriving (streamingContent becomes non-empty)
+		await act(async () => {
+			capturedWsHandlers?.onToken("I'll read that file for you.");
+		});
+
+		// Now simulate a tool activity event while streamingContent is non-empty
+		await act(async () => {
+			capturedWsHandlers?.onActivity("read_file");
+		});
+
+		// The activity indicator should be visible with the tool activity label
+		const indicator = await screen.findByTestId("activity-indicator");
+		expect(indicator).toBeInTheDocument();
+		expect(indicator).toHaveTextContent("Reading file...");
+	});
+
+	it("shows Thinking... fallback when loading with no streaming and no activity", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Simulate sending a message to set loading=true
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Hello" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// No tokens, no activity — should show "Thinking..."
+		const indicator = await screen.findByTestId("activity-indicator");
+		expect(indicator).toBeInTheDocument();
+		expect(indicator).toHaveTextContent("Thinking...");
+	});
+
+	it("hides Thinking... when streaming content is present but no tool activity", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Simulate sending a message to set loading=true
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Hello" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Tokens arrive — streamingContent is non-empty, no activity
+		await act(async () => {
+			capturedWsHandlers?.onToken("Here is my response...");
+		});
+
+		// The activity indicator should NOT be visible (just streaming bubble)
+		expect(screen.queryByTestId("activity-indicator")).not.toBeInTheDocument();
+	});
+
+	it("shows activity label for Claude Code tool names (Read, Bash, etc.)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Simulate sending a message to set loading=true
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Read my file" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Simulate tokens arriving
+		await act(async () => {
+			capturedWsHandlers?.onToken("Let me read that.");
+		});
+
+		// Claude Code sends tool name "Read" (not "read_file")
+		await act(async () => {
+			capturedWsHandlers?.onActivity("Read");
+		});
+
+		const indicator = await screen.findByTestId("activity-indicator");
+		expect(indicator).toBeInTheDocument();
+		expect(indicator).toHaveTextContent("Reading file...");
+	});
+
+	it("shows activity label for Claude Code Bash tool", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Run the tests" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await act(async () => {
+			capturedWsHandlers?.onToken("Running tests now.");
+		});
+
+		await act(async () => {
+			capturedWsHandlers?.onActivity("Bash");
+		});
+
+		const indicator = await screen.findByTestId("activity-indicator");
+		expect(indicator).toBeInTheDocument();
+		expect(indicator).toHaveTextContent("Executing command...");
+	});
+
+	it("shows generic label for unknown tool names", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Do something" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await act(async () => {
+			capturedWsHandlers?.onToken("Working on it.");
+		});
+
+		await act(async () => {
+			capturedWsHandlers?.onActivity("SomeCustomTool");
+		});
+
+		const indicator = await screen.findByTestId("activity-indicator");
+		expect(indicator).toBeInTheDocument();
+		expect(indicator).toHaveTextContent("Using SomeCustomTool...");
+	});
+});
+
+describe("Chat message queue (Story 155)", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		setupMocks();
+	});
+
+	it("shows queued message indicator when submitting while loading (AC1, AC2)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Send first message to put the chat in loading state
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "First message" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Now type and submit a second message while loading is true
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Queued message" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// The queued message indicator should appear
+		const indicator = await screen.findByTestId("queued-message-indicator");
+		expect(indicator).toBeInTheDocument();
+		expect(indicator).toHaveTextContent("Queued");
+		expect(indicator).toHaveTextContent("Queued message");
+
+		// Input should be cleared after queuing
+		expect((input as HTMLTextAreaElement).value).toBe("");
+	});
+
+	it("auto-sends queued message when agent response completes (AC4)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		// Send first message
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "First" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Queue a second message while loading
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Auto-send this" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Verify it's queued
+		expect(
+			await screen.findByTestId("queued-message-indicator"),
+		).toBeInTheDocument();
+
+		// Simulate agent response completing (loading → false)
+		await act(async () => {
+			capturedWsHandlers?.onUpdate([
+				{ role: "user", content: "First" },
+				{ role: "assistant", content: "Done." },
+			]);
+		});
+
+		// The queued indicator should disappear (message was sent)
+		await waitFor(() => {
+			expect(
+				screen.queryByTestId("queued-message-indicator"),
+			).not.toBeInTheDocument();
+		});
+	});
+
+	it("cancel button discards the queued message (AC3, AC6)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		// Send first message to start loading
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "First" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Queue a second message
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Discard me" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		const indicator = await screen.findByTestId("queued-message-indicator");
+		expect(indicator).toBeInTheDocument();
+
+		// Click the ✕ cancel button
+		const cancelBtn = screen.getByTitle("Cancel queued message");
+		await act(async () => {
+			fireEvent.click(cancelBtn);
+		});
+
+		// Indicator should be gone
+		expect(
+			screen.queryByTestId("queued-message-indicator"),
+		).not.toBeInTheDocument();
+	});
+
+	it("edit button puts queued message back into input (AC3)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		// Send first message to start loading
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "First" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Queue a second message
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Edit me back" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await screen.findByTestId("queued-message-indicator");
+
+		// Click the Edit button
+		const editBtn = screen.getByTitle("Edit queued message");
+		await act(async () => {
+			fireEvent.click(editBtn);
+		});
+
+		// Indicator should be gone and message back in input
+		expect(
+			screen.queryByTestId("queued-message-indicator"),
+		).not.toBeInTheDocument();
+		expect((input as HTMLTextAreaElement).value).toBe("Edit me back");
+	});
+
+	it("subsequent submissions are appended to the queue (Bug 168)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		// Send first message to start loading
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "First" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Queue first message
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Queue 1" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await screen.findByTestId("queued-message-indicator");
+
+		// Queue second message — should be appended, not overwrite the first
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Queue 2" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Both messages should be visible
+		const indicators = await screen.findAllByTestId("queued-message-indicator");
+		expect(indicators).toHaveLength(2);
+		expect(indicators[0]).toHaveTextContent("Queue 1");
+		expect(indicators[1]).toHaveTextContent("Queue 2");
+	});
+
+	it("all queued messages are drained at once when agent responds (Story 199)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		// Send first message to start loading
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "First" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Queue two messages while loading
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Second" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Third" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Both messages should be visible in order
+		const indicators = await screen.findAllByTestId("queued-message-indicator");
+		expect(indicators).toHaveLength(2);
+		expect(indicators[0]).toHaveTextContent("Second");
+		expect(indicators[1]).toHaveTextContent("Third");
+
+		// Simulate first response completing — both "Second" and "Third" are drained at once
+		await act(async () => {
+			capturedWsHandlers?.onUpdate([
+				{ role: "user", content: "First" },
+				{ role: "assistant", content: "Response 1." },
+			]);
+		});
+
+		// Both queued indicators should be gone — entire queue drained in one shot
+		await waitFor(() => {
+			const remaining = screen.queryAllByTestId("queued-message-indicator");
+			expect(remaining).toHaveLength(0);
+		});
+	});
+
+	it("does not auto-send queued message when generation is cancelled (AC6)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		// Send first message to start loading
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "First" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Queue a second message
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Should not send" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await screen.findByTestId("queued-message-indicator");
+
+		// Click the stop button (■) — but input is empty so button is stop
+		// Actually simulate cancel by clicking the stop button (which requires empty input)
+		// We need to use the send button when input is empty (stop mode)
+		// Simulate cancel via the cancelGeneration path: the button when loading && !input
+		// At this point input is empty (was cleared after queuing)
+		const stopButton = screen.getByRole("button", { name: "■" });
+		await act(async () => {
+			fireEvent.click(stopButton);
+		});
+
+		// Queued indicator should be gone (cancelled)
+		await waitFor(() => {
+			expect(
+				screen.queryByTestId("queued-message-indicator"),
+			).not.toBeInTheDocument();
+		});
+	});
+});
@@ -0,0 +1,511 @@
+import {
+	act,
+	fireEvent,
+	render,
+	screen,
+	waitFor,
+} from "@testing-library/react";
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { api } from "../api/client";
+import type { Message } from "../types";
+import { Chat } from "./Chat";
+
+// Module-level store for the WebSocket handlers captured during connect().
+type WsHandlers = {
+	onToken: (content: string) => void;
+	onUpdate: (history: Message[]) => void;
+	onSessionId: (sessionId: string) => void;
+	onError: (message: string) => void;
+	onActivity: (toolName: string) => void;
+	onReconciliationProgress: (
+		storyId: string,
+		status: string,
+		message: string,
+	) => void;
+};
+let capturedWsHandlers: WsHandlers | null = null;
+// Captures the last sendChat call's arguments for assertion.
+let lastSendChatArgs: { messages: Message[]; config: unknown } | null = null;
+
+vi.mock("../api/client", () => {
+	const api = {
+		getOllamaModels: vi.fn(),
+		getAnthropicApiKeyExists: vi.fn(),
+		getAnthropicModels: vi.fn(),
+		getModelPreference: vi.fn(),
+		setModelPreference: vi.fn(),
+		cancelChat: vi.fn(),
+		setAnthropicApiKey: vi.fn(),
+		readFile: vi.fn(),
+		listProjectFiles: vi.fn(),
+		botCommand: vi.fn(),
+	};
+	class ChatWebSocket {
+		connect(handlers: WsHandlers) {
+			capturedWsHandlers = handlers;
+		}
+		close() {}
+		sendChat(messages: Message[], config: unknown) {
+			lastSendChatArgs = { messages, config };
+		}
+		cancel() {}
+	}
+	return { api, ChatWebSocket };
+});
+
+const mockedApi = {
+	getOllamaModels: vi.mocked(api.getOllamaModels),
+	getAnthropicApiKeyExists: vi.mocked(api.getAnthropicApiKeyExists),
+	getAnthropicModels: vi.mocked(api.getAnthropicModels),
+	getModelPreference: vi.mocked(api.getModelPreference),
+	setModelPreference: vi.mocked(api.setModelPreference),
+	cancelChat: vi.mocked(api.cancelChat),
+	setAnthropicApiKey: vi.mocked(api.setAnthropicApiKey),
+	readFile: vi.mocked(api.readFile),
+	listProjectFiles: vi.mocked(api.listProjectFiles),
+	botCommand: vi.mocked(api.botCommand),
+};
+
+function setupMocks() {
+	mockedApi.getOllamaModels.mockResolvedValue(["llama3.1"]);
+	mockedApi.getAnthropicApiKeyExists.mockResolvedValue(true);
+	mockedApi.getAnthropicModels.mockResolvedValue([]);
+	mockedApi.getModelPreference.mockResolvedValue(null);
+	mockedApi.setModelPreference.mockResolvedValue(true);
+	mockedApi.readFile.mockResolvedValue("");
+	mockedApi.listProjectFiles.mockResolvedValue([]);
+	mockedApi.cancelChat.mockResolvedValue(true);
+	mockedApi.setAnthropicApiKey.mockResolvedValue(true);
+	mockedApi.botCommand.mockResolvedValue({ response: "Bot response" });
+}
+
+describe("Remove bubble styling from streaming messages (Story 163)", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		setupMocks();
+	});
+
+	it("AC1: streaming assistant message uses transparent background, no extra padding, no border-radius", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Send a message to put chat into loading state
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Hello" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Simulate streaming tokens arriving
+		await act(async () => {
+			capturedWsHandlers?.onToken("Streaming response text");
+		});
+
+		// Find the streaming message container (the inner div wrapping the Markdown)
+		const streamingText = await screen.findByText("Streaming response text");
+		// The markdown-body wrapper is the parent, and the styled div is its parent
+		const styledDiv = streamingText.closest(".markdown-body")
+			?.parentElement as HTMLElement;
+
+		expect(styledDiv).toBeTruthy();
+		const styleAttr = styledDiv.getAttribute("style") ?? "";
+		expect(styleAttr).toContain("background: transparent");
+		expect(styleAttr).toContain("padding: 0px");
+		expect(styleAttr).toContain("border-radius: 0px");
+		expect(styleAttr).toContain("max-width: 100%");
+	});
+
+	it("AC1: streaming message wraps Markdown in markdown-body class", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Hello" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await act(async () => {
+			capturedWsHandlers?.onToken("Some markdown content");
+		});
+
+		const streamingText = await screen.findByText("Some markdown content");
+		const markdownBody = streamingText.closest(".markdown-body");
+		expect(markdownBody).toBeTruthy();
+	});
+
+	it("AC2: no visual change when streaming ends and message transitions to completed", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Send a message to start streaming
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Hello" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Simulate streaming tokens
+		await act(async () => {
+			capturedWsHandlers?.onToken("Final response");
+		});
+
+		// Capture streaming message style attribute
+		const streamingText = await screen.findByText("Final response");
+		const streamingStyledDiv = streamingText.closest(".markdown-body")
+			?.parentElement as HTMLElement;
+		const streamingStyleAttr = streamingStyledDiv.getAttribute("style") ?? "";
+
+		// Transition: onUpdate completes the message
+		await act(async () => {
+			capturedWsHandlers?.onUpdate([
+				{ role: "user", content: "Hello" },
+				{ role: "assistant", content: "Final response" },
+			]);
+		});
+
+		// Find the completed message — it should have the same styling
+		const completedText = await screen.findByText("Final response");
+		const completedMarkdownBody = completedText.closest(".markdown-body");
+		const completedStyledDiv =
+			completedMarkdownBody?.parentElement as HTMLElement;
+
+		expect(completedStyledDiv).toBeTruthy();
+		const completedStyleAttr = completedStyledDiv.getAttribute("style") ?? "";
+
+		// Both streaming and completed use transparent bg, 0 padding, 0 border-radius
+		expect(completedStyleAttr).toContain("background: transparent");
+		expect(completedStyleAttr).toContain("padding: 0px");
+		expect(completedStyleAttr).toContain("border-radius: 0px");
+		expect(streamingStyleAttr).toContain("background: transparent");
+		expect(streamingStyleAttr).toContain("padding: 0px");
+		expect(streamingStyleAttr).toContain("border-radius: 0px");
+
+		// Both have the markdown-body class wrapper
+		expect(streamingStyledDiv.querySelector(".markdown-body")).toBeTruthy();
+	});
+
+	it("AC3: completed assistant messages retain transparent background and no border-radius", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onUpdate([
+				{ role: "user", content: "Hi" },
+				{ role: "assistant", content: "Hello there!" },
+			]);
+		});
+
+		const assistantText = await screen.findByText("Hello there!");
+		const markdownBody = assistantText.closest(".markdown-body");
+		const styledDiv = markdownBody?.parentElement as HTMLElement;
+
+		expect(styledDiv).toBeTruthy();
+		const styleAttr = styledDiv.getAttribute("style") ?? "";
+		expect(styleAttr).toContain("background: transparent");
+		expect(styleAttr).toContain("padding: 0px");
+		expect(styleAttr).toContain("border-radius: 0px");
+		expect(styleAttr).toContain("max-width: 100%");
+	});
+
+	it("AC3: completed user messages still have their bubble styling", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onUpdate([
+				{ role: "user", content: "I am a user message" },
+				{ role: "assistant", content: "I am a response" },
+			]);
+		});
+
+		// findByText finds the text element; traverse up to the styled bubble div
+		const userText = await screen.findByText("I am a user message");
+		// User messages are rendered via markdown, so text is inside a <p> inside .user-markdown-body
+		// Walk up to find the styled bubble container
+		const bubbleDiv = userText.closest("[style*='padding: 10px 16px']");
+		expect(bubbleDiv).toBeTruthy();
+		const styleAttr = bubbleDiv?.getAttribute("style") ?? "";
+		// User messages retain bubble: distinct background, padding, rounded corners
+		expect(styleAttr).toContain("padding: 10px 16px");
+		expect(styleAttr).toContain("border-radius: 20px");
+		expect(styleAttr).not.toContain("background: transparent");
+	});
+});
+
+describe("Slash command handling (Story 374)", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		lastSendChatArgs = null;
+		setupMocks();
+	});
+
+	afterEach(() => {
+		vi.clearAllMocks();
+	});
+
+	it("AC: /status calls botCommand and displays response", async () => {
+		mockedApi.botCommand.mockResolvedValue({ response: "Pipeline: 3 active" });
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/status" } });
+		});
+		// First Enter selects the command from the picker; second Enter submits it
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => {
+			expect(mockedApi.botCommand).toHaveBeenCalledWith(
+				"status",
+				"",
+			);
+		});
+		expect(await screen.findByText("Pipeline: 3 active")).toBeInTheDocument();
+		// Should NOT go to LLM
+		expect(lastSendChatArgs).toBeNull();
+	});
+
+	it("AC: /status <number> passes args to botCommand", async () => {
+		mockedApi.botCommand.mockResolvedValue({ response: "Story 42 details" });
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/status 42" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => {
+			expect(mockedApi.botCommand).toHaveBeenCalledWith(
+				"status",
+				"42",
+			);
+		});
+	});
+
+	it("AC: /start <number> calls botCommand", async () => {
+		mockedApi.botCommand.mockResolvedValue({ response: "Started agent" });
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/start 42 opus" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => {
+			expect(mockedApi.botCommand).toHaveBeenCalledWith(
+				"start",
+				"42 opus",
+			);
+		});
+		expect(await screen.findByText("Started agent")).toBeInTheDocument();
+	});
+
+	it("AC: /git calls botCommand", async () => {
+		mockedApi.botCommand.mockResolvedValue({ response: "On branch main" });
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/git" } });
+		});
+		// First Enter selects the command from the picker; second Enter submits it
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => {
+			expect(mockedApi.botCommand).toHaveBeenCalledWith("git", "");
+		});
+	});
+
+	it("AC: /cost calls botCommand", async () => {
+		mockedApi.botCommand.mockResolvedValue({ response: "$1.23 today" });
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/cost" } });
+		});
+		// First Enter selects the command from the picker; second Enter submits it
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => {
+			expect(mockedApi.botCommand).toHaveBeenCalledWith("cost", "");
+		});
+	});
+
+	it("AC: /reset clears messages and session without LLM", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// First add a message so there is history to clear
+		await act(async () => {
+			capturedWsHandlers?.onUpdate([
+				{ role: "user", content: "hello" },
+				{ role: "assistant", content: "world" },
+			]);
+		});
+		expect(await screen.findByText("world")).toBeInTheDocument();
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/reset" } });
+		});
+		// First Enter selects the command from the picker; second Enter submits it
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// LLM must NOT be invoked
+		expect(lastSendChatArgs).toBeNull();
+		// botCommand must NOT be invoked (reset is frontend-only)
+		expect(mockedApi.botCommand).not.toHaveBeenCalled();
+		// Confirmation message should appear
+		expect(await screen.findByText(/Session reset/)).toBeInTheDocument();
+	});
+
+	it("AC: unrecognised slash command shows error message", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/foobar" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		expect(await screen.findByText(/Unknown command/)).toBeInTheDocument();
+		// Should NOT go to LLM
+		expect(lastSendChatArgs).toBeNull();
+		// Should NOT call botCommand
+		expect(mockedApi.botCommand).not.toHaveBeenCalled();
+	});
+
+	it("AC: /help calls botCommand and displays response", async () => {
+		mockedApi.botCommand.mockResolvedValue({
+			response: "Available commands: status, help, ...",
+		});
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/help" } });
+		});
+		// First Enter selects the command from the picker; second Enter submits it
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => {
+			expect(mockedApi.botCommand).toHaveBeenCalledWith("help", "");
+		});
+		expect(lastSendChatArgs).toBeNull();
+	});
+
+	it("AC: botCommand API error shows error message in chat", async () => {
+		mockedApi.botCommand.mockRejectedValue(new Error("Server error"));
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "/git" } });
+		});
+		// First Enter selects the command from the picker; second Enter submits it
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		expect(
+			await screen.findByText(/Error running command/),
+		).toBeInTheDocument();
+	});
+});
+
+describe("Story 1058: WebSocket errors do not appear in chat", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		setupMocks();
+	});
+
+	it("does not add a chat message when onError is called", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onError("Something went wrong on the server.");
+		});
+
+		expect(
+			screen.queryByText("Something went wrong on the server."),
+		).not.toBeInTheDocument();
+	});
+
+	it("does not add a chat message for errors containing a URL", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onError(
+				"OAuth login required. Please visit: https://example.com/oauth/login",
+			);
+		});
+
+		expect(
+			screen.queryByRole("link", {
+				name: /https:\/\/example\.com\/oauth\/login/,
+			}),
+		).not.toBeInTheDocument();
+	});
+});
@@ -0,0 +1,264 @@
+import {
+	act,
+	fireEvent,
+	render,
+	screen,
+	waitFor,
+} from "@testing-library/react";
+
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { api } from "../api/client";
+import type { Message } from "../types";
+import { Chat } from "./Chat";
+
+// Module-level store for the WebSocket handlers captured during connect().
+type WsHandlers = {
+	onToken: (content: string) => void;
+	onUpdate: (history: Message[]) => void;
+	onSessionId: (sessionId: string) => void;
+	onError: (message: string) => void;
+	onActivity: (toolName: string) => void;
+	onReconciliationProgress: (
+		storyId: string,
+		status: string,
+		message: string,
+	) => void;
+};
+let capturedWsHandlers: WsHandlers | null = null;
+
+vi.mock("../api/client", () => {
+	const api = {
+		getOllamaModels: vi.fn(),
+		getAnthropicApiKeyExists: vi.fn(),
+		getAnthropicModels: vi.fn(),
+		getModelPreference: vi.fn(),
+		setModelPreference: vi.fn(),
+		cancelChat: vi.fn(),
+		setAnthropicApiKey: vi.fn(),
+		readFile: vi.fn(),
+		listProjectFiles: vi.fn(),
+		botCommand: vi.fn(),
+	};
+	class ChatWebSocket {
+		connect(handlers: WsHandlers) {
+			capturedWsHandlers = handlers;
+		}
+		close() {}
+		sendChat() {}
+		cancel() {}
+	}
+	return { api, ChatWebSocket };
+});
+
+const mockedApi = {
+	getOllamaModels: vi.mocked(api.getOllamaModels),
+	getAnthropicApiKeyExists: vi.mocked(api.getAnthropicApiKeyExists),
+	getAnthropicModels: vi.mocked(api.getAnthropicModels),
+	getModelPreference: vi.mocked(api.getModelPreference),
+	setModelPreference: vi.mocked(api.setModelPreference),
+	cancelChat: vi.mocked(api.cancelChat),
+	setAnthropicApiKey: vi.mocked(api.setAnthropicApiKey),
+	readFile: vi.mocked(api.readFile),
+	listProjectFiles: vi.mocked(api.listProjectFiles),
+	botCommand: vi.mocked(api.botCommand),
+};
+
+function setupMocks() {
+	mockedApi.getOllamaModels.mockResolvedValue(["llama3.1"]);
+	mockedApi.getAnthropicApiKeyExists.mockResolvedValue(true);
+	mockedApi.getAnthropicModels.mockResolvedValue([]);
+	mockedApi.getModelPreference.mockResolvedValue(null);
+	mockedApi.setModelPreference.mockResolvedValue(true);
+	mockedApi.readFile.mockResolvedValue("");
+	mockedApi.listProjectFiles.mockResolvedValue([]);
+	mockedApi.cancelChat.mockResolvedValue(true);
+	mockedApi.setAnthropicApiKey.mockResolvedValue(true);
+	mockedApi.botCommand.mockResolvedValue({ response: "Bot response" });
+}
+
+describe("Chat two-column layout", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		setupMocks();
+	});
+
+	it("renders left and right column containers (AC1, AC2)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		expect(await screen.findByTestId("chat-content-area")).toBeInTheDocument();
+		expect(await screen.findByTestId("chat-left-column")).toBeInTheDocument();
+		expect(await screen.findByTestId("chat-right-column")).toBeInTheDocument();
+	});
+
+	it("renders chat input inside the left column (AC2, AC5)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		const leftColumn = await screen.findByTestId("chat-left-column");
+		const input = screen.getByPlaceholderText("Send a message...");
+		expect(leftColumn).toContainElement(input);
+	});
+
+	it("renders panels inside the right column (AC2)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		const rightColumn = await screen.findByTestId("chat-right-column");
+		const agentsPanel = await screen.findByText("Agents");
+		expect(rightColumn).toContainElement(agentsPanel);
+	});
+
+	it("uses row flex-direction on wide screens (AC3)", async () => {
+		Object.defineProperty(window, "innerWidth", {
+			writable: true,
+			configurable: true,
+			value: 1200,
+		});
+		window.dispatchEvent(new Event("resize"));
+
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		const contentArea = await screen.findByTestId("chat-content-area");
+		expect(contentArea).toHaveStyle({ flexDirection: "row" });
+	});
+
+	it("uses column flex-direction on narrow screens (AC4)", async () => {
+		Object.defineProperty(window, "innerWidth", {
+			writable: true,
+			configurable: true,
+			value: 600,
+		});
+		window.dispatchEvent(new Event("resize"));
+
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		const contentArea = await screen.findByTestId("chat-content-area");
+		expect(contentArea).toHaveStyle({ flexDirection: "column" });
+
+		// Restore wide width for subsequent tests
+		Object.defineProperty(window, "innerWidth", {
+			writable: true,
+			configurable: true,
+			value: 1024,
+		});
+	});
+});
+
+describe("Chat input Shift+Enter behavior", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		setupMocks();
+	});
+
+	it("renders a textarea element for the chat input (AC3)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		const input = screen.getByPlaceholderText("Send a message...");
+		expect(input.tagName.toLowerCase()).toBe("textarea");
+	});
+
+	it("sends message on Enter key press without Shift (AC2)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Hello" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => {
+			expect((input as HTMLTextAreaElement).value).toBe("");
+		});
+	});
+
+	it("does not send message on Shift+Enter (AC1)", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		const input = screen.getByPlaceholderText("Send a message...");
+
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Hello" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: true });
+		});
+
+		expect((input as HTMLTextAreaElement).value).toBe("Hello");
+	});
+});
+
+describe("Chat reconciliation banner", () => {
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		setupMocks();
+	});
+
+	it("shows banner when a non-done reconciliation event is received", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onReconciliationProgress(
+				"42_story_test",
+				"checking",
+				"Checking for committed work in 2_current/",
+			);
+		});
+
+		expect(
+			await screen.findByTestId("reconciliation-banner"),
+		).toBeInTheDocument();
+		expect(
+			await screen.findByText("Reconciling startup state..."),
+		).toBeInTheDocument();
+	});
+
+	it("shows event message in the banner", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onReconciliationProgress(
+				"42_story_test",
+				"gates_running",
+				"Running acceptance gates…",
+			);
+		});
+
+		expect(
+			await screen.findByText(/Running acceptance gates/),
+		).toBeInTheDocument();
+	});
+
+	it("dismisses banner when done event is received", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onReconciliationProgress(
+				"42_story_test",
+				"checking",
+				"Checking for committed work",
+			);
+		});
+
+		expect(
+			await screen.findByTestId("reconciliation-banner"),
+		).toBeInTheDocument();
+
+		await act(async () => {
+			capturedWsHandlers?.onReconciliationProgress(
+				"",
+				"done",
+				"Startup reconciliation complete.",
+			);
+		});
+
+		await waitFor(() => {
+			expect(
+				screen.queryByTestId("reconciliation-banner"),
+			).not.toBeInTheDocument();
+		});
+	});
+});
@@ -0,0 +1,461 @@
+import {
+	act,
+	fireEvent,
+	render,
+	screen,
+	waitFor,
+} from "@testing-library/react";
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { api } from "../api/client";
+import type { Message } from "../types";
+import { Chat } from "./Chat";
+
+// Module-level store for the WebSocket handlers captured during connect().
+type WsHandlers = {
+	onToken: (content: string) => void;
+	onUpdate: (history: Message[]) => void;
+	onSessionId: (sessionId: string) => void;
+	onError: (message: string) => void;
+	onActivity: (toolName: string) => void;
+	onReconciliationProgress: (
+		storyId: string,
+		status: string,
+		message: string,
+	) => void;
+};
+let capturedWsHandlers: WsHandlers | null = null;
+// Captures the last sendChat call's arguments for assertion.
+let lastSendChatArgs: { messages: Message[]; config: unknown } | null = null;
+
+vi.mock("../api/client", () => {
+	const api = {
+		getOllamaModels: vi.fn(),
+		getAnthropicApiKeyExists: vi.fn(),
+		getAnthropicModels: vi.fn(),
+		getModelPreference: vi.fn(),
+		setModelPreference: vi.fn(),
+		cancelChat: vi.fn(),
+		setAnthropicApiKey: vi.fn(),
+		readFile: vi.fn(),
+		listProjectFiles: vi.fn(),
+		botCommand: vi.fn(),
+	};
+	class ChatWebSocket {
+		connect(handlers: WsHandlers) {
+			capturedWsHandlers = handlers;
+		}
+		close() {}
+		sendChat(messages: Message[], config: unknown) {
+			lastSendChatArgs = { messages, config };
+		}
+		cancel() {}
+	}
+	return { api, ChatWebSocket };
+});
+
+const mockedApi = {
+	getOllamaModels: vi.mocked(api.getOllamaModels),
+	getAnthropicApiKeyExists: vi.mocked(api.getAnthropicApiKeyExists),
+	getAnthropicModels: vi.mocked(api.getAnthropicModels),
+	getModelPreference: vi.mocked(api.getModelPreference),
+	setModelPreference: vi.mocked(api.setModelPreference),
+	cancelChat: vi.mocked(api.cancelChat),
+	setAnthropicApiKey: vi.mocked(api.setAnthropicApiKey),
+	readFile: vi.mocked(api.readFile),
+	listProjectFiles: vi.mocked(api.listProjectFiles),
+	botCommand: vi.mocked(api.botCommand),
+};
+
+function setupMocks() {
+	mockedApi.getOllamaModels.mockResolvedValue(["llama3.1"]);
+	mockedApi.getAnthropicApiKeyExists.mockResolvedValue(true);
+	mockedApi.getAnthropicModels.mockResolvedValue([]);
+	mockedApi.getModelPreference.mockResolvedValue(null);
+	mockedApi.setModelPreference.mockResolvedValue(true);
+	mockedApi.readFile.mockResolvedValue("");
+	mockedApi.listProjectFiles.mockResolvedValue([]);
+	mockedApi.cancelChat.mockResolvedValue(true);
+	mockedApi.setAnthropicApiKey.mockResolvedValue(true);
+	mockedApi.botCommand.mockResolvedValue({ response: "Bot response" });
+}
+
+describe("Chat localStorage persistence (Story 145)", () => {
+	const PROJECT_PATH = "/tmp/project";
+	const STORAGE_KEY = `storykit-chat-history:${PROJECT_PATH}`;
+
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		localStorage.clear();
+		setupMocks();
+	});
+
+	afterEach(() => {
+		localStorage.clear();
+	});
+
+	it("AC1: restores persisted messages on mount", async () => {
+		const saved: Message[] = [
+			{ role: "user", content: "Previously saved question" },
+			{ role: "assistant", content: "Previously saved answer" },
+		];
+		localStorage.setItem(STORAGE_KEY, JSON.stringify(saved));
+
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+
+		expect(
+			await screen.findByText("Previously saved question"),
+		).toBeInTheDocument();
+		expect(
+			await screen.findByText("Previously saved answer"),
+		).toBeInTheDocument();
+	});
+
+	it("AC2: persists messages when WebSocket onUpdate fires", async () => {
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const history: Message[] = [
+			{ role: "user", content: "Hello" },
+			{ role: "assistant", content: "Hi there!" },
+		];
+
+		await act(async () => {
+			capturedWsHandlers?.onUpdate(history);
+		});
+
+		const stored = JSON.parse(localStorage.getItem(STORAGE_KEY) ?? "[]");
+		expect(stored).toEqual(history);
+	});
+
+	it("AC3: clears localStorage when New Session is clicked", async () => {
+		const saved: Message[] = [
+			{ role: "user", content: "Old message" },
+			{ role: "assistant", content: "Old reply" },
+		];
+		localStorage.setItem(STORAGE_KEY, JSON.stringify(saved));
+
+		// Stub window.confirm to auto-approve the clear dialog
+		const confirmSpy = vi.spyOn(window, "confirm").mockReturnValue(true);
+
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+
+		// Wait for the persisted message to appear
+		expect(await screen.findByText("Old message")).toBeInTheDocument();
+
+		// Click "New Session" button
+		const newSessionBtn = screen.getByText(/New Session/);
+		await act(async () => {
+			fireEvent.click(newSessionBtn);
+		});
+
+		// localStorage should be cleared
+		expect(localStorage.getItem(STORAGE_KEY)).toBeNull();
+
+		// Messages should be gone from the UI
+		expect(screen.queryByText("Old message")).not.toBeInTheDocument();
+
+		confirmSpy.mockRestore();
+	});
+
+	it("Bug 245: messages survive unmount/remount cycle (page refresh)", async () => {
+		// Step 1: Render Chat and populate messages via WebSocket onUpdate
+		const { unmount } = render(
+			<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />,
+		);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const history: Message[] = [
+			{ role: "user", content: "Persist me across refresh" },
+			{ role: "assistant", content: "I should survive a reload" },
+		];
+
+		await act(async () => {
+			capturedWsHandlers?.onUpdate(history);
+		});
+
+		// Verify messages are persisted to localStorage
+		expect(localStorage.getItem(STORAGE_KEY)).not.toBeNull();
+		const storedBefore = JSON.parse(localStorage.getItem(STORAGE_KEY) ?? "[]");
+		expect(storedBefore).toEqual(history);
+
+		// Step 2: Unmount the Chat component (simulates page unload)
+		unmount();
+
+		// Verify localStorage was NOT cleared by unmount
+		expect(localStorage.getItem(STORAGE_KEY)).not.toBeNull();
+		const storedAfterUnmount = JSON.parse(
+			localStorage.getItem(STORAGE_KEY) ?? "[]",
+		);
+		expect(storedAfterUnmount).toEqual(history);
+
+		// Step 3: Remount the Chat component (simulates page reload)
+		capturedWsHandlers = null;
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+
+		// Verify messages are restored from localStorage
+		expect(
+			await screen.findByText("Persist me across refresh"),
+		).toBeInTheDocument();
+		expect(
+			await screen.findByText("I should survive a reload"),
+		).toBeInTheDocument();
+
+		// Verify localStorage still has the messages
+		const storedAfterRemount = JSON.parse(
+			localStorage.getItem(STORAGE_KEY) ?? "[]",
+		);
+		expect(storedAfterRemount).toEqual(history);
+	});
+
+	it("Bug 245: after refresh, sendChat includes full prior history", async () => {
+		// Step 1: Render, populate messages via onUpdate, then unmount (simulate refresh)
+		const { unmount } = render(
+			<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />,
+		);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const priorHistory: Message[] = [
+			{ role: "user", content: "What is Rust?" },
+			{ role: "assistant", content: "Rust is a systems programming language." },
+		];
+		await act(async () => {
+			capturedWsHandlers?.onUpdate(priorHistory);
+		});
+
+		// Verify localStorage has the prior history
+		const stored = JSON.parse(localStorage.getItem(STORAGE_KEY) ?? "[]");
+		expect(stored).toEqual(priorHistory);
+
+		unmount();
+
+		// Step 2: Remount (simulates page reload) — messages load from localStorage
+		capturedWsHandlers = null;
+		lastSendChatArgs = null;
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Verify prior messages are displayed
+		expect(await screen.findByText("What is Rust?")).toBeInTheDocument();
+
+		// Step 3: Send a new message — sendChat should include the full prior history
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Tell me more" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		// Verify sendChat was called with ALL prior messages + the new one
+		expect(lastSendChatArgs).not.toBeNull();
+		const args = lastSendChatArgs as unknown as {
+			messages: Message[];
+			config: unknown;
+		};
+		expect(args.messages).toHaveLength(3);
+		expect(args.messages[0]).toEqual({
+			role: "user",
+			content: "What is Rust?",
+		});
+		expect(args.messages[1]).toEqual({
+			role: "assistant",
+			content: "Rust is a systems programming language.",
+		});
+		expect(args.messages[2]).toEqual({
+			role: "user",
+			content: "Tell me more",
+		});
+	});
+
+	it("AC5: uses project-scoped storage key", async () => {
+		const otherKey = "storykit-chat-history:/other/project";
+		localStorage.setItem(
+			otherKey,
+			JSON.stringify([{ role: "user", content: "Other project msg" }]),
+		);
+
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+
+		// Should NOT show the other project's messages
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+		expect(screen.queryByText("Other project msg")).not.toBeInTheDocument();
+
+		// Other project's data should still be in storage
+		expect(localStorage.getItem(otherKey)).not.toBeNull();
+	});
+});
+
+describe("Bug 264: Claude Code session ID persisted across browser refresh", () => {
+	const PROJECT_PATH = "/tmp/project";
+	const SESSION_KEY = `storykit-claude-session-id:${PROJECT_PATH}`;
+	const STORAGE_KEY = `storykit-chat-history:${PROJECT_PATH}`;
+
+	beforeEach(() => {
+		capturedWsHandlers = null;
+		lastSendChatArgs = null;
+		localStorage.clear();
+		setupMocks();
+	});
+
+	afterEach(() => {
+		localStorage.clear();
+	});
+
+	it("AC1: session_id is persisted to localStorage when onSessionId fires", async () => {
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onSessionId("test-session-abc");
+		});
+
+		await waitFor(() => {
+			expect(localStorage.getItem(SESSION_KEY)).toBe("test-session-abc");
+		});
+	});
+
+	it("AC2: after remount, next sendChat includes session_id from localStorage", async () => {
+		// Step 1: Render, receive a session ID, then unmount (simulate refresh)
+		localStorage.setItem(SESSION_KEY, "persisted-session-xyz");
+		localStorage.setItem(
+			STORAGE_KEY,
+			JSON.stringify([
+				{ role: "user", content: "Prior message" },
+				{ role: "assistant", content: "Prior reply" },
+			]),
+		);
+
+		const { unmount } = render(
+			<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />,
+		);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+		unmount();
+
+		// Step 2: Remount (simulates page reload)
+		capturedWsHandlers = null;
+		lastSendChatArgs = null;
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		// Prior messages should be visible
+		expect(await screen.findByText("Prior message")).toBeInTheDocument();
+
+		// Step 3: Send a new message — config should include session_id
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "Continue" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		expect(lastSendChatArgs).not.toBeNull();
+		expect(
+			(
+				(
+					lastSendChatArgs as unknown as {
+						messages: Message[];
+						config: unknown;
+					}
+				)?.config as Record<string, unknown>
+			).session_id,
+		).toBe("persisted-session-xyz");
+	});
+
+	it("AC3: clearing the session also clears the persisted session_id", async () => {
+		localStorage.setItem(SESSION_KEY, "session-to-clear");
+
+		const confirmSpy = vi.spyOn(window, "confirm").mockReturnValue(true);
+
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const newSessionBtn = screen.getByText(/New Session/);
+		await act(async () => {
+			fireEvent.click(newSessionBtn);
+		});
+
+		expect(localStorage.getItem(SESSION_KEY)).toBeNull();
+
+		confirmSpy.mockRestore();
+	});
+
+	it("AC1: storage key is scoped to project path", async () => {
+		const otherPath = "/other/project";
+		const otherKey = `storykit-claude-session-id:${otherPath}`;
+		localStorage.setItem(otherKey, "other-session");
+
+		render(<Chat projectPath={PROJECT_PATH} onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		await act(async () => {
+			capturedWsHandlers?.onSessionId("my-session");
+		});
+
+		await waitFor(() => {
+			expect(localStorage.getItem(SESSION_KEY)).toBe("my-session");
+		});
+
+		// Other project's session should be untouched
+		expect(localStorage.getItem(otherKey)).toBe("other-session");
+	});
+});
+
+describe("File reference expansion (Story 269 AC4)", () => {
+	beforeEach(() => {
+		vi.clearAllMocks();
+		capturedWsHandlers = null;
+		lastSendChatArgs = null;
+		setupMocks();
+	});
+
+	it("includes file contents as context when message contains @file reference", async () => {
+		mockedApi.readFile.mockResolvedValue('fn main() { println!("hello"); }');
+
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "explain @src/main.rs" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => expect(lastSendChatArgs).not.toBeNull());
+		const sentMessages = (
+			lastSendChatArgs as NonNullable<typeof lastSendChatArgs>
+		).messages;
+		const userMsg = sentMessages[sentMessages.length - 1];
+		expect(userMsg.content).toContain("explain @src/main.rs");
+		expect(userMsg.content).toContain("[File: src/main.rs]");
+		expect(userMsg.content).toContain("fn main()");
+	});
+
+	it("sends message without modification when no @file references are present", async () => {
+		render(<Chat projectPath="/tmp/project" onCloseProject={vi.fn()} />);
+		await waitFor(() => expect(capturedWsHandlers).not.toBeNull());
+
+		const input = screen.getByPlaceholderText("Send a message...");
+		await act(async () => {
+			fireEvent.change(input, { target: { value: "hello world" } });
+		});
+		await act(async () => {
+			fireEvent.keyDown(input, { key: "Enter", shiftKey: false });
+		});
+
+		await waitFor(() => expect(lastSendChatArgs).not.toBeNull());
+		const sentMessages = (
+			lastSendChatArgs as NonNullable<typeof lastSendChatArgs>
+		).messages;
+		const userMsg = sentMessages[sentMessages.length - 1];
+		expect(userMsg.content).toBe("hello world");
+		expect(mockedApi.readFile).not.toHaveBeenCalled();
+	});
+});
@@ -84,6 +84,8 @@ export function Chat({
 	const {
 		wsRef,
 		wsConnected,
+		wsConnectivity,
+		wsDisconnectedAt,
 		streamingContent,
 		setStreamingContent,
 		streamingThinking,
@@ -106,6 +108,7 @@ export function Chat({
 		setSideQuestion,
 		serverLogs,
 		storyTokenCosts,
+		statusEvents,
 	} = useChatWebSocket({
 		setMessages,
 		setLoading,
@@ -375,6 +378,8 @@ export function Chat({
 				enableTools={enableTools}
 				onToggleTools={setEnableTools}
 				wsConnected={wsConnected}
+				wsConnectivity={wsConnectivity}
+				wsDisconnectedAt={wsDisconnectedAt}
 				oauthStatus={oauthStatus}
 				onShowBotConfig={() => setView("bot-config")}
 				onShowSettings={() => setView("settings")}
@@ -384,9 +389,7 @@ export function Chat({
 				<BotConfigPage onBack={() => setView("chat")} />
 			)}

-			{view === "settings" && (
-				<SettingsPage onBack={() => setView("chat")} />
-			)}
+			{view === "settings" && <SettingsPage onBack={() => setView("chat")} />}

 			<div
 				data-testid="chat-content-area"
@@ -449,6 +452,7 @@ export function Chat({
 					busyAgentNames={busyAgentNames}
 					selectedWorkItemId={selectedWorkItemId}
 					serverLogs={serverLogs}
+					statusEvents={statusEvents}
 					onSelectWorkItem={setSelectedWorkItemId}
 					onCloseWorkItem={() => setSelectedWorkItemId(null)}
 					onStartAgent={handleStartAgent}
@@ -1,5 +1,6 @@
 import { fireEvent, render, screen, waitFor } from "@testing-library/react";
 import { describe, expect, it, vi } from "vitest";
+import type { WsConnectivity } from "../hooks/useChatWebSocket";
 import { ChatHeader } from "./ChatHeader";

 vi.mock("../api/client", () => ({
@@ -21,6 +22,8 @@ interface ChatHeaderProps {
 	enableTools: boolean;
 	onToggleTools: (enabled: boolean) => void;
 	wsConnected: boolean;
+	wsConnectivity?: WsConnectivity;
+	wsDisconnectedAt?: Date | null;
 }

 function makeProps(overrides: Partial<ChatHeaderProps> = {}): ChatHeaderProps {
@@ -289,6 +292,53 @@ describe("ChatHeader", () => {
 		});
 	});

+	// ── Connectivity indicator ────────────────────────────────────────────────
+
+	it("does not render connectivity dot when wsConnectivity is not provided", () => {
+		render(<ChatHeader {...makeProps()} />);
+		expect(screen.queryByTestId("ws-connectivity-dot")).not.toBeInTheDocument();
+	});
+
+	it("renders green dot with title 'Connected' when connected", () => {
+		render(<ChatHeader {...makeProps({ wsConnectivity: "connected" })} />);
+		const dot = screen.getByTestId("ws-connectivity-dot");
+		expect(dot).toBeInTheDocument();
+		expect(dot).toHaveAttribute("title", "Connected");
+		expect(dot.style.backgroundColor).toBe("rgb(76, 175, 80)");
+	});
+
+	it("renders amber dot with title 'Reconnecting…' when reconnecting", () => {
+		render(<ChatHeader {...makeProps({ wsConnectivity: "reconnecting" })} />);
+		const dot = screen.getByTestId("ws-connectivity-dot");
+		expect(dot).toHaveAttribute("title", "Reconnecting…");
+		expect(dot.style.backgroundColor).toBe("rgb(245, 166, 35)");
+	});
+
+	it("renders amber dot with title 'Connecting…' when connecting", () => {
+		render(<ChatHeader {...makeProps({ wsConnectivity: "connecting" })} />);
+		const dot = screen.getByTestId("ws-connectivity-dot");
+		expect(dot).toHaveAttribute("title", "Connecting…");
+		expect(dot.style.backgroundColor).toBe("rgb(245, 166, 35)");
+	});
+
+	it("renders red dot with title 'Disconnected' when failed with no timestamp", () => {
+		render(<ChatHeader {...makeProps({ wsConnectivity: "failed" })} />);
+		const dot = screen.getByTestId("ws-connectivity-dot");
+		expect(dot).toHaveAttribute("title", "Disconnected");
+		expect(dot.style.backgroundColor).toBe("rgb(229, 57, 53)");
+	});
+
+	it("renders red dot with 'Disconnected since HH:MM' when failed with timestamp", () => {
+		const disconnectedAt = new Date("2026-05-14T14:30:00");
+		render(
+			<ChatHeader
+				{...makeProps({ wsConnectivity: "failed", wsDisconnectedAt: disconnectedAt })}
+			/>,
+		);
+		const dot = screen.getByTestId("ws-connectivity-dot");
+		expect(dot.getAttribute("title")).toMatch(/Disconnected since/);
+	});
+
 	it("clears reconnecting state when wsConnected transitions to true", async () => {
 		const { api } = await import("../api/client");
 		vi.mocked(api.rebuildAndRestart).mockRejectedValue(
@@ -1,6 +1,7 @@
 import * as React from "react";
 import type { OAuthStatus } from "../api/client";
 import { api } from "../api/client";
+import type { WsConnectivity } from "../hooks/useChatWebSocket";

 const { useState, useEffect } = React;

@@ -33,6 +34,8 @@ interface ChatHeaderProps {
 	enableTools: boolean;
 	onToggleTools: (enabled: boolean) => void;
 	wsConnected: boolean;
+	wsConnectivity?: WsConnectivity;
+	wsDisconnectedAt?: Date | null;
 	oauthStatus?: OAuthStatus | null;
 	onShowBotConfig?: () => void;
 	onShowSettings?: () => void;
@@ -59,6 +62,8 @@ export function ChatHeader({
 	enableTools,
 	onToggleTools,
 	wsConnected,
+	wsConnectivity,
+	wsDisconnectedAt,
 	oauthStatus = null,
 	onShowBotConfig,
 	onShowSettings,
@@ -117,6 +122,28 @@ export function ChatHeader({
 	const rebuildButtonDisabled =
 		rebuildStatus === "building" || rebuildStatus === "reconnecting";

+	const connectivityDotColor =
+		wsConnectivity === "connected"
+			? "#4caf50"
+			: wsConnectivity === "failed"
+				? "#e53935"
+				: wsConnectivity !== undefined
+					? "#f5a623"
+					: undefined;
+
+	const connectivityTitle =
+		wsConnectivity === "connected"
+			? "Connected"
+			: wsConnectivity === "reconnecting"
+				? "Reconnecting…"
+				: wsConnectivity === "failed"
+					? wsDisconnectedAt
+						? `Disconnected since ${wsDisconnectedAt.toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" })}`
+						: "Disconnected"
+					: wsConnectivity === "connecting"
+						? "Connecting…"
+						: undefined;
+
 	return (
 		<>
 			{/* Confirmation dialog overlay */}
@@ -347,6 +374,20 @@ export function ChatHeader({
 				</div>

 				<div style={{ display: "flex", alignItems: "center", gap: "16px" }}>
+					{connectivityDotColor !== undefined && (
+						<div
+							data-testid="ws-connectivity-dot"
+							title={connectivityTitle}
+							style={{
+								width: "8px",
+								height: "8px",
+								borderRadius: "50%",
+								backgroundColor: connectivityDotColor,
+								flexShrink: 0,
+								cursor: "default",
+							}}
+						/>
+					)}
 					{oauthStatus !== null &&
 						(!oauthStatus.authenticated || oauthStatus.expired) && (
 							<button
@@ -1,5 +1,9 @@
 import type { AgentConfigInfo } from "../api/agents";
-import type { PipelineStageItem, PipelineState } from "../api/client";
+import type {
+	PipelineStageItem,
+	PipelineState,
+	StatusEvent,
+} from "../api/client";
 import { AgentPanel } from "./AgentPanel";
 import { LozengeFlyProvider } from "./LozengeFlyContext";
 import type { LogEntry } from "./ServerLogsPanel";
@@ -7,6 +11,25 @@ import { ServerLogsPanel } from "./ServerLogsPanel";
 import { StagePanel } from "./StagePanel";
 import { WorkItemDetailPanel } from "./WorkItemDetailPanel";

+/** Format a structured StatusEvent into a human-readable display string.
+ * This conversion happens at render time, not at the WebSocket boundary,
+ * so the original StatusEvent structure is preserved in state. */
+function formatStatusEventMessage(event: StatusEvent): string {
+	const name = event.story_name || event.story_id;
+	switch (event.type) {
+		case "stage_transition":
+			return `${name} — ${event.from_stage} → ${event.to_stage}`;
+		case "merge_failure":
+			return `✗ ${name} — ${event.reason}`;
+		case "story_blocked":
+			return `⊘ ${name} — BLOCKED: ${event.reason}`;
+		case "rate_limit_warning":
+			return `⚠ ${name} — ${event.agent_name} hit an API rate limit`;
+		case "rate_limit_hard_block":
+			return `⊗ ${name} — ${event.agent_name} hard rate-limited until ${event.reset_at}`;
+	}
+}
+
 interface ChatPipelinePanelProps {
 	isNarrowScreen: boolean;
 	pipeline: PipelineState;
@@ -18,6 +41,8 @@ interface ChatPipelinePanelProps {
 	busyAgentNames: Set<string>;
 	selectedWorkItemId: string | null;
 	serverLogs: LogEntry[];
+	/** Structured pipeline status events forwarded from the status broadcaster. */
+	statusEvents: Array<{ receivedAt: string; event: StatusEvent }>;
 	onSelectWorkItem: (id: string) => void;
 	onCloseWorkItem: () => void;
 	onStartAgent: (storyId: string, agentName?: string) => void;
@@ -36,12 +61,28 @@ export function ChatPipelinePanel({
 	busyAgentNames,
 	selectedWorkItemId,
 	serverLogs,
+	statusEvents,
 	onSelectWorkItem,
 	onCloseWorkItem,
 	onStartAgent,
 	onStopAgent,
 	onDeleteItem,
 }: ChatPipelinePanelProps) {
+	// Convert structured status events to LogEntry format for display in the
+	// existing log area. Structure is preserved in the statusEvents array itself.
+	const statusLogEntries: LogEntry[] = statusEvents.map(
+		({ receivedAt, event }) => ({
+			timestamp: receivedAt,
+			level:
+				event.type === "merge_failure" ||
+				event.type === "story_blocked" ||
+				event.type === "rate_limit_hard_block"
+					? "WARN"
+					: "INFO",
+			message: formatStatusEventMessage(event),
+		}),
+	);
+	const combinedLogs = [...statusLogEntries, ...serverLogs];
 	return (
 		<div
 			data-testid="chat-right-column"
@@ -69,53 +110,68 @@ export function ChatPipelinePanel({
 							configVersion={agentConfigVersion}
 							stateVersion={agentStateVersion}
 						/>
-						<StagePanel
-							title="Done"
-							items={pipeline.done ?? []}
-							costs={storyTokenCosts}
-							onItemClick={(item) => onSelectWorkItem(item.story_id)}
-							onStopAgent={onStopAgent}
-							onDeleteItem={onDeleteItem}
-						/>
-						<StagePanel
-							title="To Merge"
-							items={pipeline.merge}
-							costs={storyTokenCosts}
-							onItemClick={(item) => onSelectWorkItem(item.story_id)}
-							onStopAgent={onStopAgent}
-							onDeleteItem={onDeleteItem}
-						/>
-						<StagePanel
-							title="QA"
-							items={pipeline.qa}
-							costs={storyTokenCosts}
-							onItemClick={(item) => onSelectWorkItem(item.story_id)}
-							onStopAgent={onStopAgent}
-							onDeleteItem={onDeleteItem}
-						/>
-						<StagePanel
-							title="Current"
-							items={pipeline.current}
-							costs={storyTokenCosts}
-							onItemClick={(item) => onSelectWorkItem(item.story_id)}
-							agentRoster={agentRoster}
-							busyAgentNames={busyAgentNames}
-							onStartAgent={onStartAgent}
-							onStopAgent={onStopAgent}
-							onDeleteItem={onDeleteItem}
-						/>
-						<StagePanel
-							title="Backlog"
-							items={pipeline.backlog}
-							costs={storyTokenCosts}
-							onItemClick={(item) => onSelectWorkItem(item.story_id)}
-							agentRoster={agentRoster}
-							busyAgentNames={busyAgentNames}
-							onStartAgent={onStartAgent}
-							onStopAgent={onStopAgent}
-							onDeleteItem={onDeleteItem}
-						/>
-						<ServerLogsPanel logs={serverLogs} />
+						{(() => {
+							const mergesInFlight = new Set(
+								pipeline.deterministic_merges_in_flight ?? [],
+							);
+							return (
+								<>
+									<StagePanel
+										title="Done"
+										items={pipeline.done ?? []}
+										costs={storyTokenCosts}
+										onItemClick={(item) => onSelectWorkItem(item.story_id)}
+										onStopAgent={onStopAgent}
+										onDeleteItem={onDeleteItem}
+										mergesInFlight={mergesInFlight}
+									/>
+									<StagePanel
+										title="To Merge"
+										items={pipeline.merge}
+										costs={storyTokenCosts}
+										onItemClick={(item) => onSelectWorkItem(item.story_id)}
+										onStopAgent={onStopAgent}
+										onDeleteItem={onDeleteItem}
+										mergesInFlight={mergesInFlight}
+										isMergeStage
+									/>
+									<StagePanel
+										title="QA"
+										items={pipeline.qa}
+										costs={storyTokenCosts}
+										onItemClick={(item) => onSelectWorkItem(item.story_id)}
+										onStopAgent={onStopAgent}
+										onDeleteItem={onDeleteItem}
+										mergesInFlight={mergesInFlight}
+									/>
+									<StagePanel
+										title="Current"
+										items={pipeline.current}
+										costs={storyTokenCosts}
+										onItemClick={(item) => onSelectWorkItem(item.story_id)}
+										agentRoster={agentRoster}
+										busyAgentNames={busyAgentNames}
+										onStartAgent={onStartAgent}
+										onStopAgent={onStopAgent}
+										onDeleteItem={onDeleteItem}
+										mergesInFlight={mergesInFlight}
+									/>
+									<StagePanel
+										title="Backlog"
+										items={pipeline.backlog}
+										costs={storyTokenCosts}
+										onItemClick={(item) => onSelectWorkItem(item.story_id)}
+										agentRoster={agentRoster}
+										busyAgentNames={busyAgentNames}
+										onStartAgent={onStartAgent}
+										onStopAgent={onStopAgent}
+										onDeleteItem={onDeleteItem}
+										mergesInFlight={mergesInFlight}
+									/>
+								</>
+							);
+						})()}
+						<ServerLogsPanel logs={combinedLogs} />
 					</>
 				)}
 			</LozengeFlyProvider>
@@ -0,0 +1,73 @@
+/** React error boundary that catches render-time exceptions and shows a
+ * recoverable error UI instead of a white screen. */
+import * as React from "react";
+
+interface Props {
+	children: React.ReactNode;
+}
+
+interface State {
+	error: Error | null;
+}
+
+/** Catches uncaught render exceptions in its subtree and displays a message. */
+export class ErrorBoundary extends React.Component<Props, State> {
+	constructor(props: Props) {
+		super(props);
+		this.state = { error: null };
+	}
+
+	static getDerivedStateFromError(error: Error): State {
+		return { error };
+	}
+
+	handleReset = () => {
+		this.setState({ error: null });
+	};
+
+	render() {
+		if (this.state.error) {
+			return (
+				<div
+					style={{
+						display: "flex",
+						flexDirection: "column",
+						alignItems: "center",
+						justifyContent: "center",
+						height: "100vh",
+						background: "#0d1117",
+						color: "#e6edf3",
+						fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif",
+						gap: "16px",
+						padding: "32px",
+						textAlign: "center",
+					}}
+				>
+					<div style={{ fontSize: "2em" }}>⚠</div>
+					<div style={{ fontWeight: 600, fontSize: "1.1em" }}>
+						Something went wrong
+					</div>
+					<div style={{ color: "#8b949e", fontSize: "0.9em", maxWidth: "480px" }}>
+						{this.state.error.message}
+					</div>
+					<button
+						type="button"
+						onClick={this.handleReset}
+						style={{
+							padding: "8px 18px",
+							borderRadius: "6px",
+							border: "1px solid #30363d",
+							background: "#21262d",
+							color: "#e6edf3",
+							cursor: "pointer",
+							fontSize: "0.9em",
+						}}
+					>
+						Try again
+					</button>
+				</div>
+			);
+		}
+		return this.props.children;
+	}
+}
@@ -0,0 +1,166 @@
+/** Tests for GatewayPanel — verifies story id and name rendering in the gateway aggregate view. */
+import { render, screen } from "@testing-library/react";
+import { describe, expect, it } from "vitest";
+import type { PipelineItem } from "../api/gateway";
+import { StoryRow } from "./GatewayPanel";
+
+describe("StoryRow", () => {
+	it("renders #id prefix before the story name", () => {
+		const item: PipelineItem = {
+			story_id: "42_story_add_feature",
+			name: "Add Feature",
+			stage: "current",
+		};
+		const { container } = render(<StoryRow item={item} />);
+		expect(container).toMatchSnapshot();
+	});
+
+	it("renders #id prefix for a backlogged story", () => {
+		const item: PipelineItem = {
+			story_id: "7_bug_fix_crash",
+			name: "Fix crash on startup",
+			stage: "qa",
+		};
+		const { container } = render(<StoryRow item={item} />);
+		expect(container).toMatchSnapshot();
+	});
+
+	it("renders awaiting-slot badge for merge item with no agent", () => {
+		const item: PipelineItem = {
+			story_id: "no-number-id",
+			name: "Mystery Story",
+			stage: "merge",
+		};
+		const { container } = render(<StoryRow item={item} />);
+		expect(container).toMatchSnapshot();
+		expect(screen.getByText("awaiting-slot")).toBeInTheDocument();
+	});
+
+	// AC1: active mergemaster is visually distinct
+	it("shows MERGING badge for merge item with running mergemaster (active)", () => {
+		const item: PipelineItem = {
+			story_id: "70_story_merging_active",
+			name: "Merging Active",
+			stage: "merge",
+			agent: { agent_name: "mergemaster", model: "claude", status: "running" },
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("▶ MERGING")).toBeInTheDocument();
+	});
+
+	// AC2: awaiting-slot with queue position labels
+	it("shows NEXT IN QUEUE for first awaiting-slot merge item", () => {
+		const item: PipelineItem = {
+			story_id: "71_story_next_in_queue",
+			name: "Next in Queue",
+			stage: "merge",
+		};
+		render(<StoryRow item={item} mergeQueuePos={1} />);
+		expect(screen.getByText("NEXT IN QUEUE")).toBeInTheDocument();
+	});
+
+	it("shows awaiting-slot with position for subsequent queued merge items", () => {
+		const item: PipelineItem = {
+			story_id: "72_story_second_in_queue",
+			name: "Second in Queue",
+			stage: "merge",
+		};
+		render(<StoryRow item={item} mergeQueuePos={2} />);
+		expect(screen.getByText("awaiting-slot (#2)")).toBeInTheDocument();
+	});
+
+	// AC2: failure kind labels derived from merge_failure string
+	it("shows ConflictDetected for merge_failure with conflict text", () => {
+		const item: PipelineItem = {
+			story_id: "73_story_conflict",
+			name: "Conflict Story",
+			stage: "merge",
+			blocked: true,
+			merge_failure: "Merge conflict: conflicts detected",
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("ConflictDetected")).toBeInTheDocument();
+	});
+
+	it("shows GatesFailed for merge_failure with quality gates text", () => {
+		const item: PipelineItem = {
+			story_id: "74_story_gates",
+			name: "Gates Failed Story",
+			stage: "merge",
+			blocked: true,
+			merge_failure: "Quality gates failed: cargo test failed",
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("GatesFailed")).toBeInTheDocument();
+	});
+
+	it("shows RECOVERING badge for merge_failure item with running mergemaster", () => {
+		const item: PipelineItem = {
+			story_id: "60_story_merge_recovering",
+			name: "Merge Recovering",
+			stage: "merge",
+			merge_failure: "Squash merge failed",
+			agent: { agent_name: "mergemaster", model: "claude", status: "running" },
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("⟳ RECOVERING")).toBeInTheDocument();
+	});
+
+	it("shows QUEUED badge for merge_failure item with pending mergemaster", () => {
+		const item: PipelineItem = {
+			story_id: "61_story_merge_queued",
+			name: "Merge Queued",
+			stage: "merge",
+			merge_failure: "Squash merge failed",
+			agent: { agent_name: "mergemaster", model: "claude", status: "pending" },
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("⏳ QUEUED")).toBeInTheDocument();
+	});
+
+	it("shows FAILED badge for merge_failure item with no recovery agent", () => {
+		const item: PipelineItem = {
+			story_id: "62_story_merge_final",
+			name: "Merge Final",
+			stage: "merge",
+			merge_failure: "Squash merge failed",
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("✕ FAILED")).toBeInTheDocument();
+	});
+
+	it("shows RECOVERING badge for blocked item with running recovery agent", () => {
+		const item: PipelineItem = {
+			story_id: "63_story_blocked_recovering",
+			name: "Blocked Recovering",
+			stage: "current",
+			blocked: true,
+			agent: { agent_name: "coder", model: "claude", status: "running" },
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("⟳ RECOVERING")).toBeInTheDocument();
+	});
+
+	it("shows QUEUED badge for blocked item with pending recovery agent", () => {
+		const item: PipelineItem = {
+			story_id: "64_story_blocked_queued",
+			name: "Blocked Queued",
+			stage: "current",
+			blocked: true,
+			agent: { agent_name: "coder", model: "claude", status: "pending" },
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("⏳ QUEUED")).toBeInTheDocument();
+	});
+
+	it("shows BLOCKED badge for blocked item with no recovery agent", () => {
+		const item: PipelineItem = {
+			story_id: "65_story_blocked_human",
+			name: "Blocked Human",
+			stage: "current",
+			blocked: true,
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("⊘ BLOCKED")).toBeInTheDocument();
+	});
+});
@@ -49,23 +49,85 @@ const STATUS_LABELS: Record<AgentStatus, string> = {
 };

 const STAGE_COLORS: Record<string, string> = {
+	backlog: "#8b949e",
 	current: "#3fb950",
 	qa: "#d2a679",
 	merge: "#79c0ff",
 	done: "#6e7681",
+	archived: "#6e7681",
 };

 const STAGE_LABELS: Record<string, string> = {
+	backlog: "Backlog",
 	current: "In Progress",
 	qa: "QA",
 	merge: "Merging",
 	done: "Done",
+	archived: "Archived",
 };

+/// Derive a short label from a merge failure string based on the failure kind.
+function mergeFailureKindLabel(failure: string): string {
+	if (failure.includes("Merge conflict") || failure.includes("CONFLICT")) {
+		return "ConflictDetected";
+	}
+	if (failure.includes("Quality gates failed") || failure.includes("gates failed")) {
+		return "GatesFailed";
+	}
+	if (failure.includes("no code changes") || failure.includes("empty diff")) {
+		return "EmptyDiff";
+	}
+	if (failure.includes("No commits")) {
+		return "NoCommits";
+	}
+	return "✕ FAILED";
+}
+
 /// A single story row inside a project pipeline card.
-function StoryRow({ item }: { item: PipelineItem }) {
-	const color = STAGE_COLORS[item.stage] ?? "#8b949e";
-	const label = STAGE_LABELS[item.stage] ?? item.stage;
+/** Render one story row in a gateway-aggregate panel: `#<id> <name>` with stage badge. */
+export function StoryRow({ item, mergeQueuePos }: { item: PipelineItem; mergeQueuePos?: number }) {
+	const isStuck = item.merge_failure != null || item.blocked;
+	const isMergeActive = item.stage === "merge" && !isStuck && item.agent?.status === "running";
+
+	let color: string;
+	let label: string;
+
+	if (isMergeActive) {
+		color = "#58a6ff";
+		label = "▶ MERGING";
+	} else if (isStuck) {
+		const agentStatus = item.agent?.status;
+		if (agentStatus === "running") {
+			color = "#e3b341";
+			label = "⟳ RECOVERING";
+		} else if (agentStatus === "pending") {
+			color = "#e3b341";
+			label = "⏳ QUEUED";
+		} else if (item.merge_failure != null) {
+			color = "#f85149";
+			label = mergeFailureKindLabel(item.merge_failure);
+		} else {
+			color = "#f85149";
+			label = "⊘ BLOCKED";
+		}
+	} else if (item.stage === "merge" && item.agent?.status === "pending") {
+		color = "#e3b341";
+		label = "⏳ QUEUED";
+	} else if (item.stage === "merge") {
+		color = "#6e7681";
+		if (mergeQueuePos === 1) {
+			label = "NEXT IN QUEUE";
+		} else if (mergeQueuePos != null) {
+			label = `awaiting-slot (#${mergeQueuePos})`;
+		} else {
+			label = "awaiting-slot";
+		}
+	} else {
+		color = STAGE_COLORS[item.stage] ?? "#8b949e";
+		label = STAGE_LABELS[item.stage] ?? item.stage;
+	}
+
+	const idNum = item.story_id.match(/^(\d+)/)?.[1];

 	return (
 		<div
@@ -75,6 +137,10 @@ function StoryRow({ item }: { item: PipelineItem }) {
 				gap: "8px",
 				padding: "4px 0",
 				fontSize: "0.82em",
+				background: isMergeActive ? "#58a6ff0a" : undefined,
+				borderRadius: isMergeActive ? "4px" : undefined,
+				paddingLeft: isMergeActive ? "4px" : undefined,
+				paddingRight: isMergeActive ? "4px" : undefined,
 			}}
 		>
 			<span
@@ -91,83 +157,13 @@ function StoryRow({ item }: { item: PipelineItem }) {
 				{label}
 			</span>
 			<span style={{ color: "#e6edf3", overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap" }}>
+				{idNum && <span style={{ color: "#8b949e", fontFamily: "monospace" }}>#{idNum}{" "}</span>}
 				{item.name}
 			</span>
 		</div>
 	);
 }

-/// Pipeline status card for a single project.
-function ProjectPipelineCard({
-	name,
-	pipeline,
-	isActive,
-	onSwitch,
-}: {
-	name: string;
-	pipeline: AllProjectsPipeline["projects"][string];
-	isActive: boolean;
-	onSwitch: (name: string) => void;
-}) {
-	const activeItems = pipeline.active ?? [];
-	const backlogCount = pipeline.backlog_count ?? 0;
-	const hasError = Boolean(pipeline.error);
-
-	return (
-		<div
-			data-testid={`pipeline-card-${name}`}
-			onClick={() => onSwitch(name)}
-			style={{
-				padding: "12px 16px",
-				background: "#161b22",
-				border: `1px solid ${isActive ? "#238636" : "#30363d"}`,
-				borderRadius: "8px",
-				marginBottom: "8px",
-				cursor: "pointer",
-			}}
-		>
-			<div
-				style={{
-					display: "flex",
-					alignItems: "center",
-					gap: "8px",
-					marginBottom: activeItems.length > 0 ? "8px" : 0,
-				}}
-			>
-				<span style={{ fontWeight: 600, color: "#e6edf3" }}>{name}</span>
-				{isActive && (
-					<span
-						style={{
-							fontSize: "0.7em",
-							padding: "1px 6px",
-							borderRadius: "10px",
-							background: "#23863622",
-							color: "#3fb950",
-							border: "1px solid #23863644",
-						}}
-					>
-						active
-					</span>
-				)}
-				<span style={{ marginLeft: "auto", fontSize: "0.75em", color: "#6e7681" }}>
-					{backlogCount > 0 ? `${backlogCount} in backlog` : ""}
-				</span>
-			</div>
-
-			{hasError ? (
-				<div style={{ fontSize: "0.8em", color: "#f85149" }}>{pipeline.error}</div>
-			) : activeItems.length === 0 ? (
-				<div style={{ fontSize: "0.8em", color: "#6e7681" }}>No active stories</div>
-			) : (
-				<div>
-					{activeItems.map((item) => (
-						<StoryRow key={item.story_id} item={item} />
-					))}
-				</div>
-			)}
-		</div>
-	);
-}

 function TokenDisplay({ token }: { token: string }) {
 	const [copied, setCopied] = useState(false);
@@ -359,6 +355,291 @@ function AgentRow({
 	);
 }

+type TabKey = "backlog" | "in-progress" | "done" | "archived";
+
+const TAB_STORAGE_KEY = "gateway_selected_tab";
+
+/// Read the persisted tab from localStorage, defaulting to "in-progress".
+function readStoredTab(): TabKey {
+	const stored = localStorage.getItem(TAB_STORAGE_KEY);
+	if (
+		stored === "backlog" ||
+		stored === "in-progress" ||
+		stored === "done" ||
+		stored === "archived"
+	) {
+		return stored;
+	}
+	return "in-progress";
+}
+
+/// Aggregate pipeline items from all projects for a given tab.
+function aggregateItems(
+	pipeline: AllProjectsPipeline,
+	tab: TabKey,
+): { project: string; items: PipelineItem[] }[] {
+	return Object.entries(pipeline.projects)
+		.map(([project, status]) => {
+			if (status.error) return { project, items: [] };
+			if (tab === "backlog") {
+				return {
+					project,
+					items: (status.backlog ?? []).map((b) => ({
+						story_id: b.story_id,
+						name: b.name,
+						stage: "backlog",
+					})),
+				};
+			}
+			if (tab === "in-progress") {
+				return {
+					project,
+					items: (status.active ?? []).filter(
+						(i) => i.stage !== "done",
+					),
+				};
+			}
+			if (tab === "done") {
+				return {
+					project,
+					items: (status.active ?? []).filter((i) => i.stage === "done"),
+				};
+			}
+			// archived
+			return { project, items: status.archived ?? [] };
+		})
+		.filter((g) => g.items.length > 0);
+}
+
+/// Count total items across all projects for a given tab.
+function tabCount(pipeline: AllProjectsPipeline, tab: TabKey): number {
+	return Object.values(pipeline.projects).reduce((sum, status) => {
+		if (status.error) return sum;
+		if (tab === "backlog") return sum + (status.backlog_count ?? 0);
+		if (tab === "in-progress") {
+			return (
+				sum +
+				(status.active ?? []).filter((i) => i.stage !== "done").length
+			);
+		}
+		if (tab === "done") {
+			return (
+				sum + (status.active ?? []).filter((i) => i.stage === "done").length
+			);
+		}
+		return sum + (status.archived ?? []).length;
+	}, 0);
+}
+
+/// Tab bar button.
+function TabButton({
+	label,
+	count,
+	active,
+	onClick,
+}: {
+	label: string;
+	count: number;
+	active: boolean;
+	onClick: () => void;
+}) {
+	return (
+		<button
+			type="button"
+			onClick={onClick}
+			style={{
+				padding: "8px 16px",
+				borderRadius: "6px 6px 0 0",
+				border: "1px solid",
+				borderColor: active ? "#30363d" : "transparent",
+				borderBottomColor: active ? "#0d1117" : "transparent",
+				background: active ? "#0d1117" : "none",
+				color: active ? "#e6edf3" : "#8b949e",
+				cursor: "pointer",
+				fontSize: "0.9em",
+				fontWeight: active ? 600 : 400,
+				display: "flex",
+				alignItems: "center",
+				gap: "6px",
+			}}
+		>
+			{label}
+			{count > 0 && (
+				<span
+					style={{
+						padding: "1px 6px",
+						borderRadius: "10px",
+						background: active ? "#21262d" : "#161b22",
+						color: active ? "#e6edf3" : "#6e7681",
+						fontSize: "0.8em",
+					}}
+				>
+					{count}
+				</span>
+			)}
+		</button>
+	);
+}
+
+/// A project-labelled story row used in the aggregate tab view.
+function ProjectStoryRow({
+	project,
+	item,
+	showProject,
+	mergeQueuePos,
+}: {
+	project: string;
+	item: PipelineItem;
+	showProject: boolean;
+	mergeQueuePos?: number;
+}) {
+	return (
+		<div style={{ display: "flex", alignItems: "center", gap: "8px" }}>
+			{showProject && (
+				<span
+					style={{
+						fontSize: "0.75em",
+						padding: "1px 6px",
+						borderRadius: "10px",
+						background: "#161b22",
+						color: "#8b949e",
+						border: "1px solid #30363d",
+						whiteSpace: "nowrap",
+						flexShrink: 0,
+					}}
+				>
+					{project}
+				</span>
+			)}
+			<div style={{ flex: 1, minWidth: 0 }}>
+				<StoryRow item={item} mergeQueuePos={mergeQueuePos} />
+			</div>
+		</div>
+	);
+}
+
+const IN_PROGRESS_STAGE_LABELS: Record<string, string> = {
+	current: "Coding",
+	qa: "QA",
+	merge: "Merging",
+};
+
+/// In Progress tab content — items grouped by stage (coding / qa / merging).
+function InProgressTabContent({
+	groups,
+}: {
+	groups: { project: string; items: PipelineItem[] }[];
+}) {
+	const allItems = groups.flatMap((g) =>
+		g.items.map((item) => ({ project: g.project, item })),
+	);
+	const multiProject = new Set(allItems.map((x) => x.project)).size > 1;
+
+	const byStage = {
+		current: allItems.filter((x) => x.item.stage === "current"),
+		qa: allItems.filter((x) => x.item.stage === "qa"),
+		merge: allItems.filter((x) => x.item.stage === "merge"),
+	};
+
+	const stages = (["current", "qa", "merge"] as const).filter(
+		(s) => byStage[s].length > 0,
+	);
+
+	// Compute queue position among clean awaiting merge items (Stage::Merge, no failure, no running agent).
+	const mergeQueuePosMap = new Map<string, number>();
+	let queuePos = 0;
+	for (const { project, item } of byStage.merge) {
+		if (
+			!item.blocked &&
+			!item.merge_failure &&
+			item.agent?.status !== "running"
+		) {
+			queuePos += 1;
+			mergeQueuePosMap.set(`${project}:${item.story_id}`, queuePos);
+		}
+	}
+
+	if (allItems.length === 0) {
+		return (
+			<p style={{ color: "#6e7681", padding: "16px 0" }}>
+				No items in progress.
+			</p>
+		);
+	}
+
+	return (
+		<div>
+			{stages.map((stage) => (
+				<div key={stage} style={{ marginBottom: "20px" }}>
+					<div
+						style={{
+							fontSize: "0.8em",
+							fontWeight: 600,
+							color: STAGE_COLORS[stage] ?? "#8b949e",
+							textTransform: "uppercase",
+							letterSpacing: "0.06em",
+							marginBottom: "8px",
+							paddingBottom: "4px",
+							borderBottom: `1px solid ${STAGE_COLORS[stage] ?? "#8b949e"}33`,
+						}}
+					>
+						{IN_PROGRESS_STAGE_LABELS[stage]}{" "}
+						<span style={{ color: "#6e7681" }}>
+							({byStage[stage].length})
+						</span>
+					</div>
+					{byStage[stage].map(({ project, item }) => (
+						<ProjectStoryRow
+							key={`${project}:${item.story_id}`}
+							project={project}
+							item={item}
+							showProject={multiProject}
+							mergeQueuePos={
+								stage === "merge"
+									? mergeQueuePosMap.get(`${project}:${item.story_id}`)
+									: undefined
+							}
+						/>
+					))}
+				</div>
+			))}
+		</div>
+	);
+}
+
+/// Flat list tab content for Backlog, Done, and Archived.
+function FlatTabContent({
+	groups,
+	emptyMessage,
+}: {
+	groups: { project: string; items: PipelineItem[] }[];
+	emptyMessage: string;
+}) {
+	const allItems = groups.flatMap((g) =>
+		g.items.map((item) => ({ project: g.project, item })),
+	);
+	const multiProject = new Set(allItems.map((x) => x.project)).size > 1;
+
+	if (allItems.length === 0) {
+		return (
+			<p style={{ color: "#6e7681", padding: "16px 0" }}>{emptyMessage}</p>
+		);
+	}
+
+	return (
+		<div>
+			{allItems.map(({ project, item }) => (
+				<ProjectStoryRow
+					key={`${project}:${item.story_id}`}
+					project={project}
+					item={item}
+					showProject={multiProject}
+				/>
+			))}
+		</div>
+	);
+}
+
 /// Gateway management panel — rendered when running in `--gateway` mode.
 export function GatewayPanel() {
 	const [agents, setAgents] = useState<JoinedAgent[]>([]);
@@ -367,11 +648,7 @@ export function GatewayPanel() {
 	const [generating, setGenerating] = useState(false);
 	const [error, setError] = useState<string | null>(null);
 	const [pipeline, setPipeline] = useState<AllProjectsPipeline | null>(null);
-
-	// Add-project form state
-	const [newProjectName, setNewProjectName] = useState("");
-	const [newProjectUrl, setNewProjectUrl] = useState("");
-	const [addingProject, setAddingProject] = useState(false);
+	const [selectedTab, setSelectedTab] = useState<TabKey>(readStoredTab);

 	// Keep stable refs so polling intervals don't recreate on state changes.
 	const setAgentsRef = useRef(setAgents);
@@ -447,52 +724,11 @@ export function GatewayPanel() {
 		[],
 	);

-	const handleAddProject = useCallback(async () => {
-		const name = newProjectName.trim();
-		const url = newProjectUrl.trim();
-		if (!name || !url) return;
-		setAddingProject(true);
-		setError(null);
-		try {
-			const created = await gatewayApi.addProject(name, url);
-			setProjects((prev) => [...prev, created]);
-			setNewProjectName("");
-			setNewProjectUrl("");
-		} catch (e) {
-			setError(e instanceof Error ? e.message : String(e));
-		} finally {
-			setAddingProject(false);
-		}
-	}, [newProjectName, newProjectUrl]);
-
-	const handleSwitchProject = useCallback(async (name: string) => {
-		setError(null);
-		try {
-			const result = await gatewayApi.switchProject(name);
-			if (!result.ok) {
-				setError(result.error ?? "Failed to switch project");
-				return;
-			}
-			// Refresh pipeline to reflect new active project.
-			const updated = await gatewayApi.getAllProjectsPipeline();
-			setPipeline(updated);
-		} catch (e) {
-			setError(e instanceof Error ? e.message : String(e));
-		}
+	const handleSelectTab = useCallback((tab: TabKey) => {
+		setSelectedTab(tab);
+		localStorage.setItem(TAB_STORAGE_KEY, tab);
 	}, []);

-	const handleRemoveProject = useCallback(async (name: string) => {
-		if (!window.confirm(`Remove project "${name}"? This cannot be undone.`)) {
-			return;
-		}
-		setError(null);
-		try {
-			await gatewayApi.removeProject(name);
-			setProjects((prev) => prev.filter((p) => p.name !== name));
-		} catch (e) {
-			setError(e instanceof Error ? e.message : String(e));
-		}
-	}, []);

 	return (
 		<div
@@ -512,29 +748,62 @@ export function GatewayPanel() {
 					Manage build agents connected to this gateway.
 				</p>

-				{/* Cross-project pipeline status */}
+				{/* Cross-project pipeline tabs */}
 				<section style={{ marginBottom: "32px" }}>
-					<h2
+					{/* Tab bar */}
+					<div
 						style={{
-							fontSize: "1.1em",
-							fontWeight: 600,
-							marginBottom: "12px",
-							borderBottom: "1px solid #21262d",
-							paddingBottom: "8px",
+							display: "flex",
+							gap: "2px",
+							borderBottom: "1px solid #30363d",
+							marginBottom: "16px",
 						}}
 					>
-						Pipeline Status
-					</h2>
-					{pipeline ? (
-						Object.entries(pipeline.projects).map(([name, status]) => (
-							<ProjectPipelineCard
-								key={name}
-								name={name}
-								pipeline={status}
-								isActive={name === pipeline.active}
-								onSwitch={handleSwitchProject}
+						{(
+							[
+								{ key: "backlog", label: "Backlog" },
+								{ key: "in-progress", label: "In Progress" },
+								{ key: "done", label: "Done" },
+								{ key: "archived", label: "Archived" },
+							] as { key: TabKey; label: string }[]
+						).map(({ key, label }) => (
+							<TabButton
+								key={key}
+								label={label}
+								count={pipeline ? tabCount(pipeline, key) : 0}
+								active={selectedTab === key}
+								onClick={() => handleSelectTab(key)}
 							/>
-						))
+						))}
+					</div>
+
+					{/* Tab content */}
+					{pipeline ? (
+						<>
+							{selectedTab === "backlog" && (
+								<FlatTabContent
+									groups={aggregateItems(pipeline, "backlog")}
+									emptyMessage="No items in backlog."
+								/>
+							)}
+							{selectedTab === "in-progress" && (
+								<InProgressTabContent
+									groups={aggregateItems(pipeline, "in-progress")}
+								/>
+							)}
+							{selectedTab === "done" && (
+								<FlatTabContent
+									groups={aggregateItems(pipeline, "done")}
+									emptyMessage="No completed items."
+								/>
+							)}
+							{selectedTab === "archived" && (
+								<FlatTabContent
+									groups={aggregateItems(pipeline, "archived")}
+									emptyMessage="No archived items."
+								/>
+							)}
+						</>
 					) : (
 						<p style={{ color: "#6e7681" }}>Loading pipeline status…</p>
 					)}
@@ -657,97 +926,8 @@ export function GatewayPanel() {
 								<div style={{ fontWeight: 600, color: "#e6edf3" }}>{p.name}</div>
 								<div style={{ fontSize: "0.8em", color: "#8b949e" }}>{p.url}</div>
 							</div>
-							<button
-								type="button"
-								data-testid={`remove-project-${p.name}`}
-								onClick={() => handleRemoveProject(p.name)}
-								style={{
-									fontSize: "0.8em",
-									padding: "4px 10px",
-									borderRadius: "4px",
-									border: "1px solid #f85149",
-									background: "none",
-									color: "#f85149",
-									cursor: "pointer",
-								}}
-							>
-								Remove
-							</button>
-						</div>
+							</div>
 					))}
-
-					{/* Add project form */}
-					<div
-						style={{
-							marginTop: "12px",
-							display: "flex",
-							gap: "8px",
-							alignItems: "flex-end",
-							flexWrap: "wrap",
-						}}
-					>
-						<div style={{ flex: "1 1 140px" }}>
-							<div style={{ fontSize: "0.75em", color: "#8b949e", marginBottom: "4px" }}>
-								Name
-							</div>
-							<input
-								data-testid="new-project-name"
-								type="text"
-								placeholder="my-project"
-								value={newProjectName}
-								onChange={(e) => setNewProjectName(e.target.value)}
-								style={{
-									width: "100%",
-									padding: "6px 10px",
-									borderRadius: "4px",
-									border: "1px solid #30363d",
-									background: "#0d1117",
-									color: "#e6edf3",
-									fontSize: "0.85em",
-								}}
-							/>
-						</div>
-						<div style={{ flex: "2 1 200px" }}>
-							<div style={{ fontSize: "0.75em", color: "#8b949e", marginBottom: "4px" }}>
-								Container URL
-							</div>
-							<input
-								data-testid="new-project-url"
-								type="text"
-								placeholder="http://localhost:3001"
-								value={newProjectUrl}
-								onChange={(e) => setNewProjectUrl(e.target.value)}
-								style={{
-									width: "100%",
-									padding: "6px 10px",
-									borderRadius: "4px",
-									border: "1px solid #30363d",
-									background: "#0d1117",
-									color: "#e6edf3",
-									fontSize: "0.85em",
-								}}
-							/>
-						</div>
-						<button
-							type="button"
-							data-testid="add-project-button"
-							onClick={handleAddProject}
-							disabled={addingProject || !newProjectName.trim() || !newProjectUrl.trim()}
-							style={{
-								padding: "6px 14px",
-								borderRadius: "4px",
-								border: "1px solid #238636",
-								background: addingProject ? "#1a2f1a" : "#238636",
-								color: "#fff",
-								cursor: addingProject ? "not-allowed" : "pointer",
-								fontWeight: 600,
-								fontSize: "0.85em",
-								whiteSpace: "nowrap",
-							}}
-						>
-							{addingProject ? "Adding…" : "Add Project"}
-						</button>
-					</div>
 				</section>

 				{error && (
@@ -0,0 +1,149 @@
+import { render } from "@testing-library/react";
+import * as React from "react";
+import { describe, expect, it } from "vitest";
+import type { PipelineState } from "../api/client";
+import { LozengeFlyProvider } from "./LozengeFlyContext";
+import { StagePanel } from "./StagePanel";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makePipeline(overrides: Partial<PipelineState> = {}): PipelineState {
+	return {
+		backlog: [],
+		current: [],
+		qa: [],
+		merge: [],
+		done: [],
+		deterministic_merges_in_flight: [],
+		...overrides,
+	};
+}
+
+function Wrapper({
+	pipeline,
+	children,
+}: {
+	pipeline: PipelineState;
+	children: React.ReactNode;
+}) {
+	return (
+		<LozengeFlyProvider pipeline={pipeline}>{children}</LozengeFlyProvider>
+	);
+}
+
+// ─── Agent lozenge fixed intrinsic width ──────────────────────────────────────
+
+describe("AgentLozenge fixed intrinsic width", () => {
+	it("has align-self: flex-start so it never stretches inside a flex column", () => {
+		const items = [
+			{
+				story_id: "74_width_test",
+				name: "Width Test",
+				error: null,
+				merge_failure: null,
+				agent: { agent_name: "coder-1", model: "sonnet", status: "running" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		const pipeline = makePipeline({ current: items });
+		const { container } = render(
+			<Wrapper pipeline={pipeline}>
+				<StagePanel title="Current" items={items} />
+			</Wrapper>,
+		);
+
+		const lozenge = container.querySelector(
+			'[data-testid="slot-lozenge-74_width_test"]',
+		) as HTMLElement;
+		expect(lozenge).toBeInTheDocument();
+		expect(lozenge.style.alignSelf).toBe("flex-start");
+	});
+});
+
+// ─── Idle vs active visual distinction ────────────────────────────────────────
+
+describe("AgentLozenge idle vs active appearance", () => {
+	it("running agent lozenge uses the green active color", () => {
+		const items = [
+			{
+				story_id: "74_running_color",
+				name: "Running",
+				error: null,
+				merge_failure: null,
+				agent: { agent_name: "coder-1", model: null, status: "running" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		const { container } = render(
+			<Wrapper pipeline={makePipeline({ current: items })}>
+				<StagePanel title="Current" items={items} />
+			</Wrapper>,
+		);
+
+		const lozenge = container.querySelector(
+			'[data-testid="slot-lozenge-74_running_color"]',
+		) as HTMLElement;
+		expect(lozenge).toBeInTheDocument();
+		// Green: rgb(63, 185, 80) = #3fb950
+		expect(lozenge.style.color).toBe("rgb(63, 185, 80)");
+	});
+
+	it("pending agent lozenge uses the yellow pending color", () => {
+		const items = [
+			{
+				story_id: "74_pending_color",
+				name: "Pending",
+				error: null,
+				merge_failure: null,
+				agent: { agent_name: "coder-1", model: null, status: "pending" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		const { container } = render(
+			<Wrapper pipeline={makePipeline({ current: items })}>
+				<StagePanel title="Current" items={items} />
+			</Wrapper>,
+		);
+
+		const lozenge = container.querySelector(
+			'[data-testid="slot-lozenge-74_pending_color"]',
+		) as HTMLElement;
+		expect(lozenge).toBeInTheDocument();
+		// Yellow: rgb(227, 179, 65) = #e3b341
+		expect(lozenge.style.color).toBe("rgb(227, 179, 65)");
+	});
+
+	it("running lozenge has a pulsing dot child element", () => {
+		const items = [
+			{
+				story_id: "74_pulse_dot",
+				name: "Pulse",
+				error: null,
+				merge_failure: null,
+				agent: { agent_name: "coder-1", model: null, status: "running" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		const { container } = render(
+			<Wrapper pipeline={makePipeline({ current: items })}>
+				<StagePanel title="Current" items={items} />
+			</Wrapper>,
+		);
+
+		const lozenge = container.querySelector(
+			'[data-testid="slot-lozenge-74_pulse_dot"]',
+		) as HTMLElement;
+		// The pulse dot is a child span with animation: pulse
+		const dot = lozenge.querySelector("span");
+		expect(dot).not.toBeNull();
+		expect(dot?.style.animation).toContain("pulse");
+	});
+});
@@ -0,0 +1,404 @@
+import { act, render, screen } from "@testing-library/react";
+import * as React from "react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { PipelineState } from "../api/client";
+import { LozengeFlyProvider, useLozengeFly } from "./LozengeFlyContext";
+import { StagePanel } from "./StagePanel";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makePipeline(overrides: Partial<PipelineState> = {}): PipelineState {
+	return {
+		backlog: [],
+		current: [],
+		qa: [],
+		merge: [],
+		done: [],
+		deterministic_merges_in_flight: [],
+		...overrides,
+	};
+}
+
+/** A minimal roster element fixture that registers itself with the context. */
+function RosterFixture({ agentName }: { agentName: string }) {
+	const { registerRosterEl } = useLozengeFly();
+	const ref = React.useRef<HTMLSpanElement>(null);
+	React.useEffect(() => {
+		const el = ref.current;
+		if (el) registerRosterEl(agentName, el);
+		return () => registerRosterEl(agentName, null);
+	}, [agentName, registerRosterEl]);
+	return (
+		<span
+			ref={ref}
+			data-testid={`roster-${agentName}`}
+			style={{ position: "fixed", top: 10, left: 20, width: 80, height: 20 }}
+		/>
+	);
+}
+
+function Wrapper({
+	pipeline,
+	children,
+}: {
+	pipeline: PipelineState;
+	children: React.ReactNode;
+}) {
+	return (
+		<LozengeFlyProvider pipeline={pipeline}>{children}</LozengeFlyProvider>
+	);
+}
+
+// ─── Fly-in: slot lozenge visibility ─────────────────────────────────────────
+
+describe("LozengeFlyProvider fly-in visibility", () => {
+	beforeEach(() => {
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.restoreAllMocks();
+	});
+
+	it("slot lozenge starts hidden when a matching roster element exists", async () => {
+		const noPipeline = makePipeline();
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "74_hidden_test",
+					name: "Hidden Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={noPipeline}>
+				<RosterFixture agentName="coder-1" />
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		// Rerender with the agent assigned
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withAgent}>
+					<RosterFixture agentName="coder-1" />
+					<StagePanel title="Current" items={withAgent.current} />
+				</Wrapper>,
+			);
+		});
+
+		const lozenge = screen.getByTestId("slot-lozenge-74_hidden_test");
+		// Hidden while fly-in is in progress
+		expect(lozenge.style.opacity).toBe("0");
+	});
+
+	it("slot lozenge is visible when no roster element is registered", async () => {
+		const noPipeline = makePipeline();
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "74_no_roster",
+					name: "No Roster",
+					error: null,
+					merge_failure: null,
+					agent: {
+						agent_name: "unknown-agent",
+						model: null,
+						status: "running",
+					},
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={noPipeline}>
+				{/* No RosterFixture for "unknown-agent" */}
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withAgent}>
+					<StagePanel title="Current" items={withAgent.current} />
+				</Wrapper>,
+			);
+		});
+
+		const lozenge = screen.getByTestId("slot-lozenge-74_no_roster");
+		// Immediately visible because no fly-in animation is possible
+		expect(lozenge.style.opacity).toBe("1");
+	});
+});
+
+// ─── Fly-in: flying clone in document.body portal ────────────────────────────
+
+describe("LozengeFlyProvider fly-in clone", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("renders a fixed-position clone in document.body when fly-in triggers", async () => {
+		const noPipeline = makePipeline();
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "74_portal_test",
+					name: "Portal Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: "sonnet", status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={noPipeline}>
+				<RosterFixture agentName="coder-1" />
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withAgent}>
+					<RosterFixture agentName="coder-1" />
+					<StagePanel title="Current" items={withAgent.current} />
+				</Wrapper>,
+			);
+			vi.runAllTimers();
+		});
+
+		// Clone is in document.body (portal), not inside the component container
+		const clone = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-in"]',
+		) as HTMLElement | null;
+		expect(clone).not.toBeNull();
+		expect(clone?.style.position).toBe("fixed");
+		expect(Number(clone?.style.zIndex)).toBeGreaterThanOrEqual(9999);
+		expect(clone?.style.pointerEvents).toBe("none");
+	});
+
+	it("clone is removed from document.body after 500 ms", async () => {
+		const noPipeline = makePipeline();
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "74_clone_remove",
+					name: "Clone Remove",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={noPipeline}>
+				<RosterFixture agentName="coder-1" />
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withAgent}>
+					<RosterFixture agentName="coder-1" />
+					<StagePanel title="Current" items={withAgent.current} />
+				</Wrapper>,
+			);
+		});
+
+		// Clone should exist before timeout
+		const cloneBefore = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-in"]',
+		);
+		expect(cloneBefore).not.toBeNull();
+
+		// Advance past the 500ms cleanup timeout
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		const cloneAfter = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-in"]',
+		);
+		expect(cloneAfter).toBeNull();
+	});
+
+	it("slot lozenge becomes visible (opacity 1) after 500 ms timeout", async () => {
+		const noPipeline = makePipeline();
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "74_reveal_test",
+					name: "Reveal Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={noPipeline}>
+				<RosterFixture agentName="coder-1" />
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withAgent}>
+					<RosterFixture agentName="coder-1" />
+					<StagePanel title="Current" items={withAgent.current} />
+				</Wrapper>,
+			);
+		});
+
+		// Initially hidden
+		const lozenge = screen.getByTestId("slot-lozenge-74_reveal_test");
+		expect(lozenge.style.opacity).toBe("0");
+
+		// After 500ms the slot becomes visible
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		expect(lozenge.style.opacity).toBe("1");
+	});
+});
+
+// ─── Flying clone renders in initial (non-flying) state ───────────────────
+
+describe("FlyingLozengeClone initial non-flying render", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("clone has transition: none before rAF fires", async () => {
+		// Collect rAF callbacks instead of firing them immediately
+		const rafCallbacks: FrameRequestCallback[] = [];
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			rafCallbacks.push(cb);
+			return rafCallbacks.length;
+		});
+
+		const noPipeline = makePipeline();
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "109_nontransition_test",
+					name: "Non-transition Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<LozengeFlyProvider pipeline={noPipeline}>
+				<RosterFixture agentName="coder-1" />
+				<StagePanel title="Current" items={[]} />
+			</LozengeFlyProvider>,
+		);
+
+		// Trigger fly-in but don't flush rAF callbacks
+		await act(async () => {
+			rerender(
+				<LozengeFlyProvider pipeline={withAgent}>
+					<RosterFixture agentName="coder-1" />
+					<StagePanel title="Current" items={withAgent.current} />
+				</LozengeFlyProvider>,
+			);
+		});
+
+		// Clone should exist in its initial (non-flying) state
+		const clone = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-in"]',
+		) as HTMLElement | null;
+		expect(clone).not.toBeNull();
+		expect(clone?.style.transition).toBe("none");
+
+		// Now flush rAF callbacks to trigger the flying state
+		await act(async () => {
+			for (const cb of rafCallbacks) cb(0);
+			rafCallbacks.length = 0;
+			// Flush inner rAF callbacks too
+			for (const cb of rafCallbacks) cb(0);
+		});
+	});
+});
@@ -0,0 +1,483 @@
+import { act, render, screen } from "@testing-library/react";
+import * as React from "react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { PipelineState } from "../api/client";
+import { LozengeFlyProvider, useLozengeFly } from "./LozengeFlyContext";
+import { StagePanel } from "./StagePanel";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makePipeline(overrides: Partial<PipelineState> = {}): PipelineState {
+	return {
+		backlog: [],
+		current: [],
+		qa: [],
+		merge: [],
+		done: [],
+		deterministic_merges_in_flight: [],
+		...overrides,
+	};
+}
+
+/** A minimal roster element fixture that registers itself with the context. */
+function RosterFixture({ agentName }: { agentName: string }) {
+	const { registerRosterEl } = useLozengeFly();
+	const ref = React.useRef<HTMLSpanElement>(null);
+	React.useEffect(() => {
+		const el = ref.current;
+		if (el) registerRosterEl(agentName, el);
+		return () => registerRosterEl(agentName, null);
+	}, [agentName, registerRosterEl]);
+	return (
+		<span
+			ref={ref}
+			data-testid={`roster-${agentName}`}
+			style={{ position: "fixed", top: 10, left: 20, width: 80, height: 20 }}
+		/>
+	);
+}
+
+/** Reads hiddenRosterAgents from context and exposes it via a data attribute. */
+function HiddenAgentsProbe() {
+	const { hiddenRosterAgents } = useLozengeFly();
+	return (
+		<div
+			data-testid="hidden-agents-probe"
+			data-hidden={[...hiddenRosterAgents].join(",")}
+		/>
+	);
+}
+
+function Wrapper({
+	pipeline,
+	children,
+}: {
+	pipeline: PipelineState;
+	children: React.ReactNode;
+}) {
+	return (
+		<LozengeFlyProvider pipeline={pipeline}>{children}</LozengeFlyProvider>
+	);
+}
+
+// ─── Fly-out animation ────────────────────────────────────────────────────────
+
+describe("LozengeFlyProvider fly-out", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("creates a fly-out clone in document.body when agent is removed", async () => {
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "74_fly_out_test",
+					name: "Fly Out Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: "haiku", status: "completed" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={withAgent}>
+				<RosterFixture agentName="coder-1" />
+				<StagePanel title="Current" items={withAgent.current} />
+			</Wrapper>,
+		);
+
+		// Advance past initial fly-in animation to get a clean state
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		// Remove the agent from the pipeline
+		const noAgent = makePipeline({
+			current: [
+				{
+					story_id: "74_fly_out_test",
+					name: "Fly Out Test",
+					error: null,
+					merge_failure: null,
+					agent: null,
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={noAgent}>
+					<RosterFixture agentName="coder-1" />
+					<StagePanel title="Current" items={noAgent.current} />
+				</Wrapper>,
+			);
+		});
+
+		// A fly-out clone should now be in document.body
+		const clone = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-out"]',
+		);
+		expect(clone).not.toBeNull();
+	});
+});
+
+// ─── Agent swap (name change) triggers both fly-out and fly-in ────────────
+
+describe("LozengeFlyProvider agent swap (name change)", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("detects agent name change as both fly-out (old) and fly-in (new)", async () => {
+		const withCoder1 = makePipeline({
+			current: [
+				{
+					story_id: "109_swap_test",
+					name: "Swap Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: "sonnet", status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		const withCoder2 = makePipeline({
+			current: [
+				{
+					story_id: "109_swap_test",
+					name: "Swap Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-2", model: "haiku", status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<LozengeFlyProvider pipeline={withCoder1}>
+				<RosterFixture agentName="coder-1" />
+				<RosterFixture agentName="coder-2" />
+				<HiddenAgentsProbe />
+				<StagePanel title="Current" items={withCoder1.current} />
+			</LozengeFlyProvider>,
+		);
+
+		// Advance past initial fly-in
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		// Swap agent: coder-1 → coder-2
+		await act(async () => {
+			rerender(
+				<LozengeFlyProvider pipeline={withCoder2}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<HiddenAgentsProbe />
+					<StagePanel title="Current" items={withCoder2.current} />
+				</LozengeFlyProvider>,
+			);
+		});
+
+		// A fly-out clone for coder-1 should appear (old agent leaves)
+		const flyOut = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-out"]',
+		);
+		expect(flyOut).not.toBeNull();
+
+		// A fly-in clone for coder-2 should appear (new agent arrives)
+		const flyIn = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-in"]',
+		);
+		expect(flyIn).not.toBeNull();
+	});
+});
+
+// ─── Fly-out without a roster element (null rosterRect fallback) ──────────
+
+describe("LozengeFlyProvider fly-out without roster element", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 200,
+			top: 100,
+			right: 280,
+			bottom: 120,
+			width: 80,
+			height: 20,
+			x: 200,
+			y: 100,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("fly-out still works when no roster element is registered (uses fallback coords)", async () => {
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "109_no_roster_flyout",
+					name: "No Roster Flyout",
+					error: null,
+					merge_failure: null,
+					agent: {
+						agent_name: "orphan-agent",
+						model: null,
+						status: "completed",
+					},
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		const noAgent = makePipeline({
+			current: [
+				{
+					story_id: "109_no_roster_flyout",
+					name: "No Roster Flyout",
+					error: null,
+					merge_failure: null,
+					agent: null,
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<LozengeFlyProvider pipeline={withAgent}>
+				{/* No RosterFixture for orphan-agent */}
+				<StagePanel title="Current" items={withAgent.current} />
+			</LozengeFlyProvider>,
+		);
+
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		await act(async () => {
+			rerender(
+				<LozengeFlyProvider pipeline={noAgent}>
+					<StagePanel title="Current" items={noAgent.current} />
+				</LozengeFlyProvider>,
+			);
+		});
+
+		// Fly-out clone should still appear even without roster element
+		const clone = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-out"]',
+		);
+		expect(clone).not.toBeNull();
+	});
+});
+
+// ─── hiddenRosterAgents: fly-out keeps agent hidden until clone lands ─────
+
+describe("hiddenRosterAgents: fly-out keeps agent hidden until clone lands", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("agent stays hidden in roster during fly-out (0–499 ms)", async () => {
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "85_flyout_hidden",
+					name: "Fly-out Hidden",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "completed" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		const noAgent = makePipeline({
+			current: [
+				{
+					story_id: "85_flyout_hidden",
+					name: "Fly-out Hidden",
+					error: null,
+					merge_failure: null,
+					agent: null,
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<LozengeFlyProvider pipeline={withAgent}>
+				<RosterFixture agentName="coder-1" />
+				<HiddenAgentsProbe />
+				<StagePanel title="Current" items={withAgent.current} />
+			</LozengeFlyProvider>,
+		);
+
+		// Advance past the initial fly-in
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		// Remove agent — fly-out starts
+		await act(async () => {
+			rerender(
+				<LozengeFlyProvider pipeline={noAgent}>
+					<RosterFixture agentName="coder-1" />
+					<HiddenAgentsProbe />
+					<StagePanel title="Current" items={noAgent.current} />
+				</LozengeFlyProvider>,
+			);
+		});
+
+		// Agent should still be hidden (fly-out clone is in flight)
+		const probe = screen.getByTestId("hidden-agents-probe");
+		expect(probe.dataset.hidden).toContain("coder-1");
+	});
+
+	it("agent reappears in roster after fly-out clone lands (500 ms)", async () => {
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "85_flyout_reveal",
+					name: "Fly-out Reveal",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "completed" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		const noAgent = makePipeline({
+			current: [
+				{
+					story_id: "85_flyout_reveal",
+					name: "Fly-out Reveal",
+					error: null,
+					merge_failure: null,
+					agent: null,
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<LozengeFlyProvider pipeline={withAgent}>
+				<RosterFixture agentName="coder-1" />
+				<HiddenAgentsProbe />
+				<StagePanel title="Current" items={withAgent.current} />
+			</LozengeFlyProvider>,
+		);
+
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		await act(async () => {
+			rerender(
+				<LozengeFlyProvider pipeline={noAgent}>
+					<RosterFixture agentName="coder-1" />
+					<HiddenAgentsProbe />
+					<StagePanel title="Current" items={noAgent.current} />
+				</LozengeFlyProvider>,
+			);
+		});
+
+		// Advance past fly-out animation
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		// Agent should now be visible in roster
+		const probe = screen.getByTestId("hidden-agents-probe");
+		expect(probe.dataset.hidden).toBe("");
+	});
+});
@@ -0,0 +1,467 @@
+import { act, render, screen } from "@testing-library/react";
+import * as React from "react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { PipelineState } from "../api/client";
+import { LozengeFlyProvider, useLozengeFly } from "./LozengeFlyContext";
+import { StagePanel } from "./StagePanel";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makePipeline(overrides: Partial<PipelineState> = {}): PipelineState {
+	return {
+		backlog: [],
+		current: [],
+		qa: [],
+		merge: [],
+		done: [],
+		deterministic_merges_in_flight: [],
+		...overrides,
+	};
+}
+
+/** A minimal roster element fixture that registers itself with the context. */
+function RosterFixture({ agentName }: { agentName: string }) {
+	const { registerRosterEl } = useLozengeFly();
+	const ref = React.useRef<HTMLSpanElement>(null);
+	React.useEffect(() => {
+		const el = ref.current;
+		if (el) registerRosterEl(agentName, el);
+		return () => registerRosterEl(agentName, null);
+	}, [agentName, registerRosterEl]);
+	return (
+		<span
+			ref={ref}
+			data-testid={`roster-${agentName}`}
+			style={{ position: "fixed", top: 10, left: 20, width: 80, height: 20 }}
+		/>
+	);
+}
+
+/** Reads hiddenRosterAgents from context and exposes it via a data attribute. */
+function HiddenAgentsProbe() {
+	const { hiddenRosterAgents } = useLozengeFly();
+	return (
+		<div
+			data-testid="hidden-agents-probe"
+			data-hidden={[...hiddenRosterAgents].join(",")}
+		/>
+	);
+}
+
+function Wrapper({
+	pipeline,
+	children,
+}: {
+	pipeline: PipelineState;
+	children: React.ReactNode;
+}) {
+	return (
+		<LozengeFlyProvider pipeline={pipeline}>{children}</LozengeFlyProvider>
+	);
+}
+
+// ─── hiddenRosterAgents: no-duplicate guarantee ───────────────────────────────
+
+describe("hiddenRosterAgents: assigned agents are absent from roster", () => {
+	it("is empty when no agents are in the pipeline", () => {
+		render(
+			<LozengeFlyProvider pipeline={makePipeline()}>
+				<HiddenAgentsProbe />
+			</LozengeFlyProvider>,
+		);
+		const probe = screen.getByTestId("hidden-agents-probe");
+		expect(probe.dataset.hidden).toBe("");
+	});
+
+	it("includes agent name when agent is assigned to a current story", () => {
+		const pipeline = makePipeline({
+			current: [
+				{
+					story_id: "85_assign_test",
+					name: "Assign Test",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		render(
+			<LozengeFlyProvider pipeline={pipeline}>
+				<HiddenAgentsProbe />
+			</LozengeFlyProvider>,
+		);
+		const probe = screen.getByTestId("hidden-agents-probe");
+		expect(probe.dataset.hidden).toContain("coder-1");
+	});
+
+	it("excludes agent name when it has no assignment in the pipeline", () => {
+		const pipeline = makePipeline({
+			current: [
+				{
+					story_id: "85_no_agent",
+					name: "No Agent",
+					error: null,
+					merge_failure: null,
+					agent: null,
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		render(
+			<LozengeFlyProvider pipeline={pipeline}>
+				<HiddenAgentsProbe />
+			</LozengeFlyProvider>,
+		);
+		const probe = screen.getByTestId("hidden-agents-probe");
+		expect(probe.dataset.hidden).toBe("");
+	});
+
+	it("updates to include agent when pipeline transitions from no-agent to assigned", async () => {
+		const noPipeline = makePipeline();
+		const withAgent = makePipeline({
+			current: [
+				{
+					story_id: "85_transition_test",
+					name: "Transition",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<LozengeFlyProvider pipeline={noPipeline}>
+				<HiddenAgentsProbe />
+			</LozengeFlyProvider>,
+		);
+
+		let probe = screen.getByTestId("hidden-agents-probe");
+		expect(probe.dataset.hidden).toBe("");
+
+		await act(async () => {
+			rerender(
+				<LozengeFlyProvider pipeline={withAgent}>
+					<HiddenAgentsProbe />
+				</LozengeFlyProvider>,
+			);
+		});
+
+		probe = screen.getByTestId("hidden-agents-probe");
+		expect(probe.dataset.hidden).toContain("coder-1");
+	});
+});
+
+// ─── Bug 137: Race condition on rapid pipeline updates ────────────────────
+
+describe("Bug 137: no animation actions lost during rapid pipeline updates", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("rapid agent swap: first timeout does not prematurely reveal slot lozenge", async () => {
+		const empty = makePipeline();
+		const withCoder1 = makePipeline({
+			current: [
+				{
+					story_id: "137_rapid_swap",
+					name: "Rapid Swap",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: "sonnet", status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		const withCoder2 = makePipeline({
+			current: [
+				{
+					story_id: "137_rapid_swap",
+					name: "Rapid Swap",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-2", model: "haiku", status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={empty}>
+				<RosterFixture agentName="coder-1" />
+				<RosterFixture agentName="coder-2" />
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		// First update: assign coder-1 → fly-in animation #1 starts
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withCoder1}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<StagePanel title="Current" items={withCoder1.current} />
+				</Wrapper>,
+			);
+		});
+
+		// Slot should be hidden (fly-in in progress)
+		const lozenge = screen.getByTestId("slot-lozenge-137_rapid_swap");
+		expect(lozenge.style.opacity).toBe("0");
+
+		// Rapid swap at 200ms: coder-1 → coder-2 (before first animation's 500ms timeout)
+		await act(async () => {
+			vi.advanceTimersByTime(200);
+		});
+
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withCoder2}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<StagePanel title="Current" items={withCoder2.current} />
+				</Wrapper>,
+			);
+		});
+
+		// Slot should still be hidden (new fly-in for coder-2 is in progress)
+		expect(lozenge.style.opacity).toBe("0");
+
+		// At 300ms after first animation started (500ms total from start),
+		// the FIRST animation's timeout fires. It must NOT reveal the slot.
+		await act(async () => {
+			vi.advanceTimersByTime(300);
+		});
+
+		// BUG: Without fix, the first timeout clears pendingFlyIns for this story,
+		// revealing the slot while coder-2's fly-in is still in progress.
+		expect(lozenge.style.opacity).toBe("0");
+	});
+
+	it("slot lozenge reveals correctly after the LAST animation completes", async () => {
+		const empty = makePipeline();
+		const withCoder1 = makePipeline({
+			current: [
+				{
+					story_id: "137_reveal_last",
+					name: "Reveal Last",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-1", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+		const withCoder2 = makePipeline({
+			current: [
+				{
+					story_id: "137_reveal_last",
+					name: "Reveal Last",
+					error: null,
+					merge_failure: null,
+					agent: { agent_name: "coder-2", model: null, status: "running" },
+					review_hold: null,
+					qa: null,
+					depends_on: null,
+				},
+			],
+		});
+
+		const { rerender } = render(
+			<Wrapper pipeline={empty}>
+				<RosterFixture agentName="coder-1" />
+				<RosterFixture agentName="coder-2" />
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		// First animation
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withCoder1}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<StagePanel title="Current" items={withCoder1.current} />
+				</Wrapper>,
+			);
+		});
+
+		// Swap at 200ms
+		await act(async () => {
+			vi.advanceTimersByTime(200);
+		});
+
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={withCoder2}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<StagePanel title="Current" items={withCoder2.current} />
+				</Wrapper>,
+			);
+		});
+
+		const lozenge = screen.getByTestId("slot-lozenge-137_reveal_last");
+
+		// After the second animation's full 500ms, slot should reveal
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		expect(lozenge.style.opacity).toBe("1");
+	});
+});
+
+describe("Bug 137: animations remain functional through sustained agent activity", () => {
+	beforeEach(() => {
+		vi.useFakeTimers();
+		Element.prototype.getBoundingClientRect = vi.fn().mockReturnValue({
+			left: 100,
+			top: 50,
+			right: 180,
+			bottom: 70,
+			width: 80,
+			height: 20,
+			x: 100,
+			y: 50,
+			toJSON: () => ({}),
+		});
+		vi.spyOn(window, "requestAnimationFrame").mockImplementation((cb) => {
+			cb(0);
+			return 0;
+		});
+	});
+
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it("fly-in still works after multiple rapid swaps have completed", async () => {
+		const empty = makePipeline();
+		const makeWith = (agentName: string) =>
+			makePipeline({
+				current: [
+					{
+						story_id: "137_sustained",
+						name: "Sustained",
+						error: null,
+						merge_failure: null,
+						agent: { agent_name: agentName, model: null, status: "running" },
+						review_hold: null,
+						qa: null,
+						depends_on: null,
+					},
+				],
+			});
+
+		const { rerender } = render(
+			<Wrapper pipeline={empty}>
+				<RosterFixture agentName="coder-1" />
+				<RosterFixture agentName="coder-2" />
+				<RosterFixture agentName="coder-3" />
+				<StagePanel title="Current" items={[]} />
+			</Wrapper>,
+		);
+
+		// Rapid-fire: assign coder-1, then swap to coder-2 at 100ms
+		const p1 = makeWith("coder-1");
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={p1}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<RosterFixture agentName="coder-3" />
+					<StagePanel title="Current" items={p1.current} />
+				</Wrapper>,
+			);
+		});
+
+		await act(async () => {
+			vi.advanceTimersByTime(100);
+		});
+
+		const p2 = makeWith("coder-2");
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={p2}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<RosterFixture agentName="coder-3" />
+					<StagePanel title="Current" items={p2.current} />
+				</Wrapper>,
+			);
+		});
+
+		// Let all animations complete
+		await act(async () => {
+			vi.advanceTimersByTime(1000);
+		});
+
+		const lozenge = screen.getByTestId("slot-lozenge-137_sustained");
+		expect(lozenge.style.opacity).toBe("1");
+
+		// Now assign coder-3 — a fresh fly-in should still work
+		const p3 = makeWith("coder-3");
+		await act(async () => {
+			rerender(
+				<Wrapper pipeline={p3}>
+					<RosterFixture agentName="coder-1" />
+					<RosterFixture agentName="coder-2" />
+					<RosterFixture agentName="coder-3" />
+					<StagePanel title="Current" items={p3.current} />
+				</Wrapper>,
+			);
+		});
+
+		// Slot should be hidden again for the new fly-in
+		expect(lozenge.style.opacity).toBe("0");
+
+		// A flying clone should exist
+		const clone = document.body.querySelector(
+			'[data-testid^="flying-lozenge-fly-in"]',
+		);
+		expect(clone).not.toBeNull();
+
+		// After animation completes, slot reveals
+		await act(async () => {
+			vi.advanceTimersByTime(600);
+		});
+
+		expect(lozenge.style.opacity).toBe("1");
+	});
+});
@@ -0,0 +1,196 @@
+/**
+ * Frontend seam test: drive a real React component against a fixture derived
+ * from the actual RPC response (the canonical `CONTRACT_FIXTURES` shared with
+ * the Rust side via the snapshot file).
+ *
+ * The first test renders `SettingsPage` against the well-formed fixture and
+ * asserts the form populates with values from the RPC response — proving the
+ * backend ↔ frontend wire shape lines up end-to-end without hand-rolled
+ * fixtures.
+ *
+ * The second test feeds a *malformed* RPC response (a frame missing the
+ * required envelope `ok` field) and asserts the `rpc.ts` client surfaces a
+ * visible error in the rendered UI instead of leaving the page empty.
+ */
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { fireEvent, render, screen, waitFor } from "@testing-library/react";
+import { SettingsPage } from "./SettingsPage";
+import { CONTRACT_FIXTURES } from "../api/rpcContract";
+import snapshot from "../api/rpcContract.snapshot.json";
+
+afterEach(() => {
+	vi.restoreAllMocks();
+});
+
+interface MockSocket {
+	url: string;
+	onopen: ((ev: Event) => void) | null;
+	onmessage: ((ev: { data: string }) => void) | null;
+	onerror: ((ev: Event) => void) | null;
+	onclose: ((ev: CloseEvent) => void) | null;
+	readyState: number;
+	send(data: string): void;
+	close(): void;
+}
+
+/**
+ * Install a `WebSocket` shim that hands each registered method a single
+ * canned frame.  Callers register either a normal RPC result or a
+ * deliberately malformed frame body (returned verbatim — i.e. the body
+ * literally has no `ok` field, simulating a server bug).
+ */
+function installSeamWs(replies: {
+	[method: string]: { kind: "ok"; result: unknown } | { kind: "raw"; body: object };
+}) {
+	const instances: MockSocket[] = [];
+	class SeamWs implements MockSocket {
+		static readonly CONNECTING = 0;
+		static readonly OPEN = 1;
+		static readonly CLOSING = 2;
+		static readonly CLOSED = 3;
+		url: string;
+		onopen: ((ev: Event) => void) | null = null;
+		onmessage: ((ev: { data: string }) => void) | null = null;
+		onerror: ((ev: Event) => void) | null = null;
+		onclose: ((ev: CloseEvent) => void) | null = null;
+		readyState = 0;
+		constructor(url: string) {
+			this.url = url;
+			instances.push(this);
+			queueMicrotask(() => {
+				this.readyState = 1;
+				this.onopen?.(new Event("open"));
+			});
+		}
+		send(data: string) {
+			let frame: {
+				correlation_id?: string;
+				method?: string;
+			};
+			try {
+				frame = JSON.parse(data);
+			} catch {
+				return;
+			}
+			const { correlation_id, method } = frame;
+			if (!correlation_id || !method) return;
+			queueMicrotask(() => {
+				const reply = replies[method];
+				if (!reply) {
+					this.onmessage?.({
+						data: JSON.stringify({
+							kind: "rpc_response",
+							version: 1,
+							correlation_id,
+							ok: false,
+							error: `no fixture for ${method}`,
+							code: "NOT_FOUND",
+						}),
+					});
+					return;
+				}
+				if (reply.kind === "ok") {
+					this.onmessage?.({
+						data: JSON.stringify({
+							kind: "rpc_response",
+							version: 1,
+							correlation_id,
+							ok: true,
+							result: reply.result,
+						}),
+					});
+					return;
+				}
+				// raw: deliberately malformed envelope (no `ok` field)
+				this.onmessage?.({
+					data: JSON.stringify({
+						kind: "rpc_response",
+						version: 1,
+						correlation_id,
+						...reply.body,
+					}),
+				});
+			});
+		}
+		close() {
+			this.readyState = 3;
+		}
+	}
+	vi.stubGlobal("WebSocket", SeamWs);
+	return instances;
+}
+
+describe("SettingsPage seam test", () => {
+	it("renders ProjectSettings from the typed RPC contract fixture", async () => {
+		// Sanity: the in-source fixture mirrors the on-disk snapshot file. If
+		// this trips, the contract has drifted from the Rust side.
+		expect(CONTRACT_FIXTURES["settings.put_project"].result).toEqual(
+			snapshot["settings.put_project"].result,
+		);
+
+		const fixture = CONTRACT_FIXTURES["settings.put_project"].result;
+		installSeamWs({
+			"settings.get_project": { kind: "ok", result: fixture },
+		});
+
+		const onBack = vi.fn();
+		render(<SettingsPage onBack={onBack} />);
+
+		await waitFor(() => {
+			expect(screen.getByDisplayValue(String(fixture.max_retries))).toBeInTheDocument();
+		});
+
+		// Field driven directly by the RPC payload populates the form.
+		expect(
+			screen.getByDisplayValue(String(fixture.watcher_sweep_interval_secs)),
+		).toBeInTheDocument();
+		expect(
+			screen.getByDisplayValue(String(fixture.watcher_done_retention_secs)),
+		).toBeInTheDocument();
+	});
+
+	it("shows a visible error when the RPC response is malformed", async () => {
+		// `body` lacks the envelope `ok` field. The fixed `rpc.ts` client
+		// should reject loudly with a `MALFORMED` error instead of letting
+		// the page render empty.
+		installSeamWs({
+			"settings.get_project": {
+				kind: "raw",
+				body: { result: { not_actually_settings: true } },
+			},
+		});
+
+		const onBack = vi.fn();
+		render(<SettingsPage onBack={onBack} />);
+
+		await waitFor(() => {
+			expect(screen.getByText(/Malformed RPC response/i)).toBeInTheDocument();
+		});
+
+		// And critically — no empty form is rendered.
+		expect(screen.queryByText(/default qa/i)).not.toBeInTheDocument();
+	});
+
+	it("user can edit and the new value flows through settings.put_project RPC", async () => {
+		const fixture = CONTRACT_FIXTURES["settings.put_project"].result;
+		const updated = { ...fixture, max_retries: 9 };
+		installSeamWs({
+			"settings.get_project": { kind: "ok", result: fixture },
+			"settings.put_project": { kind: "ok", result: updated },
+		});
+
+		const onBack = vi.fn();
+		render(<SettingsPage onBack={onBack} />);
+
+		const maxRetriesInput = (await screen.findByDisplayValue(
+			String(fixture.max_retries),
+		)) as HTMLInputElement;
+
+		fireEvent.change(maxRetriesInput, { target: { value: "9" } });
+		fireEvent.click(screen.getByRole("button", { name: /save/i }));
+
+		await waitFor(() => {
+			expect(screen.getByDisplayValue("9")).toBeInTheDocument();
+		});
+	});
+});
@@ -64,7 +64,13 @@ interface TextFieldProps {
 	placeholder?: string;
 }

-function TextField({ label, description, value, onChange, placeholder }: TextFieldProps) {
+function TextField({
+	label,
+	description,
+	value,
+	onChange,
+	placeholder,
+}: TextFieldProps) {
 	return (
 		<div style={fieldStyle}>
 			<label style={labelStyle}>{label}</label>
@@ -90,7 +96,14 @@ interface NumberFieldProps {
 	placeholder?: string;
 }

-function NumberField({ label, description, value, onChange, min, placeholder }: NumberFieldProps) {
+function NumberField({
+	label,
+	description,
+	value,
+	onChange,
+	min,
+	placeholder,
+}: NumberFieldProps) {
 	return (
 		<div style={fieldStyle}>
 			<label style={labelStyle}>{label}</label>
@@ -122,7 +135,12 @@ interface CheckboxFieldProps {
 	onChange: (v: boolean) => void;
 }

-function CheckboxField({ label, description, checked, onChange }: CheckboxFieldProps) {
+function CheckboxField({
+	label,
+	description,
+	checked,
+	onChange,
+}: CheckboxFieldProps) {
 	return (
 		<div style={fieldStyle}>
 			{description && <span style={descStyle}>{description}</span>}
@@ -152,9 +170,13 @@ const QA_MODES = ["server", "agent", "human"] as const;
 /** Settings page — form-based editor for project.toml scalar settings. */
 export function SettingsPage({ onBack }: SettingsPageProps) {
 	const [settings, setSettings] = useState<ProjectSettings | null>(null);
-	const [status, setStatus] = useState<"idle" | "loading" | "saving" | "saved" | "error">("loading");
+	const [status, setStatus] = useState<
+		"idle" | "loading" | "saving" | "saved" | "error"
+	>("loading");
 	const [errorMsg, setErrorMsg] = useState<string | null>(null);
-	const [validationErrors, setValidationErrors] = useState<Record<string, string>>({});
+	const [validationErrors, setValidationErrors] = useState<
+		Record<string, string>
+	>({});

 	useEffect(() => {
 		settingsApi
@@ -251,7 +273,9 @@ export function SettingsPage({ onBack }: SettingsPageProps) {
 				>
 					← Back
 				</button>
-				<span style={{ fontWeight: 700, fontSize: "1em" }}>Project Settings</span>
+				<span style={{ fontWeight: 700, fontSize: "1em" }}>
+					Project Settings
+				</span>
 			</div>

 			{/* Body */}
@@ -284,8 +308,8 @@ export function SettingsPage({ onBack }: SettingsPageProps) {
 							<div style={fieldStyle}>
 								<label style={labelStyle}>Default QA Mode</label>
 								<span style={descStyle}>
-									How stories are QA-reviewed after the coder stage.
-									Default: server.
+									How stories are QA-reviewed after the coder stage. Default:
+									server.
 								</span>
 								<select
 									value={s.default_qa}
@@ -346,9 +370,7 @@ export function SettingsPage({ onBack }: SettingsPageProps) {
 								label="Base Branch"
 								description="Overrides auto-detection of the merge target branch (e.g. main, master, develop)."
 								value={s.base_branch ?? ""}
-								onChange={(v) =>
-									patch({ base_branch: v.trim() || null })
-								}
+								onChange={(v) => patch({ base_branch: v.trim() || null })}
 								placeholder="e.g. master"
 							/>
 						</div>
@@ -431,11 +453,9 @@ export function SettingsPage({ onBack }: SettingsPageProps) {
 									padding: "8px 24px",
 									borderRadius: "6px",
 									border: "none",
-									background:
-										status === "saved" ? "#1a5c2a" : "#2563eb",
+									background: status === "saved" ? "#1a5c2a" : "#2563eb",
 									color: "#fff",
-									cursor:
-										status === "saving" ? "not-allowed" : "pointer",
+									cursor: status === "saving" ? "not-allowed" : "pointer",
 									fontSize: "0.9em",
 									fontWeight: 600,
 									opacity: status === "saving" ? 0.7 : 1,
@@ -1,7 +1,6 @@
 import { useCallback, useState } from "react";
 import type { WizardStateData, WizardStepInfo } from "../api/client";
-
-const API_BASE = "/api";
+import { rpcCall } from "../api/rpc";

 interface SetupWizardProps {
 	wizardState: WizardStateData;
@@ -50,27 +49,17 @@ function stepBorder(status: string, isActive: boolean): string {
 /** Messages sent to the chat to trigger agent generation for each step. */
 const STEP_PROMPTS: Record<string, string> = {
 	context:
-		"Read the codebase and generate .huskies/specs/00_CONTEXT.md with a project context spec. Include High-Level Goal, Core Features, Domain Definition, and Glossary sections. Then call the wizard API to store the content: PUT /api/wizard/step/context/content",
+		"Read the codebase and generate .huskies/specs/00_CONTEXT.md with a project context spec. Include High-Level Goal, Core Features, Domain Definition, and Glossary sections. Then call the wizard MCP tool `wizard_generate` with step=context to store the content.",
 	stack:
-		"Read the tech stack and generate .huskies/specs/tech/STACK.md with a tech stack spec. Include Core Stack, Coding Standards, Quality Gates, and Libraries sections. Then call the wizard API to store the content: PUT /api/wizard/step/stack/content",
+		"Read the tech stack and generate .huskies/specs/tech/STACK.md with a tech stack spec. Include Core Stack, Coding Standards, Quality Gates, and Libraries sections. Then call the wizard MCP tool `wizard_generate` with step=stack to store the content.",
 	test_script:
-		"Read the project structure and create script/test — a bash script that runs the project's actual test suite. Then call the wizard API: PUT /api/wizard/step/test_script/content",
+		"Read the project structure and create script/test — a bash script that runs the project's actual test suite. Then call the wizard MCP tool `wizard_generate` with step=test_script to store the content.",
 	release_script:
-		"Read the project's deployment setup and create script/release tailored to the project. Then call the wizard API: PUT /api/wizard/step/release_script/content",
+		"Read the project's deployment setup and create script/release tailored to the project. Then call the wizard MCP tool `wizard_generate` with step=release_script to store the content.",
 	test_coverage:
-		"If the stack supports coverage reporting, create script/test_coverage. Then call the wizard API: PUT /api/wizard/step/test_coverage/content",
+		"If the stack supports coverage reporting, create script/test_coverage. Then call the wizard MCP tool `wizard_generate` with step=test_coverage to store the content.",
 };

-async function apiPost(path: string): Promise<WizardStateData | null> {
-	try {
-		const resp = await fetch(`${API_BASE}${path}`, { method: "POST" });
-		if (!resp.ok) return null;
-		return (await resp.json()) as WizardStateData;
-	} catch {
-		return null;
-	}
-}
-
 function StepCard({
 	step,
 	isActive,
@@ -272,10 +261,14 @@ export default function SetupWizard({

 	const handleConfirm = useCallback(
 		async (step: WizardStepInfo) => {
-			const result = await apiPost(`/wizard/step/${step.step}/confirm`);
-			if (result) {
+			try {
+				const result = await rpcCall<WizardStateData>("wizard.confirm_step", {
+					step: step.step,
+				});
 				onWizardUpdate(result);
 				setRefreshKey((k) => k + 1);
+			} catch {
+				// ignore — state remains unchanged
 			}
 		},
 		[onWizardUpdate],
@@ -283,10 +276,14 @@ export default function SetupWizard({

 	const handleSkip = useCallback(
 		async (step: WizardStepInfo) => {
-			const result = await apiPost(`/wizard/step/${step.step}/skip`);
-			if (result) {
+			try {
+				const result = await rpcCall<WizardStateData>("wizard.skip_step", {
+					step: step.step,
+				});
 				onWizardUpdate(result);
 				setRefreshKey((k) => k + 1);
+			} catch {
+				// ignore — state remains unchanged
 			}
 		},
 		[onWizardUpdate],
@@ -1,4 +1,5 @@
 import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
 import { describe, expect, it } from "vitest";
 import type { PipelineStageItem } from "../api/client";
 import { StagePanel } from "./StagePanel";
@@ -113,7 +114,7 @@ describe("StagePanel", () => {
 		const items: PipelineStageItem[] = [
 			{
 				story_id: "1_story_bad",
-				name: null,
+				name: "",
 				error: "Missing front matter",
 				merge_failure: null,
 				agent: null,
@@ -324,4 +325,249 @@ describe("StagePanel", () => {
 			screen.queryByTestId("merge-failure-reason-31_story_no_failure"),
 		).not.toBeInTheDocument();
 	});
+
+	it("shows merge-in-flight icon when story is in mergesInFlight set", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "40_story_merging",
+				name: "Merging Story",
+				error: null,
+				merge_failure: null,
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		const mergesInFlight = new Set(["40_story_merging"]);
+		render(
+			<StagePanel title="To Merge" items={items} mergesInFlight={mergesInFlight} />,
+		);
+		expect(
+			screen.getByTestId("merge-in-flight-icon-40_story_merging"),
+		).toBeInTheDocument();
+	});
+
+	it("does not show merge-in-flight icon when story is not in mergesInFlight set", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "41_story_not_merging",
+				name: "Idle Story",
+				error: null,
+				merge_failure: null,
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		const mergesInFlight = new Set(["99_story_other"]);
+		render(
+			<StagePanel
+				title="To Merge"
+				items={items}
+				mergesInFlight={mergesInFlight}
+			/>,
+		);
+		expect(
+			screen.queryByTestId("merge-in-flight-icon-41_story_not_merging"),
+		).not.toBeInTheDocument();
+	});
+
+	it("does not show merge-in-flight icon when mergesInFlight prop is absent", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "42_story_no_prop",
+				name: "No Prop Story",
+				error: null,
+				merge_failure: null,
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		render(<StagePanel title="To Merge" items={items} />);
+		expect(
+			screen.queryByTestId("merge-in-flight-icon-42_story_no_prop"),
+		).not.toBeInTheDocument();
+	});
+
+	it("shows spinning RECOVERING badge for blocked item with running recovery agent", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "50_story_blocked_recovering",
+				name: "Blocked Recovering Story",
+				error: null,
+				merge_failure: null,
+				agent: { agent_name: "coder", model: "claude", status: "running" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+				blocked: true,
+			},
+		];
+		render(<StagePanel title="Current" items={items} />);
+		const badge = screen.getByTestId("blocked-badge-50_story_blocked_recovering");
+		expect(badge).toBeInTheDocument();
+		expect(badge).toHaveTextContent("RECOVERING");
+	});
+
+	it("shows QUEUED badge for blocked item with pending recovery agent", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "51_story_blocked_queued",
+				name: "Blocked Queued Story",
+				error: null,
+				merge_failure: null,
+				agent: { agent_name: "coder", model: "claude", status: "pending" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+				blocked: true,
+			},
+		];
+		render(<StagePanel title="Current" items={items} />);
+		const badge = screen.getByTestId("blocked-badge-51_story_blocked_queued");
+		expect(badge).toBeInTheDocument();
+		expect(badge).toHaveTextContent("QUEUED");
+	});
+
+	it("shows red BLOCKED badge for blocked item with no recovery agent", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "52_story_blocked_human",
+				name: "Blocked Human Story",
+				error: null,
+				merge_failure: null,
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+				blocked: true,
+			},
+		];
+		render(<StagePanel title="Current" items={items} />);
+		const badge = screen.getByTestId("blocked-badge-52_story_blocked_human");
+		expect(badge).toBeInTheDocument();
+		expect(badge).toHaveTextContent("BLOCKED");
+	});
+
+	it("shows spinning icon for merge_failure item with running mergemaster", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "53_story_merge_recovering",
+				name: "Merge Recovering Story",
+				error: null,
+				merge_failure: "Squash merge failed: conflicts",
+				agent: { agent_name: "mergemaster", model: "claude", status: "running" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		render(<StagePanel title="Merge" items={items} />);
+		const icon = screen.getByTestId("merge-failure-icon-53_story_merge_recovering");
+		expect(icon).toBeInTheDocument();
+		expect(icon).toHaveTextContent("⟳");
+	});
+
+	it("shows hourglass icon for merge_failure item with pending mergemaster", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "54_story_merge_queued",
+				name: "Merge Queued Story",
+				error: null,
+				merge_failure: "Squash merge failed: conflicts",
+				agent: { agent_name: "mergemaster", model: "claude", status: "pending" },
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		render(<StagePanel title="Merge" items={items} />);
+		const icon = screen.getByTestId("merge-failure-icon-54_story_merge_queued");
+		expect(icon).toBeInTheDocument();
+		expect(icon).toHaveTextContent("⏳");
+	});
+
+	it("renders gate output in a bounded box with expand and copy controls", () => {
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "60_story_gate_output",
+				name: "Gate Output Story",
+				error: null,
+				merge_failure: "Quality gates failed: cargo test output here",
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		render(<StagePanel title="Merge" items={items} />);
+		expect(screen.getByTestId("gate-output-text")).toHaveTextContent(
+			"Quality gates failed: cargo test output here",
+		);
+		expect(screen.getByTestId("gate-output-toggle")).toBeInTheDocument();
+		expect(screen.getByTestId("gate-output-copy")).toBeInTheDocument();
+	});
+
+	it("expand toggle changes label from Expand to Collapse", async () => {
+		const user = userEvent.setup();
+		const items: PipelineStageItem[] = [
+			{
+				story_id: "61_story_expand",
+				name: "Expand Story",
+				error: null,
+				merge_failure: "A".repeat(1000),
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		render(<StagePanel title="Merge" items={items} />);
+		const toggle = screen.getByTestId("gate-output-toggle");
+		expect(toggle).toHaveTextContent("Expand");
+		await user.click(toggle);
+		expect(toggle).toHaveTextContent("Collapse");
+		await user.click(toggle);
+		expect(toggle).toHaveTextContent("Expand");
+	});
+});
+
+describe("StagePanel - defensive rendering", () => {
+	it("renders without exception when a story is missing its name field", () => {
+		const items = [
+			{
+				story_id: "60_story_no_name",
+				name: undefined as unknown as string,
+				error: null,
+				merge_failure: null,
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		expect(() => render(<StagePanel title="Current" items={items} />)).not.toThrow();
+		expect(screen.getByTestId("card-60_story_no_name")).toBeInTheDocument();
+	});
+
+	it("renders without exception when a story is missing its story_id field", () => {
+		const items = [
+			{
+				story_id: undefined as unknown as string,
+				name: "Orphaned Story",
+				error: null,
+				merge_failure: null,
+				agent: null,
+				review_hold: null,
+				qa: null,
+				depends_on: null,
+			},
+		];
+		expect(() => render(<StagePanel title="Current" items={items} />)).not.toThrow();
+		expect(screen.getByText("Orphaned Story")).toBeInTheDocument();
+	});
 });
@@ -5,6 +5,82 @@ import { useLozengeFly } from "./LozengeFlyContext";

 const { useLayoutEffect, useRef, useState } = React;

+/** Renders merge-failure gate output in a bounded scroll region with expand and copy controls. */
+function GateOutputBox({ text }: { text: string }) {
+	const [expanded, setExpanded] = useState(false);
+	const [copied, setCopied] = useState(false);
+
+	const handleToggle = (e: React.MouseEvent) => {
+		e.stopPropagation();
+		setExpanded((prev) => !prev);
+	};
+
+	const handleCopy = (e: React.MouseEvent) => {
+		e.stopPropagation();
+		navigator.clipboard.writeText(text).then(() => {
+			setCopied(true);
+			setTimeout(() => setCopied(false), 1500);
+		});
+	};
+
+	const btnStyle: React.CSSProperties = {
+		background: "transparent",
+		border: "1px solid #444",
+		borderRadius: "4px",
+		color: "#aaa",
+		cursor: "pointer",
+		fontSize: "0.75em",
+		padding: "1px 6px",
+		lineHeight: 1.4,
+	};
+
+	return (
+		<div style={{ marginTop: "4px" }}>
+			<div
+				data-testid="gate-output-text"
+				style={{
+					fontSize: "0.8em",
+					color: "#f85149",
+					whiteSpace: "pre-wrap",
+					wordBreak: "break-word",
+					fontFamily: "monospace",
+					background: "#1a0808",
+					borderRadius: "4px",
+					padding: "6px 8px",
+					maxHeight: expanded ? "none" : "10rem",
+					overflowY: expanded ? "visible" : "auto",
+				}}
+			>
+				{text}
+			</div>
+			<div
+				style={{
+					display: "flex",
+					gap: "6px",
+					marginTop: "4px",
+				}}
+			>
+				<button
+					type="button"
+					data-testid="gate-output-toggle"
+					onClick={handleToggle}
+					style={btnStyle}
+				>
+					{expanded ? "▲ Collapse" : "▼ Expand"}
+				</button>
+				<button
+					type="button"
+					data-testid="gate-output-copy"
+					onClick={handleCopy}
+					style={btnStyle}
+				>
+					{copied ? "✓ Copied" : "⎘ Copy"}
+				</button>
+			</div>
+		</div>
+	);
+}
+
 type WorkItemType = "story" | "bug" | "spike" | "refactor" | "unknown";

 const TYPE_COLORS: Record<WorkItemType, string> = {
@@ -53,6 +129,10 @@ interface StagePanelProps {
 	busyAgentNames?: Set<string>;
 	/** Called when the user requests to start an agent on a story. */
 	onStartAgent?: (storyId: string, agentName?: string) => void;
+	/** Set of story IDs that currently have a deterministic merge in progress. */
+	mergesInFlight?: Set<string>;
+	/** True when this panel shows merge-stage items — enables the mergemaster robot icon. */
+	isMergeStage?: boolean;
 }

 function AgentLozenge({
@@ -259,6 +339,8 @@ export function StagePanel({
 	agentRoster,
 	busyAgentNames,
 	onStartAgent,
+	mergesInFlight,
+	isMergeStage,
 }: StagePanelProps) {
 	const showStartButton =
 		Boolean(onStartAgent) &&
@@ -307,8 +389,10 @@ export function StagePanel({
 					}}
 				>
 					{items.map((item) => {
-						const itemNumber = item.story_id.match(/^(\d+)/)?.[1];
-						const itemType = getWorkItemType(item.story_id);
+						const itemNumber = item.story_id?.match(/^(\d+)/)?.[1];
+						const itemType = item.story_id
+							? getWorkItemType(item.story_id)
+							: "unknown";
 						const borderColor = TYPE_COLORS[itemType];
 						const typeLabel = TYPE_LABELS[itemType];
 						const hasMergeFailure = Boolean(item.merge_failure);
@@ -342,17 +426,77 @@ export function StagePanel({
 							<>
 								<div style={{ flex: 1 }}>
 									<div style={{ fontWeight: 600, fontSize: "0.9em" }}>
-										{hasMergeFailure && (
+										{hasMergeFailure &&
+											(() => {
+												const agentStatus = item.agent?.status;
+												if (agentStatus === "running") {
+													return (
+														<span
+															data-testid={`merge-failure-icon-${item.story_id}`}
+															title="Merge recovery in progress — no human action needed"
+															style={{
+																display: "inline-block",
+																color: "#e3b341",
+																marginRight: "6px",
+																animation: "spin 1s linear infinite",
+															}}
+														>
+															⟳
+														</span>
+													);
+												}
+												if (agentStatus === "pending") {
+													return (
+														<span
+															data-testid={`merge-failure-icon-${item.story_id}`}
+															title="Merge recovery scheduled — waiting for a slot"
+															style={{
+																color: "#e3b341",
+																marginRight: "6px",
+																fontStyle: "normal",
+															}}
+														>
+															⏳
+														</span>
+													);
+												}
+												return (
+													<span
+														data-testid={`merge-failure-icon-${item.story_id}`}
+														title="Merge failed — needs human"
+														style={{
+															color: "#f85149",
+															marginRight: "6px",
+															fontStyle: "normal",
+														}}
+													>
+														✕
+													</span>
+												);
+											})()}
+										{isMergeStage &&
+											item.agent?.status === "running" && (
+												<span
+													data-testid={`mergemaster-icon-${item.story_id}`}
+													title="Mergemaster recovery agent running"
+													style={{
+														marginRight: "4px",
+													}}
+												>
+													🤖
+												</span>
+											)}
+										{mergesInFlight?.has(item.story_id) && (
 											<span
-												data-testid={`merge-failure-icon-${item.story_id}`}
-												title="Merge failed"
+												data-testid={`merge-in-flight-icon-${item.story_id}`}
+												title="Deterministic merge in progress"
 												style={{
-													color: "#f85149",
+													display: "inline-block",
 													marginRight: "6px",
-													fontStyle: "normal",
+													animation: "spin 1s linear infinite",
 												}}
 											>
-												✕
+												⟳
 											</span>
 										)}
 										{itemNumber && (
@@ -380,6 +524,93 @@ export function StagePanel({
 												{typeLabel}
 											</span>
 										)}
+										{item.blocked &&
+											!item.merge_failure &&
+											(() => {
+												const agentStatus = item.agent?.status;
+												if (agentStatus === "running") {
+													return (
+														<span
+															data-testid={`blocked-badge-${item.story_id}`}
+															title="Recovery coder running — no human action needed"
+															style={{
+																display: "inline-block",
+																fontSize: "0.65em",
+																fontWeight: 700,
+																color: "#e3b341",
+																background: "#2a1f0a",
+																border: "1px solid #6e4a00",
+																borderRadius: "4px",
+																padding: "1px 4px",
+																marginRight: "8px",
+																letterSpacing: "0.05em",
+																animation: "spin 1s linear infinite",
+															}}
+														>
+															⟳ RECOVERING
+														</span>
+													);
+												}
+												if (agentStatus === "pending") {
+													return (
+														<span
+															data-testid={`blocked-badge-${item.story_id}`}
+															title="Recovery coder queued — waiting for a slot"
+															style={{
+																fontSize: "0.65em",
+																fontWeight: 700,
+																color: "#e3b341",
+																background: "#2a1f0a",
+																border: "1px solid #6e4a00",
+																borderRadius: "4px",
+																padding: "1px 4px",
+																marginRight: "8px",
+																letterSpacing: "0.05em",
+															}}
+														>
+															⏳ QUEUED
+														</span>
+													);
+												}
+												return (
+													<span
+														data-testid={`blocked-badge-${item.story_id}`}
+														title="Blocked — awaiting human unblock"
+														style={{
+															fontSize: "0.65em",
+															fontWeight: 700,
+															color: "#f85149",
+															background: "#2a1010",
+															border: "1px solid #6e1b1b",
+															borderRadius: "4px",
+															padding: "1px 4px",
+															marginRight: "8px",
+															letterSpacing: "0.05em",
+														}}
+													>
+														⊘ BLOCKED
+													</span>
+												);
+											})()}
+										{item.frozen && (
+											<span
+												data-testid={`frozen-badge-${item.story_id}`}
+												title="Frozen — auto-assign paused"
+												style={{
+													fontSize: "0.65em",
+													fontWeight: 700,
+													color: "#58a6ff",
+													background: "#0d1f36",
+													border: "1px solid #1a3a6e",
+													borderRadius: "4px",
+													padding: "1px 4px",
+													marginRight: "8px",
+													letterSpacing: "0.05em",
+												}}
+											>
+												❄ FROZEN
+											</span>
+										)}
 										{costs?.has(item.story_id) && (
 											<span
 												data-testid={`cost-badge-${item.story_id}`}
@@ -393,7 +624,7 @@ export function StagePanel({
 												${costs.get(item.story_id)?.toFixed(2)}
 											</span>
 										)}
-										{item.name ?? item.story_id}
+										{item.name || item.story_id}
 									</div>
 									{item.error && (
 										<div
@@ -409,15 +640,8 @@ export function StagePanel({
 									{item.merge_failure && (
 										<div
 											data-testid={`merge-failure-reason-${item.story_id}`}
-											style={{
-												fontSize: "0.8em",
-												color: "#f85149",
-												marginTop: "4px",
-												whiteSpace: "pre-wrap",
-												wordBreak: "break-word",
-											}}
 										>
-											{item.merge_failure}
+											<GateOutputBox text={item.merge_failure} />
 										</div>
 									)}
 									{item.depends_on && item.depends_on.length > 0 && (
@@ -483,10 +707,10 @@ export function StagePanel({
 									<button
 										type="button"
 										data-testid={`delete-btn-${item.story_id}`}
-										title={`Delete ${item.name ?? item.story_id}`}
+										title={`Delete ${item.name || item.story_id}`}
 										onClick={(e) => {
 											e.stopPropagation();
-											const label = item.name ?? item.story_id;
+											const label = item.name || item.story_id;
 											if (
 												window.confirm(
 													`Delete "${label}"? This cannot be undone.`,
@@ -0,0 +1,141 @@
+/** Test results card sub-components for WorkItemDetailPanel. */
+
+import type { TestCaseResult, TestResultsResponse } from "../api/client";
+
+function TestCaseRow({ tc }: { tc: TestCaseResult }) {
+	const isPassing = tc.status === "pass";
+	return (
+		<div
+			data-testid={`test-case-${tc.name}`}
+			style={{
+				display: "flex",
+				flexDirection: "column",
+				gap: "2px",
+				padding: "4px 0",
+			}}
+		>
+			<div style={{ display: "flex", alignItems: "center", gap: "6px" }}>
+				<span
+					data-testid={`test-status-${tc.name}`}
+					style={{
+						fontSize: "0.85em",
+						color: isPassing ? "#3fb950" : "#f85149",
+					}}
+				>
+					{isPassing ? "PASS" : "FAIL"}
+				</span>
+				<span style={{ fontSize: "0.82em", color: "#ccc" }}>{tc.name}</span>
+			</div>
+			{tc.details && (
+				<div
+					data-testid={`test-details-${tc.name}`}
+					style={{
+						fontSize: "0.75em",
+						color: "#888",
+						paddingLeft: "22px",
+						whiteSpace: "pre-wrap",
+						wordBreak: "break-word",
+					}}
+				>
+					{tc.details}
+				</div>
+			)}
+		</div>
+	);
+}
+
+function TestSection({
+	title,
+	tests,
+	testId,
+}: {
+	title: string;
+	tests: TestCaseResult[];
+	testId: string;
+}) {
+	const passCount = tests.filter((t) => t.status === "pass").length;
+	const failCount = tests.length - passCount;
+	return (
+		<div data-testid={testId}>
+			<div
+				style={{
+					fontSize: "0.78em",
+					fontWeight: 600,
+					color: "#aaa",
+					marginBottom: "6px",
+				}}
+			>
+				{title} ({passCount} passed, {failCount} failed)
+			</div>
+			{tests.length === 0 ? (
+				<div style={{ fontSize: "0.75em", color: "#555", fontStyle: "italic" }}>
+					No tests recorded
+				</div>
+			) : (
+				tests.map((tc) => <TestCaseRow key={tc.name} tc={tc} />)
+			)}
+		</div>
+	);
+}
+
+/** Renders the "Test Results" card in the detail panel. */
+export function TestResultsSection({
+	testResults,
+}: {
+	testResults: TestResultsResponse | null;
+}) {
+	const hasTestResults =
+		testResults &&
+		(testResults.unit.length > 0 || testResults.integration.length > 0);
+
+	return (
+		<div
+			data-testid="test-results-section"
+			style={{
+				border: "1px solid #2a2a2a",
+				borderRadius: "8px",
+				padding: "10px 12px",
+				background: "#161616",
+			}}
+		>
+			<div
+				style={{
+					fontWeight: 600,
+					fontSize: "0.8em",
+					color: "#555",
+					marginBottom: "8px",
+				}}
+			>
+				Test Results
+			</div>
+			{hasTestResults ? (
+				<div
+					data-testid="test-results-content"
+					style={{
+						display: "flex",
+						flexDirection: "column",
+						gap: "12px",
+					}}
+				>
+					<TestSection
+						title="Unit Tests"
+						tests={testResults.unit}
+						testId="test-section-unit"
+					/>
+					<TestSection
+						title="Integration Tests"
+						tests={testResults.integration}
+						testId="test-section-integration"
+					/>
+				</div>
+			) : (
+				<div
+					data-testid="test-results-empty"
+					style={{ fontSize: "0.75em", color: "#444" }}
+				>
+					No test results recorded
+				</div>
+			)}
+		</div>
+	);
+}
@@ -0,0 +1,101 @@
+/** Token cost card sub-component for WorkItemDetailPanel. */
+
+import type { AgentCostEntry, TokenCostResponse } from "../api/client";
+
+/** Renders the "Token Cost" card in the detail panel. */
+export function TokenCostSection({
+	tokenCost,
+}: {
+	tokenCost: TokenCostResponse | null;
+}) {
+	return (
+		<div
+			data-testid="token-cost-section"
+			style={{
+				border: "1px solid #2a2a2a",
+				borderRadius: "8px",
+				padding: "10px 12px",
+				background: "#161616",
+			}}
+		>
+			<div
+				style={{
+					fontWeight: 600,
+					fontSize: "0.8em",
+					color: "#555",
+					marginBottom: "8px",
+				}}
+			>
+				Token Cost
+			</div>
+			{tokenCost && tokenCost.agents.length > 0 ? (
+				<div data-testid="token-cost-content">
+					<div
+						style={{
+							fontSize: "0.75em",
+							color: "#888",
+							marginBottom: "8px",
+						}}
+					>
+						Total:{" "}
+						<span data-testid="token-cost-total" style={{ color: "#ccc" }}>
+							${tokenCost.total_cost_usd.toFixed(6)}
+						</span>
+					</div>
+					{tokenCost.agents.map((agent: AgentCostEntry) => (
+						<div
+							key={agent.agent_name}
+							data-testid={`token-cost-agent-${agent.agent_name}`}
+							style={{
+								fontSize: "0.75em",
+								color: "#888",
+								padding: "4px 0",
+								borderTop: "1px solid #222",
+							}}
+						>
+							<div
+								style={{
+									display: "flex",
+									justifyContent: "space-between",
+									marginBottom: "2px",
+								}}
+							>
+								<span style={{ color: "#ccc", fontWeight: 600 }}>
+									{agent.agent_name}
+									{agent.model ? (
+										<span
+											style={{ color: "#666", fontWeight: 400 }}
+										>{` (${agent.model})`}</span>
+									) : null}
+								</span>
+								<span style={{ color: "#aaa" }}>
+									${agent.total_cost_usd.toFixed(6)}
+								</span>
+							</div>
+							<div style={{ color: "#555" }}>
+								in {agent.input_tokens.toLocaleString()} / out{" "}
+								{agent.output_tokens.toLocaleString()}
+								{(agent.cache_creation_input_tokens > 0 ||
+									agent.cache_read_input_tokens > 0) && (
+									<>
+										{" "}
+										/ cache +
+										{agent.cache_creation_input_tokens.toLocaleString()}{" "}
+										read {agent.cache_read_input_tokens.toLocaleString()}
+									</>
+								)}
+							</div>
+						</div>
+					))}
+				</div>
+			) : (
+				<div
+					data-testid="token-cost-empty"
+					style={{ fontSize: "0.75em", color: "#444" }}
+				>
+					No token data recorded
+				</div>
+			)}
+		</div>
+	);
+}
@@ -0,0 +1,379 @@
+import { act, render, screen } from "@testing-library/react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { AgentEvent, AgentInfo } from "../api/agents";
+
+vi.mock("../api/client", async () => {
+	const actual =
+		await vi.importActual<typeof import("../api/client")>("../api/client");
+	return {
+		...actual,
+		api: {
+			...actual.api,
+			getWorkItemContent: vi.fn(),
+			getTestResults: vi.fn(),
+			getTokenCost: vi.fn(),
+		},
+	};
+});
+
+vi.mock("../api/agents", () => ({
+	agentsApi: {
+		listAgents: vi.fn(),
+		getAgentConfig: vi.fn(),
+		stopAgent: vi.fn(),
+		startAgent: vi.fn(),
+	},
+	subscribeAgentStream: vi.fn(() => () => {}),
+}));
+
+import { agentsApi, subscribeAgentStream } from "../api/agents";
+import { api } from "../api/client";
+
+const { WorkItemDetailPanel } = await import("./WorkItemDetailPanel");
+
+const mockedGetWorkItemContent = vi.mocked(api.getWorkItemContent);
+const mockedGetTestResults = vi.mocked(api.getTestResults);
+const mockedGetTokenCost = vi.mocked(api.getTokenCost);
+const mockedListAgents = vi.mocked(agentsApi.listAgents);
+const mockedGetAgentConfig = vi.mocked(agentsApi.getAgentConfig);
+const mockedSubscribeAgentStream = vi.mocked(subscribeAgentStream);
+
+const DEFAULT_CONTENT = {
+	content: "# Big Title\n\nSome content here.",
+	stage: "current",
+	name: "Big Title Story",
+	agent: null,
+};
+
+beforeEach(() => {
+	vi.clearAllMocks();
+	mockedGetWorkItemContent.mockResolvedValue(DEFAULT_CONTENT);
+	mockedGetTestResults.mockResolvedValue(null);
+	mockedGetTokenCost.mockResolvedValue({ total_cost_usd: 0, agents: [] });
+	mockedListAgents.mockResolvedValue([]);
+	mockedGetAgentConfig.mockResolvedValue([]);
+	mockedSubscribeAgentStream.mockReturnValue(() => {});
+});
+
+afterEach(() => {
+	vi.restoreAllMocks();
+});
+
+describe("WorkItemDetailPanel - Agent Logs", () => {
+	it("shows placeholder when no agent is assigned to the story", async () => {
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+		await screen.findByTestId("detail-panel-content");
+		const placeholder = screen.getByTestId("placeholder-agent-logs");
+		expect(placeholder).toBeInTheDocument();
+		expect(placeholder).toHaveTextContent("Coming soon");
+	});
+
+	it("shows agent name and running status when agent is running", async () => {
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "running",
+				session_id: null,
+				worktree_path: "/tmp/wt",
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		const statusBadge = await screen.findByTestId("agent-status-badge");
+		expect(statusBadge).toHaveTextContent("coder-1");
+		expect(statusBadge).toHaveTextContent("running");
+	});
+
+	it("shows log output when agent emits output events", async () => {
+		let emitEvent: ((e: AgentEvent) => void) | null = null;
+		mockedSubscribeAgentStream.mockImplementation(
+			(_storyId, _agentName, onEvent) => {
+				emitEvent = onEvent;
+				return () => {};
+			},
+		);
+
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "running",
+				session_id: null,
+				worktree_path: "/tmp/wt",
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await screen.findByTestId("agent-status-badge");
+
+		await act(async () => {
+			emitEvent?.({
+				type: "output",
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				text: "Writing tests...",
+			});
+		});
+
+		const logOutput = screen.getByTestId("agent-log-output");
+		expect(logOutput).toHaveTextContent("Writing tests...");
+	});
+
+	it("appends multiple output events to the log", async () => {
+		let emitEvent: ((e: AgentEvent) => void) | null = null;
+		mockedSubscribeAgentStream.mockImplementation(
+			(_storyId, _agentName, onEvent) => {
+				emitEvent = onEvent;
+				return () => {};
+			},
+		);
+
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "running",
+				session_id: null,
+				worktree_path: "/tmp/wt",
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await screen.findByTestId("agent-status-badge");
+
+		await act(async () => {
+			emitEvent?.({
+				type: "output",
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				text: "Line one\n",
+			});
+		});
+
+		await act(async () => {
+			emitEvent?.({
+				type: "output",
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				text: "Line two\n",
+			});
+		});
+
+		const logOutput = screen.getByTestId("agent-log-output");
+		expect(logOutput.textContent).toContain("Line one");
+		expect(logOutput.textContent).toContain("Line two");
+	});
+
+	it("updates status to completed after done event", async () => {
+		let emitEvent: ((e: AgentEvent) => void) | null = null;
+		mockedSubscribeAgentStream.mockImplementation(
+			(_storyId, _agentName, onEvent) => {
+				emitEvent = onEvent;
+				return () => {};
+			},
+		);
+
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "running",
+				session_id: null,
+				worktree_path: "/tmp/wt",
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await screen.findByTestId("agent-status-badge");
+
+		await act(async () => {
+			emitEvent?.({
+				type: "done",
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				session_id: "session-123",
+			});
+		});
+
+		const statusBadge = screen.getByTestId("agent-status-badge");
+		expect(statusBadge).toHaveTextContent("completed");
+	});
+
+	it("shows failed status after error event", async () => {
+		let emitEvent: ((e: AgentEvent) => void) | null = null;
+		mockedSubscribeAgentStream.mockImplementation(
+			(_storyId, _agentName, onEvent) => {
+				emitEvent = onEvent;
+				return () => {};
+			},
+		);
+
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "running",
+				session_id: null,
+				worktree_path: "/tmp/wt",
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await screen.findByTestId("agent-status-badge");
+
+		await act(async () => {
+			emitEvent?.({
+				type: "error",
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				message: "Process failed",
+			});
+		});
+
+		const statusBadge = screen.getByTestId("agent-status-badge");
+		expect(statusBadge).toHaveTextContent("failed");
+
+		const logOutput = screen.getByTestId("agent-log-output");
+		expect(logOutput.textContent).toContain("[ERROR] Process failed");
+	});
+
+	it("shows completed agent status without subscribing to stream", async () => {
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "completed",
+				session_id: "session-123",
+				worktree_path: "/tmp/wt",
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		const statusBadge = await screen.findByTestId("agent-status-badge");
+		expect(statusBadge).toHaveTextContent("completed");
+		expect(mockedSubscribeAgentStream).not.toHaveBeenCalled();
+	});
+
+	it("shows failed agent status for a failed agent without subscribing to stream", async () => {
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "failed",
+				session_id: null,
+				worktree_path: null,
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		const statusBadge = await screen.findByTestId("agent-status-badge");
+		expect(statusBadge).toHaveTextContent("failed");
+		expect(mockedSubscribeAgentStream).not.toHaveBeenCalled();
+	});
+
+	it("shows agent logs section (not placeholder) when agent is assigned", async () => {
+		const agentList: AgentInfo[] = [
+			{
+				story_id: "42_story_test",
+				agent_name: "coder-1",
+				status: "running",
+				session_id: null,
+				worktree_path: "/tmp/wt",
+				base_branch: "master",
+				log_session_id: null,
+			},
+		];
+		mockedListAgents.mockResolvedValue(agentList);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_test"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await screen.findByTestId("agent-logs-section");
+
+		expect(
+			screen.queryByTestId("placeholder-agent-logs"),
+		).not.toBeInTheDocument();
+	});
+});
@@ -0,0 +1,329 @@
+import { render, screen, waitFor } from "@testing-library/react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { TestResultsResponse, TokenCostResponse } from "../api/client";
+
+vi.mock("../api/client", async () => {
+	const actual =
+		await vi.importActual<typeof import("../api/client")>("../api/client");
+	return {
+		...actual,
+		api: {
+			...actual.api,
+			getWorkItemContent: vi.fn(),
+			getTestResults: vi.fn(),
+			getTokenCost: vi.fn(),
+		},
+	};
+});
+
+vi.mock("../api/agents", () => ({
+	agentsApi: {
+		listAgents: vi.fn(),
+		getAgentConfig: vi.fn(),
+		stopAgent: vi.fn(),
+		startAgent: vi.fn(),
+	},
+	subscribeAgentStream: vi.fn(() => () => {}),
+}));
+
+import { agentsApi, subscribeAgentStream } from "../api/agents";
+import { api } from "../api/client";
+
+const { WorkItemDetailPanel } = await import("./WorkItemDetailPanel");
+
+const mockedGetWorkItemContent = vi.mocked(api.getWorkItemContent);
+const mockedGetTestResults = vi.mocked(api.getTestResults);
+const mockedGetTokenCost = vi.mocked(api.getTokenCost);
+const mockedListAgents = vi.mocked(agentsApi.listAgents);
+const mockedGetAgentConfig = vi.mocked(agentsApi.getAgentConfig);
+const mockedSubscribeAgentStream = vi.mocked(subscribeAgentStream);
+
+const DEFAULT_CONTENT = {
+	content: "# Big Title\n\nSome content here.",
+	stage: "current",
+	name: "Big Title Story",
+	agent: null,
+};
+
+const sampleTestResults: TestResultsResponse = {
+	unit: [
+		{ name: "test_add", status: "pass", details: null },
+		{ name: "test_subtract", status: "fail", details: "expected 3, got 4" },
+	],
+	integration: [{ name: "test_api_endpoint", status: "pass", details: null }],
+};
+
+beforeEach(() => {
+	vi.clearAllMocks();
+	mockedGetWorkItemContent.mockResolvedValue(DEFAULT_CONTENT);
+	mockedGetTestResults.mockResolvedValue(null);
+	mockedGetTokenCost.mockResolvedValue({ total_cost_usd: 0, agents: [] });
+	mockedListAgents.mockResolvedValue([]);
+	mockedGetAgentConfig.mockResolvedValue([]);
+	mockedSubscribeAgentStream.mockReturnValue(() => {});
+});
+
+afterEach(() => {
+	vi.restoreAllMocks();
+});
+
+describe("WorkItemDetailPanel - Test Results", () => {
+	it("shows empty test results message when no results exist", async () => {
+		mockedGetTestResults.mockResolvedValue(null);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-results-empty")).toBeInTheDocument();
+		});
+		expect(screen.getByText("No test results recorded")).toBeInTheDocument();
+	});
+
+	it("shows unit and integration test results when available", async () => {
+		mockedGetTestResults.mockResolvedValue(sampleTestResults);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-results-content")).toBeInTheDocument();
+		});
+
+		// Unit test section
+		expect(screen.getByTestId("test-section-unit")).toBeInTheDocument();
+		expect(
+			screen.getByText("Unit Tests (1 passed, 1 failed)"),
+		).toBeInTheDocument();
+
+		// Integration test section
+		expect(screen.getByTestId("test-section-integration")).toBeInTheDocument();
+		expect(
+			screen.getByText("Integration Tests (1 passed, 0 failed)"),
+		).toBeInTheDocument();
+	});
+
+	it("shows pass/fail status and details for each test", async () => {
+		mockedGetTestResults.mockResolvedValue(sampleTestResults);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-case-test_add")).toBeInTheDocument();
+		});
+
+		// Passing test
+		expect(screen.getByTestId("test-status-test_add")).toHaveTextContent(
+			"PASS",
+		);
+		expect(screen.getByText("test_add")).toBeInTheDocument();
+
+		// Failing test with details
+		expect(screen.getByTestId("test-status-test_subtract")).toHaveTextContent(
+			"FAIL",
+		);
+		expect(screen.getByText("test_subtract")).toBeInTheDocument();
+		expect(screen.getByTestId("test-details-test_subtract")).toHaveTextContent(
+			"expected 3, got 4",
+		);
+
+		// Integration test
+		expect(
+			screen.getByTestId("test-status-test_api_endpoint"),
+		).toHaveTextContent("PASS");
+	});
+
+	it("re-fetches test results when pipelineVersion changes", async () => {
+		mockedGetTestResults.mockResolvedValue(null);
+
+		const { rerender } = render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(mockedGetTestResults).toHaveBeenCalledTimes(1);
+		});
+
+		// Update with new results and bump pipelineVersion.
+		mockedGetTestResults.mockResolvedValue(sampleTestResults);
+
+		rerender(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={1}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(mockedGetTestResults).toHaveBeenCalledTimes(2);
+		});
+
+		await waitFor(() => {
+			expect(screen.getByTestId("test-results-content")).toBeInTheDocument();
+		});
+	});
+});
+
+describe("WorkItemDetailPanel - Token Cost", () => {
+	const sampleTokenCost: TokenCostResponse = {
+		total_cost_usd: 0.012345,
+		agents: [
+			{
+				agent_name: "coder-1",
+				model: "claude-sonnet-4-6",
+				input_tokens: 1000,
+				output_tokens: 500,
+				cache_creation_input_tokens: 200,
+				cache_read_input_tokens: 100,
+				total_cost_usd: 0.009,
+			},
+			{
+				agent_name: "coder-2",
+				model: null,
+				input_tokens: 800,
+				output_tokens: 300,
+				cache_creation_input_tokens: 0,
+				cache_read_input_tokens: 0,
+				total_cost_usd: 0.003345,
+			},
+		],
+	};
+
+	it("shows empty state when no token data exists", async () => {
+		mockedGetTokenCost.mockResolvedValue({ total_cost_usd: 0, agents: [] });
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("token-cost-empty")).toBeInTheDocument();
+		});
+		expect(screen.getByText("No token data recorded")).toBeInTheDocument();
+	});
+
+	it("shows per-agent breakdown and total cost when data exists", async () => {
+		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(screen.getByTestId("token-cost-content")).toBeInTheDocument();
+		});
+
+		expect(screen.getByTestId("token-cost-total")).toHaveTextContent(
+			"$0.012345",
+		);
+		expect(screen.getByTestId("token-cost-agent-coder-1")).toBeInTheDocument();
+		expect(screen.getByTestId("token-cost-agent-coder-2")).toBeInTheDocument();
+	});
+
+	it("shows agent name and model when model is present", async () => {
+		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(
+				screen.getByTestId("token-cost-agent-coder-1"),
+			).toBeInTheDocument();
+		});
+
+		const agentRow = screen.getByTestId("token-cost-agent-coder-1");
+		expect(agentRow).toHaveTextContent("coder-1");
+		expect(agentRow).toHaveTextContent("claude-sonnet-4-6");
+	});
+
+	it("shows agent name without model when model is null", async () => {
+		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
+
+		render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(
+				screen.getByTestId("token-cost-agent-coder-2"),
+			).toBeInTheDocument();
+		});
+
+		const agentRow = screen.getByTestId("token-cost-agent-coder-2");
+		expect(agentRow).toHaveTextContent("coder-2");
+		expect(agentRow).not.toHaveTextContent("null");
+	});
+
+	it("re-fetches token cost when pipelineVersion changes", async () => {
+		mockedGetTokenCost.mockResolvedValue({ total_cost_usd: 0, agents: [] });
+
+		const { rerender } = render(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={0}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(mockedGetTokenCost).toHaveBeenCalledTimes(1);
+		});
+
+		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
+
+		rerender(
+			<WorkItemDetailPanel
+				storyId="42_story_foo"
+				pipelineVersion={1}
+				onClose={() => {}}
+			/>,
+		);
+
+		await waitFor(() => {
+			expect(mockedGetTokenCost).toHaveBeenCalledTimes(2);
+		});
+
+		await waitFor(() => {
+			expect(screen.getByTestId("token-cost-content")).toBeInTheDocument();
+		});
+	});
+});
@@ -1,7 +1,5 @@
-import { act, render, screen, waitFor } from "@testing-library/react";
+import { render, screen, waitFor } from "@testing-library/react";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
-import type { AgentEvent, AgentInfo } from "../api/agents";
-import type { TestResultsResponse, TokenCostResponse } from "../api/client";

 vi.mock("../api/client", async () => {
 	const actual =
@@ -46,14 +44,6 @@ const DEFAULT_CONTENT = {
 	agent: null,
 };

-const sampleTestResults: TestResultsResponse = {
-	unit: [
-		{ name: "test_add", status: "pass", details: null },
-		{ name: "test_subtract", status: "fail", details: "expected 3, got 4" },
-	],
-	integration: [{ name: "test_api_endpoint", status: "pass", details: null }],
-};
-
 beforeEach(() => {
 	vi.clearAllMocks();
 	mockedGetWorkItemContent.mockResolvedValue(DEFAULT_CONTENT);
@@ -214,325 +204,6 @@ describe("WorkItemDetailPanel", () => {
 	});
 });

-describe("WorkItemDetailPanel - Agent Logs", () => {
-	it("shows placeholder when no agent is assigned to the story", async () => {
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-		await screen.findByTestId("detail-panel-content");
-		const placeholder = screen.getByTestId("placeholder-agent-logs");
-		expect(placeholder).toBeInTheDocument();
-		expect(placeholder).toHaveTextContent("Coming soon");
-	});
-
-	it("shows agent name and running status when agent is running", async () => {
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "running",
-				session_id: null,
-				worktree_path: "/tmp/wt",
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		const statusBadge = await screen.findByTestId("agent-status-badge");
-		expect(statusBadge).toHaveTextContent("coder-1");
-		expect(statusBadge).toHaveTextContent("running");
-	});
-
-	it("shows log output when agent emits output events", async () => {
-		let emitEvent: ((e: AgentEvent) => void) | null = null;
-		mockedSubscribeAgentStream.mockImplementation(
-			(_storyId, _agentName, onEvent) => {
-				emitEvent = onEvent;
-				return () => {};
-			},
-		);
-
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "running",
-				session_id: null,
-				worktree_path: "/tmp/wt",
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await screen.findByTestId("agent-status-badge");
-
-		await act(async () => {
-			emitEvent?.({
-				type: "output",
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				text: "Writing tests...",
-			});
-		});
-
-		const logOutput = screen.getByTestId("agent-log-output");
-		expect(logOutput).toHaveTextContent("Writing tests...");
-	});
-
-	it("appends multiple output events to the log", async () => {
-		let emitEvent: ((e: AgentEvent) => void) | null = null;
-		mockedSubscribeAgentStream.mockImplementation(
-			(_storyId, _agentName, onEvent) => {
-				emitEvent = onEvent;
-				return () => {};
-			},
-		);
-
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "running",
-				session_id: null,
-				worktree_path: "/tmp/wt",
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await screen.findByTestId("agent-status-badge");
-
-		await act(async () => {
-			emitEvent?.({
-				type: "output",
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				text: "Line one\n",
-			});
-		});
-
-		await act(async () => {
-			emitEvent?.({
-				type: "output",
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				text: "Line two\n",
-			});
-		});
-
-		const logOutput = screen.getByTestId("agent-log-output");
-		expect(logOutput.textContent).toContain("Line one");
-		expect(logOutput.textContent).toContain("Line two");
-	});
-
-	it("updates status to completed after done event", async () => {
-		let emitEvent: ((e: AgentEvent) => void) | null = null;
-		mockedSubscribeAgentStream.mockImplementation(
-			(_storyId, _agentName, onEvent) => {
-				emitEvent = onEvent;
-				return () => {};
-			},
-		);
-
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "running",
-				session_id: null,
-				worktree_path: "/tmp/wt",
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await screen.findByTestId("agent-status-badge");
-
-		await act(async () => {
-			emitEvent?.({
-				type: "done",
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				session_id: "session-123",
-			});
-		});
-
-		const statusBadge = screen.getByTestId("agent-status-badge");
-		expect(statusBadge).toHaveTextContent("completed");
-	});
-
-	it("shows failed status after error event", async () => {
-		let emitEvent: ((e: AgentEvent) => void) | null = null;
-		mockedSubscribeAgentStream.mockImplementation(
-			(_storyId, _agentName, onEvent) => {
-				emitEvent = onEvent;
-				return () => {};
-			},
-		);
-
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "running",
-				session_id: null,
-				worktree_path: "/tmp/wt",
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await screen.findByTestId("agent-status-badge");
-
-		await act(async () => {
-			emitEvent?.({
-				type: "error",
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				message: "Process failed",
-			});
-		});
-
-		const statusBadge = screen.getByTestId("agent-status-badge");
-		expect(statusBadge).toHaveTextContent("failed");
-
-		const logOutput = screen.getByTestId("agent-log-output");
-		expect(logOutput.textContent).toContain("[ERROR] Process failed");
-	});
-
-	it("shows completed agent status without subscribing to stream", async () => {
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "completed",
-				session_id: "session-123",
-				worktree_path: "/tmp/wt",
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		const statusBadge = await screen.findByTestId("agent-status-badge");
-		expect(statusBadge).toHaveTextContent("completed");
-		expect(mockedSubscribeAgentStream).not.toHaveBeenCalled();
-	});
-
-	it("shows failed agent status for a failed agent without subscribing to stream", async () => {
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "failed",
-				session_id: null,
-				worktree_path: null,
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		const statusBadge = await screen.findByTestId("agent-status-badge");
-		expect(statusBadge).toHaveTextContent("failed");
-		expect(mockedSubscribeAgentStream).not.toHaveBeenCalled();
-	});
-
-	it("shows agent logs section (not placeholder) when agent is assigned", async () => {
-		const agentList: AgentInfo[] = [
-			{
-				story_id: "42_story_test",
-				agent_name: "coder-1",
-				status: "running",
-				session_id: null,
-				worktree_path: "/tmp/wt",
-				base_branch: "master",
-				log_session_id: null,
-			},
-		];
-		mockedListAgents.mockResolvedValue(agentList);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_test"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await screen.findByTestId("agent-logs-section");
-
-		expect(
-			screen.queryByTestId("placeholder-agent-logs"),
-		).not.toBeInTheDocument();
-	});
-});
-
 describe("WorkItemDetailPanel - Assigned Agent", () => {
 	it("shows assigned agent name when agent front matter field is set", async () => {
 		mockedGetWorkItemContent.mockResolvedValue({
@@ -586,264 +257,3 @@ describe("WorkItemDetailPanel - Assigned Agent", () => {
 		expect(agentEl).not.toHaveTextContent("assigned");
 	});
 });
-
-describe("WorkItemDetailPanel - Test Results", () => {
-	it("shows empty test results message when no results exist", async () => {
-		mockedGetTestResults.mockResolvedValue(null);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(screen.getByTestId("test-results-empty")).toBeInTheDocument();
-		});
-		expect(screen.getByText("No test results recorded")).toBeInTheDocument();
-	});
-
-	it("shows unit and integration test results when available", async () => {
-		mockedGetTestResults.mockResolvedValue(sampleTestResults);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(screen.getByTestId("test-results-content")).toBeInTheDocument();
-		});
-
-		// Unit test section
-		expect(screen.getByTestId("test-section-unit")).toBeInTheDocument();
-		expect(
-			screen.getByText("Unit Tests (1 passed, 1 failed)"),
-		).toBeInTheDocument();
-
-		// Integration test section
-		expect(screen.getByTestId("test-section-integration")).toBeInTheDocument();
-		expect(
-			screen.getByText("Integration Tests (1 passed, 0 failed)"),
-		).toBeInTheDocument();
-	});
-
-	it("shows pass/fail status and details for each test", async () => {
-		mockedGetTestResults.mockResolvedValue(sampleTestResults);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(screen.getByTestId("test-case-test_add")).toBeInTheDocument();
-		});
-
-		// Passing test
-		expect(screen.getByTestId("test-status-test_add")).toHaveTextContent(
-			"PASS",
-		);
-		expect(screen.getByText("test_add")).toBeInTheDocument();
-
-		// Failing test with details
-		expect(screen.getByTestId("test-status-test_subtract")).toHaveTextContent(
-			"FAIL",
-		);
-		expect(screen.getByText("test_subtract")).toBeInTheDocument();
-		expect(screen.getByTestId("test-details-test_subtract")).toHaveTextContent(
-			"expected 3, got 4",
-		);
-
-		// Integration test
-		expect(
-			screen.getByTestId("test-status-test_api_endpoint"),
-		).toHaveTextContent("PASS");
-	});
-
-	it("re-fetches test results when pipelineVersion changes", async () => {
-		mockedGetTestResults.mockResolvedValue(null);
-
-		const { rerender } = render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(mockedGetTestResults).toHaveBeenCalledTimes(1);
-		});
-
-		// Update with new results and bump pipelineVersion.
-		mockedGetTestResults.mockResolvedValue(sampleTestResults);
-
-		rerender(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={1}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(mockedGetTestResults).toHaveBeenCalledTimes(2);
-		});
-
-		await waitFor(() => {
-			expect(screen.getByTestId("test-results-content")).toBeInTheDocument();
-		});
-	});
-});
-
-describe("WorkItemDetailPanel - Token Cost", () => {
-	const sampleTokenCost: TokenCostResponse = {
-		total_cost_usd: 0.012345,
-		agents: [
-			{
-				agent_name: "coder-1",
-				model: "claude-sonnet-4-6",
-				input_tokens: 1000,
-				output_tokens: 500,
-				cache_creation_input_tokens: 200,
-				cache_read_input_tokens: 100,
-				total_cost_usd: 0.009,
-			},
-			{
-				agent_name: "coder-2",
-				model: null,
-				input_tokens: 800,
-				output_tokens: 300,
-				cache_creation_input_tokens: 0,
-				cache_read_input_tokens: 0,
-				total_cost_usd: 0.003345,
-			},
-		],
-	};
-
-	it("shows empty state when no token data exists", async () => {
-		mockedGetTokenCost.mockResolvedValue({ total_cost_usd: 0, agents: [] });
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(screen.getByTestId("token-cost-empty")).toBeInTheDocument();
-		});
-		expect(screen.getByText("No token data recorded")).toBeInTheDocument();
-	});
-
-	it("shows per-agent breakdown and total cost when data exists", async () => {
-		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(screen.getByTestId("token-cost-content")).toBeInTheDocument();
-		});
-
-		expect(screen.getByTestId("token-cost-total")).toHaveTextContent(
-			"$0.012345",
-		);
-		expect(screen.getByTestId("token-cost-agent-coder-1")).toBeInTheDocument();
-		expect(screen.getByTestId("token-cost-agent-coder-2")).toBeInTheDocument();
-	});
-
-	it("shows agent name and model when model is present", async () => {
-		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(
-				screen.getByTestId("token-cost-agent-coder-1"),
-			).toBeInTheDocument();
-		});
-
-		const agentRow = screen.getByTestId("token-cost-agent-coder-1");
-		expect(agentRow).toHaveTextContent("coder-1");
-		expect(agentRow).toHaveTextContent("claude-sonnet-4-6");
-	});
-
-	it("shows agent name without model when model is null", async () => {
-		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
-
-		render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(
-				screen.getByTestId("token-cost-agent-coder-2"),
-			).toBeInTheDocument();
-		});
-
-		const agentRow = screen.getByTestId("token-cost-agent-coder-2");
-		expect(agentRow).toHaveTextContent("coder-2");
-		expect(agentRow).not.toHaveTextContent("null");
-	});
-
-	it("re-fetches token cost when pipelineVersion changes", async () => {
-		mockedGetTokenCost.mockResolvedValue({ total_cost_usd: 0, agents: [] });
-
-		const { rerender } = render(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={0}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(mockedGetTokenCost).toHaveBeenCalledTimes(1);
-		});
-
-		mockedGetTokenCost.mockResolvedValue(sampleTokenCost);
-
-		rerender(
-			<WorkItemDetailPanel
-				storyId="42_story_foo"
-				pipelineVersion={1}
-				onClose={() => {}}
-			/>,
-		);
-
-		await waitFor(() => {
-			expect(mockedGetTokenCost).toHaveBeenCalledTimes(2);
-		});
-
-		await waitFor(() => {
-			expect(screen.getByTestId("token-cost-content")).toBeInTheDocument();
-		});
-	});
-});
@@ -8,71 +8,18 @@ import type {
 } from "../api/agents";
 import { agentsApi, subscribeAgentStream } from "../api/agents";
 import type {
-	AgentCostEntry,
-	TestCaseResult,
 	TestResultsResponse,
 	TokenCostResponse,
 } from "../api/client";
 import { api } from "../api/client";
+import { AgentLogsSection } from "./AgentLogsSection";
+import { TestResultsSection } from "./TestResultsSection";
+import { TokenCostSection } from "./TokenCostSection";
+import { WorkItemDetailPanelHeader } from "./WorkItemDetailPanelHeader";
+import { stripDisplayContent } from "./workItemDetailPanelUtils";

 const { useCallback, useEffect, useRef, useState } = React;

-/**
- * Strip YAML front matter and the first H1 heading from story content before
- * rendering. The panel header already shows the story ID/title, so rendering
- * them again inside the markdown body creates duplicate information.
- */
-function stripDisplayContent(content: string): string {
-	let text = content;
-	// Strip YAML front matter (--- ... ---)
-	if (text.startsWith("---")) {
-		const eol = text.indexOf("\n");
-		if (eol !== -1) {
-			const closeIdx = text.indexOf("\n---", eol);
-			if (closeIdx !== -1) {
-				text = text.slice(closeIdx + 4);
-			}
-		}
-	}
-	// Trim leading blank lines left by the front matter
-	text = text.trimStart();
-	// Strip the first H1 heading — it duplicates the panel header title
-	if (text.startsWith("# ")) {
-		const eol = text.indexOf("\n");
-		text = eol !== -1 ? text.slice(eol + 1).trimStart() : "";
-	}
-	return text;
-}
-
-/**
- * Format the story ID/title line shown in the panel header.
- * Produces e.g. "Story 454: My Story Name" or "Bug 12: Crash on startup".
- * Falls back to name or storyId when the pattern doesn't match.
- */
-function formatStoryTitle(storyId: string, name: string | null): string {
-	const match = storyId.match(/^(\d+)_([a-z]+)_/);
-	if (!match || !name) return name ?? storyId;
-	const [, number, type] = match;
-	const typeLabel = type.charAt(0).toUpperCase() + type.slice(1);
-	return `${typeLabel} ${number}: ${name}`;
-}
-
-const STAGE_LABELS: Record<string, string> = {
-	backlog: "Backlog",
-	current: "Current",
-	qa: "QA",
-	merge: "To Merge",
-	done: "Done",
-	archived: "Archived",
-};
-
-const STATUS_COLORS: Record<AgentStatusValue, string> = {
-	running: "#3fb950",
-	pending: "#e3b341",
-	completed: "#aaa",
-	failed: "#f85149",
-};
-
 interface WorkItemDetailPanelProps {
 	storyId: string;
 	pipelineVersion: number;
@@ -81,82 +28,6 @@ interface WorkItemDetailPanelProps {
 	reviewHold?: boolean;
 }

-function TestCaseRow({ tc }: { tc: TestCaseResult }) {
-	const isPassing = tc.status === "pass";
-	return (
-		<div
-			data-testid={`test-case-${tc.name}`}
-			style={{
-				display: "flex",
-				flexDirection: "column",
-				gap: "2px",
-				padding: "4px 0",
-			}}
-		>
-			<div style={{ display: "flex", alignItems: "center", gap: "6px" }}>
-				<span
-					data-testid={`test-status-${tc.name}`}
-					style={{
-						fontSize: "0.85em",
-						color: isPassing ? "#3fb950" : "#f85149",
-					}}
-				>
-					{isPassing ? "PASS" : "FAIL"}
-				</span>
-				<span style={{ fontSize: "0.82em", color: "#ccc" }}>{tc.name}</span>
-			</div>
-			{tc.details && (
-				<div
-					data-testid={`test-details-${tc.name}`}
-					style={{
-						fontSize: "0.75em",
-						color: "#888",
-						paddingLeft: "22px",
-						whiteSpace: "pre-wrap",
-						wordBreak: "break-word",
-					}}
-				>
-					{tc.details}
-				</div>
-			)}
-		</div>
-	);
-}
-
-function TestSection({
-	title,
-	tests,
-	testId,
-}: {
-	title: string;
-	tests: TestCaseResult[];
-	testId: string;
-}) {
-	const passCount = tests.filter((t) => t.status === "pass").length;
-	const failCount = tests.length - passCount;
-	return (
-		<div data-testid={testId}>
-			<div
-				style={{
-					fontSize: "0.78em",
-					fontWeight: 600,
-					color: "#aaa",
-					marginBottom: "6px",
-				}}
-			>
-				{title} ({passCount} passed, {failCount} failed)
-			</div>
-			{tests.length === 0 ? (
-				<div style={{ fontSize: "0.75em", color: "#555", fontStyle: "italic" }}>
-					No tests recorded
-				</div>
-			) : (
-				tests.map((tc) => <TestCaseRow key={tc.name} tc={tc} />)
-			)}
-		</div>
-	);
-}
-
 export function WorkItemDetailPanel({
 	storyId,
 	pipelineVersion,
@@ -302,17 +173,6 @@ export function WorkItemDetailPanel({
 			});
 	}, []);

-	// Map pipeline stage → agent stage filter.
-	const STAGE_TO_AGENT_STAGE: Record<string, string> = {
-		current: "coder",
-		qa: "qa",
-		merge: "mergemaster",
-	};
-
-	const filteredAgents = agentConfig.filter(
-		(a) => a.stage === STAGE_TO_AGENT_STAGE[stage],
-	);
-
 	// The currently active agent name for this story (running or pending).
 	const activeAgentName =
 		agentInfo && (agentStatus === "running" || agentStatus === "pending")
@@ -343,11 +203,6 @@ export function WorkItemDetailPanel({
 		[storyId, activeAgentName],
 	);

-	const stageLabel = STAGE_LABELS[stage] ?? stage;
-	const hasTestResults =
-		testResults &&
-		(testResults.unit.length > 0 || testResults.integration.length > 0);
-
 	return (
 		<div
 			data-testid="work-item-detail-panel"
@@ -362,138 +217,19 @@ export function WorkItemDetailPanel({
 				border: "1px solid #333",
 			}}
 		>
-			{/* Header */}
-			<div
-				style={{
-					display: "flex",
-					alignItems: "center",
-					justifyContent: "space-between",
-					padding: "12px 16px",
-					borderBottom: "1px solid #333",
-					flexShrink: 0,
-				}}
-			>
-				<div
-					style={{
-						display: "flex",
-						flexDirection: "column",
-						gap: "2px",
-						minWidth: 0,
-					}}
-				>
-					<div
-						data-testid="detail-panel-title"
-						style={{
-							fontWeight: 600,
-							fontSize: "0.95em",
-							color: "#ececec",
-							overflow: "hidden",
-							textOverflow: "ellipsis",
-							whiteSpace: "nowrap",
-						}}
-					>
-						{formatStoryTitle(storyId, name)}
-					</div>
-					{stage && (
-						<div
-							data-testid="detail-panel-stage"
-							style={{ fontSize: "0.75em", color: "#888" }}
-						>
-							{stageLabel}
-						</div>
-					)}
-					{filteredAgents.length > 0 && (
-						<div
-							data-testid="detail-panel-agent-assignment"
-							style={{
-								display: "flex",
-								alignItems: "center",
-								gap: "6px",
-								marginTop: "4px",
-							}}
-						>
-							<span style={{ fontSize: "0.75em", color: "#666" }}>Agent:</span>
-							<select
-								data-testid="agent-assignment-dropdown"
-								disabled={assigning}
-								value={activeAgentName ?? assignedAgent ?? ""}
-								onChange={(e) => handleAgentAssign(e.target.value)}
-								style={{
-									background: "#1a1a1a",
-									border: "1px solid #444",
-									borderRadius: "4px",
-									color: "#ccc",
-									cursor: assigning ? "not-allowed" : "pointer",
-									fontSize: "0.75em",
-									padding: "2px 6px",
-									opacity: assigning ? 0.6 : 1,
-								}}
-							>
-								<option value="">— none —</option>
-								{filteredAgents.map((a) => {
-									const isRunning =
-										agentInfo?.agent_name === a.name &&
-										agentStatus === "running";
-									const isPending =
-										agentInfo?.agent_name === a.name &&
-										agentStatus === "pending";
-									const statusLabel = isRunning
-										? " — running"
-										: isPending
-											? " — pending"
-											: " — idle";
-									const modelPart = a.model ? ` (${a.model})` : "";
-									return (
-										<option key={a.name} value={a.name}>
-											{a.name}
-											{modelPart}
-											{statusLabel}
-										</option>
-									);
-								})}
-							</select>
-							{assigning && (
-								<span style={{ fontSize: "0.7em", color: "#888" }}>
-									Assigning…
-								</span>
-							)}
-							{assignError && (
-								<span
-									data-testid="agent-assignment-error"
-									style={{ fontSize: "0.7em", color: "#f85149" }}
-								>
-									{assignError}
-								</span>
-							)}
-						</div>
-					)}
-					{filteredAgents.length === 0 && assignedAgent ? (
-						<div
-							data-testid="detail-panel-assigned-agent"
-							style={{ fontSize: "0.75em", color: "#888" }}
-						>
-							Agent: {assignedAgent}
-						</div>
-					) : null}
-				</div>
-				<button
-					type="button"
-					data-testid="detail-panel-close"
-					onClick={onClose}
-					style={{
-						background: "none",
-						border: "1px solid #444",
-						borderRadius: "6px",
-						color: "#aaa",
-						cursor: "pointer",
-						padding: "4px 10px",
-						fontSize: "0.8em",
-						flexShrink: 0,
-					}}
-				>
-					Close
-				</button>
-			</div>
+			<WorkItemDetailPanelHeader
+				storyId={storyId}
+				name={name}
+				stage={stage}
+				assignedAgent={assignedAgent}
+				agentConfig={agentConfig}
+				agentInfo={agentInfo}
+				agentStatus={agentStatus}
+				assigning={assigning}
+				assignError={assignError}
+				onAgentAssign={handleAgentAssign}
+				onClose={onClose}
+			/>

 			{/* Scrollable content area */}
 			<div
@@ -522,7 +258,7 @@ export function WorkItemDetailPanel({
 						{error}
 					</div>
 				)}
-				{!loading && !error && content !== null && (
+				{!loading && !error && content != null && (
 					<div
 						data-testid="detail-panel-content"
 						className="markdown-body"
@@ -549,145 +285,9 @@ export function WorkItemDetailPanel({
 					</div>
 				)}

-				{/* Token Cost section */}
-				<div
-					data-testid="token-cost-section"
-					style={{
-						border: "1px solid #2a2a2a",
-						borderRadius: "8px",
-						padding: "10px 12px",
-						background: "#161616",
-					}}
-				>
-					<div
-						style={{
-							fontWeight: 600,
-							fontSize: "0.8em",
-							color: "#555",
-							marginBottom: "8px",
-						}}
-					>
-						Token Cost
-					</div>
-					{tokenCost && tokenCost.agents.length > 0 ? (
-						<div data-testid="token-cost-content">
-							<div
-								style={{
-									fontSize: "0.75em",
-									color: "#888",
-									marginBottom: "8px",
-								}}
-							>
-								Total:{" "}
-								<span data-testid="token-cost-total" style={{ color: "#ccc" }}>
-									${tokenCost.total_cost_usd.toFixed(6)}
-								</span>
-							</div>
-							{tokenCost.agents.map((agent: AgentCostEntry) => (
-								<div
-									key={agent.agent_name}
-									data-testid={`token-cost-agent-${agent.agent_name}`}
-									style={{
-										fontSize: "0.75em",
-										color: "#888",
-										padding: "4px 0",
-										borderTop: "1px solid #222",
-									}}
-								>
-									<div
-										style={{
-											display: "flex",
-											justifyContent: "space-between",
-											marginBottom: "2px",
-										}}
-									>
-										<span style={{ color: "#ccc", fontWeight: 600 }}>
-											{agent.agent_name}
-											{agent.model ? (
-												<span
-													style={{ color: "#666", fontWeight: 400 }}
-												>{` (${agent.model})`}</span>
-											) : null}
-										</span>
-										<span style={{ color: "#aaa" }}>
-											${agent.total_cost_usd.toFixed(6)}
-										</span>
-									</div>
-									<div style={{ color: "#555" }}>
-										in {agent.input_tokens.toLocaleString()} / out{" "}
-										{agent.output_tokens.toLocaleString()}
-										{(agent.cache_creation_input_tokens > 0 ||
-											agent.cache_read_input_tokens > 0) && (
-											<>
-												{" "}
-												/ cache +
-												{agent.cache_creation_input_tokens.toLocaleString()}{" "}
-												read {agent.cache_read_input_tokens.toLocaleString()}
-											</>
-										)}
-									</div>
-								</div>
-							))}
-						</div>
-					) : (
-						<div
-							data-testid="token-cost-empty"
-							style={{ fontSize: "0.75em", color: "#444" }}
-						>
-							No token data recorded
-						</div>
-					)}
-				</div>
+				<TokenCostSection tokenCost={tokenCost} />

-				{/* Test Results section */}
-				<div
-					data-testid="test-results-section"
-					style={{
-						border: "1px solid #2a2a2a",
-						borderRadius: "8px",
-						padding: "10px 12px",
-						background: "#161616",
-					}}
-				>
-					<div
-						style={{
-							fontWeight: 600,
-							fontSize: "0.8em",
-							color: "#555",
-							marginBottom: "8px",
-						}}
-					>
-						Test Results
-					</div>
-					{hasTestResults ? (
-						<div
-							data-testid="test-results-content"
-							style={{
-								display: "flex",
-								flexDirection: "column",
-								gap: "12px",
-							}}
-						>
-							<TestSection
-								title="Unit Tests"
-								tests={testResults.unit}
-								testId="test-section-unit"
-							/>
-							<TestSection
-								title="Integration Tests"
-								tests={testResults.integration}
-								testId="test-section-integration"
-							/>
-						</div>
-					) : (
-						<div
-							data-testid="test-results-empty"
-							style={{ fontSize: "0.75em", color: "#444" }}
-						>
-							No test results recorded
-						</div>
-					)}
-				</div>
+				<TestResultsSection testResults={testResults} />

 				<div
 					style={{
@@ -696,97 +296,11 @@ export function WorkItemDetailPanel({
 						gap: "8px",
 					}}
 				>
-					{/* Agent Logs section */}
-					{!agentInfo && (
-						<div
-							data-testid="placeholder-agent-logs"
-							style={{
-								border: "1px solid #2a2a2a",
-								borderRadius: "8px",
-								padding: "10px 12px",
-								background: "#161616",
-							}}
-						>
-							<div
-								style={{
-									fontWeight: 600,
-									fontSize: "0.8em",
-									color: "#555",
-									marginBottom: "4px",
-								}}
-							>
-								Agent Logs
-							</div>
-							<div style={{ fontSize: "0.75em", color: "#444" }}>
-								Coming soon
-							</div>
-						</div>
-					)}
-					{agentInfo && (
-						<div
-							data-testid="agent-logs-section"
-							style={{
-								border: "1px solid #2a2a2a",
-								borderRadius: "8px",
-								padding: "10px 12px",
-								background: "#161616",
-							}}
-						>
-							<div
-								style={{
-									display: "flex",
-									alignItems: "center",
-									justifyContent: "space-between",
-									marginBottom: "6px",
-								}}
-							>
-								<div
-									style={{
-										fontWeight: 600,
-										fontSize: "0.8em",
-										color: "#888",
-									}}
-								>
-									Agent Logs
-								</div>
-								{agentStatus && (
-									<div
-										data-testid="agent-status-badge"
-										style={{
-											fontSize: "0.7em",
-											color: STATUS_COLORS[agentStatus],
-											fontWeight: 600,
-										}}
-									>
-										{agentInfo.agent_name} — {agentStatus}
-									</div>
-								)}
-							</div>
-							{agentLog.length > 0 ? (
-								<div
-									data-testid="agent-log-output"
-									style={{
-										fontSize: "0.75em",
-										fontFamily: "monospace",
-										color: "#ccc",
-										whiteSpace: "pre-wrap",
-										wordBreak: "break-word",
-										lineHeight: "1.5",
-										maxHeight: "200px",
-										overflowY: "auto",
-									}}
-								>
-									{agentLog.join("")}
-								</div>
-							) : (
-								<div style={{ fontSize: "0.75em", color: "#444" }}>
-									{agentStatus === "running" || agentStatus === "pending"
-										? "Waiting for output..."
-										: "No output."}
-								</div>
-							)}
-						</div>
-					)}
+					<AgentLogsSection
+						agentInfo={agentInfo}
+						agentStatus={agentStatus}
+						agentLog={agentLog}
+					/>

 					{/* Placeholder sections for future content */}
 					{(
@@ -0,0 +1,184 @@
+/** Header sub-component for WorkItemDetailPanel. */
+
+import type { AgentConfigInfo, AgentInfo, AgentStatusValue } from "../api/agents";
+import { STAGE_LABELS, formatStoryTitle } from "./workItemDetailPanelUtils";
+
+const STAGE_TO_AGENT_STAGE: Record<string, string> = {
+	current: "coder",
+	qa: "qa",
+	merge: "mergemaster",
+};
+
+interface WorkItemDetailPanelHeaderProps {
+	storyId: string;
+	name: string | null;
+	stage: string;
+	assignedAgent: string | null;
+	agentConfig: AgentConfigInfo[];
+	agentInfo: AgentInfo | null;
+	agentStatus: AgentStatusValue | null;
+	assigning: boolean;
+	assignError: string | null;
+	onAgentAssign: (agentName: string) => Promise<void>;
+	onClose: () => void;
+}
+
+/**
+ * Panel header: title, stage label, agent assignment dropdown, and close button.
+ */
+export function WorkItemDetailPanelHeader({
+	storyId,
+	name,
+	stage,
+	assignedAgent,
+	agentConfig,
+	agentInfo,
+	agentStatus,
+	assigning,
+	assignError,
+	onAgentAssign,
+	onClose,
+}: WorkItemDetailPanelHeaderProps) {
+	const stageLabel = STAGE_LABELS[stage] ?? stage;
+	const filteredAgents = agentConfig.filter(
+		(a) => a.stage === STAGE_TO_AGENT_STAGE[stage],
+	);
+	const activeAgentName =
+		agentInfo && (agentStatus === "running" || agentStatus === "pending")
+			? agentInfo.agent_name
+			: null;
+
+	return (
+		<div
+			style={{
+				display: "flex",
+				alignItems: "center",
+				justifyContent: "space-between",
+				padding: "12px 16px",
+				borderBottom: "1px solid #333",
+				flexShrink: 0,
+			}}
+		>
+			<div
+				style={{
+					display: "flex",
+					flexDirection: "column",
+					gap: "2px",
+					minWidth: 0,
+				}}
+			>
+				<div
+					data-testid="detail-panel-title"
+					style={{
+						fontWeight: 600,
+						fontSize: "0.95em",
+						color: "#ececec",
+						overflow: "hidden",
+						textOverflow: "ellipsis",
+						whiteSpace: "nowrap",
+					}}
+				>
+					{formatStoryTitle(storyId, name)}
+				</div>
+				{stage && (
+					<div
+						data-testid="detail-panel-stage"
+						style={{ fontSize: "0.75em", color: "#888" }}
+					>
+						{stageLabel}
+					</div>
+				)}
+				{filteredAgents.length > 0 && (
+					<div
+						data-testid="detail-panel-agent-assignment"
+						style={{
+							display: "flex",
+							alignItems: "center",
+							gap: "6px",
+							marginTop: "4px",
+						}}
+					>
+						<span style={{ fontSize: "0.75em", color: "#666" }}>Agent:</span>
+						<select
+							data-testid="agent-assignment-dropdown"
+							disabled={assigning}
+							value={activeAgentName ?? assignedAgent ?? ""}
+							onChange={(e) => onAgentAssign(e.target.value)}
+							style={{
+								background: "#1a1a1a",
+								border: "1px solid #444",
+								borderRadius: "4px",
+								color: "#ccc",
+								cursor: assigning ? "not-allowed" : "pointer",
+								fontSize: "0.75em",
+								padding: "2px 6px",
+								opacity: assigning ? 0.6 : 1,
+							}}
+						>
+							<option value="">— none —</option>
+							{filteredAgents.map((a) => {
+								const isRunning =
+									agentInfo?.agent_name === a.name &&
+									agentStatus === "running";
+								const isPending =
+									agentInfo?.agent_name === a.name &&
+									agentStatus === "pending";
+								const statusLabel = isRunning
+									? " — running"
+									: isPending
+										? " — pending"
+										: " — idle";
+								const modelPart = a.model ? ` (${a.model})` : "";
+								return (
+									<option key={a.name} value={a.name}>
+										{a.name}
+										{modelPart}
+										{statusLabel}
+									</option>
+								);
+							})}
+						</select>
+						{assigning && (
+							<span style={{ fontSize: "0.7em", color: "#888" }}>
+								Assigning…
+							</span>
+						)}
+						{assignError && (
+							<span
+								data-testid="agent-assignment-error"
+								style={{ fontSize: "0.7em", color: "#f85149" }}
+							>
+								{assignError}
+							</span>
+						)}
+					</div>
+				)}
+				{filteredAgents.length === 0 && assignedAgent ? (
+					<div
+						data-testid="detail-panel-assigned-agent"
+						style={{ fontSize: "0.75em", color: "#888" }}
+					>
+						Agent: {assignedAgent}
+					</div>
+				) : null}
+			</div>
+			<button
+				type="button"
+				data-testid="detail-panel-close"
+				onClick={onClose}
+				style={{
+					background: "none",
+					border: "1px solid #444",
+					borderRadius: "6px",
+					color: "#aaa",
+					cursor: "pointer",
+					padding: "4px 10px",
+					fontSize: "0.8em",
+					flexShrink: 0,
+				}}
+			>
+				Close
+			</button>
+		</div>
+	);
+}
@@ -0,0 +1,72 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`StoryRow > renders #id prefix before the story name 1`] = `
+<div>
+  <div
+    style="display: flex; align-items: center; gap: 8px; padding-top: 4px; padding-bottom: 4px; font-size: 0.82em;"
+  >
+    <span
+      style="padding: 1px 6px; border-radius: 10px; background: rgba(63, 185, 80, 0.133); color: rgb(63, 185, 80); border: 1px solid rgba(63, 185, 80, 0.267); white-space: nowrap; flex-shrink: 0;"
+    >
+      In Progress
+    </span>
+    <span
+      style="color: rgb(230, 237, 243); overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"
+    >
+      <span
+        style="color: rgb(139, 148, 158); font-family: monospace;"
+      >
+        #
+        42
+         
+      </span>
+      Add Feature
+    </span>
+  </div>
+</div>
+`;
+
+exports[`StoryRow > renders #id prefix for a backlogged story 1`] = `
+<div>
+  <div
+    style="display: flex; align-items: center; gap: 8px; padding-top: 4px; padding-bottom: 4px; font-size: 0.82em;"
+  >
+    <span
+      style="padding: 1px 6px; border-radius: 10px; background: rgba(210, 166, 121, 0.133); color: rgb(210, 166, 121); border: 1px solid rgba(210, 166, 121, 0.267); white-space: nowrap; flex-shrink: 0;"
+    >
+      QA
+    </span>
+    <span
+      style="color: rgb(230, 237, 243); overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"
+    >
+      <span
+        style="color: rgb(139, 148, 158); font-family: monospace;"
+      >
+        #
+        7
+         
+      </span>
+      Fix crash on startup
+    </span>
+  </div>
+</div>
+`;
+
+exports[`StoryRow > renders awaiting-slot badge for merge item with no agent 1`] = `
+<div>
+  <div
+    style="display: flex; align-items: center; gap: 8px; padding-top: 4px; padding-bottom: 4px; font-size: 0.82em;"
+  >
+    <span
+      style="padding: 1px 6px; border-radius: 10px; background: rgba(110, 118, 129, 0.133); color: rgb(110, 118, 129); border: 1px solid rgba(110, 118, 129, 0.267); white-space: nowrap; flex-shrink: 0;"
+    >
+      awaiting-slot
+    </span>
+    <span
+      style="color: rgb(230, 237, 243); overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"
+    >
+      Mystery Story
+    </span>
+  </div>
+</div>
+`;
@@ -0,0 +1,62 @@
+/** Shared utility functions and constants for WorkItemDetailPanel sub-components. */
+
+import type { AgentStatusValue } from "../api/agents";
+
+export const STAGE_LABELS: Record<string, string> = {
+	backlog: "Backlog",
+	current: "Current",
+	qa: "QA",
+	merge: "To Merge",
+	done: "Done",
+	archived: "Archived",
+};
+
+export const STATUS_COLORS: Record<AgentStatusValue, string> = {
+	running: "#3fb950",
+	pending: "#e3b341",
+	completed: "#aaa",
+	failed: "#f85149",
+};
+
+/**
+ * Strip YAML front matter and the first H1 heading from story content before
+ * rendering. The panel header already shows the story ID/title, so rendering
+ * them again inside the markdown body creates duplicate information.
+ */
+export function stripDisplayContent(content: string): string {
+	// Guard: content may be undefined/null at runtime if the server response is
+	// missing the field (e.g. a tombstoned story returns an error object).
+	if (!content) return "";
+	let text = content;
+	// Strip YAML front matter (--- ... ---)
+	if (text.startsWith("---")) {
+		const eol = text.indexOf("\n");
+		if (eol !== -1) {
+			const closeIdx = text.indexOf("\n---", eol);
+			if (closeIdx !== -1) {
+				text = text.slice(closeIdx + 4);
+			}
+		}
+	}
+	// Trim leading blank lines left by the front matter
+	text = text.trimStart();
+	// Strip the first H1 heading — it duplicates the panel header title
+	if (text.startsWith("# ")) {
+		const eol = text.indexOf("\n");
+		text = eol !== -1 ? text.slice(eol + 1).trimStart() : "";
+	}
+	return text;
+}
+
+/**
+ * Format the story ID/title line shown in the panel header.
+ * Produces e.g. "Story 454: My Story Name" or "Bug 12: Crash on startup".
+ * Falls back to name or storyId when the pattern doesn't match.
+ */
+export function formatStoryTitle(storyId: string, name: string | null): string {
+	const match = storyId.match(/^(\d+)_([a-z]+)_/);
+	if (!match || !name) return name ?? storyId;
+	const [, number, type] = match;
+	const typeLabel = type.charAt(0).toUpperCase() + type.slice(1);
+	return `${typeLabel} ${number}: ${name}`;
+}
@@ -125,7 +125,7 @@ export function useChatSend({
 							{ role: "user", content: messageText },
 						]);
 						try {
-							const result = await api.botCommand(cmd, args, undefined);
+							const result = await api.botCommand(cmd, args);
 							setMessages((prev: Message[]) => [
 								...prev,
 								{ role: "assistant", content: result.response },
@@ -1,5 +1,9 @@
 import * as React from "react";
-import type { PipelineState, WizardStateData } from "../api/client";
+import type {
+	PipelineState,
+	StatusEvent,
+	WizardStateData,
+} from "../api/client";
 import { api, ChatWebSocket } from "../api/client";
 import type { LogEntry } from "../components/ServerLogsPanel";
 import type { Message } from "../types";
@@ -7,6 +11,9 @@ import { formatToolActivity } from "../utils/chatUtils";

 const { useEffect, useRef, useState } = React;

+/** Connectivity state of the WebSocket connection. */
+export type WsConnectivity = "connecting" | "connected" | "reconnecting" | "failed";
+
 type SetState<T> = React.Dispatch<React.SetStateAction<T>>;

 interface UseChatWebSocketParams {
@@ -28,6 +35,8 @@ interface ReconciliationEvent {
 export interface UseChatWebSocketResult {
 	wsRef: React.MutableRefObject<ChatWebSocket | null>;
 	wsConnected: boolean;
+	wsConnectivity: WsConnectivity;
+	wsDisconnectedAt: Date | null;
 	streamingContent: string;
 	setStreamingContent: SetState<string>;
 	streamingThinking: string;
@@ -68,6 +77,9 @@ export interface UseChatWebSocketResult {
 	} | null>;
 	serverLogs: LogEntry[];
 	storyTokenCosts: Map<string, number>;
+	/** Structured pipeline status events. Each entry preserves the full
+	 * StatusEvent so future UI stories can render per-type icons or filters. */
+	statusEvents: Array<{ receivedAt: string; event: StatusEvent }>;
 }

 export function useChatWebSocket({
@@ -80,6 +92,9 @@ export function useChatWebSocket({
 }: UseChatWebSocketParams): UseChatWebSocketResult {
 	const wsRef = useRef<ChatWebSocket | null>(null);
 	const [wsConnected, setWsConnected] = useState(false);
+	const [wsConnectivity, setWsConnectivity] = useState<WsConnectivity>("connecting");
+	const [wsDisconnectedAt, setWsDisconnectedAt] = useState<Date | null>(null);
+	const failedTimerRef = useRef<number | undefined>(undefined);
 	const [streamingContent, setStreamingContent] = useState("");
 	const [streamingThinking, setStreamingThinking] = useState("");
 	const [activityStatus, setActivityStatus] = useState<string | null>(null);
@@ -96,6 +111,7 @@ export function useChatWebSocket({
 		qa: [],
 		merge: [],
 		done: [],
+		deterministic_merges_in_flight: [],
 	});
 	const [pipelineVersion, setPipelineVersion] = useState(0);
 	const [reconciliationActive, setReconciliationActive] = useState(false);
@@ -116,6 +132,9 @@ export function useChatWebSocket({
 	const [storyTokenCosts, setStoryTokenCosts] = useState<Map<string, number>>(
 		new Map(),
 	);
+	const [statusEvents, setStatusEvents] = useState<
+		Array<{ receivedAt: string; event: StatusEvent }>
+	>([]);

 	useEffect(() => {
 		const ws = new ChatWebSocket();
@@ -151,14 +170,6 @@ export function useChatWebSocket({
 				console.error("WebSocket error:", message);
 				setLoading(false);
 				setActivityStatus(null);
-				const markdownMessage = message.replace(
-					/(https?:\/\/[^\s]+)/g,
-					"[$1]($1)",
-				);
-				setMessages((prev) => [
-					...prev,
-					{ role: "assistant", content: markdownMessage },
-				]);
 				if (queuedMessagesRef.current.length > 0) {
 					const batch = queuedMessagesRef.current.map((item) => item.text);
 					queuedMessagesRef.current = [];
@@ -240,20 +251,44 @@ export function useChatWebSocket({
 			onLogEntry: (timestamp, level, message) => {
 				setServerLogs((prev) => [...prev, { timestamp, level, message }]);
 			},
+			onStatusUpdate: (event) => {
+				// Preserve the structured event and receive timestamp so future stories
+				// can render per-type icons, banners, or filters without format changes.
+				setStatusEvents((prev) => [
+					...prev,
+					{ receivedAt: new Date().toISOString(), event },
+				]);
+			},
 			onConnected: () => {
 				setWsConnected(true);
+				setWsConnectivity("connected");
+				setWsDisconnectedAt(null);
+				window.clearTimeout(failedTimerRef.current);
+				failedTimerRef.current = undefined;
+			},
+			onDisconnected: () => {
+				setWsConnectivity("reconnecting");
+				setWsDisconnectedAt(new Date());
+				window.clearTimeout(failedTimerRef.current);
+				failedTimerRef.current = window.setTimeout(() => {
+					setWsConnectivity("failed");
+				}, 30_000);
 			},
 		});

 		return () => {
 			ws.close();
 			wsRef.current = null;
+			window.clearTimeout(failedTimerRef.current);
+			failedTimerRef.current = undefined;
 		};
 	}, []);

 	return {
 		wsRef,
 		wsConnected,
+		wsConnectivity,
+		wsDisconnectedAt,
 		streamingContent,
 		setStreamingContent,
 		streamingThinking,
@@ -276,5 +311,6 @@ export function useChatWebSocket({
 		setSideQuestion,
 		serverLogs,
 		storyTokenCosts,
+		statusEvents,
 	};
 }
@@ -1,9 +1,12 @@
 import * as React from "react";
 import ReactDOM from "react-dom/client";
 import App from "./App";
+import { ErrorBoundary } from "./components/ErrorBoundary";

 ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render(
 	<React.StrictMode>
-		<App />
+		<ErrorBoundary>
+			<App />
+		</ErrorBoundary>
 	</React.StrictMode>,
 );
@@ -1,6 +1,31 @@
 import "@testing-library/jest-dom";
 import { beforeEach, vi } from "vitest";

+// Default WebSocket stub: every `new WebSocket(...)` immediately fires
+// `onerror` + `onclose` on the next microtask. Without this, `rpcCall` from
+// `./api/rpc` (added by 770's HTTP→read-RPC migration) opens a real jsdom
+// WebSocket that hangs ~9s before firing its connection-failure error,
+// making any test that mounts a component calling `listAgents()` time out.
+// Tests that need real WS responses should override per-test with
+// `vi.stubGlobal("WebSocket", ...)`.
+class FailingWebSocket {
+	onopen: ((ev: Event) => void) | null = null;
+	onmessage: ((ev: MessageEvent) => void) | null = null;
+	onerror: ((ev: Event) => void) | null = null;
+	onclose: ((ev: CloseEvent) => void) | null = null;
+	readyState = 0;
+	constructor(_url: string) {
+		queueMicrotask(() => {
+			this.readyState = 3;
+			this.onerror?.(new Event("error"));
+			this.onclose?.(new CloseEvent("close"));
+		});
+	}
+	send(_data: string) {}
+	close() {}
+}
+vi.stubGlobal("WebSocket", FailingWebSocket);
+
 // Provide a default fetch mock so components that call API endpoints on mount
 // don't throw URL-parse errors in the jsdom test environment.  Tests that need
 // specific responses should mock the relevant `api.*` method as usual.
@@ -10,7 +35,7 @@ beforeEach(() => {
 		vi.fn((input: string | URL | Request) => {
 			const url = typeof input === "string" ? input : input.toString();
 			// Endpoints that return arrays need [] not {} to avoid "not iterable" errors.
-			const arrayEndpoints = ["/agents", "/agents/config"];
+			const arrayEndpoints = ["/agents/config"];
 			const body = arrayEndpoints.some((ep) => url.endsWith(ep))
 				? JSON.stringify([])
 				: JSON.stringify({});
--- a/Show More
+++ b/Show More