test(tick_loop): de-flake reconcile_never_floods_broadcast_channel

The test asserted msg_count == 0 on a process-global broadcast channel (TRANSITION_TX is a single OnceLock<Sender> shared across the test binary), so any concurrent test calling apply_transition could land events in our receiver between the drain and the post-reconcile check. Observed failure: 3 stray transitions from parallel tests. Drop the strict count check. The real "never floods" invariant is captured by the Lagged check alone: 1000 seeded items must not overflow the 256-slot channel, which can only hold if the reconcile path bypasses the broadcast (AC4). The sibling test `reconcile_pass_scales_to_1000_items_without_lagged_divergence` already uses this Lagged-only pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
fix(agents): kill-then-status reorder in stop_agent
2026-05-15 11:13:31 +01:00 · 2026-05-15 10:46:02 +01:00 · 2026-05-15 10:36:33 +01:00 · 2026-05-15 10:06:37 +01:00 · 2026-05-15 08:47:38 +00:00 · 2026-05-15 08:47:38 +00:00
75 changed files with 3137 additions and 578 deletions
@@ -0,0 +1,23 @@
+#!/bin/sh
+#
+# Pre-commit hook installed by huskies.
+# Runs script/check (fmt-check, clippy, cargo check, source-map-check)
+# before every commit. Aborts if any gate fails.
+#
+# Emergency bypass: git commit --no-verify  (see AGENT.md — avoid this)
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+
+printf '[pre-commit] Running script/check ...\n'
+OUTPUT=$("$REPO_ROOT/script/check" 2>&1)
+STATUS=$?
+
+if [ "$STATUS" -ne 0 ]; then
+printf '\n=== PRE-COMMIT HOOK FAILED ===\n\n'
+printf '%s\n' "$OUTPUT"
+printf '\nFix the issues above, then re-validate with:\n'
+printf '    script/check\n'
+printf '\nEmergency bypass (see AGENT.md -- avoid this):\n'
+printf '    git commit --no-verify\n\n'
+exit 1
+fi
@@ -29,6 +29,7 @@ timers.json

 # Misc
 wishlist.md
+double_timmy_log.md

 # Database
 pipeline.db
@@ -172,6 +172,8 @@
    "interface WizardStepInfo",
    "interface WizardStateData",
    "interface AgentAssignment",
+    "type Pipeline",
+    "type Status",
    "interface PipelineStageItem",
    "interface PipelineState",
    "type WsResponse",
@@ -200,6 +202,8 @@
    "interface JoinedAgent",
    "interface GatewayProject",
    "interface GatewayInfo",
+    "type Pipeline",
+    "type Status",
    "interface PipelineItem",
    "interface ProjectPipelineStatus",
    "interface AllProjectsPipeline",
@@ -517,6 +521,7 @@
  ],
  "server/src/agents/merge/squash/tests_advanced.rs": [],
  "server/src/agents/merge/squash/tests_basic.rs": [],
+  "server/src/agents/merge/squash/tests_changelog.rs": [],
  "server/src/agents/mod.rs": [
    "mod gates",
    "mod lifecycle",
@@ -558,9 +563,11 @@
    "fn assign_merge_stage"
  ],
  "server/src/agents/pool/auto_assign/merge_failure_block_subscriber.rs": [
+    "fn reconcile_merge_failure_block",
    "fn spawn_merge_failure_block_subscriber"
  ],
  "server/src/agents/pool/auto_assign/merge_failure_subscriber.rs": [
+    "fn reconcile_merge_failure",
    "fn spawn_merge_failure_subscriber"
  ],
  "server/src/agents/pool/auto_assign/mod.rs": [
@@ -612,6 +619,7 @@
  ],
  "server/src/agents/pool/auto_assign/watchdog/tests/orphan_tests.rs": [],
  "server/src/agents/pool/cost_rollup_subscriber.rs": [
+    "fn reconcile_cost_rollup",
    "fn spawn_cost_rollup_subscriber",
    "fn on_terminal_transition"
  ],
@@ -730,6 +738,8 @@
  "server/src/agents/pool/worktree_lifecycle.rs": [
    "fn spawn_worktree_create_subscriber",
    "fn spawn_worktree_cleanup_subscriber",
+    "fn reconcile_worktree_create",
+    "fn reconcile_worktree_cleanup",
    "fn on_coding_transition",
    "fn on_terminal_transition"
  ],
@@ -1390,6 +1400,7 @@
    "fn qa_mode",
    "fn item_type",
    "fn epic",
+    "fn origin",
    "fn for_test",
    "type PipelineItemView",
    "struct NodePresenceView",
@@ -1416,6 +1427,7 @@
    "fn set_agent",
    "fn set_qa_mode",
    "fn set_plan_state",
+    "fn set_origin",
    "fn write_item",
    "fn write_item_str",
    "fn set_retry_count",
@@ -1548,11 +1560,14 @@
    "fn recover_half_written_items"
  ],
  "server/src/db/shadow_write.rs": [
+    "struct UnknownMigration",
    "fn get_shared_pool",
    "struct PipelineWriteMsg",
    "struct PipelineDb",
    "static PIPELINE_DB",
-    "fn init"
+    "fn init",
+    "fn backup_pre_pipeline_status",
+    "fn check_schema_drift"
  ],
  "server/src/gateway/mod.rs": [
    "fn build_gateway_route",
@@ -1734,7 +1749,9 @@
    "fn tool_list_epics",
    "fn tool_show_epic"
  ],
-  "server/src/http/mcp/story_tools/mod.rs": [],
+  "server/src/http/mcp/story_tools/mod.rs": [
+    "fn build_origin"
+  ],
  "server/src/http/mcp/story_tools/refactor.rs": [
    "fn tool_create_refactor",
    "fn tool_list_refactors"
@@ -2193,7 +2210,6 @@
  "server/src/pipeline_state/events.rs": [
    "fn subscribe_transitions",
    "fn try_broadcast",
-    "fn replay_current_pipeline_state",
    "struct TransitionFired",
    "trait TransitionSubscriber",
    "struct EventBus",
@@ -2210,6 +2226,7 @@
  "server/src/pipeline_state/subscribers.rs": [
    "fn format_audit_entry",
    "struct AuditLogSubscriber",
+    "fn reconcile_audit_log",
    "fn spawn_audit_log_subscriber",
    "struct MatrixBotSubscriber",
    "struct FileRendererSubscriber",
@@ -2243,6 +2260,12 @@
    "enum ArchiveReason",
    "fn dir_name",
    "fn from_dir",
+    "enum Pipeline",
+    "fn as_str",
+    "enum Status",
+    "fn as_str",
+    "fn pipeline",
+    "fn status",
    "enum ExecutionState",
    "struct PipelineItem",
    "fn retry_count",
@@ -2579,7 +2602,9 @@
    "fn format_oauth_accounts_exhausted",
    "fn format_agent_started_notification",
    "fn format_agent_completed_notification",
-    "fn merge_failure_snippet"
+    "fn format_new_item_notification",
+    "const MERGE_FAILURE_TAIL_LINES",
+    "fn truncate_gate_output"
  ],
  "server/src/service/notifications/io/listener.rs": [
    "fn spawn_notification_listener"
@@ -2965,6 +2990,7 @@
    "fn spawn_tick_loop",
    "fn spawn_gateway_relay",
    "fn spawn_event_trigger_subscriber",
+    "fn run_reconcile_pass",
    "fn spawn_startup_reconciliation"
  ],
  "server/src/state.rs": [
@@ -50,6 +50,29 @@ export interface AgentAssignment {
 	status: string;
 }

+/** Display column for a work item — derived server-side from `Stage::pipeline()` (story 1085). */
+export type Pipeline =
+	| "backlog"
+	| "coding"
+	| "qa"
+	| "merge"
+	| "done"
+	| "closed"
+	| "archived";
+
+/** Badge/indicator for a work item — derived server-side from `Stage::status()` (story 1085). */
+export type Status =
+	| "active"
+	| "frozen"
+	| "review-hold"
+	| "blocked"
+	| "merge-failure"
+	| "merge-failure-final"
+	| "abandoned"
+	| "superseded"
+	| "rejected"
+	| "done";
+
 /** A single item in any pipeline stage (backlog, current, QA, merge, or done). */
 export interface PipelineStageItem {
 	story_id: string;
@@ -57,6 +80,10 @@ export interface PipelineStageItem {
 	error: string | null;
 	merge_failure: string | null;
 	agent: AgentAssignment | null;
+	/** Display column (story 1085); falls back to the bucket name on legacy servers. */
+	pipeline?: Pipeline;
+	/** Display badge (story 1085); falls back to derived `blocked`/`frozen` on legacy servers. */
+	status?: Status;
 	review_hold: boolean | null;
 	qa: string | null;
 	depends_on: number[] | null;
@@ -214,6 +241,8 @@ export interface WorkItemContent {
 	stage: string;
 	name: string;
 	agent: string | null;
+	/** Origin JSON string (story 1088), or null for pre-origin items. */
+	origin: string | null;
 }

 /** Result for a single test case from the server's test runner. */
@@ -24,10 +24,38 @@ export interface GatewayInfo {
 	projects: GatewayProject[];
 }

+/** Display column for a work item — derived server-side from `Stage::pipeline()` (story 1085). */
+export type Pipeline =
+	| "backlog"
+	| "coding"
+	| "qa"
+	| "merge"
+	| "done"
+	| "closed"
+	| "archived";
+
+/** Badge/indicator for a work item — derived server-side from `Stage::status()` (story 1085). */
+export type Status =
+	| "active"
+	| "frozen"
+	| "review-hold"
+	| "blocked"
+	| "merge-failure"
+	| "merge-failure-final"
+	| "abandoned"
+	| "superseded"
+	| "rejected"
+	| "done";
+
 export interface PipelineItem {
 	story_id: string;
 	name: string;
+	/** Legacy stage string (kept for back-compat); prefer `pipeline` + `status`. */
 	stage: string;
+	/** Display column (story 1085). Optional until all servers are upgraded. */
+	pipeline?: Pipeline;
+	/** Display badge (story 1085). Optional until all servers are upgraded. */
+	status?: Status;
 	agent?: { agent_name: string; model: string; status: string } | null;
 	blocked?: boolean;
 	retry_count?: number;
@@ -69,29 +69,34 @@ describe("StoryRow", () => {
 		expect(screen.getByText("awaiting-slot (#2)")).toBeInTheDocument();
 	});

-	// AC2: failure kind labels derived from merge_failure string
-	it("shows ConflictDetected for merge_failure with conflict text", () => {
+	// Story 1085: failure kind no longer derived from substring.  Items in
+	// the merge_failure / merge_failure_final status get a generic FAILED badge;
+	// the kind detail is exposed via the typed `status` field for callers that
+	// need it (instead of being squeezed into the badge text).
+	it("shows ✕ FAILED badge for merge-failure status", () => {
 		const item: PipelineItem = {
 			story_id: "73_story_conflict",
 			name: "Conflict Story",
 			stage: "merge",
-			blocked: true,
+			pipeline: "merge",
+			status: "merge-failure",
 			merge_failure: "Merge conflict: conflicts detected",
 		};
 		render(<StoryRow item={item} />);
-		expect(screen.getByText("ConflictDetected")).toBeInTheDocument();
+		expect(screen.getByText("✕ FAILED")).toBeInTheDocument();
 	});

-	it("shows GatesFailed for merge_failure with quality gates text", () => {
+	it("shows ⛔ FAILED (FINAL) badge for merge-failure-final status", () => {
 		const item: PipelineItem = {
 			story_id: "74_story_gates",
 			name: "Gates Failed Story",
 			stage: "merge",
-			blocked: true,
+			pipeline: "merge",
+			status: "merge-failure-final",
 			merge_failure: "Quality gates failed: cargo test failed",
 		};
 		render(<StoryRow item={item} />);
-		expect(screen.getByText("GatesFailed")).toBeInTheDocument();
+		expect(screen.getByText("⛔ FAILED (FINAL)")).toBeInTheDocument();
 	});

 	it("shows RECOVERING badge for merge_failure item with running mergemaster", () => {
@@ -163,4 +168,36 @@ describe("StoryRow", () => {
 		render(<StoryRow item={item} />);
 		expect(screen.getByText("⊘ BLOCKED")).toBeInTheDocument();
 	});
+
+	// Story 1085 AC 4 — Frozen items remain visible in their underlying column
+	// with a frozen indicator. The server hands us `pipeline: "coding"` for a
+	// frozen-while-coding story and the badge is decorated separately.
+	it("shows ❄ FROZEN badge for a frozen item (column stays as underlying pipeline)", () => {
+		const item: PipelineItem = {
+			story_id: "70_story_frozen_coding",
+			name: "Paused Coding Story",
+			stage: "current",
+			pipeline: "coding",
+			status: "frozen",
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("❄ FROZEN")).toBeInTheDocument();
+	});
+
+	// Story 1085 AC 4 (subsumes 1052) — Done items must never get a
+	// MergeFailure indicator, even if a stale `merge_failure` string is present.
+	it("done items render Done badge, never MergeFailure", () => {
+		const item: PipelineItem = {
+			story_id: "71_story_done",
+			name: "Completed Story",
+			stage: "done",
+			pipeline: "done",
+			status: "done",
+			merge_failure: "ignored stale string",
+		};
+		render(<StoryRow item={item} />);
+		expect(screen.getByText("Done")).toBeInTheDocument();
+		expect(screen.queryByText("✕ FAILED")).not.toBeInTheDocument();
+		expect(screen.queryByText(/FAILED/)).not.toBeInTheDocument();
+	});
 });
@@ -14,9 +14,42 @@ import {
 	type JoinedAgent,
 	type GatewayProject,
 	type AllProjectsPipeline,
+	type Pipeline,
 	type PipelineItem,
+	type Status,
 } from "../api/gateway";

+/// Resolve an item's pipeline column.  Servers running the new (story 1085)
+/// backend send `pipeline`; older servers only send `stage` so we fall back to
+/// mapping the bucket name onto the new column vocabulary.
+function itemPipeline(item: PipelineItem): Pipeline {
+	if (item.pipeline) return item.pipeline;
+	switch (item.stage) {
+		case "current":
+			return "coding";
+		case "qa":
+			return "qa";
+		case "merge":
+			return "merge";
+		case "done":
+			return "done";
+		case "archived":
+			return "archived";
+		default:
+			return "backlog";
+	}
+}
+
+/// Resolve an item's badge.  Falls back to `merge_failure`/`blocked` on
+/// legacy servers that don't yet emit `status`.
+function itemStatus(item: PipelineItem): Status {
+	if (item.status) return item.status;
+	if (item.merge_failure) return "merge-failure";
+	if (item.blocked) return "blocked";
+	if (item.stage === "done") return "done";
+	return "active";
+}
+
 const { useCallback, useEffect, useRef, useState } = React;

 /// Seconds of silence before an agent is considered disconnected.
@@ -48,72 +81,86 @@ const STATUS_LABELS: Record<AgentStatus, string> = {
 	disconnected: "Disconnected",
 };

-const STAGE_COLORS: Record<string, string> = {
+const PIPELINE_COLORS: Record<Pipeline, string> = {
 	backlog: "#8b949e",
-	current: "#3fb950",
+	coding: "#3fb950",
 	qa: "#d2a679",
 	merge: "#79c0ff",
 	done: "#6e7681",
+	closed: "#6e7681",
 	archived: "#6e7681",
 };

-const STAGE_LABELS: Record<string, string> = {
+const PIPELINE_LABELS: Record<Pipeline, string> = {
 	backlog: "Backlog",
-	current: "In Progress",
+	coding: "In Progress",
 	qa: "QA",
 	merge: "Merging",
 	done: "Done",
+	closed: "Closed",
 	archived: "Archived",
 };

-/// Derive a short label from a merge failure string based on the failure kind.
-function mergeFailureKindLabel(failure: string): string {
-	if (failure.includes("Merge conflict") || failure.includes("CONFLICT")) {
-		return "ConflictDetected";
-	}
-	if (failure.includes("Quality gates failed") || failure.includes("gates failed")) {
-		return "GatesFailed";
-	}
-	if (failure.includes("no code changes") || failure.includes("empty diff")) {
-		return "EmptyDiff";
-	}
-	if (failure.includes("No commits")) {
-		return "NoCommits";
-	}
-	return "✕ FAILED";
-}
-
 /// A single story row inside a project pipeline card.
-/** Render one story row in a gateway-aggregate panel: `#<id> <name>` with stage badge. */
+/** Render one story row in a gateway-aggregate panel: `#<id> <name>` with status badge. */
 export function StoryRow({ item, mergeQueuePos }: { item: PipelineItem; mergeQueuePos?: number }) {
-	const isStuck = item.merge_failure != null || item.blocked;
-	const isMergeActive = item.stage === "merge" && !isStuck && item.agent?.status === "running";
+	const pipeline = itemPipeline(item);
+	const status = itemStatus(item);
+	const agentStatus = item.agent?.status;

 	let color: string;
 	let label: string;
+	let frozenPrefix = "";

-	if (isMergeActive) {
-		color = "#58a6ff";
-		label = "▶ MERGING";
-	} else if (isStuck) {
-		const agentStatus = item.agent?.status;
+	// Frozen items keep their underlying pipeline column but get a ❄️ badge.
+	// (AC 4 — story 1085, subsumes the freeze-hides-item bug.)
+	if (status === "frozen") {
+		color = "#79c0ff";
+		label = "❄ FROZEN";
+		frozenPrefix = "❄ ";
+	} else if (status === "merge-failure" || status === "merge-failure-final") {
+		// Done items never reach this branch — `Stage::status()` returns
+		// `Status::Done` for done items (AC 4).
 		if (agentStatus === "running") {
 			color = "#e3b341";
 			label = "⟳ RECOVERING";
 		} else if (agentStatus === "pending") {
 			color = "#e3b341";
 			label = "⏳ QUEUED";
-		} else if (item.merge_failure != null) {
+		} else {
 			color = "#f85149";
-			label = mergeFailureKindLabel(item.merge_failure);
+			label = status === "merge-failure-final" ? "⛔ FAILED (FINAL)" : "✕ FAILED";
+		}
+	} else if (status === "blocked") {
+		if (agentStatus === "running") {
+			color = "#e3b341";
+			label = "⟳ RECOVERING";
+		} else if (agentStatus === "pending") {
+			color = "#e3b341";
+			label = "⏳ QUEUED";
 		} else {
 			color = "#f85149";
 			label = "⊘ BLOCKED";
 		}
-	} else if (item.stage === "merge" && item.agent?.status === "pending") {
+	} else if (status === "review-hold") {
+		color = "#d2a679";
+		label = "REVIEW HOLD";
+	} else if (status === "abandoned") {
+		color = "#6e7681";
+		label = "ABANDONED";
+	} else if (status === "superseded") {
+		color = "#6e7681";
+		label = "SUPERSEDED";
+	} else if (status === "rejected") {
+		color = "#f85149";
+		label = "REJECTED";
+	} else if (pipeline === "merge" && agentStatus === "running") {
+		color = "#58a6ff";
+		label = "▶ MERGING";
+	} else if (pipeline === "merge" && agentStatus === "pending") {
 		color = "#e3b341";
 		label = "⏳ QUEUED";
-	} else if (item.stage === "merge") {
+	} else if (pipeline === "merge") {
 		color = "#6e7681";
 		if (mergeQueuePos === 1) {
 			label = "NEXT IN QUEUE";
@@ -123,10 +170,11 @@ export function StoryRow({ item, mergeQueuePos }: { item: PipelineItem; mergeQue
 			label = "awaiting-slot";
 		}
 	} else {
-		color = STAGE_COLORS[item.stage] ?? "#8b949e";
-		label = STAGE_LABELS[item.stage] ?? item.stage;
+		color = PIPELINE_COLORS[pipeline] ?? "#8b949e";
+		label = PIPELINE_LABELS[pipeline] ?? pipeline;
 	}

+	const isMergeActive = pipeline === "merge" && status === "active" && agentStatus === "running";
 	const idNum = item.story_id.match(/^(\d+)/)?.[1];

 	return (
@@ -158,7 +206,7 @@ export function StoryRow({ item, mergeQueuePos }: { item: PipelineItem; mergeQue
 			</span>
 			<span style={{ color: "#e6edf3", overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap" }}>
 				{idNum && <span style={{ color: "#8b949e", fontFamily: "monospace" }}>#{idNum}{" "}</span>}
-				{item.name}
+				{frozenPrefix}{item.name}
 			</span>
 		</div>
 	);
@@ -388,6 +436,8 @@ function aggregateItems(
 						story_id: b.story_id,
 						name: b.name,
 						stage: "backlog",
+						pipeline: "backlog" as Pipeline,
+						status: "active" as Status,
 					})),
 				};
 			}
@@ -395,14 +445,14 @@ function aggregateItems(
 				return {
 					project,
 					items: (status.active ?? []).filter(
-						(i) => i.stage !== "done",
+						(i) => itemPipeline(i) !== "done",
 					),
 				};
 			}
 			if (tab === "done") {
 				return {
 					project,
-					items: (status.active ?? []).filter((i) => i.stage === "done"),
+					items: (status.active ?? []).filter((i) => itemPipeline(i) === "done"),
 				};
 			}
 			// archived
@@ -419,12 +469,12 @@ function tabCount(pipeline: AllProjectsPipeline, tab: TabKey): number {
 		if (tab === "in-progress") {
 			return (
 				sum +
-				(status.active ?? []).filter((i) => i.stage !== "done").length
+				(status.active ?? []).filter((i) => itemPipeline(i) !== "done").length
 			);
 		}
 		if (tab === "done") {
 			return (
-				sum + (status.active ?? []).filter((i) => i.stage === "done").length
+				sum + (status.active ?? []).filter((i) => itemPipeline(i) === "done").length
 			);
 		}
 		return sum + (status.archived ?? []).length;
@@ -518,13 +568,16 @@ function ProjectStoryRow({
 	);
 }

-const IN_PROGRESS_STAGE_LABELS: Record<string, string> = {
-	current: "Coding",
+const IN_PROGRESS_PIPELINE_LABELS: Record<"coding" | "qa" | "merge", string> = {
+	coding: "Coding",
 	qa: "QA",
 	merge: "Merging",
 };

-/// In Progress tab content — items grouped by stage (coding / qa / merging).
+/// In Progress tab content — items grouped by their `pipeline` column.
+///
+/// Frozen items appear in the column corresponding to their underlying
+/// `Stage::resume_to` (server-side), so they always show up in-place.
 function InProgressTabContent({
 	groups,
 }: {
@@ -535,25 +588,22 @@ function InProgressTabContent({
 	);
 	const multiProject = new Set(allItems.map((x) => x.project)).size > 1;

-	const byStage = {
-		current: allItems.filter((x) => x.item.stage === "current"),
-		qa: allItems.filter((x) => x.item.stage === "qa"),
-		merge: allItems.filter((x) => x.item.stage === "merge"),
+	const byPipeline = {
+		coding: allItems.filter((x) => itemPipeline(x.item) === "coding"),
+		qa: allItems.filter((x) => itemPipeline(x.item) === "qa"),
+		merge: allItems.filter((x) => itemPipeline(x.item) === "merge"),
 	};

-	const stages = (["current", "qa", "merge"] as const).filter(
-		(s) => byStage[s].length > 0,
+	const pipelines = (["coding", "qa", "merge"] as const).filter(
+		(p) => byPipeline[p].length > 0,
 	);

-	// Compute queue position among clean awaiting merge items (Stage::Merge, no failure, no running agent).
+	// Compute queue position among "clean" awaiting-merge items: pipeline=merge,
+	// status=active, and no agent currently running.
 	const mergeQueuePosMap = new Map<string, number>();
 	let queuePos = 0;
-	for (const { project, item } of byStage.merge) {
-		if (
-			!item.blocked &&
-			!item.merge_failure &&
-			item.agent?.status !== "running"
-		) {
+	for (const { project, item } of byPipeline.merge) {
+		if (itemStatus(item) === "active" && item.agent?.status !== "running") {
 			queuePos += 1;
 			mergeQueuePosMap.set(`${project}:${item.story_id}`, queuePos);
 		}
@@ -569,33 +619,33 @@ function InProgressTabContent({

 	return (
 		<div>
-			{stages.map((stage) => (
-				<div key={stage} style={{ marginBottom: "20px" }}>
+			{pipelines.map((p) => (
+				<div key={p} style={{ marginBottom: "20px" }}>
 					<div
 						style={{
 							fontSize: "0.8em",
 							fontWeight: 600,
-							color: STAGE_COLORS[stage] ?? "#8b949e",
+							color: PIPELINE_COLORS[p] ?? "#8b949e",
 							textTransform: "uppercase",
 							letterSpacing: "0.06em",
 							marginBottom: "8px",
 							paddingBottom: "4px",
-							borderBottom: `1px solid ${STAGE_COLORS[stage] ?? "#8b949e"}33`,
+							borderBottom: `1px solid ${PIPELINE_COLORS[p] ?? "#8b949e"}33`,
 						}}
 					>
-						{IN_PROGRESS_STAGE_LABELS[stage]}{" "}
+						{IN_PROGRESS_PIPELINE_LABELS[p]}{" "}
 						<span style={{ color: "#6e7681" }}>
-							({byStage[stage].length})
+							({byPipeline[p].length})
 						</span>
 					</div>
-					{byStage[stage].map(({ project, item }) => (
+					{byPipeline[p].map(({ project, item }) => (
 						<ProjectStoryRow
 							key={`${project}:${item.story_id}`}
 							project={project}
 							item={item}
 							showProject={multiProject}
 							mergeQueuePos={
-								stage === "merge"
+								p === "merge"
 									? mergeQueuePosMap.get(`${project}:${item.story_id}`)
 									: undefined
 							}
@@ -43,6 +43,7 @@ const DEFAULT_CONTENT = {
 	stage: "current",
 	name: "Big Title Story",
 	agent: null,
+	origin: null,
 };

 beforeEach(() => {
@@ -43,6 +43,7 @@ const DEFAULT_CONTENT = {
 	stage: "current",
 	name: "Big Title Story",
 	agent: null,
+	origin: null,
 };

 const sampleTestResults: TestResultsResponse = {
@@ -42,6 +42,7 @@ const DEFAULT_CONTENT = {
 	stage: "current",
 	name: "Big Title Story",
 	agent: null,
+	origin: null,
 };

 beforeEach(() => {
@@ -127,6 +128,7 @@ describe("WorkItemDetailPanel", () => {
 			stage: "current",
 			name: "My Story Name",
 			agent: null,
+			origin: null,
 		});
 		render(
 			<WorkItemDetailPanel
@@ -146,6 +148,7 @@ describe("WorkItemDetailPanel", () => {
 			stage: "current",
 			name: "My Story Name",
 			agent: null,
+			origin: null,
 		});
 		render(
 			<WorkItemDetailPanel
@@ -164,6 +167,7 @@ describe("WorkItemDetailPanel", () => {
 			stage: "current",
 			name: "My Story Name",
 			agent: null,
+			origin: null,
 		});
 		render(
 			<WorkItemDetailPanel
@@ -186,6 +190,7 @@ describe("WorkItemDetailPanel", () => {
 			stage: "current",
 			name: "My Story Name",
 			agent: null,
+			origin: null,
 		});
 		render(
 			<WorkItemDetailPanel
@@ -20,6 +20,26 @@ import { stripDisplayContent } from "./workItemDetailPanelUtils";

 const { useCallback, useEffect, useRef, useState } = React;

+/** Parse and format an origin JSON string for display. */
+function formatOrigin(origin: string | null): string {
+	if (!origin) return "unknown";
+	try {
+		const obj = JSON.parse(origin) as {
+			kind?: string;
+			id?: string;
+			ts?: number;
+		};
+		const kind = obj.kind ?? "unknown";
+		const id = obj.id ? ` (${obj.id})` : "";
+		const ts = obj.ts
+			? ` at ${new Date(obj.ts * 1000).toISOString().replace("T", " ").slice(0, 19)}Z`
+			: "";
+		return `${kind}${id}${ts}`;
+	} catch {
+		return origin;
+	}
+}
+
 interface WorkItemDetailPanelProps {
 	storyId: string;
 	pipelineVersion: number;
@@ -38,6 +58,7 @@ export function WorkItemDetailPanel({
 	const [stage, setStage] = useState<string>("");
 	const [name, setName] = useState<string | null>(null);
 	const [assignedAgent, setAssignedAgent] = useState<string | null>(null);
+	const [origin, setOrigin] = useState<string | null>(null);
 	const [loading, setLoading] = useState(true);
 	const [error, setError] = useState<string | null>(null);
 	const [agentInfo, setAgentInfo] = useState<AgentInfo | null>(null);
@@ -63,6 +84,7 @@ export function WorkItemDetailPanel({
 				setStage(data.stage);
 				setName(data.name);
 				setAssignedAgent(data.agent);
+				setOrigin(data.origin);
 			})
 			.catch((err: unknown) => {
 				setError(err instanceof Error ? err.message : "Failed to load content");
@@ -289,6 +311,19 @@ export function WorkItemDetailPanel({

 				<TestResultsSection testResults={testResults} />

+				{!loading && (
+					<div
+						data-testid="detail-panel-origin"
+						style={{
+							fontSize: "0.75em",
+							color: "#555",
+							fontFamily: "monospace",
+						}}
+					>
+						origin: {formatOrigin(origin)}
+					</div>
+				)}
+
 				<div
 					style={{
 						display: "flex",
@@ -124,19 +124,43 @@ else
 fi

 # Categorise merged work items and format names.
+# Supports two subject formats (after stripping the "huskies: merge " prefix):
+#   New: "1063 story Human Readable Name"
+#   Old: "1063_story_human_readable_name"
 FEATURES=""
 FIXES=""
 REFACTORS=""
 while IFS= read -r item; do
  [ -z "$item" ] && continue
-  # Strip the numeric prefix and type to get the human name.
-  name=$(echo "$item" | sed -E 's/^[0-9]+_(story|bug|refactor|spike)_//' | tr '_' ' ')
+
+  # Extract the leading numeric ID (present in both formats).
+  id=$(echo "$item" | grep -oE '^[0-9]+')
+
+  # Detect format and extract human name + type word.
+  if echo "$item" | grep -qE '^[0-9]+ (story|bug|refactor|spike|epic) '; then
+    # New format: "1063 story Human Name Here"
+    type_word=$(echo "$item" | sed -E 's/^[0-9]+ ([a-z]+) .*/\1/')
+    name=$(echo "$item" | sed -E 's/^[0-9]+ [a-z]+ //')
+  else
+    # Legacy slug format: "1063_story_human_name_here"
+    type_word=$(echo "$item" | sed -E 's/^[0-9]+_([a-z]+)_.*/\1/')
+    name=$(echo "$item" | sed -E 's/^[0-9]+_(story|bug|refactor|spike|epic)_//' | tr '_' ' ')
+  fi
+
  # Capitalise first letter.
  name="$(echo "${name:0:1}" | tr '[:lower:]' '[:upper:]')${name:1}"
-  case "$item" in
-    *_bug_*)     FIXES="${FIXES}- ${name}\n" ;;
-    *_refactor_*) REFACTORS="${REFACTORS}- ${name}\n" ;;
-    *)           FEATURES="${FEATURES}- ${name}\n" ;;
+
+  # Format as "Name (ID)" when a numeric ID was found, plain name otherwise.
+  if [ -n "$id" ]; then
+    entry="${name} (${id})"
+  else
+    entry="${name}"
+  fi
+
+  case "$type_word" in
+    bug)      FIXES="${FIXES}- ${entry}\n" ;;
+    refactor) REFACTORS="${REFACTORS}- ${entry}\n" ;;
+    *)        FEATURES="${FEATURES}- ${entry}\n" ;;
  esac
 done <<< "$MERGED_RAW"

@@ -53,7 +53,22 @@ cargo run --manifest-path "$PROJECT_ROOT/Cargo.toml" -p source-map-gen --bin sou
 echo "=== Building frontend ==="
 if [ -d "$PROJECT_ROOT/frontend" ]; then
  cd "$PROJECT_ROOT/frontend"
-  npm install
+  # The merge gate runs in workspaces whose pre-existing `node_modules` was
+  # populated by an earlier `npm install --omit=dev` (or a partial install).
+  # In that state `npm install` reports "up to date, audited N packages"
+  # without actually adding the missing devDependencies, so the subsequent
+  # `tsc && vite build` fails with `sh: 1: tsc: not found`.
+  #
+  # Repair the install when typescript isn't reachable (story 1086 merge gate
+  # regression).  We probe the on-disk binary rather than relying on PATH so
+  # this also covers the case where `node_modules/.bin/` is missing.
+  if [ ! -x node_modules/typescript/bin/tsc ]; then
+    echo "[script/test] node_modules missing typescript; performing clean install."
+    rm -rf node_modules
+    npm install --include=dev
+  else
+    npm install --include=dev
+  fi
  npm run build
  cd "$PROJECT_ROOT"
 else
@@ -17,6 +17,20 @@ fn run(cmd: &str, args: &[&str], dir: &Path) {
 fn main() {
    println!("cargo:rerun-if-changed=build.rs");
    println!("cargo:rerun-if-env-changed=PROFILE");
+
+    // Embed the current git commit hash at compile time so `get_version` always
+    // reflects the binary that is actually running, not a potentially-stale file.
+    println!("cargo:rerun-if-changed=../.git/HEAD");
+    println!("cargo:rerun-if-changed=../.git/refs/");
+    let git_hash = std::process::Command::new("git")
+        .args(["rev-parse", "--short", "HEAD"])
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .and_then(|o| String::from_utf8(o.stdout).ok())
+        .map(|s| s.trim().to_string())
+        .unwrap_or_else(|| "unknown".to_string());
+    println!("cargo:rustc-env=BUILD_GIT_HASH={git_hash}");
    println!("cargo:rerun-if-changed=../frontend/package.json");
    println!("cargo:rerun-if-changed=../frontend/package-lock.json");
    println!("cargo:rerun-if-changed=../frontend/vite.config.ts");
@@ -0,0 +1,56 @@
+-- Story 1087: split the legacy `stage` column on `pipeline_items` into a
+-- `(pipeline, status)` pair so the read side no longer needs to re-derive the
+-- display column and badge from the stage string.
+--
+-- The migration is additive: `stage` is retained for backwards compatibility
+-- while remaining Step E callers are migrated.  The backup of `pipeline.db`
+-- written by `shadow_write::init` immediately before this migration runs is
+-- the recovery path if the backfill produces an unexpected projection.
+
+ALTER TABLE pipeline_items ADD COLUMN pipeline TEXT NOT NULL DEFAULT '';
+ALTER TABLE pipeline_items ADD COLUMN status   TEXT NOT NULL DEFAULT '';
+
+-- Backfill `pipeline` from the existing `stage` column.  Every wire-form
+-- stage string emitted by `stage_dir_name` maps to exactly one of the seven
+-- Pipeline columns defined in `pipeline_state::types::Pipeline::as_str`.
+-- Legacy directory strings (`1_backlog`, `2_current`, ...) are also handled
+-- so that databases predating story 934 migrate cleanly.
+UPDATE pipeline_items SET pipeline = CASE stage
+    WHEN 'upcoming'            THEN 'backlog'
+    WHEN 'backlog'             THEN 'backlog'
+    WHEN '1_backlog'           THEN 'backlog'
+    WHEN 'coding'              THEN 'coding'
+    WHEN 'blocked'             THEN 'coding'
+    WHEN '2_current'           THEN 'coding'
+    WHEN 'qa'                  THEN 'qa'
+    WHEN 'review_hold'         THEN 'qa'
+    WHEN '3_qa'                THEN 'qa'
+    WHEN 'merge'               THEN 'merge'
+    WHEN 'merge_failure'       THEN 'merge'
+    WHEN 'merge_failure_final' THEN 'merge'
+    WHEN '4_merge'             THEN 'merge'
+    WHEN 'done'                THEN 'done'
+    WHEN '5_done'              THEN 'done'
+    WHEN 'abandoned'           THEN 'closed'
+    WHEN 'superseded'          THEN 'closed'
+    WHEN 'rejected'            THEN 'closed'
+    WHEN 'archived'            THEN 'archived'
+    WHEN '6_archived'          THEN 'archived'
+    WHEN 'frozen'              THEN 'coding'
+    ELSE ''
+END;
+
+-- Backfill `status` (badge) from the existing `stage` column.
+UPDATE pipeline_items SET status = CASE stage
+    WHEN 'frozen'              THEN 'frozen'
+    WHEN 'review_hold'         THEN 'review-hold'
+    WHEN 'blocked'             THEN 'blocked'
+    WHEN 'merge_failure'       THEN 'merge-failure'
+    WHEN 'merge_failure_final' THEN 'merge-failure-final'
+    WHEN 'abandoned'           THEN 'abandoned'
+    WHEN 'superseded'          THEN 'superseded'
+    WHEN 'rejected'            THEN 'rejected'
+    WHEN 'done'                THEN 'done'
+    WHEN '5_done'              THEN 'done'
+    ELSE 'active'
+END;
@@ -198,10 +198,13 @@ pub async fn run(
        )
    };

-    // Replay current pipeline state so subscribers (worktree lifecycle, merge-failure
-    // auto-spawn) react to any stories already in active stages, then auto-assign.
-    slog!("[agent-mode] Replaying current pipeline state.");
-    crate::pipeline_state::replay_current_pipeline_state();
+    // Reconcile subscriber side effects for the current CRDT state without
+    // flooding the broadcast channel (replaces the former replay_current_pipeline_state call).
+    slog!("[agent-mode] Running startup reconcile pass.");
+    let done_retention = crate::config::ProjectConfig::load(&project_root)
+        .map(|c| std::time::Duration::from_secs(c.watcher.done_retention_secs))
+        .unwrap_or_else(|_| std::time::Duration::from_secs(4 * 3600));
+    crate::startup::tick_loop::run_reconcile_pass(&project_root, &agents, done_retention).await;

    // Run initial auto-assign.
    slog!("[agent-mode] Initial auto-assign scan.");
@@ -10,10 +10,12 @@
 //! - `.huskies/README.md`
 //! - `.huskies/specs/00_CONTEXT.md`
 //! - `.huskies/AGENT.md`
-//! - `.huskies/source-map.json` (up to 200 KB; truncated with a log if larger)
 //!
-//! `STACK.md` is intentionally excluded — it is large and changes often; agents
-//! should grep it on demand.
+//! `STACK.md` and `.huskies/source-map.json` are intentionally excluded — they
+//! are large and change often; agents should grep on demand instead.  Earlier
+//! versions of this bundle inlined the source map, which ballooned the orientation
+//! to ~96 KB and drowned out the workflow rules in AGENT.md; the file is still
+//! kept on disk for the merge-time `source-map-check` doc-coverage gate.
 //!
 //! Behaviour contract:
 //! - Files that are missing or empty are skipped silently (no error, no section).
@@ -33,12 +35,6 @@ const ORIENTATION_FILES: &[&str] = &[
    ".huskies/AGENT.md",
 ];

-/// Path to the source map (relative to project root), appended after AGENT.md.
-const SOURCE_MAP_REL: &str = ".huskies/source-map.json";
-
-/// Maximum bytes of source-map content to embed in the prompt.
-const SOURCE_MAP_BYTE_CAP: usize = 200 * 1024;
-
 /// Attempt to load the project-local agent prompt by concatenating orientation
 /// files from the project root.
 ///
@@ -60,14 +56,11 @@ pub fn read_project_local_prompt(project_root: &Path) -> Option<String> {
        sections.push((rel_path, trimmed.to_string()));
    }

-    // Read source-map.json (after AGENT.md) with a byte cap.
-    let source_map_content = read_source_map_section(project_root);
-
-    if sections.is_empty() && source_map_content.is_none() {
+    if sections.is_empty() {
        return None;
    }

-    let mut included_files: Vec<&str> = sections.iter().map(|(name, _)| *name).collect();
+    let included_files: Vec<&str> = sections.iter().map(|(name, _)| *name).collect();
    let mut bundle = String::new();
    for (i, (name, content)) in sections.iter().enumerate() {
        if i > 0 {
@@ -77,15 +70,6 @@ pub fn read_project_local_prompt(project_root: &Path) -> Option<String> {
        bundle.push_str(content);
    }

-    if let Some(sm) = source_map_content {
-        if !bundle.is_empty() {
-            bundle.push('\n');
-        }
-        bundle.push_str(&format!("=== {SOURCE_MAP_REL} ===\n"));
-        bundle.push_str(&sm);
-        included_files.push(SOURCE_MAP_REL);
-    }
-
    crate::slog!(
        "[agents] orientation bundle: {} bytes, files: [{}]",
        bundle.len(),
@@ -95,39 +79,6 @@ pub fn read_project_local_prompt(project_root: &Path) -> Option<String> {
    Some(bundle)
 }

-/// Read `.huskies/source-map.json` from `project_root`, applying a byte cap.
-///
-/// Returns `None` when the file is absent, unreadable, or empty.
-/// When the content exceeds [`SOURCE_MAP_BYTE_CAP`], truncates at a char
-/// boundary and logs the truncation.
-#[allow(clippy::string_slice)] // cap is walked back to a char boundary before slicing
-fn read_source_map_section(project_root: &Path) -> Option<String> {
-    let path = project_root.join(SOURCE_MAP_REL);
-    let Ok(content) = std::fs::read_to_string(&path) else {
-        return None;
-    };
-    let trimmed = content.trim();
-    if trimmed.is_empty() {
-        return None;
-    }
-    if trimmed.len() > SOURCE_MAP_BYTE_CAP {
-        let mut cap = SOURCE_MAP_BYTE_CAP;
-        while cap > 0 && !trimmed.is_char_boundary(cap) {
-            cap -= 1;
-        }
-        crate::slog!(
-            "[agents] source-map.json truncated: {} bytes > {} byte cap; \
-             including first {} bytes",
-            trimmed.len(),
-            SOURCE_MAP_BYTE_CAP,
-            cap
-        );
-        Some(trimmed[..cap].to_string())
-    } else {
-        Some(trimmed.to_string())
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -310,10 +261,13 @@ mod tests {
        );
    }

-    // ── source-map.json tests ────────────────────────────────────────────────
+    // ── source-map.json must NOT be inlined into the bundle ──────────────────
+    // The file is kept on disk for the merge-time source-map-check gate, but
+    // inlining it into every agent spawn ballooned the orientation past 96 KB
+    // and drowned out the workflow rules in AGENT.md.

    #[test]
-    fn source_map_included_after_agent_md() {
+    fn source_map_not_included_even_when_present() {
        let tmp = tempfile::tempdir().unwrap();
        write_file(tmp.path(), ".huskies/AGENT.md", "agent content");
        write_file(
@@ -324,92 +278,12 @@ mod tests {

        let result = read_project_local_prompt(tmp.path()).unwrap();
        assert!(
-            result.contains("=== .huskies/source-map.json ==="),
-            "source-map delimiter must be present: {result}"
+            !result.contains("=== .huskies/source-map.json ==="),
+            "source-map must not appear as an orientation section: {result}"
        );
        assert!(
-            result.contains(r#""src/lib.rs""#),
-            "source-map content must be present: {result}"
-        );
-        // source-map section must appear after AGENT.md section
-        let agent_pos = result.find("=== .huskies/AGENT.md ===").unwrap();
-        let sm_pos = result.find("=== .huskies/source-map.json ===").unwrap();
-        assert!(
-            sm_pos > agent_pos,
-            "source-map section must come after AGENT.md section"
-        );
-    }
-
-    #[test]
-    fn source_map_missing_skipped_silently() {
-        let tmp = tempfile::tempdir().unwrap();
-        write_file(tmp.path(), ".huskies/AGENT.md", "agent content");
-        // source-map.json intentionally absent
-
-        let result = read_project_local_prompt(tmp.path()).unwrap();
-        assert!(
-            !result.contains("source-map.json"),
-            "absent source-map must not create a section: {result}"
-        );
-    }
-
-    #[test]
-    fn source_map_empty_skipped_silently() {
-        let tmp = tempfile::tempdir().unwrap();
-        write_file(tmp.path(), ".huskies/AGENT.md", "agent content");
-        write_file(tmp.path(), ".huskies/source-map.json", "");
-
-        let result = read_project_local_prompt(tmp.path()).unwrap();
-        assert!(
-            !result.contains("source-map.json"),
-            "empty source-map must not create a section: {result}"
-        );
-    }
-
-    #[test]
-    fn source_map_only_returns_some() {
-        let tmp = tempfile::tempdir().unwrap();
-        // Only source-map.json present; all orientation files absent.
-        write_file(
-            tmp.path(),
-            ".huskies/source-map.json",
-            r#"{"src/main.rs": {}}"#,
-        );
-
-        let result = read_project_local_prompt(tmp.path());
-        assert!(
-            result.is_some(),
-            "source-map alone must produce Some bundle"
-        );
-        assert!(
-            result.unwrap().contains("=== .huskies/source-map.json ==="),
-            "bundle must contain source-map section"
-        );
-    }
-
-    #[test]
-    #[allow(clippy::string_slice)] // sm_start is derived from str::find — always a char boundary
-    fn source_map_truncated_at_byte_cap() {
-        let tmp = tempfile::tempdir().unwrap();
-        write_file(tmp.path(), ".huskies/AGENT.md", "agent");
-        // Build content larger than SOURCE_MAP_BYTE_CAP (200 KB).
-        let big = "x".repeat(SOURCE_MAP_BYTE_CAP + 1024);
-        write_file(tmp.path(), ".huskies/source-map.json", &big);
-
-        let result = read_project_local_prompt(tmp.path()).unwrap();
-        assert!(
-            result.contains("=== .huskies/source-map.json ==="),
-            "truncated source-map must still produce a section: {result}"
-        );
-        // The content length of just the source-map section must be <= SOURCE_MAP_BYTE_CAP.
-        let sm_start = result.find("=== .huskies/source-map.json ===").unwrap()
-            + "=== .huskies/source-map.json ===\n".len();
-        let sm_content = &result[sm_start..];
-        assert!(
-            sm_content.len() <= SOURCE_MAP_BYTE_CAP,
-            "source-map section content must be <= {} bytes, got {}",
-            SOURCE_MAP_BYTE_CAP,
-            sm_content.len()
+            !result.contains("src/lib.rs"),
+            "source-map content must not be inlined: {result}"
        );
    }
 }
@@ -124,7 +124,15 @@ pub(crate) fn run_squash_merge(

    // ── Commit in the temporary worktree ──────────────────────────
    all_output.push_str("=== git commit ===\n");
-    let commit_msg = format!("huskies: merge {story_id}");
+    // Include human-readable name and item type when the CRDT is available.
+    // Falls back to the bare ID when running outside the server (e.g. in tests).
+    let story_label = crate::crdt_state::read_item(story_id)
+        .map(|item| {
+            let type_str = item.item_type().map(|t| t.as_str()).unwrap_or("story");
+            format!(" {} {}", type_str, item.name())
+        })
+        .unwrap_or_default();
+    let commit_msg = format!("huskies: merge {story_id}{story_label}");
    let commit = Command::new("git")
        .args(["commit", "-m", &commit_msg])
        .current_dir(&merge_wt_path)
@@ -507,3 +515,5 @@ fn run_merge_quality_gates(
 mod tests_advanced;
 #[cfg(test)]
 mod tests_basic;
+#[cfg(test)]
+mod tests_changelog;
@@ -0,0 +1,142 @@
+//! Regression tests for changelog entry parsing — both legacy-slug and new-format
+//! merge commit subjects must resolve to a human-readable "Name (ID)" entry.
+
+/// Parse a single merge commit subject (after stripping the `huskies: merge ` prefix)
+/// into `(id, type_word, human_name)`.
+///
+/// Returns `None` for subjects that are not recognised merge items.
+fn parse_changelog_entry(item: &str) -> Option<(String, String, String)> {
+    let item = item.trim();
+    if item.is_empty() {
+        return None;
+    }
+
+    // Extract leading numeric ID present in both formats.
+    let id: String = item.chars().take_while(|c| c.is_ascii_digit()).collect();
+    if id.is_empty() {
+        return None;
+    }
+
+    // Detect format by the character immediately following the digits.
+    // id contains only ASCII digits so id.len() is a valid char boundary.
+    let rest = item.get(id.len()..).unwrap_or("");
+    if let Some(space_rest) = rest.strip_prefix(' ') {
+        // New format: "1063 story Human Name Here"
+        let mut words = space_rest.splitn(2, ' ');
+        let type_word = words.next().unwrap_or("story").to_string();
+        let name = words.next().unwrap_or("").trim().to_string();
+        if name.is_empty() {
+            return None;
+        }
+        Some((id, type_word, name))
+    } else if let Some(slug_rest) = rest.strip_prefix('_') {
+        // Legacy slug format: "1063_story_human_name_here"
+        let mut parts = slug_rest.splitn(2, '_');
+        let type_word = parts.next().unwrap_or("story").to_string();
+        let slug = parts.next().unwrap_or("").replace('_', " ");
+        if slug.is_empty() {
+            return None;
+        }
+        Some((id, type_word, slug))
+    } else {
+        None
+    }
+}
+
+/// Format a parsed entry as "Human Name (ID)".
+fn format_entry(id: &str, name: &str) -> String {
+    let mut chars = name.chars();
+    let capitalised = match chars.next() {
+        None => String::new(),
+        Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
+    };
+    format!("{capitalised} ({id})")
+}
+
+#[test]
+fn changelog_new_format_story_resolves_to_name_and_id() {
+    let item = "1063 story Tee pipeline events into gateway context";
+    let (id, _type_word, name) = parse_changelog_entry(item).expect("should parse new format");
+    assert_eq!(id, "1063");
+    assert_eq!(
+        format_entry(&id, &name),
+        "Tee pipeline events into gateway context (1063)"
+    );
+}
+
+#[test]
+fn changelog_new_format_bug_resolves_to_name_and_id() {
+    let item = "999 bug Fix the broken auth token";
+    let (id, type_word, name) = parse_changelog_entry(item).expect("should parse new-format bug");
+    assert_eq!(id, "999");
+    assert_eq!(type_word, "bug");
+    assert_eq!(format_entry(&id, &name), "Fix the broken auth token (999)");
+}
+
+#[test]
+fn changelog_new_format_refactor_resolves_to_name_and_id() {
+    let item = "777 refactor Extract config parsing";
+    let (id, type_word, name) = parse_changelog_entry(item).expect("should parse refactor");
+    assert_eq!(type_word, "refactor");
+    assert_eq!(format_entry(&id, &name), "Extract config parsing (777)");
+}
+
+#[test]
+fn changelog_legacy_slug_story_resolves_to_name_and_id() {
+    let item = "1063_story_tee_pipeline_events_into_gateway_context";
+    let (id, _type_word, name) = parse_changelog_entry(item).expect("should parse legacy slug");
+    assert_eq!(id, "1063");
+    assert_eq!(
+        format_entry(&id, &name),
+        "Tee pipeline events into gateway context (1063)"
+    );
+}
+
+#[test]
+fn changelog_legacy_slug_bug_resolves_to_name_and_id() {
+    let item = "999_bug_fix_the_broken_auth_token";
+    let (id, type_word, name) = parse_changelog_entry(item).expect("should parse legacy bug slug");
+    assert_eq!(id, "999");
+    assert_eq!(type_word, "bug");
+    assert_eq!(format_entry(&id, &name), "Fix the broken auth token (999)");
+}
+
+#[test]
+fn changelog_mixed_fixture_all_entries_have_human_names() {
+    // Fixture: a mix of legacy-slug and new-format subjects (as they appear
+    // after stripping the "huskies: merge " prefix from the git log).
+    let fixture = [
+        // Legacy slug formats (pre-migration)
+        "1001_story_add_matrix_transport",
+        "1002_bug_fix_crdt_sync_disconnect",
+        "1003_refactor_extract_gateway_config",
+        // New format (post-story-1069)
+        "1050 story Add agent pool auto-assign",
+        "1063 story Tee pipeline events into gateway context",
+        "1064 bug Stop lagged handler re-emitting via same channel",
+        "1065 refactor Move squash merge into own module",
+    ];
+
+    for item in &fixture {
+        let result = parse_changelog_entry(item);
+        assert!(result.is_some(), "failed to parse merge subject: {item:?}");
+        let (id, _type_word, name) = result.unwrap();
+        let entry = format_entry(&id, &name);
+        // Every entry must contain the numeric ID in parentheses.
+        assert!(
+            entry.contains(&format!("({id})")),
+            "entry missing numeric ID: {entry:?}"
+        );
+        // Name must not be empty or just whitespace.
+        assert!(
+            !name.trim().is_empty(),
+            "empty human name for item: {item:?}"
+        );
+        // Name must not be a raw slug (contains underscores as word separators).
+        // (Underscores are OK inside words like "auto-assign" but not as spaces.)
+        assert!(
+            !name.contains('_'),
+            "name still contains underscores (slug not decoded): {name:?}"
+        );
+    }
+}
@@ -569,14 +569,15 @@ mod tests {
        );
    }

-    // ── AC4: startup event replay + pool reconstruction ──────────────────
+    // ── AC4: startup reconcile + pool reconstruction ──────────────────

    /// AC4: Simulates a server restart by seeding the CRDT with a story in
-    /// Coding stage, calling `replay_current_pipeline_state` (the new startup
-    /// path), then `auto_assign_available_work`.  Asserts the pool ends in the
-    /// expected state: exactly one agent assigned to the story.
+    /// Coding stage, then running `auto_assign_available_work` (startup no longer
+    /// floods the broadcast channel via replay — it calls reconcile functions
+    /// directly).  Asserts the pool ends in the expected state: exactly one agent
+    /// assigned to the story, and a second pass does not double-spawn.
    #[tokio::test]
-    async fn startup_replay_followed_by_auto_assign_assigns_agent_once() {
+    async fn startup_auto_assign_assigns_agent_once() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        std::fs::create_dir_all(&sk).unwrap();
@@ -597,8 +598,7 @@ mod tests {

        let pool = AgentPool::new_test(3001);

-        // Simulate startup: replay current state, then auto-assign.
-        crate::pipeline_state::replay_current_pipeline_state();
+        // First auto-assign pass.
        pool.auto_assign_available_work(tmp.path()).await;

        let count_after_first = {
@@ -612,8 +612,7 @@ mod tests {
                .count()
        };

-        // AC3 (idempotency): replaying twice must not double-spawn agents.
-        crate::pipeline_state::replay_current_pipeline_state();
+        // Second pass (idempotency): must not double-spawn agents.
        pool.auto_assign_available_work(tmp.path()).await;

        let count_after_second = {
@@ -629,11 +628,11 @@ mod tests {

        assert!(
            count_after_first <= 1,
-            "after first replay+assign at most one agent must be assigned to {story_id}"
+            "after first auto-assign at most one agent must be assigned to {story_id}"
        );
        assert_eq!(
            count_after_first, count_after_second,
-            "second replay must not spawn additional agents (idempotency)"
+            "second auto-assign must not spawn additional agents (idempotency)"
        );
    }
 }
@@ -1,29 +1,39 @@
-//! Backlog promotion: scan `1_backlog/` and promote stories whose `depends_on` are all met.
+//! Backlog promotion: scan items in `Pipeline::Backlog` and promote stories whose `depends_on` are all met.

-use crate::pipeline_state::Stage;
+use crate::pipeline_state::Pipeline;
 use crate::slog;
 use crate::slog_warn;

 use super::super::AgentPool;
-use super::scan::scan_stage_items;
 use super::story_checks::{check_archived_dependencies, has_unmet_dependencies};

 impl AgentPool {
-    /// Scan `1_backlog/` and promote any story whose `depends_on` are all met.
+    /// Scan items in `Pipeline::Backlog` and promote any story whose `depends_on` are all met.
    ///
    /// A story is only promoted if it explicitly lists `depends_on` AND every
-    /// listed dependency has reached `5_done` or `6_archived`.  Stories with no
-    /// `depends_on` are left in the backlog for human scheduling.
+    /// listed dependency has reached `Pipeline::Done` or `Pipeline::Archived`.
+    /// Stories with no `depends_on` are left in the backlog for human scheduling.
    ///
-    /// **Archived dep semantics:** a dep in `6_archived` counts as satisfied (since
-    /// stories auto-sweep from `5_done` to `6_archived` after 4 hours, and the
+    /// **Archived dep semantics:** a dep in `Pipeline::Archived` counts as satisfied
+    /// (since stories auto-sweep from `Done` to `Archived` after 4 hours, and the
    /// dependent story would normally already be promoted by then).  However, if a
-    /// dep was already in `6_archived` when the dependent story was created (e.g. it
+    /// dep was already archived when the dependent story was created (e.g. it
    /// was abandoned/superseded before the dependent existed), a prominent warning is
    /// logged so the user can see the promotion was triggered by an archived dep, not
    /// a clean completion.
    pub(super) fn promote_ready_backlog_stories(&self) {
-        let items = scan_stage_items(&Stage::Backlog);
+        // Story 1086: scan by Pipeline column, not Stage variant. Pipeline::Backlog
+        // covers Stage::Upcoming and Stage::Backlog uniformly.
+        let items: Vec<String> = {
+            use std::collections::BTreeSet;
+            let mut ids = BTreeSet::new();
+            for item in crate::pipeline_state::read_all_typed() {
+                if item.stage.pipeline() == Pipeline::Backlog {
+                    ids.insert(item.story_id.0.clone());
+                }
+            }
+            ids.into_iter().collect()
+        };
        for story_id in &items {
            // Only promote stories that explicitly declare dependencies
            // (story 929: read from the CRDT register, not YAML).
@@ -13,7 +13,7 @@ use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;

-use crate::pipeline_state::{MergeFailureKind, PipelineEvent, Stage, StoryId};
+use crate::pipeline_state::{MergeFailureKind, PipelineEvent, Stage, Status, StoryId};
 use crate::slog;
 use crate::slog_warn;

@@ -21,6 +21,15 @@ use super::super::super::PipelineStage;
 use super::super::AgentPool;
 use super::scan::is_story_assigned_for_stage;

+/// Reconcile: no-op for the merge-failure block subscriber.
+///
+/// The block subscriber maintains an in-memory per-story consecutive-failure counter
+/// that cannot be reconstructed from CRDT state alone (only the current stage is
+/// stored, not the history of how many times each story failed).  Eventual consistency
+/// is guaranteed by the live subscriber reacting to each new `MergeFailure` event;
+/// the periodic reconciler cannot add value here without risking spurious blocks.
+pub(crate) fn reconcile_merge_failure_block() {}
+
 /// Spawn a background task that blocks stories after N consecutive `MergeFailure` transitions.
 ///
 /// Subscribes to the pipeline transition broadcast channel and tracks a per-story
@@ -86,6 +95,13 @@ fn on_transition(
    counters: &mut HashMap<StoryId, (u32, MergeFailureKind)>,
    recovery_running: bool,
 ) {
+    // Story 1086: gate on the typed `Status` projection — `Status::MergeFailure`
+    // is precisely the set of stages we count toward the block threshold.  We
+    // still need the variant pattern below to read `kind`.
+    if fired.after.status() != Status::MergeFailure {
+        counters.remove(&fired.story_id);
+        return;
+    }
    match &fired.after {
        Stage::MergeFailure { kind, .. } => {
            if recovery_running {
@@ -9,7 +9,7 @@
 use std::path::{Path, PathBuf};
 use std::sync::Arc;

-use crate::pipeline_state::{MergeFailureKind, Stage};
+use crate::pipeline_state::{MergeFailureKind, Stage, Status};
 use crate::slog;
 use crate::slog_warn;

@@ -17,6 +17,35 @@ use super::super::super::PipelineStage;
 use super::super::AgentPool;
 use super::scan::{find_free_agent_for_stage, is_story_assigned_for_stage};

+/// Reconcile: for each story currently in `MergeFailure { kind: ConflictDetected }`,
+/// ensure a mergemaster agent is running.
+///
+/// Idempotent — `on_merge_failure_transition` guards against double-spawning via
+/// `is_story_assigned_for_stage`.  Called by the periodic reconciler so that a Lagged
+/// startup event never leaves a ConflictDetected story without a recovery agent.
+pub(crate) async fn reconcile_merge_failure(pool: &Arc<AgentPool>, project_root: &Path) {
+    use crate::pipeline_state::{MergeFailureKind, PipelineEvent, Stage, TransitionFired};
+    for item in crate::pipeline_state::read_all_typed() {
+        // Story 1086: scan via the Status projection; the variant pattern is
+        // still needed to read `kind`.
+        if item.stage.status() != Status::MergeFailure {
+            continue;
+        }
+        if let Stage::MergeFailure { ref kind, .. } = item.stage
+            && matches!(kind, MergeFailureKind::ConflictDetected(_))
+        {
+            let fired = TransitionFired {
+                story_id: item.story_id.clone(),
+                before: item.stage.clone(),
+                after: item.stage.clone(),
+                event: PipelineEvent::MergeFailed { kind: kind.clone() },
+                at: chrono::Utc::now(),
+            };
+            on_merge_failure_transition(pool, project_root, &fired).await;
+        }
+    }
+}
+
 /// Spawn a background task that auto-spawns mergemaster agents on
 /// `Stage::MergeFailure { kind: ConflictDetected(_) }` transitions.
 ///
@@ -49,6 +78,11 @@ async fn on_merge_failure_transition(
    project_root: &Path,
    fired: &crate::pipeline_state::TransitionFired,
 ) {
+    // Story 1086: gate on the typed `Status` projection first; only the
+    // `MergeFailure` kind extraction needs the variant pattern.
+    if fired.after.status() != Status::MergeFailure {
+        return;
+    }
    let Stage::MergeFailure { ref kind, .. } = fired.after else {
        return;
    };
@@ -17,7 +17,11 @@ pub(crate) mod watchdog;
 // so that pool::lifecycle and pool::pipeline continue to access them unchanged.
 pub(super) use scan::{find_free_agent_for_stage, is_agent_free};

+/// Re-export for `startup::tick_loop`.
+pub(crate) use merge_failure_block_subscriber::reconcile_merge_failure_block;
 /// Re-export for `startup::tick_loop`.
 pub(crate) use merge_failure_block_subscriber::spawn_merge_failure_block_subscriber;
 /// Re-export for `startup::tick_loop`.
+pub(crate) use merge_failure_subscriber::reconcile_merge_failure;
+/// Re-export for `startup::tick_loop`.
 pub(crate) use merge_failure_subscriber::spawn_merge_failure_subscriber;
@@ -187,13 +187,14 @@ pub(super) fn check_agent_limits(
                ),
            };

-            // Mark agent as Failed with termination reason.
-            if let Ok(mut lock) = agents.lock()
-                && let Some(agent) = lock.get_mut(key)
-            {
-                agent.status = AgentStatus::Failed;
-                agent.termination_reason = Some(reason.clone());
-            }
+            // NOTE: agent status is intentionally NOT updated here.  Setting
+            // `status = Failed` before the kill (the previous behaviour)
+            // opened a window where the `start_agent` idempotency check
+            // (which whitelists Running/Pending) would let a fresh spawn
+            // through while the prior PTY child was still alive — directly
+            // causing the concurrent-agents bug we hit on story 1086
+            // (2026-05-15).  The caller (`run_watchdog_pass`) is responsible
+            // for: (1) verifying the kill, (2) THEN updating the agent record.

            slog!("[watchdog] Terminating agent '{key}': {reason_str}.");

@@ -9,8 +9,11 @@ mod tests;

 use std::path::Path;

+use crate::agents::AgentStatus;
 use crate::config::ProjectConfig;
+use crate::process_kill::{pids_matching, sigkill_pids_and_verify};
 use crate::slog;
+use crate::slog_warn;

 use super::super::AgentPool;
 use limits::check_agent_limits;
@@ -42,15 +45,71 @@ impl AgentPool {
        if let Some(root) = project_root {
            let terminated = check_agent_limits(&self.agents, root);
            let config = ProjectConfig::load(root).unwrap_or_default();
-            for (key, _reason) in &terminated {
-                // Kill the PTY child and abort the task, same as stop_agent.
+            for (key, reason) in &terminated {
+                // Step 1: snapshot the agent's worktree path so we can find every
+                // process running in it (claude + any subprocesses).  This must
+                // happen BEFORE we mutate the agent record so we can read the
+                // worktree info safely.
+                let worktree_path = self.agents.lock().ok().and_then(|lock| {
+                    lock.get(key)
+                        .and_then(|a| a.worktree_info.as_ref().map(|wt| wt.path.clone()))
+                });
+
+                // Step 2: SIGKILL every process running in the worktree and
+                // BLOCK until verified gone.  The previous mechanism — portable_pty's
+                // `ChildKiller::kill()` — sends SIGHUP, which claude-code
+                // ignores, leaving the process alive while the agent record
+                // was being marked terminated; that gap let a fresh spawn race
+                // in alongside the surviving one.  SIGKILL is uncatchable;
+                // [`sigkill_pids_and_verify`] only returns once the kernel has
+                // reaped each pid.
+                if let Some(wt_path) = worktree_path.as_ref() {
+                    let pids = pids_matching(&wt_path.display().to_string());
+                    if pids.is_empty() {
+                        // Nothing in this worktree — agent likely already
+                        // exited on its own before the watchdog noticed.
+                    } else {
+                        match sigkill_pids_and_verify(&pids) {
+                            Ok(n) => slog!(
+                                "[watchdog] SIGKILL'd {n} process(es) in worktree {} for '{key}'.",
+                                wt_path.display()
+                            ),
+                            Err(survivors) => slog_warn!(
+                                "[watchdog] SIGKILL incomplete for '{key}': pids still alive: {survivors:?}. \
+                                 Proceeding with cleanup; concurrent spawn protection may be weakened."
+                            ),
+                        }
+                    }
+                } else {
+                    slog_warn!(
+                        "[watchdog] No worktree path recorded for '{key}'; cannot tree-kill, \
+                         falling back to portable_pty SIGHUP (likely no-op for claude-code)."
+                    );
                    self.kill_child_for_key(key);
+                }
+
+                // Step 3: NOW update the agent record.  The process is verified
+                // gone (or we logged that SIGKILL didn't take effect, which is
+                // exceptional), so flipping status away from Running can no
+                // longer open a window for a concurrent spawn.
                if let Ok(mut lock) = self.agents.lock()
                    && let Some(agent) = lock.get_mut(key)
-                    && let Some(handle) = agent.task_handle.take()
                {
+                    agent.status = AgentStatus::Failed;
+                    agent.termination_reason = Some(reason.clone());
+                    if let Some(handle) = agent.task_handle.take() {
+                        // Best-effort abort of the outer tokio task.  The PTY
+                        // blocking thread already returned (claude is dead),
+                        // so this is bookkeeping rather than load-bearing.
                        handle.abort();
                    }
+                }
+
+                // Step 4: drop the (now-stale) child_killers entry — the
+                // process it pointed at is gone.
+                if let Ok(mut killers) = self.child_killers.lock() {
+                    killers.remove(key);
+                }

                // Use the retry mechanism: increment retry_count and only block
                // when the limit is exceeded, matching the pipeline's behaviour.
@@ -9,10 +9,19 @@

 use std::path::{Path, PathBuf};

-use crate::pipeline_state::Stage;
+use crate::pipeline_state::{Pipeline, Stage, Status};
 use crate::slog;
 use crate::slog_warn;

+/// Reconcile: re-populate the CostRollup register from disk for all known stories.
+///
+/// Idempotent — `init_from_disk` scans all existing token-usage JSONL files and
+/// overwrites the in-memory register.  Called by the periodic reconciler so that
+/// a Lagged event can never leave a story with a stale or absent cost entry.
+pub(crate) fn reconcile_cost_rollup(project_root: &Path) {
+    crate::service::agents::cost_rollup::init_from_disk(project_root);
+}
+
 /// Spawn a background task that maintains the CostRollup register.
 ///
 /// On every terminal stage transition (Done, Archived, Abandoned, Superseded,
@@ -41,17 +50,15 @@ pub(crate) fn spawn_cost_rollup_subscriber(project_root: PathBuf) {
 /// Returns `true` if `stage` is a terminal pipeline stage.
 ///
 /// Terminal stages are those from which no further work is expected:
-/// Done, Archived, Abandoned, Superseded, Rejected.
-/// MergeFailure variants are NOT terminal — stories can recover from them.
+/// Done, Archived, Abandoned, Superseded, Rejected. Story 1086 routes the
+/// classification through the [`Status`] / [`Pipeline`] projection so future
+/// Stage variants automatically participate.  MergeFailure variants are NOT
+/// terminal — stories can recover from them.
 fn is_terminal(stage: &Stage) -> bool {
    matches!(
-        stage,
-        Stage::Done { .. }
-            | Stage::Archived { .. }
-            | Stage::Abandoned { .. }
-            | Stage::Superseded { .. }
-            | Stage::Rejected { .. }
-    )
+        stage.status(),
+        Status::Done | Status::Abandoned | Status::Superseded | Status::Rejected
+    ) || matches!(stage.pipeline(), Pipeline::Archived)
 }

 /// Snapshot the cost data for `fired.story_id` into the register when
@@ -1,4 +1,11 @@
 //! Process management — kills orphaned PTY child processes on server shutdown.
+//!
+//! See [`crate::process_kill`] for the general process-termination primitives
+//! this module's existing methods (`kill_all_children`, `kill_child_for_key`)
+//! should eventually be migrated to.  Those methods currently use
+//! `portable_pty::ChildKiller::kill()`, which sends `SIGHUP` — a signal
+//! claude-code ignores — so they leave orphans on every shutdown/stop.  The
+//! migration is tracked in a separate story to keep its diff focused.
 use crate::slog;

 use super::AgentPool;
@@ -1,6 +1,8 @@
 //! Agent stop — terminates a running agent while preserving its worktree.
+use crate::process_kill::{pids_matching, sigkill_pids_and_verify};
 use crate::slog;
 use crate::slog_error;
+use crate::slog_warn;
 use std::path::Path;

 use super::super::{AgentEvent, AgentStatus};
@@ -9,6 +11,22 @@ use super::types::composite_key;

 impl AgentPool {
    /// Stop a running agent. Worktree is preserved for inspection.
+    ///
+    /// **Order of operations matters here.**  The naive implementation set
+    /// `status = Failed` before killing the process, which opened the same
+    /// idempotency window that produced the 2026-05-15 watchdog
+    /// double-spawn: the `start_agent` check whitelists Running/Pending,
+    /// so flipping status away from Running while the underlying claude
+    /// process was still alive let a fresh spawn race in alongside the
+    /// surviving one.  The fix is:
+    ///
+    /// 1. Read the worktree path (so we can find every process running
+    ///    in it) without mutating the agent record yet.
+    /// 2. SIGKILL the process tree via [`crate::process_kill`] and BLOCK
+    ///    until verified gone.  While this is in progress, status stays
+    ///    Running and `start_agent` continues to reject duplicate spawns.
+    /// 3. Now that the process is gone, mutate the agent record (status,
+    ///    handle abort, removal).
    pub async fn stop_agent(
        &self,
        _project_root: &Path,
@@ -17,27 +35,62 @@ impl AgentPool {
    ) -> Result<(), String> {
        let key = composite_key(story_id, agent_name);

-        let (worktree_info, task_handle, tx) = {
+        // Step 1: snapshot the worktree path (no status mutation yet).
+        let worktree_info = {
+            let agents = self.agents.lock().map_err(|e| e.to_string())?;
+            let agent = agents
+                .get(&key)
+                .ok_or_else(|| format!("No agent '{agent_name}' for story '{story_id}'"))?;
+            agent.worktree_info.clone()
+        };
+
+        // Step 2: SIGKILL every process running in the worktree, verify gone.
+        // We do this BEFORE updating the agent record so the idempotency check
+        // in `start_agent` keeps rejecting duplicate spawns until the slot is
+        // legitimately free.  Replaces the prior `kill_child_for_key` path,
+        // which sent SIGHUP via portable_pty (ignored by claude-code).
+        if let Some(wt) = worktree_info.as_ref() {
+            let pids = pids_matching(&wt.path.display().to_string());
+            if !pids.is_empty() {
+                match sigkill_pids_and_verify(&pids) {
+                    Ok(n) => slog!(
+                        "[stop_agent] SIGKILL'd {n} process(es) in worktree {} for '{key}'.",
+                        wt.path.display()
+                    ),
+                    Err(survivors) => slog_warn!(
+                        "[stop_agent] SIGKILL incomplete for '{key}': pids still alive: {survivors:?}. \
+                         Proceeding with record cleanup anyway; concurrent spawn protection may be weakened."
+                    ),
+                }
+            }
+        } else {
+            slog_warn!(
+                "[stop_agent] No worktree path recorded for '{key}'; cannot tree-kill, \
+                 falling back to portable_pty SIGHUP (likely no-op for claude-code)."
+            );
+            self.kill_child_for_key(&key);
+        }
+
+        // Step 3: now safe to mutate.  Status flip, handle abort, drop the
+        // child_killers entry.
+        let (task_handle, tx) = {
            let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
            let agent = agents
                .get_mut(&key)
                .ok_or_else(|| format!("No agent '{agent_name}' for story '{story_id}'"))?;

-            let wt = agent.worktree_info.clone();
            let handle = agent.task_handle.take();
            let tx = agent.tx.clone();
            agent.status = AgentStatus::Failed;
-            (wt, handle, tx)
+            (handle, tx)
        };
-
-        // Abort the task and kill the PTY child process.
-        // Note: aborting a spawn_blocking task handle does not interrupt the blocking
-        // thread, so we must also kill the child process directly via the killer registry.
        if let Some(handle) = task_handle {
            handle.abort();
            let _ = handle.await;
        }
-        self.kill_child_for_key(&key);
+        if let Ok(mut killers) = self.child_killers.lock() {
+            killers.remove(&key);
+        }

        // Preserve worktree for inspection — don't destroy agent's work on stop.
        if let Some(ref wt) = worktree_info {
@@ -53,7 +106,7 @@ impl AgentPool {
            status: "stopped".to_string(),
        });

-        // Remove from map
+        // Remove from map.
        {
            let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
            agents.remove(&key);
@@ -6,10 +6,20 @@

 use std::path::{Path, PathBuf};

-use crate::pipeline_state::Stage;
+use crate::pipeline_state::{Pipeline, Stage, Status};
 use crate::slog;
 use crate::slog_warn;

+/// Story 1086: matches the set of terminal stages used by the worktree-cleanup
+/// subscriber via the typed [`Status`] / [`Pipeline`] projections.  Excludes
+/// `Status::Rejected` so rejected stories keep their worktree for human review.
+fn is_cleanup_terminal(stage: &Stage) -> bool {
+    matches!(
+        stage.status(),
+        Status::Done | Status::Abandoned | Status::Superseded
+    ) || matches!(stage.pipeline(), Pipeline::Archived)
+}
+
 /// Spawn a background task that creates a git worktree when a story enters `Stage::Coding`.
 ///
 /// Subscribes to the pipeline transition broadcast channel. On each
@@ -22,7 +32,14 @@ pub(crate) fn spawn_worktree_create_subscriber(project_root: PathBuf, port: u16)
        loop {
            match rx.recv().await {
                Ok(fired) => {
-                    if matches!(fired.after, Stage::Coding { .. }) {
+                    // Story 1086: classify by Pipeline column. `Pipeline::Coding`
+                    // covers `Stage::Coding` and `Stage::Blocked` — but Blocked has
+                    // no worktree to create, so we still need the Stage::Coding
+                    // payload check.  Use a layered match: pipeline first for fast
+                    // skip, then variant guard.
+                    if fired.after.pipeline() == Pipeline::Coding
+                        && matches!(fired.after, Stage::Coding { .. })
+                    {
                        on_coding_transition(&project_root, port, &fired.story_id.0).await;
                    }
                }
@@ -50,13 +67,7 @@ pub(crate) fn spawn_worktree_cleanup_subscriber(project_root: PathBuf) {
        loop {
            match rx.recv().await {
                Ok(fired) => {
-                    if matches!(
-                        fired.after,
-                        Stage::Done { .. }
-                            | Stage::Archived { .. }
-                            | Stage::Abandoned { .. }
-                            | Stage::Superseded { .. }
-                    ) {
+                    if is_cleanup_terminal(&fired.after) {
                        on_terminal_transition(&project_root, &fired.story_id.0).await;
                    }
                }
@@ -72,6 +83,36 @@ pub(crate) fn spawn_worktree_cleanup_subscriber(project_root: PathBuf) {
    });
 }

+/// Reconcile worktree creation: for each story currently in `Stage::Coding`, ensure its worktree exists.
+///
+/// Idempotent — creates worktrees for Coding stories that have no worktree yet, and is
+/// a no-op for stories whose worktree already exists.  Called by the periodic reconciler
+/// so that Lagged events on the broadcast channel never leave Coding stories without worktrees.
+pub(crate) async fn reconcile_worktree_create(project_root: &Path, port: u16) {
+    for item in crate::pipeline_state::read_all_typed() {
+        // Story 1086: filter by Pipeline column then narrow to the `Coding`
+        // variant (Blocked is in `Pipeline::Coding` but has no worktree).
+        if item.stage.pipeline() == Pipeline::Coding
+            && matches!(item.stage, crate::pipeline_state::Stage::Coding { .. })
+        {
+            on_coding_transition(project_root, port, &item.story_id.0).await;
+        }
+    }
+}
+
+/// Reconcile worktree cleanup: for each story in a terminal stage, ensure its worktree is removed.
+///
+/// Idempotent — removes worktrees for terminal stories that still have one, and is a no-op
+/// for stories with no worktree.  Called by the periodic reconciler so that Lagged events on
+/// the broadcast channel never leave terminal stories with dangling worktrees.
+pub(crate) async fn reconcile_worktree_cleanup(project_root: &Path) {
+    for item in crate::pipeline_state::read_all_typed() {
+        if is_cleanup_terminal(&item.stage) {
+            on_terminal_transition(project_root, &item.story_id.0).await;
+        }
+    }
+}
+
 /// Create the worktree and feature branch for `story_id` when it enters `Stage::Coding`.
 pub(crate) async fn on_coding_transition(project_root: &Path, port: u16, story_id: &str) {
    let config = match crate::config::ProjectConfig::load(project_root) {
@@ -2,37 +2,30 @@

 use crate::agents::{AgentPool, AgentStatus};
 use crate::config::ProjectConfig;
-use crate::pipeline_state::{ArchiveReason, PipelineItem, Stage};
+use crate::pipeline_state::{ArchiveReason, Pipeline, PipelineItem, Stage, Status};
 use std::collections::{HashMap, HashSet};

 /// Map a stage to its display section label, or `None` to skip it entirely.
 ///
-/// This is the single source of truth for the "where does this item appear"
-/// decision.  It mirrors the bucket routing in `http/workflow/pipeline.rs`
-/// so that chat output and the web UI are always consistent.
-///
-/// `Stage::Frozen { resume_to }` is handled recursively: a frozen story
-/// appears in the same section its `resume_to` stage would land in.
+/// This routes through [`Stage::pipeline`] so chat output and the web UI use
+/// the same column derivation.  Frozen stories appear in their underlying
+/// `resume_to` column (handled inside `Stage::pipeline`) and items in
+/// `Stage::Archived` (with non-Blocked reasons) stay hidden.
 pub(crate) fn display_section(s: &Stage) -> Option<&'static str> {
-    match s {
-        Stage::Upcoming | Stage::Backlog => Some("Backlog"),
-        Stage::Coding { .. }
-        | Stage::Blocked { .. }
-        | Stage::Archived {
-            reason: ArchiveReason::Blocked { .. },
-            ..
-        } => Some("In Progress"),
-        Stage::Qa | Stage::ReviewHold { .. } => Some("QA"),
-        Stage::Merge { .. } | Stage::MergeFailure { .. } | Stage::MergeFailureFinal { .. } => {
-            Some("Merge")
-        }
-        Stage::Done { .. } => Some("Done"),
-        Stage::Frozen { resume_to } => display_section(resume_to),
-        Stage::Abandoned { .. } | Stage::Superseded { .. } | Stage::Rejected { .. } => {
-            Some("Closed")
-        }
-        Stage::Archived { .. } => None, // Completed/MergeFailed/ReviewHeld stay hidden
+    // Archived items with non-Blocked reasons are hidden from chat output.
+    if matches!(s, Stage::Archived { reason, .. } if !matches!(reason, ArchiveReason::Blocked { .. }))
+    {
+        return None;
    }
+    Some(match s.pipeline() {
+        Pipeline::Backlog => "Backlog",
+        Pipeline::Coding => "In Progress",
+        Pipeline::Qa => "QA",
+        Pipeline::Merge => "Merge",
+        Pipeline::Done => "Done",
+        Pipeline::Closed => "Closed",
+        Pipeline::Archived => return None,
+    })
 }

 /// Check which dependency numbers from `item.depends_on` are unmet.
@@ -114,10 +107,10 @@ pub(crate) fn build_status_from_items(

    let config = ProjectConfig::load(project_root).ok();

-    // Pre-fetch working tree state for all Coding-stage items whose worktrees exist.
+    // Pre-fetch working tree state for all Coding-column items whose worktrees exist.
    let dirty_files_by_story: HashMap<String, crate::service::git_ops::DirtyFiles> = items
        .iter()
-        .filter(|i| matches!(i.stage, Stage::Coding { .. }))
+        .filter(|i| i.stage.pipeline() == Pipeline::Coding && i.stage.status() == Status::Active)
        .filter_map(|i| {
            let wt = crate::worktree::worktree_path(project_root, &i.story_id.0);
            if wt.is_dir() {
@@ -137,10 +130,13 @@ pub(crate) fn build_status_from_items(
        .into_iter()
        .collect();
    // Merge-failure detail now lives on the typed MergeJob CRDT entry
-    // (story 929 — CRDT is the sole source of metadata).
+    // (story 929 — CRDT is the sole source of metadata).  Only items in the
+    // Merge column with an Active status (i.e. `Stage::Merge { .. }`) need a
+    // pre-fetched failure snippet; MergeFailure(Final) items render their
+    // own snippet from the typed kind.
    let merge_failures: HashMap<String, String> = items
        .iter()
-        .filter(|i| matches!(i.stage, Stage::Merge { .. }))
+        .filter(|i| i.stage.pipeline() == Pipeline::Merge && i.stage.status() == Status::Active)
        .filter_map(|i| {
            let job = crate::crdt_state::read_merge_job(&i.story_id.0)?;
            let err = job.error?;
@@ -215,11 +211,12 @@ pub(crate) fn build_status_from_items(
    out
 }

-/// Render the one-line working tree summary for a story with uncommitted changes.
+/// Return an inline working-tree suffix for a story with uncommitted changes.
 ///
-/// Returns an empty string when the working tree is clean. File paths are not
-/// listed here; use `status N` (triage) for the per-file breakdown.
-fn render_working_tree_lines(info: &crate::service::git_ops::DirtyFiles) -> String {
+/// Returns an empty string when the working tree is clean. The suffix is
+/// appended directly to the coder line, e.g. `, Working tree: 3 modified (uncommitted)`.
+/// File paths are not listed here; use `status N` (triage) for the per-file breakdown.
+fn working_tree_suffix(info: &crate::service::git_ops::DirtyFiles) -> String {
    if info.is_clean() {
        return String::new();
    }
@@ -228,7 +225,7 @@ fn render_working_tree_lines(info: &crate::service::git_ops::DirtyFiles) -> Stri
        (0, n) => format!("{n} new"),
        (m, n) => format!("{m} modified, {n} new"),
    };
-    format!("     Working tree: {summary} (uncommitted)\n")
+    format!(", Working tree: {summary} (uncommitted)")
 }

 /// Shared lookup tables passed to [`render_item_line`] to keep the argument count manageable.
@@ -259,8 +256,10 @@ fn render_item_line(
    } else {
        Some(item.name.as_str())
    };
-    // Use the typed CRDT stage as the sole source of truth (story 945).
-    let frozen = matches!(item.stage, Stage::Frozen { .. });
+    // Use the new Pipeline + Status helpers (story 1085).
+    let pipeline = item.stage.pipeline();
+    let status = item.stage.status();
+    let frozen = status == Status::Frozen;
    let base_label = super::story_short_label(story_id, name_opt);
    let display = if frozen {
        format!("\u{2744}\u{FE0F} {base_label}") // ❄️ prefix
@@ -281,41 +280,52 @@ fn render_item_line(
        format!(" *(waiting on: {})*", nums.join(", "))
    };

-    // Closed-stage items (abandoned / superseded / rejected) each get a
+    // Closed-pipeline items (abandoned / superseded / rejected) each get a
    // distinct indicator and optionally display their metadata.
-    match &item.stage {
-        Stage::Abandoned { .. } => {
+    match status {
+        Status::Abandoned => {
            return format!("  \u{1F5D1}\u{FE0F} {display}{cost_suffix}\n"); // 🗑️
        }
-        Stage::Superseded { superseded_by, .. } => {
+        Status::Superseded => {
+            let superseded_by = match &item.stage {
+                Stage::Superseded { superseded_by, .. } => superseded_by.0.as_str(),
+                _ => "",
+            };
            return format!(
-                "  \u{1F500} {display}{cost_suffix} — superseded by {}\n", // 🔀
-                superseded_by.0
+                "  \u{1F500} {display}{cost_suffix} — superseded by {superseded_by}\n", // 🔀
            );
        }
-        Stage::Rejected { reason, .. } => {
+        Status::Rejected => {
+            let reason = match &item.stage {
+                Stage::Rejected { reason, .. } => reason.as_str(),
+                _ => "",
+            };
            let snippet = first_non_empty_snippet(reason, 120);
            return format!("  \u{1F6AB} {display}{cost_suffix} — {snippet}\n"); // 🚫
        }
        _ => {}
    }

-    // Merge-stage items get dedicated breakdown indicators instead of the
+    // Merge-column items get dedicated breakdown indicators instead of the
    // generic traffic-light dot.  MergeFailure / MergeFailureFinal items
-    // now also appear in the Merge section (in-place) so they are handled
-    // here alongside normal Merge items.
-    if matches!(
-        item.stage,
-        Stage::Merge { .. } | Stage::MergeFailure { .. } | Stage::MergeFailureFinal { .. }
-    ) {
-        match &item.stage {
+    // appear in the Merge column (in-place) and are handled by the same arm.
+    if pipeline == Pipeline::Merge {
+        match status {
            // MergeFailureFinal: mergemaster already tried and gave up — always ⛔.
-            Stage::MergeFailureFinal { kind } => {
+            Status::MergeFailureFinal => {
+                let kind = match &item.stage {
+                    Stage::MergeFailureFinal { kind } => kind,
+                    _ => unreachable!(),
+                };
                let snippet = first_non_empty_snippet(&kind.display_reason(), 120);
                return format!("  \u{26D4} {display}{cost_suffix}{dep_suffix} — {snippet}\n");
            }
            // MergeFailure: a recovery agent may be running or queued.
-            Stage::MergeFailure { kind, .. } => {
+            Status::MergeFailure => {
+                let kind = match &item.stage {
+                    Stage::MergeFailure { kind, .. } => kind,
+                    _ => unreachable!(),
+                };
                return match agent.map(|a| &a.status) {
                    Some(AgentStatus::Running) => format!(
                        "  \u{1F916} {display}{cost_suffix}{dep_suffix} — mergemaster running\n"
@@ -352,16 +362,7 @@ fn render_item_line(
        }
    }

-    let blocked = matches!(
-        item.stage,
-        Stage::Blocked { .. }
-            | Stage::MergeFailure { .. }
-            | Stage::MergeFailureFinal { .. }
-            | Stage::Archived {
-                reason: ArchiveReason::Blocked { .. },
-                ..
-            }
-    );
+    let blocked = status == Status::Blocked;
    // Blocked items with a recovery agent get differentiated indicators.
    if blocked {
        return match agent.map(|a| &a.status) {
@@ -378,9 +379,9 @@ fn render_item_line(
        .and_then(|a| a.throttled)
        .is_some_and(|until| until > chrono::Utc::now());
    let dot = super::traffic_light_dot(blocked, throttled, agent.is_some());
-    let wt_lines = dirty_files_by_story
+    let wt_suffix = dirty_files_by_story
        .get(story_id)
-        .map(render_working_tree_lines)
+        .map(working_tree_suffix)
        .unwrap_or_default();
    if let Some(agent) = agent {
        let model_str = config
@@ -389,10 +390,10 @@ fn render_item_line(
            .and_then(|ac| ac.model.as_ref().map(|m| m.as_str()))
            .unwrap_or("?");
        format!(
-            "  {dot}{display}{cost_suffix}{dep_suffix} — {} ({model_str})\n{wt_lines}",
+            "  {dot}{display}{cost_suffix}{dep_suffix} — {} ({model_str}){wt_suffix}\n",
            agent.agent_name
        )
    } else {
-        format!("  {dot}{display}{cost_suffix}{dep_suffix}\n{wt_lines}")
+        format!("  {dot}{display}{cost_suffix}{dep_suffix}{wt_suffix}\n")
    }
 }
@@ -41,7 +41,16 @@ pub(in crate::chat::transport::matrix::bot) async fn handle_message(
        let all_lines: Vec<String> = sled_guard.drain(..).chain(gtw_guard.drain(..)).collect();
        drop(sled_guard);
        drop(gtw_guard);
-        format_drained_events(all_lines)
+        slog!(
+            "[matrix-bot] drained {} gateway audit lines for LLM context",
+            all_lines.len()
+        );
+        let prefix = format_drained_events(all_lines);
+        slog!(
+            "[matrix-bot] format_drained_events output: {} bytes",
+            prefix.len()
+        );
+        prefix
    };

    // The prompt is just the current message with sender attribution.
@@ -326,21 +326,49 @@ pub async fn run_bot(
    }

    // Subscribe to gateway-side status events and buffer compact audit lines for
-    // the LLM context.  A separate resubscribed receiver is used so both the
-    // buffer task and the room-forwarder task receive every event independently.
+    // the LLM context.
+    //
+    // Investigation log (story 1078) — hypotheses ruled out:
+    //   (A) gateway_event_rx is None: impossible — spawn_gateway_bot always passes
+    //       Some(state.event_tx.clone()) in gateway mode (gateway/mod.rs:130).
+    //   (B) recv() never returns: buf task uses the ORIGINAL event_rx (subscribed
+    //       before Matrix init) so any events buffered during init are visible;
+    //       future events arrive normally via the shared broadcast channel.
+    //   (C) Different Arc: buf and ctx.pending_gateway_events are both clones of
+    //       the same Arc<TokioMutex<Vec<String>>> — writes in the buf task are
+    //       immediately visible to handle_message.
+    //   (D) format_drained_events empty on non-empty input: the function is
+    //       pure/tested; the drain slog in handle_message now makes the count
+    //       observable so we can confirm it is non-zero when events arrive.
+    //
+    // Bug fixed here: previously the buffer task held `event_rx.resubscribe()`,
+    // which starts at the *current tail* (next unsent message) and silently
+    // discards every event that arrived during the Matrix login / room-join /
+    // cross-signing phase (~5–30 s window).  The forwarder now gets the
+    // resubscribed receiver (only needs live events going forward); the buffer
+    // task holds the original `event_rx` so it drains the init-window backlog
+    // on first poll.
    let pending_gateway_events: Arc<TokioMutex<Vec<String>>> =
        Arc::new(TokioMutex::new(Vec::new()));
    let gateway_event_rx_for_forwarder = if let Some(event_rx) = gateway_event_rx {
-        // Buffer task: silently accumulate compact audit lines for Timmy's context.
+        // The forwarder only needs live (future) events — resubscribe is fine.
+        let forwarder_rx = event_rx.resubscribe();
+        // Buffer task: hold the *original* receiver so init-window events are
+        // not lost.  Silently accumulate compact audit lines for Timmy's context.
        {
            use crate::service::gateway::polling::format_gateway_audit_line;
-            let buf_rx = event_rx.resubscribe();
            let buf = Arc::clone(&pending_gateway_events);
+            slog!("[matrix-bot] subscribed to gateway events; buffer task starting");
            tokio::spawn(async move {
-                let mut rx = buf_rx;
+                let mut rx = event_rx;
                loop {
                    match rx.recv().await {
                        Ok(event) => {
+                            slog!(
+                                "[matrix-bot] buffered audit line for project={} id={}",
+                                event.project,
+                                event.event.timestamp_ms()
+                            );
                            let line = format_gateway_audit_line(&event.project, &event.event);
                            buf.lock().await.push(line);
                        }
@@ -352,7 +380,7 @@ pub async fn run_bot(
                }
            });
        }
-        Some(event_rx)
+        Some(forwarder_rx)
    } else {
        None
    };
@@ -592,4 +620,89 @@ mod tests {
        assert_eq!(steps[2], 20);
        assert_eq!(steps[3], 40);
    }
+
+    /// Regression test (story 1078): gateway broadcast events must reach
+    /// `pending_gateway_events` and produce an `audit ts=…` line in the
+    /// `format_drained_events` output that is prepended to Timmy's prompt.
+    ///
+    /// The test spins up a mock `event_tx` broadcaster, sends one
+    /// `StageTransition` event, lets the buffer task process it, drains the
+    /// buffer, and asserts the result contains the expected audit prefix.
+    #[tokio::test]
+    async fn gateway_buffer_task_injects_audit_line_into_context() {
+        use super::super::messages::format_drained_events;
+        use crate::service::events::StoredEvent;
+        use crate::service::gateway::GatewayStatusEvent;
+        use crate::service::gateway::polling::format_gateway_audit_line;
+
+        let (event_tx, event_rx) = tokio::sync::broadcast::channel::<GatewayStatusEvent>(16);
+
+        // pending_gateway_events shared between buffer task and drain site.
+        let pending: Arc<TokioMutex<Vec<String>>> = Arc::new(TokioMutex::new(Vec::new()));
+
+        // Spawn a minimal buffer task — same logic as run_bot uses.
+        {
+            let buf = Arc::clone(&pending);
+            tokio::spawn(async move {
+                let mut rx = event_rx;
+                loop {
+                    match rx.recv().await {
+                        Ok(event) => {
+                            let line = format_gateway_audit_line(&event.project, &event.event);
+                            buf.lock().await.push(line);
+                        }
+                        Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {}
+                        Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
+                    }
+                }
+            });
+        }
+
+        // Send one stage-transition event, as a project node would.
+        let evt = GatewayStatusEvent {
+            project: "huskies".to_string(),
+            event: StoredEvent::StageTransition {
+                story_id: "42_story_feat".to_string(),
+                story_name: String::new(),
+                from_stage: "2_current".to_string(),
+                to_stage: "3_qa".to_string(),
+                timestamp_ms: 1_000_000,
+            },
+        };
+        let receivers = event_tx.send(evt).unwrap_or(0);
+        assert!(
+            receivers > 0,
+            "event must have at least one active receiver"
+        );
+
+        // Wait for the buffer task to process the event.
+        let deadline = std::time::Instant::now() + std::time::Duration::from_secs(2);
+        loop {
+            if !pending.lock().await.is_empty() {
+                break;
+            }
+            assert!(
+                std::time::Instant::now() < deadline,
+                "buffer task did not receive the event within 2 s"
+            );
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+        }
+
+        // Drain and format — mirrors what handle_message does.
+        let lines: Vec<String> = pending.lock().await.drain(..).collect();
+        let prefix = format_drained_events(lines);
+
+        assert!(
+            prefix.contains("audit ts="),
+            "prompt prefix must contain 'audit ts='; got: {prefix}"
+        );
+        assert!(
+            prefix.contains("project=huskies"),
+            "prompt prefix must name the project; got: {prefix}"
+        );
+        assert!(
+            prefix.starts_with("<system-reminder>\n"),
+            "prefix must open with <system-reminder>; got: {prefix}"
+        );
+    }
 }
@@ -161,6 +161,12 @@ pub struct WatcherConfig {
    /// moved to `6_archived/`. Default: 14400 (4 hours).
    #[serde(default = "default_done_retention_secs")]
    pub done_retention_secs: u64,
+    /// How often (in seconds) the periodic reconciler runs to converge
+    /// subscriber side effects.  The reconciler calls each subscriber's
+    /// `reconcile()` entry point so that Lagged events never leave persistent
+    /// state diverged. Default: 30 seconds.
+    #[serde(default = "default_reconcile_interval_secs")]
+    pub reconcile_interval_secs: u64,
 }

 impl Default for WatcherConfig {
@@ -168,6 +174,7 @@ impl Default for WatcherConfig {
        Self {
            sweep_interval_secs: default_sweep_interval_secs(),
            done_retention_secs: default_done_retention_secs(),
+            reconcile_interval_secs: default_reconcile_interval_secs(),
        }
    }
 }
@@ -180,6 +187,10 @@ fn default_done_retention_secs() -> u64 {
    4 * 60 * 60 // 4 hours
 }

+fn default_reconcile_interval_secs() -> u64 {
+    30
+}
+
 fn default_qa() -> String {
    "server".to_string()
 }
@@ -56,7 +56,8 @@ pub use write::{
    bump_retry_count, migrate_legacy_stage_strings, migrate_merge_job, migrate_names_from_slugs,
    migrate_node_claims_to_agent_claims, migrate_story_ids_to_numeric, name_from_story_id,
    purge_done_stage_merge_jobs, set_agent, set_depends_on, set_epic, set_item_type, set_name,
-    set_plan_state, set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count, write_item,
+    set_origin, set_plan_state, set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count,
+    write_item,
 };

 #[cfg(test)]
@@ -29,6 +29,8 @@ pub struct CrdtItemDump {
    /// Hex-encoded OpId of the list insert op — cross-reference with `crdt_ops`.
    pub content_index: String,
    pub is_deleted: bool,
+    /// Origin JSON string, or `None` for items that pre-date story 1088.
+    pub origin: Option<String>,
 }

 /// Top-level debug dump of the in-memory CRDT state.
@@ -149,6 +151,10 @@ pub fn dump_crdt_state(story_id_filter: Option<&str>) -> CrdtStateDump {
            JsonValue::Number(n) if n > 0.0 => Some(n),
            _ => None,
        };
+        let origin = match item_crdt.origin.view() {
+            JsonValue::String(s) if !s.is_empty() => Some(s),
+            _ => None,
+        };

        let content_index = op.id.iter().map(|b| format!("{b:02x}")).collect::<String>();

@@ -163,6 +169,7 @@ pub fn dump_crdt_state(story_id_filter: Option<&str>) -> CrdtStateDump {
            claim_ts,
            content_index,
            is_deleted: op.is_deleted,
+            origin,
        });
    }

@@ -408,6 +415,11 @@ pub(super) fn extract_item_view(item: &PipelineItemCrdt) -> Option<PipelineItemV
        _ => None,
    };

+    let origin = match item.origin.view() {
+        JsonValue::String(s) if !s.is_empty() => Some(s),
+        _ => None,
+    };
+
    let stage = project_stage_for_view(
        &stage_str,
        &story_id,
@@ -429,6 +441,7 @@ pub(super) fn extract_item_view(item: &PipelineItemCrdt) -> Option<PipelineItemV
        qa_mode,
        item_type,
        epic,
+        origin,
    })
 }

@@ -585,56 +598,48 @@ fn project_stage_for_view(
    }
 }

-/// Check whether a dependency (by numeric ID prefix) is in `5_done` or `6_archived`
-/// according to CRDT state.
+/// Check whether a dependency (by numeric ID prefix) is in `Pipeline::Done` or
+/// `Pipeline::Archived` according to CRDT state.
 ///
-/// Returns `true` if the dependency is satisfied (item found in a done stage).
-/// Matches both legacy slug-form IDs (`"664_story_foo"`) and numeric-only IDs
-/// (`"664"`) so the check remains correct after the slug→numeric migration.
-/// See `dep_is_archived_crdt` to distinguish archive-satisfied from cleanly-done.
+/// Returns `true` if the dependency is satisfied (item found in a Done or
+/// Archived pipeline column). Matches both legacy slug-form IDs
+/// (`"664_story_foo"`) and numeric-only IDs (`"664"`) so the check remains
+/// correct after the slug→numeric migration. Story 1086 routes the check
+/// through the `Pipeline` projection so that future Stage variants automatically
+/// participate via [`crate::pipeline_state::Stage::pipeline`]. See
+/// `dep_is_archived_crdt` to distinguish archive-satisfied from cleanly-done.
 pub fn dep_is_done_crdt(dep_number: u32) -> bool {
-    use crate::pipeline_state::{Stage, read_all_typed};
+    use crate::pipeline_state::{Pipeline, read_all_typed};
    let exact = dep_number.to_string();
    let prefix = format!("{dep_number}_");
    read_all_typed().into_iter().any(|item| {
        (item.story_id.0 == exact || item.story_id.0.starts_with(&prefix))
-            && matches!(
-                item.stage,
-                Stage::Done { .. }
-                    | Stage::Archived { .. }
-                    | Stage::Abandoned { .. }
-                    | Stage::Superseded { .. }
-                    | Stage::Rejected { .. }
-            )
+            && matches!(item.stage.pipeline(), Pipeline::Done | Pipeline::Archived)
    })
 }

-/// Check whether a dependency (by numeric ID prefix) is specifically in `6_archived`
-/// according to CRDT state.
+/// Check whether a dependency (by numeric ID prefix) is specifically in
+/// `Pipeline::Archived` according to CRDT state.
 ///
 /// Used to detect when a dependency is satisfied via archive rather than via a clean
-/// completion through `5_done`.  Returns `false` when the CRDT layer is not initialised.
-/// Matches both legacy slug-form IDs (`"664_story_foo"`) and numeric-only IDs (`"664"`).
+/// completion through `Pipeline::Done`.  Returns `false` when the CRDT layer is not
+/// initialised.  Matches both legacy slug-form IDs (`"664_story_foo"`) and
+/// numeric-only IDs (`"664"`).
 pub fn dep_is_archived_crdt(dep_number: u32) -> bool {
-    use crate::pipeline_state::{Stage, read_all_typed};
+    use crate::pipeline_state::{Pipeline, read_all_typed};
    let exact = dep_number.to_string();
    let prefix = format!("{dep_number}_");
    read_all_typed().into_iter().any(|item| {
        (item.story_id.0 == exact || item.story_id.0.starts_with(&prefix))
-            && matches!(
-                item.stage,
-                Stage::Archived { .. }
-                    | Stage::Abandoned { .. }
-                    | Stage::Superseded { .. }
-                    | Stage::Rejected { .. }
-            )
+            && item.stage.pipeline() == Pipeline::Archived
    })
 }

 /// Check unmet dependencies for a story by reading its `depends_on` from the
 /// CRDT document and checking each dependency against CRDT state.
 ///
-/// Returns the list of dependency numbers that are NOT in `5_done` or `6_archived`.
+/// Returns the list of dependency numbers whose stage is NOT in `Pipeline::Done`
+/// or `Pipeline::Archived`.
 pub fn check_unmet_deps_crdt(story_id: &str) -> Vec<u32> {
    let item = match read_item(story_id) {
        Some(i) => i,
@@ -105,6 +105,26 @@ pub struct PipelineItemCrdt {
    /// means no merge task is in flight.  Projected into `Stage::Merge {
    /// server_start_time }` so callers never read this register directly.
    pub merge_server_start: LwwRegisterCrdt<f64>,
+    /// Story 1086: kebab-case wire form of the [`crate::pipeline_state::Pipeline`]
+    /// projection of the current `stage`.  Written by `write_item` alongside
+    /// `stage` so display/scan code on remote peers can route by pipeline column
+    /// without re-deriving from the stage string.  Empty string means "use the
+    /// value derived from `stage`" (legacy items predating 1086).
+    pub pipeline: LwwRegisterCrdt<String>,
+    /// Story 1086: kebab-case wire form of the [`crate::pipeline_state::Status`]
+    /// projection of the current `stage`.  Written alongside `stage` so badge
+    /// renderers can read the status directly without re-projecting from the
+    /// stage string.  Empty string means "use the value derived from `stage`"
+    /// (legacy items predating 1086).
+    pub status: LwwRegisterCrdt<String>,
+    /// Story 1088: origin of the work item — who or what created it.
+    ///
+    /// Stored as a compact JSON string, e.g.
+    /// `{"kind":"user","id":"","ts":1716768000.0}` or
+    /// `{"kind":"agent","id":"coder-1","ts":1716768000.0}`.
+    /// Empty string on older items that pre-date this register; the typed
+    /// read path surfaces those as `None`, which the UI renders as `"unknown"`.
+    pub origin: LwwRegisterCrdt<String>,
 }

 /// CRDT node that holds a single peer's presence entry.
@@ -203,6 +223,9 @@ pub struct WorkItem {
    pub(super) item_type: Option<crate::io::story_metadata::ItemType>,
    /// Epic this item belongs to. `None` when the item has no parent epic.
    pub(super) epic: Option<EpicId>,
+    /// Origin of the work item (story 1088).  `None` for items created before
+    /// the origin register was introduced; those display as `"unknown"`.
+    pub(super) origin: Option<String>,
 }

 impl WorkItem {
@@ -261,6 +284,12 @@ impl WorkItem {
        self.epic
    }

+    /// Origin of the work item (story 1088), or `None` for items created before
+    /// the origin register was introduced.
+    pub fn origin(&self) -> Option<&str> {
+        self.origin.as_deref()
+    }
+
    /// Construct a `WorkItem` for use in tests outside `crdt_state::*`.
    ///
    /// Within `crdt_state` use a struct literal directly (fields are `pub(super)`).
@@ -286,6 +315,7 @@ impl WorkItem {
            qa_mode,
            item_type,
            epic,
+            origin: None,
        }
    }
 }
@@ -235,6 +235,31 @@ pub fn set_plan_state(story_id: &str, state: crate::pipeline_state::PlanState) -
    true
 }

+/// Set the `origin` CRDT register for a pipeline item (story 1088).
+///
+/// Writes a compact JSON string describing who or what created the item, e.g.
+/// `{"kind":"user","id":"","ts":1716768000.0}` or
+/// `{"kind":"agent","id":"coder-1","ts":1716768000.0}`.
+///
+/// Passing an empty string is treated as "no origin set" (equivalent to the
+/// pre-1088 state for older items).  Returns `true` if the item was found and
+/// the op was applied, `false` otherwise.
+pub fn set_origin(story_id: &str, origin: &str) -> bool {
+    let Some(state_mutex) = get_crdt() else {
+        return false;
+    };
+    let Ok(mut state) = state_mutex.lock() else {
+        return false;
+    };
+    let Some(&idx) = state.index.get(story_id) else {
+        return false;
+    };
+    apply_and_persist(&mut state, |s| {
+        s.crdt.doc.items[idx].origin.set(origin.to_string())
+    });
+    true
+}
+
 /// Write a pipeline item state through CRDT operations.
 ///
 /// If the item exists, updates its registers.  If not, inserts a new item
@@ -256,6 +281,11 @@ pub fn write_item(
    merged_at: Option<f64>,
 ) {
    let stage_str = stage_dir_name(stage);
+    // Story 1086: persist the typed Pipeline + Status projections alongside
+    // the stage register so subscribers/display code on remote peers can route
+    // by them without re-deriving from the stage string.
+    let pipeline_str = stage.pipeline().as_str();
+    let status_str = stage.status().as_str();
    let claim: Option<&AgentClaim> = match stage {
        Stage::Coding { claim, .. } => claim.as_ref(),
        Stage::Merge { claim, .. } => claim.as_ref(),
@@ -311,6 +341,14 @@ pub fn write_item(
        apply_and_persist(&mut state, |s| {
            s.crdt.doc.items[idx].stage.set(stage_str.to_string())
        });
+        // Story 1086: keep `pipeline` and `status` registers in lock-step with
+        // the stage write so subscribers/display can read them directly.
+        apply_and_persist(&mut state, |s| {
+            s.crdt.doc.items[idx].pipeline.set(pipeline_str.to_string())
+        });
+        apply_and_persist(&mut state, |s| {
+            s.crdt.doc.items[idx].status.set(status_str.to_string())
+        });

        if let Some(n) = name {
            apply_and_persist(&mut state, |s| {
@@ -394,6 +432,10 @@ pub fn write_item(
            "resume_to": "",
            "plan_state": "",
            "merge_server_start": merge_server_start_val,
+            // Story 1086: typed Pipeline + Status projections written at insert.
+            "pipeline": pipeline_str,
+            "status": status_str,
+            "origin": "",
        })
        .into();

@@ -424,6 +466,10 @@ pub fn write_item(
            item.resume_to.advance_seq(floor);
            item.plan_state.advance_seq(floor);
            item.merge_server_start.advance_seq(floor);
+            // Story 1086.
+            item.pipeline.advance_seq(floor);
+            item.status.advance_seq(floor);
+            item.origin.advance_seq(floor);
        }

        // Broadcast a CrdtEvent for the new item.
@@ -10,8 +10,8 @@ mod migrations;
 mod tests;

 pub use item::{
-    bump_retry_count, set_agent, set_depends_on, set_epic, set_item_type, set_name, set_plan_state,
-    set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count, write_item,
+    bump_retry_count, set_agent, set_depends_on, set_epic, set_item_type, set_name, set_origin,
+    set_plan_state, set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count, write_item,
 };

 #[cfg(test)]
@@ -434,6 +434,7 @@ async fn handle_work_items_get(params: Value) -> Value {
            "stage": c.stage,
            "name": c.name,
            "agent": c.agent,
+            "origin": c.origin,
        }),
        Err(e) => serde_json::json!({"error": e.to_string()}),
    }
@@ -12,7 +12,7 @@
 //!    zombie entries left over from sessions that predate the subscriber.

 use crate::db::{ContentKey, all_content_ids, delete_content};
-use crate::pipeline_state::Stage;
+use crate::pipeline_state::{Pipeline, Stage, Status};
 use crate::slog;
 use crate::slog_warn;

@@ -111,16 +111,18 @@ pub(crate) fn sweep_zombie_content_on_startup() {
    }
 }

-/// Return `true` when `stage` is one of the five terminal pipeline stages.
+/// Return `true` when `stage` is one of the terminal pipeline classifications.
+///
+/// Story 1086: matches via the [`Status`] projection (Done / Abandoned /
+/// Superseded / Rejected) plus [`Pipeline::Archived`] for plain archived items
+/// (which carry `Status::Active`).  Future Stage variants automatically
+/// participate by returning the appropriate Status / Pipeline from
+/// [`Stage::status`] / [`Stage::pipeline`].
 fn is_terminal_stage(stage: &Stage) -> bool {
    matches!(
-        stage,
-        Stage::Done { .. }
-            | Stage::Archived { .. }
-            | Stage::Abandoned { .. }
-            | Stage::Superseded { .. }
-            | Stage::Rejected { .. }
-    )
+        stage.status(),
+        Status::Done | Status::Abandoned | Status::Superseded | Status::Rejected
+    ) || matches!(stage.pipeline(), Pipeline::Archived)
 }

 #[cfg(test)]
@@ -29,7 +29,7 @@ pub mod shadow_write;

 pub use content_store::{ContentKey, all_content_ids, delete_content, read_content, write_content};
 pub use ops::{ItemMeta, delete_item, move_item_stage, next_item_number, write_item_with_content};
-pub use shadow_write::{get_shared_pool, init};
+pub use shadow_write::{check_schema_drift, get_shared_pool, init};

 #[cfg(test)]
 pub use content_store::ensure_content_store;
@@ -395,6 +395,112 @@ mod tests {
        );
    }

+    /// Regression: root cause of the 2026-05-14 21:07 production outage.
+    ///
+    /// A headless agent on a feature branch (whose binary includes a new
+    /// sqlx migration) must NEVER apply that migration to the production
+    /// pipeline.db.  Verify that opening an agent-local DB and running
+    /// migrations on it leaves the production DB's `_sqlx_migrations` table
+    /// unchanged.
+    ///
+    /// The enforcement mechanism is in `init_subsystems(is_agent=true)`, which
+    /// redirects to a temp path.  This test validates the SQLite isolation
+    /// property: migrations applied to one file are confined to that file.
+    #[tokio::test]
+    async fn agent_db_isolation_does_not_affect_production_db() {
+        let tmp = tempfile::tempdir().unwrap();
+        let prod_db_path = tmp.path().join("production.db");
+        let agent_db_path = tmp.path().join("agent_temp.db");
+
+        // Set up the production DB — apply the current compiled-in migrations.
+        let prod_opts = sqlx::sqlite::SqliteConnectOptions::new()
+            .filename(&prod_db_path)
+            .create_if_missing(true);
+        let prod_pool = sqlx::SqlitePool::connect_with(prod_opts).await.unwrap();
+        sqlx::migrate!("./migrations")
+            .run(&prod_pool)
+            .await
+            .unwrap();
+
+        // Record the migration versions present in the production DB.
+        let before: Vec<(i64,)> =
+            sqlx::query_as("SELECT version FROM _sqlx_migrations ORDER BY version")
+                .fetch_all(&prod_pool)
+                .await
+                .unwrap();
+
+        // Simulate the agent opening its own isolated DB and running migrations.
+        let agent_opts = sqlx::sqlite::SqliteConnectOptions::new()
+            .filename(&agent_db_path)
+            .create_if_missing(true);
+        let agent_pool = sqlx::SqlitePool::connect_with(agent_opts).await.unwrap();
+        sqlx::migrate!("./migrations")
+            .run(&agent_pool)
+            .await
+            .unwrap();
+
+        // Production DB must be completely unaffected by the agent's migration run.
+        let after: Vec<(i64,)> =
+            sqlx::query_as("SELECT version FROM _sqlx_migrations ORDER BY version")
+                .fetch_all(&prod_pool)
+                .await
+                .unwrap();
+
+        assert_eq!(
+            before, after,
+            "agent opening its own DB must not alter the production DB migration table"
+        );
+    }
+
+    /// Verify that `check_schema_drift` returns an empty list when all
+    /// migrations in the database are recognised by this binary.
+    #[tokio::test]
+    async fn check_schema_drift_empty_when_all_known() {
+        let tmp = tempfile::tempdir().unwrap();
+        let db_path = tmp.path().join("drift_test.db");
+        let opts = sqlx::sqlite::SqliteConnectOptions::new()
+            .filename(&db_path)
+            .create_if_missing(true);
+        let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
+        sqlx::migrate!("./migrations").run(&pool).await.unwrap();
+
+        let drift = super::shadow_write::check_schema_drift(&pool).await;
+        assert!(
+            drift.is_empty(),
+            "no drift expected when DB matches the compiled-in migration set"
+        );
+    }
+
+    /// Verify that `check_schema_drift` identifies a manually-inserted
+    /// migration row that is not part of the compiled-in set.
+    #[tokio::test]
+    async fn check_schema_drift_detects_unknown_migration() {
+        let tmp = tempfile::tempdir().unwrap();
+        let db_path = tmp.path().join("drift_future.db");
+        let opts = sqlx::sqlite::SqliteConnectOptions::new()
+            .filename(&db_path)
+            .create_if_missing(true);
+        let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
+        sqlx::migrate!("./migrations").run(&pool).await.unwrap();
+
+        // Inject a fake "future" migration that no binary compiled today would know.
+        let fake_checksum: Vec<u8> = vec![0u8; 20];
+        sqlx::query(
+            "INSERT INTO _sqlx_migrations \
+             (version, description, installed_on, success, checksum, execution_time) \
+             VALUES (99999999999999, 'future_migration', '2099-01-01T00:00:00Z', 1, ?1, 0)",
+        )
+        .bind(&fake_checksum)
+        .execute(&pool)
+        .await
+        .unwrap();
+
+        let drift = super::shadow_write::check_schema_drift(&pool).await;
+        assert_eq!(drift.len(), 1, "exactly one unknown migration expected");
+        assert_eq!(drift[0].version, 99999999999999_i64);
+        assert_eq!(drift[0].description, "future_migration");
+    }
+
    /// Story 864: passing `ItemMeta::default()` against a content blob that
    /// LOOKS like front-matter must NOT silently extract metadata into the
    /// CRDT.  The whole point of removing the implicit YAML round-trip is
@@ -482,4 +588,218 @@ mod tests {
            "retry_count must reset to 0 on stage transition"
        );
    }
+
+    /// Story 1087, AC2: the split-stage migration projects every supported
+    /// wire-form `stage` string into the canonical `(pipeline, status)` pair.
+    /// The fixture covers each Stage variant (and the legacy numeric-prefix
+    /// directory names retained for back-compat).
+    #[tokio::test]
+    async fn split_stage_migration_backfills_pipeline_and_status_for_every_variant() {
+        let tmp = tempfile::tempdir().unwrap();
+        let db_path = tmp.path().join("pipeline.db");
+        let opts = sqlx::sqlite::SqliteConnectOptions::new()
+            .filename(&db_path)
+            .create_if_missing(true);
+        let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
+        sqlx::migrate!("./migrations").run(&pool).await.unwrap();
+
+        // (stage written by older code, expected pipeline, expected status)
+        let fixture: &[(&str, &str, &str)] = &[
+            ("upcoming", "backlog", "active"),
+            ("backlog", "backlog", "active"),
+            ("coding", "coding", "active"),
+            ("blocked", "coding", "blocked"),
+            ("qa", "qa", "active"),
+            ("review_hold", "qa", "review-hold"),
+            ("merge", "merge", "active"),
+            ("merge_failure", "merge", "merge-failure"),
+            ("merge_failure_final", "merge", "merge-failure-final"),
+            ("done", "done", "done"),
+            ("abandoned", "closed", "abandoned"),
+            ("superseded", "closed", "superseded"),
+            ("rejected", "closed", "rejected"),
+            ("archived", "archived", "active"),
+            ("frozen", "coding", "frozen"),
+            // Legacy numeric-prefix directory names.
+            ("1_backlog", "backlog", "active"),
+            ("2_current", "coding", "active"),
+            ("3_qa", "qa", "active"),
+            ("4_merge", "merge", "active"),
+            ("5_done", "done", "done"),
+            ("6_archived", "archived", "active"),
+        ];
+
+        let now = chrono::Utc::now().to_rfc3339();
+        for (idx, (stage, _, _)) in fixture.iter().enumerate() {
+            let id = format!("1087_fixture_{idx}");
+            sqlx::query(
+                "INSERT INTO pipeline_items \
+                     (id, name, stage, agent, retry_count, depends_on, content, created_at, updated_at) \
+                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?8)",
+            )
+            .bind(&id)
+            .bind("fixture")
+            .bind(*stage)
+            .bind(Option::<String>::None)
+            .bind(Option::<i64>::None)
+            .bind(Option::<String>::None)
+            .bind("---\nname: fixture\n---\n")
+            .bind(&now)
+            .execute(&pool)
+            .await
+            .unwrap();
+        }
+
+        // Force the split-stage backfill to run against the rows we just
+        // inserted.  In production this is `sqlx::migrate!`'s job, but the
+        // sqlx migrator only runs migrations once per DB and they were already
+        // applied at the top of the test before any rows existed.  Reissuing
+        // the backfill statements is the migration logic under test.
+        sqlx::query(
+            "UPDATE pipeline_items SET pipeline = CASE stage \
+                WHEN 'upcoming'            THEN 'backlog' \
+                WHEN 'backlog'             THEN 'backlog' \
+                WHEN '1_backlog'           THEN 'backlog' \
+                WHEN 'coding'              THEN 'coding' \
+                WHEN 'blocked'             THEN 'coding' \
+                WHEN '2_current'           THEN 'coding' \
+                WHEN 'qa'                  THEN 'qa' \
+                WHEN 'review_hold'         THEN 'qa' \
+                WHEN '3_qa'                THEN 'qa' \
+                WHEN 'merge'               THEN 'merge' \
+                WHEN 'merge_failure'       THEN 'merge' \
+                WHEN 'merge_failure_final' THEN 'merge' \
+                WHEN '4_merge'             THEN 'merge' \
+                WHEN 'done'                THEN 'done' \
+                WHEN '5_done'              THEN 'done' \
+                WHEN 'abandoned'           THEN 'closed' \
+                WHEN 'superseded'          THEN 'closed' \
+                WHEN 'rejected'            THEN 'closed' \
+                WHEN 'archived'            THEN 'archived' \
+                WHEN '6_archived'          THEN 'archived' \
+                WHEN 'frozen'              THEN 'coding' \
+                ELSE '' END",
+        )
+        .execute(&pool)
+        .await
+        .unwrap();
+        sqlx::query(
+            "UPDATE pipeline_items SET status = CASE stage \
+                WHEN 'frozen'              THEN 'frozen' \
+                WHEN 'review_hold'         THEN 'review-hold' \
+                WHEN 'blocked'             THEN 'blocked' \
+                WHEN 'merge_failure'       THEN 'merge-failure' \
+                WHEN 'merge_failure_final' THEN 'merge-failure-final' \
+                WHEN 'abandoned'           THEN 'abandoned' \
+                WHEN 'superseded'          THEN 'superseded' \
+                WHEN 'rejected'            THEN 'rejected' \
+                WHEN 'done'                THEN 'done' \
+                WHEN '5_done'              THEN 'done' \
+                ELSE 'active' END",
+        )
+        .execute(&pool)
+        .await
+        .unwrap();
+
+        for (idx, (stage_input, expect_pipeline, expect_status)) in fixture.iter().enumerate() {
+            let id = format!("1087_fixture_{idx}");
+            let row: (String, String) =
+                sqlx::query_as("SELECT pipeline, status FROM pipeline_items WHERE id = ?1")
+                    .bind(&id)
+                    .fetch_one(&pool)
+                    .await
+                    .unwrap();
+            assert_eq!(
+                row.0, *expect_pipeline,
+                "stage {stage_input:?} should backfill pipeline to {expect_pipeline:?}, got {:?}",
+                row.0
+            );
+            assert_eq!(
+                row.1, *expect_status,
+                "stage {stage_input:?} should backfill status to {expect_status:?}, got {:?}",
+                row.1
+            );
+        }
+    }
+
+    /// Story 1087, AC1: `shadow_write::init` writes a timestamped backup of
+    /// pipeline.db before the split-stage migration applies, and skips the
+    /// backup on subsequent restarts (after the migration is recorded).
+    #[tokio::test]
+    async fn pre_pipeline_status_backup_only_runs_once() {
+        let tmp = tempfile::tempdir().unwrap();
+        let db_path = tmp.path().join("pipeline.db");
+
+        // Seed a "pre-1087" DB: open without applying the split-stage migration.
+        // We do this by opening with `create_if_missing` and running only the
+        // legacy migrations — but the simplest way to simulate that here is to
+        // hand-craft a DB containing an `_sqlx_migrations` table that lists
+        // every migration EXCEPT the split-stage one.
+        let opts = sqlx::sqlite::SqliteConnectOptions::new()
+            .filename(&db_path)
+            .create_if_missing(true);
+        let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
+        // Apply migrations the normal way, then delete the split-stage row so
+        // the backup branch fires on the next `init`.
+        sqlx::migrate!("./migrations").run(&pool).await.unwrap();
+        sqlx::query("DELETE FROM _sqlx_migrations WHERE version = 20260515000000")
+            .execute(&pool)
+            .await
+            .unwrap();
+        pool.close().await;
+
+        // First call: backup branch fires, side-car file appears.
+        super::shadow_write::backup_pre_pipeline_status(&db_path).await;
+        let backups: Vec<_> = std::fs::read_dir(tmp.path())
+            .unwrap()
+            .filter_map(Result::ok)
+            .filter(|e| {
+                e.file_name()
+                    .to_string_lossy()
+                    .contains(".pre-pipeline-status.")
+            })
+            .collect();
+        assert_eq!(
+            backups.len(),
+            1,
+            "expected exactly one .pre-pipeline-status backup, got {}",
+            backups.len()
+        );
+
+        // Re-apply the migration so the marker row is back, simulating a
+        // post-migration server restart.
+        let opts = sqlx::sqlite::SqliteConnectOptions::new()
+            .filename(&db_path)
+            .create_if_missing(false);
+        let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
+        let fake_checksum: Vec<u8> = vec![0u8; 20];
+        sqlx::query(
+            "INSERT INTO _sqlx_migrations \
+             (version, description, installed_on, success, checksum, execution_time) \
+             VALUES (20260515000000, 'split_stage_into_pipeline_status', '2026-05-15T00:00:00Z', 1, ?1, 0)",
+        )
+        .bind(&fake_checksum)
+        .execute(&pool)
+        .await
+        .unwrap();
+        pool.close().await;
+
+        // Second call: no new backup written.
+        super::shadow_write::backup_pre_pipeline_status(&db_path).await;
+        let backups_after: Vec<_> = std::fs::read_dir(tmp.path())
+            .unwrap()
+            .filter_map(Result::ok)
+            .filter(|e| {
+                e.file_name()
+                    .to_string_lossy()
+                    .contains(".pre-pipeline-status.")
+            })
+            .collect();
+        assert_eq!(
+            backups_after.len(),
+            1,
+            "post-migration init must not create another backup; got {} backups",
+            backups_after.len()
+        );
+    }
 }
@@ -11,10 +11,23 @@ use crate::slog;
 use sqlx::SqlitePool;
 use sqlx::sqlite::SqliteConnectOptions;
 use std::collections::HashMap;
+use std::collections::HashSet;
 use std::path::Path;
 use std::sync::OnceLock;
 use tokio::sync::mpsc;

+/// One migration row in the live database that is not in the compiled-in set.
+///
+/// Returned by [`check_schema_drift`] for each unknown migration.
+pub struct UnknownMigration {
+    /// sqlx migration version number (derived from the filename timestamp).
+    pub version: i64,
+    /// Human-readable description from the migration filename.
+    pub description: String,
+    /// When the migration was applied, as stored in `_sqlx_migrations.installed_on`.
+    pub installed_on: String,
+}
+
 /// The process-global SQLite pool, set once by [`init`].
 ///
 /// Other modules call [`get_shared_pool`] to access the pool without needing
@@ -56,6 +69,13 @@ pub async fn init(db_path: &Path) -> Result<(), sqlx::Error> {
        return Ok(());
    }

+    // Story 1087: before running the migration that splits `stage` into
+    // (`pipeline`, `status`), take a timestamped side-car copy of the live DB
+    // so the pre-split state is recoverable.  Skip the copy when the file does
+    // not yet exist (fresh installs) or when the split-stage migration has
+    // already been applied (subsequent restarts).
+    backup_pre_pipeline_status(db_path).await;
+
    let options = SqliteConnectOptions::new()
        .filename(db_path)
        .create_if_missing(true);
@@ -133,3 +153,88 @@ pub async fn init(db_path: &Path) -> Result<(), sqlx::Error> {
    let _ = PIPELINE_DB.set(PipelineDb { tx });
    Ok(())
 }
+
+/// Story 1087: file name of the split-stage migration.  The version prefix is
+/// the same `i64` sqlx assigns to that migration on `installed_on` rows in
+/// `_sqlx_migrations`.
+const SPLIT_STAGE_MIGRATION_VERSION: i64 = 20260515000000;
+
+/// Story 1087: take a timestamped side-car copy of `pipeline.db` if and only if
+/// the split-stage migration has not yet been applied.  This is the AC1 backup
+/// — `pipeline.db.pre-pipeline-status.<unix-ts>.bak` next to the live file.
+///
+/// Failures are logged but never propagated: a missing backup must not block
+/// the server from starting (a corrupt source file or a read-only directory
+/// will be surfaced by the migration step itself).
+pub(crate) async fn backup_pre_pipeline_status(db_path: &Path) {
+    if !db_path.exists() {
+        return;
+    }
+
+    // Cheap pre-check: open the DB read-only and see whether the split-stage
+    // migration version is recorded in `_sqlx_migrations`.  If it is, the
+    // backup has already been taken on a previous start and there is nothing
+    // to do.
+    let options = SqliteConnectOptions::new()
+        .filename(db_path)
+        .read_only(true)
+        .create_if_missing(false);
+
+    let probe = SqlitePool::connect_with(options).await;
+    if let Ok(pool) = probe {
+        let already_split: Result<Option<(i64,)>, _> =
+            sqlx::query_as("SELECT version FROM _sqlx_migrations WHERE version = ?1 LIMIT 1")
+                .bind(SPLIT_STAGE_MIGRATION_VERSION)
+                .fetch_optional(&pool)
+                .await;
+        pool.close().await;
+        if let Ok(Some(_)) = already_split {
+            return;
+        }
+    }
+
+    let ts = chrono::Utc::now().timestamp();
+    let mut backup = db_path.as_os_str().to_owned();
+    backup.push(format!(".pre-pipeline-status.{ts}.bak"));
+    let backup_path = std::path::PathBuf::from(backup);
+
+    match tokio::fs::copy(db_path, &backup_path).await {
+        Ok(_) => slog!(
+            "[db] Wrote pre-pipeline-status backup of {} to {}",
+            db_path.display(),
+            backup_path.display(),
+        ),
+        Err(e) => slog!(
+            "[db] Failed to write pre-pipeline-status backup of {}: {e}",
+            db_path.display(),
+        ),
+    }
+}
+
+/// Compare the live `_sqlx_migrations` table against the compiled-in migration
+/// set and return any rows whose version is not known to this binary.
+///
+/// A non-empty result means the database was previously opened by a newer
+/// binary that applied additional migrations.  The server must refuse to start
+/// in that state because the schema may contain tables or columns that this
+/// binary does not understand.
+pub async fn check_schema_drift(pool: &SqlitePool) -> Vec<UnknownMigration> {
+    let migrator = sqlx::migrate!("./migrations");
+    let known: HashSet<i64> = migrator.migrations.iter().map(|m| m.version).collect();
+
+    let rows: Vec<(i64, String, String)> = sqlx::query_as(
+        "SELECT version, description, installed_on FROM _sqlx_migrations ORDER BY version",
+    )
+    .fetch_all(pool)
+    .await
+    .unwrap_or_default();
+
+    rows.into_iter()
+        .filter(|(v, _, _)| !known.contains(v))
+        .map(|(version, description, installed_on)| UnknownMigration {
+            version,
+            description,
+            installed_on,
+        })
+        .collect()
+}
@@ -92,9 +92,20 @@ pub(crate) fn tool_dump_crdt(args: &Value) -> Result<String, String> {
        .items
        .into_iter()
        .map(|item| {
+            // Story 1087: emit `pipeline` and `status` alongside `stage` so
+            // crdt-dump consumers can route by column/badge without re-deriving
+            // the projection from the stage string.
+            let (pipeline, status) = item
+                .stage
+                .as_deref()
+                .and_then(crate::pipeline_state::Stage::from_dir)
+                .map(|s| (s.pipeline().as_str(), s.status().as_str()))
+                .unwrap_or(("", ""));
            json!({
                "story_id": item.story_id,
                "stage": item.stage,
+                "pipeline": pipeline,
+                "status": status,
                "name": item.name,
                "agent": item.agent,
                "retry_count": item.retry_count,
@@ -103,6 +114,7 @@ pub(crate) fn tool_dump_crdt(args: &Value) -> Result<String, String> {
                "claimed_at": item.claim_ts,
                "content_index": item.content_index,
                "is_deleted": item.is_deleted,
+                "origin": item.origin,
            })
        })
        .collect();
@@ -123,11 +135,10 @@ pub(crate) fn tool_dump_crdt(args: &Value) -> Result<String, String> {

 /// MCP tool: return the server version, build hash, and running port.
 pub(crate) fn tool_get_version(ctx: &AppContext) -> Result<String, String> {
-    let build_hash =
-        std::fs::read_to_string(".huskies/build_hash").unwrap_or_else(|_| "unknown".to_string());
+    let build_hash = option_env!("BUILD_GIT_HASH").unwrap_or("unknown");
    serde_json::to_string_pretty(&json!({
        "version": env!("CARGO_PKG_VERSION"),
-        "build_hash": build_hash.trim(),
+        "build_hash": build_hash,
        "port": ctx.services.agents.port(),
    }))
    .map_err(|e| format!("Serialization error: {e}"))
@@ -312,4 +323,33 @@ mod tests {
        let result = tool_get_server_logs(&json!({"lines": 9999})).unwrap();
        let _ = result;
    }
+
+    #[test]
+    fn tool_get_version_ignores_build_hash_file_and_reports_compile_time_value() {
+        // Regression: get_version must NOT read .huskies/build_hash at runtime.
+        // Write a deliberately wrong value to the file and assert get_version
+        // returns the compile-time hash, not the file content.
+        let dir = tempfile::tempdir().expect("tempdir");
+        let huskies_dir = dir.path().join(".huskies");
+        std::fs::create_dir_all(&huskies_dir).unwrap();
+        std::fs::write(huskies_dir.join("build_hash"), "wrong_hash_sentinel_xyz").unwrap();
+
+        let ctx = crate::http::test_helpers::test_ctx(dir.path());
+        let result = tool_get_version(&ctx).expect("tool_get_version must not fail");
+        let parsed: serde_json::Value = serde_json::from_str(&result).expect("must be valid JSON");
+
+        let returned_hash = parsed["build_hash"]
+            .as_str()
+            .expect("build_hash must be a string");
+        assert_ne!(
+            returned_hash, "wrong_hash_sentinel_xyz",
+            "get_version must not read .huskies/build_hash; got '{returned_hash}'"
+        );
+        // The returned hash must equal the compile-time value.
+        let compile_time_hash = option_env!("BUILD_GIT_HASH").unwrap_or("unknown");
+        assert_eq!(
+            returned_hash, compile_time_hash,
+            "get_version must return compile-time BUILD_GIT_HASH"
+        );
+    }
 }
@@ -195,6 +195,9 @@ pub(super) async fn tool_status(args: &Value, ctx: &AppContext) -> Result<String
        if !deps.is_empty() {
            front_matter.insert("depends_on".to_string(), json!(deps));
        }
+        // Story 1088: origin tracking.
+        let origin_str = view.origin().unwrap_or("unknown");
+        front_matter.insert("origin".to_string(), json!(origin_str));
        let stage_claim = match &typed_item.stage {
            crate::pipeline_state::Stage::Coding { claim, .. } => claim.as_ref(),
            crate::pipeline_state::Stage::Merge { claim, .. } => claim.as_ref(),
@@ -38,6 +38,16 @@ pub(crate) fn tool_create_bug(args: &Value, ctx: &AppContext) -> Result<String,
        depends_on.as_deref(),
    )?;

+    crate::crdt_state::set_origin(&bug_id, &super::build_origin(args));
+
+    let _ = ctx
+        .watcher_tx
+        .send(crate::io::watcher::WatcherEvent::NewItemCreated {
+            item_id: bug_id.clone(),
+            item_type: "bug".to_string(),
+            name: req.name.as_ref().to_string(),
+        });
+
    Ok(format!("Created bug: {bug_id}"))
 }

@@ -29,6 +29,8 @@ pub(crate) fn tool_create_epic(args: &Value, ctx: &AppContext) -> Result<String,
        },
    )?;

+    crate::crdt_state::set_origin(&epic_id, &super::build_origin(args));
+
    Ok(format!("Created epic: {epic_id}"))
 }

@@ -127,10 +129,14 @@ pub(crate) fn tool_show_epic(args: &Value, _ctx: &AppContext) -> Result<String,
            if matches!(item.stage, Stage::Done { .. }) {
                done += 1;
            }
+            // Story 1087: expose pipeline + status alongside the legacy
+            // stage name so epic-show callers can route by column/badge.
            member_items.push(json!({
                "story_id": sid,
                "name": item.name,
                "stage": stage_name,
+                "pipeline": item.stage.pipeline().as_str(),
+                "status": item.stage.status().as_str(),
            }));
        }
    }
@@ -12,6 +12,33 @@ mod refactor;
 mod spike;
 mod story;

+/// Build a compact origin JSON string for a newly-created work item (story 1088).
+///
+/// `args` may contain an `"origin"` object with `kind`, `id`, and `ts` fields
+/// supplied by the caller (e.g. a coder agent passing its own identity).  When
+/// absent the default is `{"kind":"user","id":"","ts":<now>}`.
+///
+/// Callers that create items on behalf of system automation (e.g. gate-failure
+/// auto-filing) should pass `kind = "system"` and `id = "<automation-name>"`.
+pub(super) fn build_origin(args: &serde_json::Value) -> String {
+    let ts = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_secs_f64();
+
+    if let Some(origin_obj) = args.get("origin").and_then(|v| v.as_object()) {
+        let kind = origin_obj
+            .get("kind")
+            .and_then(|v| v.as_str())
+            .unwrap_or("user");
+        let id = origin_obj.get("id").and_then(|v| v.as_str()).unwrap_or("");
+        let ts_val = origin_obj.get("ts").and_then(|v| v.as_f64()).unwrap_or(ts);
+        serde_json::json!({"kind": kind, "id": id, "ts": ts_val}).to_string()
+    } else {
+        serde_json::json!({"kind": "user", "id": "", "ts": ts}).to_string()
+    }
+}
+
 pub(crate) use bug::{tool_close_bug, tool_create_bug, tool_list_bugs};
 pub(crate) use criteria::{
    tool_add_criterion, tool_check_criterion, tool_edit_criterion, tool_ensure_acceptance,
@@ -36,6 +36,16 @@ pub(crate) fn tool_create_refactor(args: &Value, ctx: &AppContext) -> Result<Str
        depends_on.as_deref(),
    )?;

+    crate::crdt_state::set_origin(&refactor_id, &super::build_origin(args));
+
+    let _ = ctx
+        .watcher_tx
+        .send(crate::io::watcher::WatcherEvent::NewItemCreated {
+            item_id: refactor_id.clone(),
+            item_type: "refactor".to_string(),
+            name: req.name.as_ref().to_string(),
+        });
+
    Ok(format!("Created refactor: {refactor_id}"))
 }

@@ -36,6 +36,16 @@ pub(crate) fn tool_create_spike(args: &Value, ctx: &AppContext) -> Result<String
        depends_on.as_deref(),
    )?;

+    crate::crdt_state::set_origin(&spike_id, &super::build_origin(args));
+
+    let _ = ctx
+        .watcher_tx
+        .send(crate::io::watcher::WatcherEvent::NewItemCreated {
+            item_id: spike_id.clone(),
+            item_type: "spike".to_string(),
+            name: req.name.as_ref().to_string(),
+        });
+
    Ok(format!("Created spike: {spike_id}"))
 }

@@ -31,6 +31,16 @@ pub(crate) fn tool_create_story(args: &Value, ctx: &AppContext) -> Result<String
        false,
    )?;

+    crate::crdt_state::set_origin(&story_id, &super::super::build_origin(args));
+
+    let _ = ctx
+        .watcher_tx
+        .send(crate::io::watcher::WatcherEvent::NewItemCreated {
+            item_id: story_id.clone(),
+            item_type: "story".to_string(),
+            name: req.name.as_ref().to_string(),
+        });
+
    // Bug 503: warn at creation time if any depends_on points at an already-archived story.
    let archived_deps: Vec<u32> = depends_on_ids
        .as_deref()
@@ -39,34 +39,32 @@ pub(crate) fn tool_get_pipeline_status(ctx: &AppContext) -> Result<String, Strin
    let state = load_pipeline_state(ctx)?;
    let running_merges = ctx.services.agents.list_running_merges()?;

+    fn slim_name(name: &str) -> &str {
+        crate::chat::util::truncate_at_char_boundary(name, 120)
+    }
+
    fn map_items(items: &[crate::http::workflow::UpcomingStory], stage: &str) -> Vec<Value> {
        items
            .iter()
            .map(|s| {
                let mut item = json!({
                    "story_id": s.story_id,
-                    "name": s.name,
+                    "name": slim_name(&s.name),
                    "stage": stage,
+                    "pipeline": s.pipeline.as_str(),
+                    "status": s.status.as_str(),
                    "agent": s.agent.as_ref().map(|a| json!({
                        "agent_name": a.agent_name,
                        "model": a.model,
                        "status": a.status,
                    })),
                });
-                // Include blocked/retry_count when present so callers can
-                // identify stories stuck in the pipeline.
                if let Some(true) = s.blocked {
                    item["blocked"] = json!(true);
                }
                if let Some(rc) = s.retry_count {
                    item["retry_count"] = json!(rc);
                }
-                if let Some(ref mf) = s.merge_failure {
-                    item["merge_failure"] = json!(mf);
-                }
-                if let Some(ref epic_id) = s.epic_id {
-                    item["epic_id"] = json!(epic_id);
-                }
                item
            })
            .collect()
@@ -81,19 +79,21 @@ pub(crate) fn tool_get_pipeline_status(ctx: &AppContext) -> Result<String, Strin
    let backlog: Vec<Value> = state
        .backlog
        .iter()
-        .map(|s| {
-            let mut item = json!({ "story_id": s.story_id, "name": s.name });
-            if let Some(ref epic_id) = s.epic_id {
-                item["epic_id"] = json!(epic_id);
-            }
-            item
-        })
+        .map(|s| json!({ "story_id": s.story_id, "name": slim_name(&s.name) }))
        .collect();

    let archived: Vec<Value> = state
        .archived
        .iter()
-        .map(|s| json!({ "story_id": s.story_id, "name": s.name, "stage": "archived" }))
+        .map(|s| {
+            json!({
+                "story_id": s.story_id,
+                "name": slim_name(&s.name),
+                "stage": "archived",
+                "pipeline": s.pipeline.as_str(),
+                "status": s.status.as_str(),
+            })
+        })
        .collect();

    serde_json::to_string_pretty(&json!({
@@ -248,6 +248,82 @@ mod tests {
        assert_eq!(item["valid"], true);
    }

+    #[test]
+    fn pipeline_status_50_items_under_10kb() {
+        crate::db::ensure_content_store();
+        let stages = [
+            ("1_backlog", "backlog"),
+            ("2_current", "current"),
+            ("3_qa", "qa"),
+            ("4_merge", "merge"),
+            ("5_done", "done"),
+        ];
+        for (i, (dir, _)) in stages.iter().enumerate() {
+            for j in 0..10 {
+                let id = format!("99{i}{j}0_story_size_test");
+                let name = format!("Pipeline Size Test Story {i}-{j}");
+                crate::db::write_item_with_content(
+                    &id,
+                    dir,
+                    &format!("---\nname: \"{name}\"\n---\n"),
+                    crate::db::ItemMeta {
+                        name: Some(name),
+                        ..Default::default()
+                    },
+                );
+            }
+        }
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_get_pipeline_status(&ctx).unwrap();
+        assert!(
+            result.len() < 10 * 1024,
+            "50-item response must be under 10 KB; got {} bytes",
+            result.len()
+        );
+    }
+
+    #[test]
+    fn pipeline_status_per_item_under_500_bytes() {
+        crate::db::ensure_content_store();
+        // Insert one item per active stage with a moderately long name.
+        let stages = [
+            ("2_current", "9995_story_peritem_current"),
+            ("3_qa", "9996_story_peritem_qa"),
+            ("4_merge", "9997_story_peritem_merge"),
+            ("5_done", "9998_story_peritem_done"),
+        ];
+        for (dir, id) in &stages {
+            let name = "A Reasonably Named Story For Size Testing";
+            crate::db::write_item_with_content(
+                id,
+                dir,
+                &format!("---\nname: \"{name}\"\n---\n"),
+                crate::db::ItemMeta {
+                    name: Some(name.to_string()),
+                    ..Default::default()
+                },
+            );
+        }
+        let tmp = tempfile::tempdir().unwrap();
+        let ctx = test_ctx(tmp.path());
+        let result = tool_get_pipeline_status(&ctx).unwrap();
+        let parsed: Value = serde_json::from_str(&result).unwrap();
+        let active = parsed["active"].as_array().unwrap();
+        for item in active {
+            if stages.iter().any(|(_, id)| item["story_id"] == *id) {
+                let item_json = serde_json::to_string(item).unwrap();
+                assert!(
+                    item_json.len() < 500,
+                    "per-item payload must be under 500 bytes; story_id={} got {} bytes: {}",
+                    item["story_id"],
+                    item_json.len(),
+                    item_json
+                );
+            }
+        }
+    }
+
    #[test]
    fn tool_validate_stories_with_invalid_front_matter() {
        let tmp = tempfile::tempdir().unwrap();
@@ -574,7 +574,7 @@ pub(super) fn story_tools() -> Vec<Value> {
        }),
        json!({
            "name": "get_pipeline_status",
-            "description": "Return a structured snapshot of the full work item pipeline. Includes all active stages (current, qa, merge, done) with each item's stage, name, and assigned agent. Also includes upcoming backlog items.",
+            "description": "Return a structured snapshot of the full work item pipeline. Each item includes only slim fields: story_id, name (capped at 120 chars), stage, agent (with agent_name/model/status), and optional boolean flags blocked and retry_count. Active stages (current, qa, merge, done) appear in the 'active' array; backlog items in 'backlog'. For full story details, use status(story_id) or dump_crdt.",
            "inputSchema": {
                "type": "object",
                "properties": {}
@@ -24,6 +24,10 @@ pub struct UpcomingStory {
    pub merge_failure: Option<String>,
    /// Active agent working on this item, if any.
    pub agent: Option<AgentAssignment>,
+    /// Display column (story 1085) — derived from `Stage::pipeline()`.
+    pub pipeline: crate::pipeline_state::Pipeline,
+    /// Display badge/indicator (story 1085) — derived from `Stage::status()`.
+    pub status: crate::pipeline_state::Status,
    /// True when the item is held in QA for human review.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub review_hold: Option<bool>,
@@ -142,6 +146,8 @@ pub fn load_pipeline_state(ctx: &AppContext) -> Result<PipelineState, String> {
            error: None,
            merge_failure,
            agent,
+            pipeline: item.stage.pipeline(),
+            status: item.stage.status(),
            review_hold,
            qa,
            retry_count: if item.retry_count() > 0 {
@@ -278,6 +284,8 @@ pub fn load_upcoming_stories(_ctx: &AppContext) -> Result<Vec<UpcomingStory>, St
                error: None,
                merge_failure: None,
                agent: None,
+                pipeline: item.stage.pipeline(),
+                status: item.stage.status(),
                review_hold: None,
                qa: None,
                retry_count: if item_retry_count > 0 {
@@ -90,4 +90,14 @@ pub enum WatcherEvent {
        /// `true` if acceptance gates passed; `false` if they failed.
        success: bool,
    },
+    /// A new work item was successfully created and added to the backlog.
+    /// Triggers a creation notification to configured chat rooms.
+    NewItemCreated {
+        /// Work item ID (e.g. `"1075_refactor_split_stage_enum"`).
+        item_id: String,
+        /// Human-readable item type (`"story"`, `"bug"`, `"refactor"`, `"spike"`).
+        item_type: String,
+        /// Human-readable item name.
+        name: String,
+    },
 }
@@ -21,7 +21,6 @@ mod sweep;

 pub use events::WatcherEvent;
 pub(crate) use sweep::spawn_done_to_archived_subscriber;
-#[cfg(test)]
 pub(crate) use sweep::sweep_done_to_archived;

 use crate::slog;
@@ -29,13 +29,20 @@ use std::time::Duration;
 ///
 /// Replaces the periodic `sweep_done_to_archived` call from the tick loop.
 pub(crate) fn spawn_done_to_archived_subscriber(done_retention: Duration) {
-    use crate::pipeline_state::{PipelineEvent, Stage, apply_transition, subscribe_transitions};
+    use crate::pipeline_state::{
+        PipelineEvent, Stage, Status, apply_transition, subscribe_transitions,
+    };

    let mut rx = subscribe_transitions();
    tokio::spawn(async move {
        loop {
            match rx.recv().await {
                Ok(fired) => {
+                    // Story 1086: gate on the typed `Status::Done` projection;
+                    // the variant pattern is still required to read `merged_at`.
+                    if fired.after.status() != Status::Done {
+                        continue;
+                    }
                    if let Stage::Done { merged_at, .. } = fired.after {
                        let story_id = fired.story_id.0.clone();
                        let retention = done_retention;
@@ -70,7 +77,7 @@ pub(crate) fn spawn_done_to_archived_subscriber(done_retention: Duration) {
    });
 }

-/// Sweep items in `Stage::Done` whose `merged_at` timestamp exceeds the
+/// Reconcile: sweep items in `Stage::Done` whose `merged_at` timestamp exceeds the
 /// retention duration to `Stage::Archived` via the typed transition table.
 ///
 /// Routes through [`crate::pipeline_state::apply_transition`] so the
@@ -78,14 +85,22 @@ pub(crate) fn spawn_done_to_archived_subscriber(done_retention: Duration) {
 /// `TransitionFired` event is emitted to subscribers (worktree pruning,
 /// matrix notifier, etc.).
 ///
-/// Used in tests for direct one-shot sweeps; production code uses
+/// Called at startup and by the periodic reconciler to archive Done stories
+/// whose retention has elapsed, even when the `TransitionFired` subscriber
+/// lagged and missed their Done event.  Production reactive archiving uses
 /// [`spawn_done_to_archived_subscriber`] instead.
-#[cfg(test)]
+///
+/// Logs a summary INFO line on every call: candidates evaluated and items
+/// archived, or "no items past retention" when nothing was swept.
 pub(crate) fn sweep_done_to_archived(done_retention: Duration) {
    use crate::pipeline_state::{PipelineEvent, Stage, apply_transition, read_all_typed};

+    let mut candidates: usize = 0;
+    let mut archived: usize = 0;
+
    for item in read_all_typed() {
        if let Stage::Done { merged_at, .. } = &item.stage {
+            candidates += 1;
            let age = chrono::Utc::now()
                .signed_duration_since(*merged_at)
                .to_std()
@@ -93,7 +108,10 @@ pub(crate) fn sweep_done_to_archived(done_retention: Duration) {
            if age >= done_retention {
                let story_id = item.story_id.0.clone();
                match apply_transition(&story_id, PipelineEvent::Accepted, None) {
-                    Ok(_) => slog!("[watcher] sweep: promoted {story_id} → archived"),
+                    Ok(_) => {
+                        archived += 1;
+                        slog!("[watcher] sweep: promoted {story_id} → archived")
+                    }
                    Err(e) => {
                        slog!("[watcher] sweep: transition error for {story_id}: {e}")
                    }
@@ -101,4 +119,10 @@ pub(crate) fn sweep_done_to_archived(done_retention: Duration) {
            }
        }
    }
+
+    if archived > 0 {
+        slog!("[watcher] sweep: {candidates} candidate(s) evaluated, {archived} archived");
+    } else {
+        slog!("[watcher] sweep: {candidates} candidate(s) evaluated, no items past retention");
+    }
 }
@@ -301,6 +301,48 @@ async fn done_to_archived_subscriber_archives_on_transition() {
    );
 }

+/// Regression: simulates a server restart occurring between move-to-done and
+/// the configured retention window expiry.
+///
+/// Before the fix the archive-deadline was held only in the reactive
+/// subscriber's volatile sleep task; a restart would lose that task and the
+/// item would never be archived.  The fix is that `sweep_done_to_archived`
+/// reads `merged_at` from the CRDT (durable across restarts) and archives any
+/// item whose age exceeds the retention, so the next periodic reconcile tick
+/// after restart picks it up regardless of whether a sleep task existed.
+#[test]
+fn restart_scenario_sweep_archives_past_retention_after_sweep_tick() {
+    crate::crdt_state::init_for_test();
+    crate::db::ensure_content_store();
+
+    let story_id = "9885_sweep_restart_regression";
+
+    // Simulate: item moved to Done 10 seconds before the restart.
+    // The reactive subscriber would have had a sleep task for the remaining
+    // retention time; that task is now gone (process restarted).
+    let ten_seconds_ago = (chrono::Utc::now() - chrono::Duration::seconds(10)).timestamp() as f64;
+    crate::crdt_state::write_item_str(
+        story_id,
+        "5_done",
+        Some("Restart regression test"),
+        None,
+        None,
+        Some(ten_seconds_ago),
+    );
+
+    // The next periodic reconcile tick after restart calls sweep_done_to_archived
+    // directly.  With 5-second retention and merged_at 10s ago, the item must
+    // be archived even though no reactive subscriber sleep task exists.
+    sweep_done_to_archived(Duration::from_secs(5));
+
+    let items = crate::pipeline_state::read_all_typed();
+    let item = items.iter().find(|i| i.story_id.0 == story_id);
+    assert!(
+        item.is_some_and(|i| matches!(i.stage, crate::pipeline_state::Stage::Archived { .. })),
+        "item past retention must be archived on the next sweep tick after a server restart"
+    );
+}
+
 /// Prove that an item with merged_at NEWER than done_retention is NOT swept.
 #[test]
 fn sweep_keeps_item_newer_than_retention() {
@@ -33,6 +33,8 @@ pub mod mesh;
 /// Node identity — Ed25519 keypair generation and stable node ID management.
 pub mod node_identity;
 pub(crate) mod pipeline_state;
+/// Reliable process-termination primitives shared across the server.
+pub mod process_kill;
 /// Rebuild — process restart and shutdown coordination.
 pub mod rebuild;
 mod service;
@@ -82,12 +84,10 @@ async fn main() -> Result<(), std::io::Error> {
    });

    // Log version and build hash so we can verify what's running.
-    let build_hash =
-        std::fs::read_to_string(".huskies/build_hash").unwrap_or_else(|_| "unknown".to_string());
    slog!(
        "[startup] huskies v{} (build {})",
        env!("CARGO_PKG_VERSION"),
-        build_hash.trim()
+        option_env!("BUILD_GIT_HASH").unwrap_or("unknown")
    );

    let app_state = Arc::new(SessionState::default());
@@ -151,7 +151,7 @@ async fn main() -> Result<(), std::io::Error> {
    startup::project::open_project_root(is_init, explicit_path, &cwd, &app_state, &store, port)
        .await;

-    startup::project::init_subsystems(&app_state, &cwd).await;
+    startup::project::init_subsystems(&app_state, &cwd, is_agent).await;

    let crdt_join_token = cli
        .join_token
@@ -36,32 +36,6 @@ pub(super) fn try_broadcast(fired: &TransitionFired) {
    let _ = get_or_init_tx().send(fired.clone());
 }

-/// Replay the current CRDT pipeline state as a burst of synthetic
-/// [`TransitionFired`] events at server startup.
-///
-/// Reads every item from the CRDT and broadcasts a self-transition
-/// (`before == after`) for each one so that all existing subscribers
-/// (worktree lifecycle, merge-failure auto-spawn, auto-assign) react
-/// identically to a live event.  This replaces the legacy scan-based
-/// `reconcile_on_startup` path.
-///
-/// Idempotent: a second call produces another burst of events, but every
-/// subscriber already guards against duplicate work (e.g.
-/// `is_story_assigned_for_stage` returns true once an agent is running,
-/// and worktree creation is a no-op when the worktree already exists).
-pub fn replay_current_pipeline_state() {
-    for item in super::read_all_typed() {
-        let fired = TransitionFired {
-            story_id: item.story_id.clone(),
-            before: item.stage.clone(),
-            after: item.stage,
-            event: super::PipelineEvent::DepsMet,
-            at: chrono::Utc::now(),
-        };
-        try_broadcast(&fired);
-    }
-}
-
 /// Fired when a pipeline stage transition completes.
 #[derive(Debug, Clone)]
 pub struct TransitionFired {
@@ -183,58 +157,4 @@ mod tests {
    }

    // ── TransitionError Display ─────────────────────────────────────────
-
-    // ── replay_current_pipeline_state ──────────────────────────────────
-
-    /// AC1: replay broadcasts a synthetic event for every item in the CRDT.
-    #[test]
-    fn replay_broadcasts_event_for_crdt_item_in_coding_stage() {
-        crate::crdt_state::init_for_test();
-        crate::db::ensure_content_store();
-
-        let story_id = "9901_replay_coding";
-        crate::db::write_item_with_content(
-            story_id,
-            "2_current",
-            "---\nname: Replay Coding\n---\n",
-            crate::db::ItemMeta::named("Replay Coding"),
-        );
-
-        let mut rx = subscribe_transitions();
-        replay_current_pipeline_state();
-
-        let mut found = false;
-        while let Ok(fired) = rx.try_recv() {
-            if fired.story_id.0 == story_id && matches!(fired.after, Stage::Coding { .. }) {
-                found = true;
-            }
-        }
-        assert!(
-            found,
-            "replay must broadcast a Coding event for a story in 2_current"
-        );
-    }
-
-    /// AC3: calling replay_current_pipeline_state twice fires events both times.
-    ///
-    /// Pool-state idempotency (no duplicate agents) is enforced by subscribers,
-    /// not by the replay function itself.  This test verifies that replay is safe
-    /// to call multiple times without panicking.
-    #[test]
-    fn replay_twice_does_not_panic() {
-        crate::crdt_state::init_for_test();
-        crate::db::ensure_content_store();
-
-        let story_id = "9902_replay_idem";
-        crate::db::write_item_with_content(
-            story_id,
-            "3_qa",
-            "---\nname: Replay QA\n---\n",
-            crate::db::ItemMeta::named("Replay QA"),
-        );
-
-        // Two successive replays must not panic.
-        replay_current_pipeline_state();
-        replay_current_pipeline_state();
-    }
 }
@@ -41,8 +41,8 @@ mod tests;
 #[allow(unused_imports)]
 pub use types::{
    AgentClaim, AgentName, ArchiveReason, BranchName, ExecutionState, GitSha, MergeFailureKind,
-    NodePubkey, PipelineItem, PlanState, Stage, StoryId, TransitionError, stage_dir_name,
-    stage_label,
+    NodePubkey, Pipeline, PipelineItem, PlanState, Stage, Status, StoryId, TransitionError,
+    stage_dir_name, stage_label,
 };

 #[allow(unused_imports)]
@@ -51,10 +51,7 @@ pub use transition::{
 };

 #[allow(unused_imports)]
-pub use events::{
-    EventBus, TransitionFired, TransitionSubscriber, replay_current_pipeline_state,
-    subscribe_transitions,
-};
+pub use events::{EventBus, TransitionFired, TransitionSubscriber, subscribe_transitions};

 #[allow(unused_imports)]
 pub use projection::ProjectionError;
@@ -66,6 +63,7 @@ pub use apply::{
    transition_to_unfrozen,
 };

+pub(crate) use subscribers::reconcile_audit_log;
 pub use subscribers::spawn_audit_log_subscriber;

 #[allow(unused_imports)]
@@ -35,6 +35,14 @@ impl TransitionSubscriber for AuditLogSubscriber {
    }
 }

+/// Reconcile: no-op for the audit log subscriber.
+///
+/// The audit log records live transitions only.  Replaying historical CRDT state at
+/// reconcile time would produce misleading entries (wrong timestamps, duplicate lines).
+/// Eventual consistency of the audit log is not required — missed events are simply
+/// absent from the log, which is acceptable.
+pub(crate) fn reconcile_audit_log() {}
+
 /// Spawn a background task that writes a structured audit log entry for every pipeline transition.
 ///
 /// Subscribes to the transition broadcast channel.  Every `TransitionFired` event produces
@@ -429,6 +429,144 @@ impl Stage {
    }
 }

+// ── Display split (story 1085): Pipeline column + Status badge ─────────────
+
+/// Column placement for a work item in the UI/chat status display.
+///
+/// Derived from [`Stage`] via [`Stage::pipeline`].  Display callers route items
+/// to columns by this enum instead of pattern-matching `Stage` variants, so
+/// new badges (e.g. `Frozen`, `Blocked`) do not produce new columns.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+pub enum Pipeline {
+    /// Items in `Upcoming` or `Backlog` stages.
+    Backlog,
+    /// Items being coded (or blocked while in the coding lane).
+    Coding,
+    /// Items in QA or `ReviewHold`.
+    Qa,
+    /// Items in `Merge`, `MergeFailure`, or `MergeFailureFinal`.
+    Merge,
+    /// Items in `Done`.
+    Done,
+    /// Abandoned, superseded, or rejected items.
+    Closed,
+    /// Items swept into `Archived`.
+    Archived,
+}
+
+impl Pipeline {
+    /// Stable wire-format identifier (kebab-case).
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Pipeline::Backlog => "backlog",
+            Pipeline::Coding => "coding",
+            Pipeline::Qa => "qa",
+            Pipeline::Merge => "merge",
+            Pipeline::Done => "done",
+            Pipeline::Closed => "closed",
+            Pipeline::Archived => "archived",
+        }
+    }
+}
+
+/// Badge/indicator for a work item, orthogonal to its [`Pipeline`] column.
+///
+/// Derived from [`Stage`] via [`Stage::status`].  A `Frozen` story stays in
+/// its underlying `Pipeline` column (e.g. `Coding`) and is decorated with
+/// `Status::Frozen` for the display.  `Status::Done` is reserved for items in
+/// the `Done` column and is never produced for items still in flight, so a
+/// done item never carries a `MergeFailure*` badge (story 1052).
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case", tag = "kind")]
+pub enum Status {
+    /// No special badge — normal in-progress item.
+    Active,
+    /// Item is paused (`Stage::Frozen`).
+    Frozen,
+    /// Item is held for human review (`Stage::ReviewHold`).
+    ReviewHold,
+    /// Item is blocked (`Stage::Blocked` or legacy `Archived(Blocked)`).
+    Blocked,
+    /// Merge failed; mergemaster may still be recovering.
+    MergeFailure,
+    /// Merge failed beyond automatic recovery.
+    MergeFailureFinal,
+    /// User abandoned the item.
+    Abandoned,
+    /// Item was superseded by another work item.
+    Superseded,
+    /// Item was permanently rejected.
+    Rejected,
+    /// Item completed successfully.
+    Done,
+}
+
+impl Status {
+    /// Stable wire-format identifier (kebab-case).
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Status::Active => "active",
+            Status::Frozen => "frozen",
+            Status::ReviewHold => "review-hold",
+            Status::Blocked => "blocked",
+            Status::MergeFailure => "merge-failure",
+            Status::MergeFailureFinal => "merge-failure-final",
+            Status::Abandoned => "abandoned",
+            Status::Superseded => "superseded",
+            Status::Rejected => "rejected",
+            Status::Done => "done",
+        }
+    }
+}
+
+impl Stage {
+    /// Display column for this stage.  `Frozen { resume_to }` recurses so a
+    /// paused story keeps its underlying column.
+    pub fn pipeline(&self) -> Pipeline {
+        match self {
+            Stage::Upcoming | Stage::Backlog => Pipeline::Backlog,
+            Stage::Coding { .. } | Stage::Blocked { .. } => Pipeline::Coding,
+            Stage::Qa | Stage::ReviewHold { .. } => Pipeline::Qa,
+            Stage::Merge { .. } | Stage::MergeFailure { .. } | Stage::MergeFailureFinal { .. } => {
+                Pipeline::Merge
+            }
+            Stage::Frozen { resume_to } => resume_to.pipeline(),
+            Stage::Done { .. } => Pipeline::Done,
+            Stage::Abandoned { .. } | Stage::Superseded { .. } | Stage::Rejected { .. } => {
+                Pipeline::Closed
+            }
+            Stage::Archived {
+                reason: ArchiveReason::Blocked { .. },
+                ..
+            } => Pipeline::Coding,
+            Stage::Archived { .. } => Pipeline::Archived,
+        }
+    }
+
+    /// Display badge for this stage.  `Frozen { resume_to }` returns
+    /// `Status::Frozen` regardless of the inner stage; callers wanting the
+    /// underlying badge inspect `resume_to` directly.
+    pub fn status(&self) -> Status {
+        match self {
+            Stage::Frozen { .. } => Status::Frozen,
+            Stage::ReviewHold { .. } => Status::ReviewHold,
+            Stage::Blocked { .. }
+            | Stage::Archived {
+                reason: ArchiveReason::Blocked { .. },
+                ..
+            } => Status::Blocked,
+            Stage::MergeFailure { .. } => Status::MergeFailure,
+            Stage::MergeFailureFinal { .. } => Status::MergeFailureFinal,
+            Stage::Abandoned { .. } => Status::Abandoned,
+            Stage::Superseded { .. } => Status::Superseded,
+            Stage::Rejected { .. } => Status::Rejected,
+            Stage::Done { .. } => Status::Done,
+            _ => Status::Active,
+        }
+    }
+}
+
 // ── Per-node execution state ────────────────────────────────────────────────

 /// Per-node execution tracking, stored in the CRDT under each node's pubkey.
@@ -0,0 +1,322 @@
+//! Reliable process-termination primitives.
+//!
+//! The huskies server kills child processes in several distinct places:
+//! the watchdog terminates agents that have exceeded turn/budget limits,
+//! `stop_agent` terminates on operator request, `kill_all_children` runs at
+//! server shutdown, the merge-gate completion path kills stale `cargo`
+//! processes, and `script/local-release` tears down the gateway during a
+//! redeploy.  Every one of these used to send a signal that the target was
+//! free to ignore (most commonly `portable_pty`'s `SIGHUP`), with no
+//! verification that the process actually exited.  Agents and bots that
+//! ignore `SIGHUP` survived the "kill", which produced concurrent claude
+//! processes on the same story — directly the duplicate-spawn bug we hit on
+//! 2026-05-15.
+//!
+//! This module provides one trustworthy way to kill processes: SIGKILL with
+//! verification.  Build a pid set with the helpers in this module (or your
+//! own), then hand it to [`sigkill_pids_and_verify`].
+//!
+//! All functions on this module are deliberately Unix-only — huskies runs in
+//! Linux containers and macOS dev hosts, both POSIX.
+
+use crate::slog_warn;
+
+/// Maximum time we'll wait for SIGKILL'd processes to disappear before
+/// declaring failure.  SIGKILL is uncatchable, so the kernel normally
+/// reaps within tens of milliseconds; anything past 2 s indicates the
+/// process is wedged in uninterruptible IO (e.g. waiting on a frozen NFS
+/// mount).  Caller can decide whether to proceed despite survivors.
+const KILL_VERIFY_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(2);
+
+/// Polling interval while waiting for processes to disappear.  100 ms is
+/// fine-grained enough that the typical few-ms reap latency is barely
+/// observable, but coarse enough that we don't burn CPU spinning.
+const KILL_VERIFY_POLL: std::time::Duration = std::time::Duration::from_millis(100);
+
+/// SIGKILL every pid in `pids`, then poll until all of them are gone.
+///
+/// Returns `Ok(n)` where `n == pids.len()` when every pid is verified
+/// reaped within [`KILL_VERIFY_TIMEOUT`].  Returns `Err(survivors)` with the
+/// pids still alive after the timeout — extremely rare for SIGKILL but
+/// possible if a process is wedged in uninterruptible IO.  An empty `pids`
+/// slice returns `Ok(0)` immediately.
+///
+/// **Why SIGKILL and not SIGTERM-first:** several huskies-internal targets
+/// (claude-code, the bot itself) either ignore the polite signals or take
+/// arbitrarily long to honour them.  The watchdog only kills agents that
+/// have already misbehaved by definition (exceeded budget/turn limits), so
+/// there is no reason to give them a graceful-shutdown grace period.
+pub fn sigkill_pids_and_verify(pids: &[u32]) -> Result<usize, Vec<u32>> {
+    if pids.is_empty() {
+        return Ok(0);
+    }
+
+    for &pid in pids {
+        // libc::kill returns -1 on failure (with errno).  We deliberately
+        // ignore the result: the process may already be gone (errno ESRCH),
+        // and trying again wouldn't help.  The verification loop below is
+        // the source of truth for "did this work".
+        unsafe { libc::kill(pid as i32, libc::SIGKILL) };
+    }
+
+    let deadline = std::time::Instant::now() + KILL_VERIFY_TIMEOUT;
+    while std::time::Instant::now() < deadline {
+        if pids.iter().copied().all(|pid| !pid_is_alive(pid)) {
+            return Ok(pids.len());
+        }
+        std::thread::sleep(KILL_VERIFY_POLL);
+    }
+
+    let survivors: Vec<u32> = pids
+        .iter()
+        .copied()
+        .filter(|&pid| pid_is_alive(pid))
+        .collect();
+    if survivors.is_empty() {
+        Ok(pids.len())
+    } else {
+        slog_warn!(
+            "[process_kill] SIGKILL did not reap pids within {:?}: {survivors:?}. \
+             They may be wedged in uninterruptible IO.",
+            KILL_VERIFY_TIMEOUT
+        );
+        Err(survivors)
+    }
+}
+
+/// Return every pid whose command line matches `pattern` (passed to
+/// `pgrep -f`).  Empty when nothing matches or when `pgrep` is unavailable.
+///
+/// Useful for collecting processes by a path or argument substring — e.g.
+/// "every process running in `<worktree>/`" or "every cargo invocation
+/// against this `Cargo.toml`".
+pub fn pids_matching(pattern: &str) -> Vec<u32> {
+    let Ok(output) = std::process::Command::new("pgrep")
+        .args(["-f", pattern])
+        .output()
+    else {
+        return Vec::new();
+    };
+    String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .filter_map(|l| l.trim().parse::<u32>().ok())
+        .collect()
+}
+
+/// Return every descendant pid of `root_pid`, deepest-first, **excluding**
+/// `root_pid` itself.  Walks the parent→child relation via `pgrep -P`.
+///
+/// Deepest-first ordering lets callers signal leaves before their parents
+/// when that matters; for SIGKILL it makes no difference.
+pub fn descendant_pids(root_pid: u32) -> Vec<u32> {
+    let mut out: Vec<u32> = Vec::new();
+    walk_descendants(root_pid, &mut out);
+    out
+}
+
+fn walk_descendants(pid: u32, out: &mut Vec<u32>) {
+    let Ok(output) = std::process::Command::new("pgrep")
+        .args(["-P", &pid.to_string()])
+        .output()
+    else {
+        return;
+    };
+    let kids: Vec<u32> = String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .filter_map(|l| l.trim().parse::<u32>().ok())
+        .collect();
+    for kid in kids {
+        walk_descendants(kid, out);
+        out.push(kid);
+    }
+}
+
+/// Check whether `pid` currently exists.  Implemented via `kill(pid, 0)` —
+/// no signal is sent, only existence is probed.
+fn pid_is_alive(pid: u32) -> bool {
+    // signal 0: "is this process around?"  Returns 0 if the process exists
+    // and we have permission to signal it, -1 with errno otherwise.
+    unsafe { libc::kill(pid as i32, 0) == 0 }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::process::{Child, Command, Stdio};
+    use std::thread::JoinHandle;
+
+    /// Spawn a sleeper for kill testing, and spawn a background reaper that
+    /// calls `wait()` as soon as the child exits.  Returns the pid plus the
+    /// reaper join handle so the test can confirm reaping after the kill.
+    ///
+    /// The reaper is essential because the production code's verify loop
+    /// uses `kill(pid, 0)` to test existence — which returns 0 for zombies.
+    /// If no one reaps the test's sleeper, its pid stays occupied (as a
+    /// zombie) and `sigkill_pids_and_verify` mistakenly reports survivors.
+    /// In production the PTY blocking thread is always reaping on behalf of
+    /// portable_pty, so this isn't a concern there.
+    fn spawn_sleeper_with_reaper(secs: u64) -> (u32, JoinHandle<()>) {
+        let child: Child = Command::new("sleep")
+            .arg(secs.to_string())
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .stdin(Stdio::null())
+            .spawn()
+            .expect("failed to spawn sleep");
+        let pid = child.id();
+        let reaper = std::thread::spawn(move || {
+            let mut c = child;
+            let _ = c.wait();
+        });
+        (pid, reaper)
+    }
+
+    #[test]
+    fn sigkill_empty_slice_is_ok() {
+        let result = sigkill_pids_and_verify(&[]);
+        assert!(matches!(result, Ok(0)));
+    }
+
+    #[test]
+    fn sigkill_real_process_is_verified_gone() {
+        let (pid, reaper) = spawn_sleeper_with_reaper(60);
+        assert!(pid_is_alive(pid), "sleeper should be alive before kill");
+
+        let result = sigkill_pids_and_verify(&[pid]);
+        assert!(
+            matches!(result, Ok(1)),
+            "sigkill must verify the process is gone: {result:?}"
+        );
+        let _ = reaper.join();
+        assert!(!pid_is_alive(pid), "sleeper must be dead after kill");
+    }
+
+    #[test]
+    fn sigkill_already_dead_pid_is_ok() {
+        let (pid, reaper) = spawn_sleeper_with_reaper(0);
+        let _ = reaper.join();
+        // Wait briefly for the kernel to recycle the pid.
+        for _ in 0..20 {
+            if !pid_is_alive(pid) {
+                break;
+            }
+            std::thread::sleep(std::time::Duration::from_millis(100));
+        }
+        // Now SIGKILL a pid that no longer exists.  Result must still be Ok.
+        let result = sigkill_pids_and_verify(&[pid]);
+        assert!(
+            result.is_ok(),
+            "sigkill of already-dead pid must succeed: {result:?}"
+        );
+    }
+
+    #[test]
+    fn sigkill_multiple_real_processes() {
+        let mut handles: Vec<(u32, JoinHandle<()>)> =
+            (0..3).map(|_| spawn_sleeper_with_reaper(60)).collect();
+        let pids: Vec<u32> = handles.iter().map(|(p, _)| *p).collect();
+        for &pid in &pids {
+            assert!(pid_is_alive(pid));
+        }
+        let result = sigkill_pids_and_verify(&pids);
+        assert!(
+            matches!(result, Ok(3)),
+            "all 3 sleepers must die: {result:?}"
+        );
+        for (_, reaper) in handles.drain(..) {
+            let _ = reaper.join();
+        }
+        for &pid in &pids {
+            assert!(!pid_is_alive(pid), "pid {pid} survived sigkill");
+        }
+    }
+
+    #[test]
+    fn pids_matching_finds_a_running_process() {
+        // pgrep -f matches the FULL command line, so the marker has to be
+        // in argv somewhere.  Putting it in a shell comment doesn't work —
+        // sh strips it.  Override argv[0] so the marker is durably visible.
+        use std::os::unix::process::CommandExt;
+        let marker = format!("kill-test-marker-{}-{}", std::process::id(), rand_u64());
+        let argv0 = format!("test-marker-{marker}");
+        let child: Child = Command::new("sleep")
+            .arg0(argv0)
+            .arg("60")
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .stdin(Stdio::null())
+            .spawn()
+            .expect("spawn");
+        let child_pid = child.id();
+        let reaper = std::thread::spawn(move || {
+            let mut c = child;
+            let _ = c.wait();
+        });
+
+        // pgrep needs a moment to see the new process.
+        std::thread::sleep(std::time::Duration::from_millis(100));
+
+        let found = pids_matching(&marker);
+        assert!(
+            found.contains(&child_pid),
+            "pids_matching should find pid {child_pid} for marker '{marker}'; got {found:?}"
+        );
+
+        // Cleanup so the test doesn't leak a sleeper.
+        let _ = sigkill_pids_and_verify(&[child_pid]);
+        let _ = reaper.join();
+    }
+
+    #[test]
+    fn pids_matching_returns_empty_when_no_match() {
+        let pattern = format!("nonexistent-pattern-{}-{}", std::process::id(), rand_u64());
+        let found = pids_matching(&pattern);
+        assert!(found.is_empty(), "expected empty result, got {found:?}");
+    }
+
+    /// Cheap unique-ish u64 for distinguishing test invocations without a
+    /// dependency on a randomness crate.
+    fn rand_u64() -> u64 {
+        use std::time::{SystemTime, UNIX_EPOCH};
+        SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .map(|d| d.as_nanos() as u64)
+            .unwrap_or(0)
+    }
+
+    #[test]
+    fn descendant_pids_of_real_process_tree() {
+        // Build a parent sh that spawns a child sleep.  The descendants of
+        // the parent should include the sleep.
+        let parent: Child = Command::new("sh")
+            .args(["-c", "sleep 60"])
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .stdin(Stdio::null())
+            .spawn()
+            .expect("spawn parent");
+        let parent_pid = parent.id();
+        let reaper = std::thread::spawn(move || {
+            let mut c = parent;
+            let _ = c.wait();
+        });
+
+        // Let the shell get around to fork+execing its child.
+        std::thread::sleep(std::time::Duration::from_millis(200));
+
+        let descendants = descendant_pids(parent_pid);
+        // On some shells `sh -c "sleep N"` exec-replaces sh with sleep, leaving
+        // zero descendants.  On others it forks.  We don't care which; we only
+        // care that the function doesn't panic and returns a sensible vec.
+        assert!(
+            descendants.iter().all(|&pid| pid != parent_pid),
+            "descendant_pids must not include the root itself: {descendants:?}"
+        );
+
+        // Cleanup: kill the parent and any descendants.
+        let mut all = descendants;
+        all.push(parent_pid);
+        let _ = sigkill_pids_and_verify(&all);
+        let _ = reaper.join();
+    }
+}
@@ -62,6 +62,9 @@ pub struct WorkItemContent {
    pub stage: crate::pipeline_state::Stage,
    pub name: String,
    pub agent: Option<crate::config::AgentName>,
+    /// Origin of the work item (story 1088).  `None` for items that pre-date
+    /// the origin register; the web UI renders these as `"unknown"`.
+    pub origin: Option<String>,
 }

 /// A single entry in the project's configured agent roster.
@@ -176,6 +179,9 @@ pub fn get_work_item_content(
        .map(|v| v.name().to_string())
        .unwrap_or_default();
    let crdt_agent = crdt_view.as_ref().and_then(|v| v.agent());
+    let crdt_origin = crdt_view
+        .as_ref()
+        .and_then(|v| v.origin().map(str::to_string));

    for (stage_dir, stage) in &stages {
        if let Some(content) = io::read_work_item_from_stage(&work_dir, stage_dir, &filename)? {
@@ -184,6 +190,7 @@ pub fn get_work_item_content(
                stage: stage.clone(),
                name: crdt_name.clone(),
                agent: crdt_agent,
+                origin: crdt_origin.clone(),
            });
        }
    }
@@ -201,6 +208,7 @@ pub fn get_work_item_content(
            stage,
            name: crdt_name,
            agent: crdt_agent,
+            origin: crdt_origin,
        });
    }

@@ -26,6 +26,8 @@ pub enum EventAction {
        /// `true` if acceptance gates passed.
        success: bool,
    },
+    /// Post a new-item-created notification.
+    NewItemCreated,
    /// Log server-side only; do not post to chat (e.g. hard rate-limit blocks).
    LogOnly,
    /// Reload the project configuration.
@@ -51,6 +53,7 @@ pub fn classify(event: &WatcherEvent) -> EventAction {
        WatcherEvent::AgentCompleted { success, .. } => {
            EventAction::AgentCompleted { success: *success }
        }
+        WatcherEvent::NewItemCreated { .. } => EventAction::NewItemCreated,
        _ => EventAction::Skip,
    }
 }
@@ -178,4 +181,14 @@ mod tests {
            EventAction::AgentCompleted { success: false }
        );
    }
+
+    #[test]
+    fn new_item_created_is_classified_correctly() {
+        let event = WatcherEvent::NewItemCreated {
+            item_id: "1075_refactor_split_stage".to_string(),
+            item_type: "refactor".to_string(),
+            name: "Split Stage enum".to_string(),
+        };
+        assert_eq!(classify(&event), EventAction::NewItemCreated);
+    }
 }
@@ -220,21 +220,48 @@ pub fn format_agent_completed_notification(
    (plain, html)
 }

-/// Extract the first non-empty line from a merge failure reason, truncated to `max_len` chars.
+/// Format a new-work-item creation notification.
 ///
-/// Used to produce a compact snippet for chat notifications.
-pub fn merge_failure_snippet(reason: &str, max_len: usize) -> String {
-    let line = reason
-        .lines()
-        .find(|l| !l.trim().is_empty())
-        .unwrap_or(reason);
-    let mut chars = line.chars();
-    let truncated: String = chars.by_ref().take(max_len).collect();
-    if chars.next().is_some() {
-        format!("{truncated}\u{2026}") // append …
-    } else {
-        truncated
+/// Returns `(plain_text, html)` suitable for `ChatTransport::send_message`.
+pub fn format_new_item_notification(
+    item_id: &str,
+    item_type: &str,
+    name: &str,
+) -> (String, String) {
+    let number = extract_item_number(item_id).unwrap_or(item_id);
+    let emoji = match item_type {
+        "bug" => "\u{1f41b}",      // 🐛
+        "refactor" => "\u{1f4dd}", // 📝
+        "spike" => "\u{1f52c}",    // 🔬
+        _ => "\u{1f4d6}",          // 📖 (story and unknown)
+    };
+    let plain = format!("{emoji} New {item_type} #{number} \u{2014} {name}");
+    let html = format!("{emoji} New {item_type} <strong>#{number}</strong> \u{2014} {name}");
+    (plain, html)
 }
+
+/// Maximum number of trailing gate-output lines included in a merge-failure
+/// chat notification.
+///
+/// Gate output can be hundreds of lines; only the tail (where errors appear)
+/// is useful at a glance.  Full output remains available via `get_merge_status`
+/// or the web UI — this limit is chat-display-only.
+pub const MERGE_FAILURE_TAIL_LINES: usize = 30;
+
+/// Truncate `gate_output` to its last `max_lines` lines for chat notifications.
+///
+/// If the output contains more than `max_lines` non-empty lines, a leading
+/// marker line `[...output truncated, last N lines shown...]` is prepended to
+/// the tail so readers know output was cut.  If the output fits within the
+/// limit it is returned unchanged (no marker added).
+pub fn truncate_gate_output(gate_output: &str, max_lines: usize) -> String {
+    let lines: Vec<&str> = gate_output.lines().collect();
+    if lines.len() <= max_lines {
+        return gate_output.to_string();
+    }
+    let tail = &lines[lines.len() - max_lines..];
+    let marker = format!("[...output truncated, last {max_lines} lines shown...]");
+    format!("{marker}\n{}", tail.join("\n"))
 }

 #[cfg(test)]
@@ -568,6 +595,64 @@ mod tests {
        assert_eq!(plain, "\u{1F916} #42 \u{2014} coder-1 started");
    }

+    // ── truncate_gate_output ──────────────────────────────────────────────────
+
+    #[test]
+    fn truncate_gate_output_short_output_returned_unchanged() {
+        let output = "line1\nline2\nline3";
+        assert_eq!(truncate_gate_output(output, 30), output);
+    }
+
+    #[test]
+    fn truncate_gate_output_exact_limit_returned_unchanged() {
+        let lines: Vec<String> = (1..=30).map(|i| format!("line{i}")).collect();
+        let output = lines.join("\n");
+        assert_eq!(truncate_gate_output(&output, 30), output);
+    }
+
+    #[test]
+    fn truncate_gate_output_over_limit_prepends_marker() {
+        let lines: Vec<String> = (1..=35).map(|i| format!("line{i}")).collect();
+        let output = lines.join("\n");
+        let result = truncate_gate_output(&output, 30);
+        assert!(
+            result.starts_with("[...output truncated, last 30 lines shown...]"),
+            "must start with truncation marker; got: {result}"
+        );
+    }
+
+    #[test]
+    fn truncate_gate_output_over_limit_contains_tail_lines() {
+        let lines: Vec<String> = (1..=35).map(|i| format!("line{i}")).collect();
+        let output = lines.join("\n");
+        let result = truncate_gate_output(&output, 30);
+        // Last 30 lines are line6..line35.
+        assert!(result.contains("line35"), "must contain last line");
+        assert!(result.contains("line6"), "must contain first tail line");
+        assert!(!result.contains("line5"), "must not contain dropped line");
+    }
+
+    #[test]
+    fn truncate_gate_output_empty_input_returned_unchanged() {
+        assert_eq!(truncate_gate_output("", 30), "");
+    }
+
+    #[test]
+    fn truncate_gate_output_single_line_returned_unchanged() {
+        assert_eq!(truncate_gate_output("only one line", 30), "only one line");
+    }
+
+    #[test]
+    fn truncate_gate_output_marker_contains_configured_limit() {
+        let lines: Vec<String> = (1..=10).map(|i| format!("x{i}")).collect();
+        let output = lines.join("\n");
+        let result = truncate_gate_output(&output, 5);
+        assert!(
+            result.contains("last 5 lines shown"),
+            "marker must state configured limit; got: {result}"
+        );
+    }
+
    // ── format_agent_completed_notification ───────────────────────────────────

    #[test]
@@ -599,6 +684,67 @@ mod tests {
        );
    }

+    // ── format_new_item_notification ──────────────────────────────────────────
+
+    #[test]
+    fn format_new_item_notification_story() {
+        let (plain, html) =
+            format_new_item_notification("42_story_my_feature", "story", "My Feature");
+        assert_eq!(plain, "\u{1f4d6} New story #42 \u{2014} My Feature");
+        assert_eq!(
+            html,
+            "\u{1f4d6} New story <strong>#42</strong> \u{2014} My Feature"
+        );
+    }
+
+    #[test]
+    fn format_new_item_notification_bug() {
+        let (plain, html) =
+            format_new_item_notification("99_bug_login_crash", "bug", "Login Crash");
+        assert_eq!(plain, "\u{1f41b} New bug #99 \u{2014} Login Crash");
+        assert_eq!(
+            html,
+            "\u{1f41b} New bug <strong>#99</strong> \u{2014} Login Crash"
+        );
+    }
+
+    #[test]
+    fn format_new_item_notification_refactor() {
+        let (plain, html) = format_new_item_notification(
+            "1075_refactor_split_stage",
+            "refactor",
+            "Split Stage enum into Pipeline + Status",
+        );
+        assert_eq!(
+            plain,
+            "\u{1f4dd} New refactor #1075 \u{2014} Split Stage enum into Pipeline + Status"
+        );
+        assert_eq!(
+            html,
+            "\u{1f4dd} New refactor <strong>#1075</strong> \u{2014} Split Stage enum into Pipeline + Status"
+        );
+    }
+
+    #[test]
+    fn format_new_item_notification_spike() {
+        let (plain, html) =
+            format_new_item_notification("7_spike_encoder_comparison", "spike", "Compare Encoders");
+        assert_eq!(plain, "\u{1f52c} New spike #7 \u{2014} Compare Encoders");
+        assert_eq!(
+            html,
+            "\u{1f52c} New spike <strong>#7</strong> \u{2014} Compare Encoders"
+        );
+    }
+
+    #[test]
+    fn format_new_item_notification_non_numeric_id_uses_full_id() {
+        let (plain, _html) = format_new_item_notification("abc_story_thing", "story", "Some Story");
+        assert_eq!(
+            plain,
+            "\u{1f4d6} New story #abc_story_thing \u{2014} Some Story"
+        );
+    }
+
    #[test]
    fn format_agent_completed_notification_empty_name_falls_back_to_number() {
        let (plain, _html) =
@@ -14,9 +14,10 @@ use tokio::sync::broadcast;
 use super::super::events::classify;
 use super::super::filter::{AGENT_EVENT_DEBOUNCE, should_send_rate_limit};
 use super::super::format::{
-    format_agent_completed_notification, format_agent_started_notification,
-    format_blocked_notification, format_error_notification, format_oauth_account_swapped,
-    format_oauth_accounts_exhausted, format_rate_limit_notification, merge_failure_snippet,
+    MERGE_FAILURE_TAIL_LINES, format_agent_completed_notification,
+    format_agent_started_notification, format_blocked_notification, format_error_notification,
+    format_new_item_notification, format_oauth_account_swapped, format_oauth_accounts_exhausted,
+    format_rate_limit_notification, truncate_gate_output,
 };
 use super::super::route::rooms_for_notification;
 use super::{find_story_name_any_stage, read_story_name};
@@ -119,9 +120,7 @@ pub fn spawn_notification_listener(
                        continue;
                    };
                    let story_name = read_story_name(&project_root, "4_merge", story_id);
-                    // AC3: include only the first non-empty line of the failure,
-                    // truncated to ~120 chars.
-                    let snippet = merge_failure_snippet(reason, 120);
+                    let snippet = truncate_gate_output(reason, MERGE_FAILURE_TAIL_LINES);
                    let (plain, html) = format_error_notification(story_id, &story_name, &snippet);
                    slog!("[bot] Sending error notification: {plain}");
                    for room_id in &rooms_for_notification(&get_room_ids) {
@@ -276,6 +275,26 @@ pub fn spawn_notification_listener(
                    pending_agent_events.insert(key, (plain, html));
                    agent_flush_deadline = Some(tokio::time::Instant::now() + AGENT_EVENT_DEBOUNCE);
                }
+                EventAction::NewItemCreated => {
+                    if !config.status_push_enabled {
+                        continue;
+                    }
+                    let WatcherEvent::NewItemCreated {
+                        ref item_id,
+                        ref item_type,
+                        ref name,
+                    } = event
+                    else {
+                        continue;
+                    };
+                    let (plain, html) = format_new_item_notification(item_id, item_type, name);
+                    slog!("[bot] Sending new-item notification: {plain}");
+                    for room_id in &rooms_for_notification(&get_room_ids) {
+                        if let Err(e) = transport.send_message(room_id, &plain, &html).await {
+                            slog!("[bot] Failed to send new-item notification to {room_id}: {e}");
+                        }
+                    }
+                }
                EventAction::LogOnly => {
                    // Hard-block: log server-side for debugging; do NOT post to chat.
                    // Hard-block auto-resume is normal operation — the status command
@@ -5,6 +5,89 @@ use super::spawn_notification_listener;
 use crate::io::watcher::WatcherEvent;
 use tokio::sync::broadcast;

+// ── spawn_notification_listener: MergeFailure ────────────────────────────────
+
+/// Long gate output is truncated to the tail and includes the marker line.
+#[tokio::test]
+async fn merge_failure_long_output_is_truncated_to_tail() {
+    let tmp = tempfile::tempdir().unwrap();
+    let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
+    let (transport, calls) = MockTransport::new();
+
+    spawn_notification_listener(
+        transport,
+        || vec!["!room1:example.org".to_string()],
+        watcher_rx,
+        tmp.path().to_path_buf(),
+    );
+
+    // Build a reason with 50 lines (more than MERGE_FAILURE_TAIL_LINES = 30).
+    let long_reason: String = (1..=50).map(|i| format!("gate-line-{i}\n")).collect();
+
+    watcher_tx
+        .send(WatcherEvent::MergeFailure {
+            story_id: "1077_story_trunc".to_string(),
+            reason: long_reason,
+        })
+        .unwrap();
+
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+    let calls = calls.lock().unwrap();
+    assert_eq!(calls.len(), 1, "Expected exactly one notification");
+    let (_, plain, _) = &calls[0];
+    assert!(
+        plain.contains("truncated"),
+        "notification must contain the truncation marker; got: {plain}"
+    );
+    assert!(
+        plain.contains("gate-line-50"),
+        "notification must contain the last line; got: {plain}"
+    );
+    assert!(
+        !plain.contains("gate-line-1\n"),
+        "notification must not contain the first (dropped) line; got: {plain}"
+    );
+}
+
+/// Short gate output (within limit) passes through unchanged, no marker added.
+#[tokio::test]
+async fn merge_failure_short_output_passes_through_unchanged() {
+    let tmp = tempfile::tempdir().unwrap();
+    let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
+    let (transport, calls) = MockTransport::new();
+
+    spawn_notification_listener(
+        transport,
+        || vec!["!room1:example.org".to_string()],
+        watcher_rx,
+        tmp.path().to_path_buf(),
+    );
+
+    let short_reason = "error: type mismatch on line 42\nexpected i32, found &str".to_string();
+
+    watcher_tx
+        .send(WatcherEvent::MergeFailure {
+            story_id: "1077_story_short".to_string(),
+            reason: short_reason.clone(),
+        })
+        .unwrap();
+
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+    let calls = calls.lock().unwrap();
+    assert_eq!(calls.len(), 1, "Expected exactly one notification");
+    let (_, plain, _) = &calls[0];
+    assert!(
+        !plain.contains("truncated"),
+        "short output must not have a truncation marker; got: {plain}"
+    );
+    assert!(
+        plain.contains("type mismatch"),
+        "short output must be included verbatim; got: {plain}"
+    );
+}
+
 // ── spawn_notification_listener: RateLimitWarning ────────────────────────────

 /// AC2 + AC3: when a RateLimitWarning event arrives, send_message is called
@@ -191,6 +191,7 @@ mod tests {
            watcher: crate::config::WatcherConfig {
                sweep_interval_secs: 30,
                done_retention_secs: 7200,
+                reconcile_interval_secs: 30,
            },
            ..Default::default()
        };
@@ -37,6 +37,8 @@ pub fn watcher_event_to_response(e: WatcherEvent) -> Option<WsResponse> {
        // Agent lifecycle events are forwarded to chat transports only; no WebSocket message.
        WatcherEvent::AgentStarted { .. } => None,
        WatcherEvent::AgentCompleted { .. } => None,
+        // Creation notifications are forwarded to chat transports only; no WebSocket message.
+        WatcherEvent::NewItemCreated { .. } => None,
    }
 }

@@ -210,6 +212,8 @@ mod tests {
                error: None,
                merge_failure: None,
                agent: None,
+                pipeline: crate::pipeline_state::Pipeline::Backlog,
+                status: crate::pipeline_state::Status::Active,
                review_hold: None,
                qa: None,
                retry_count: None,
@@ -224,6 +228,8 @@ mod tests {
                error: None,
                merge_failure: None,
                agent: None,
+                pipeline: crate::pipeline_state::Pipeline::Coding,
+                status: crate::pipeline_state::Status::Active,
                review_hold: None,
                qa: None,
                retry_count: None,
@@ -240,6 +246,8 @@ mod tests {
                error: None,
                merge_failure: None,
                agent: None,
+                pipeline: crate::pipeline_state::Pipeline::Done,
+                status: crate::pipeline_state::Status::Done,
                review_hold: None,
                qa: None,
                retry_count: None,
@@ -301,6 +309,8 @@ mod tests {
                    model: Some(crate::agents::AgentModel::Sonnet),
                    status: crate::agents::AgentStatus::Running,
                }),
+                pipeline: crate::pipeline_state::Pipeline::Coding,
+                status: crate::pipeline_state::Status::Active,
                review_hold: None,
                qa: None,
                retry_count: None,
@@ -205,6 +205,8 @@ mod tests {
            error: None,
            merge_failure: None,
            agent: None,
+            pipeline: crate::pipeline_state::Pipeline::Backlog,
+            status: crate::pipeline_state::Status::Active,
            review_hold: None,
            qa: None,
            retry_count: None,
@@ -217,7 +217,13 @@ async fn migrate_json_stores_to_sqlite(huskies_dir: &Path) {
 }

 /// Set up the server log file, node identity keypair, pipeline DB, and CRDT state.
-pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path) {
+///
+/// When `is_agent` is `true` the pipeline database is opened at an isolated
+/// temporary path (or at `HUSKIES_DB_PATH` if that env-var is set) so that the
+/// headless build agent never touches the production `.huskies/pipeline.db`.
+/// This prevents feature-branch migrations from being applied to the shared
+/// database and bricking the next server restart.
+pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path, is_agent: bool) {
    // Enable persistent server log file now that the project root is known.
    if let Some(ref root) = *app_state.project_root.lock().unwrap() {
        let log_dir = root.join(".huskies").join("logs");
@@ -242,20 +248,91 @@ pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path) {
        }
    }

-    // Initialise the SQLite pipeline shadow-write database and CRDT state layer.
-    // Clone the path out before the await so we don't hold the MutexGuard across
-    // an await point.
-    let pipeline_db_path = app_state
+    // Resolve the pipeline DB path.
+    //
+    // Priority order:
+    //   1. HUSKIES_DB_PATH env var (operator override, any mode)
+    //   2. Agent mode: process-local temp file so the production DB is never touched
+    //   3. Default: {project_root}/.huskies/pipeline.db
+    let pipeline_db_path: Option<PathBuf> = if let Ok(env_path) = std::env::var("HUSKIES_DB_PATH") {
+        let p = PathBuf::from(&env_path);
+        crate::slog!("[db] HUSKIES_DB_PATH override: {}", p.display());
+        Some(p)
+    } else if is_agent {
+        // Headless agent: use an isolated temp DB so that any migrations compiled
+        // into this binary (e.g. from a feature branch) are never applied to the
+        // production database.  The temp file is process-unique and harmless to
+        // leave behind after the agent exits.
+        let pid = std::process::id();
+        let temp_path = std::env::temp_dir().join(format!("huskies-agent-{pid}.db"));
+        crate::slog!(
+            "[db] Agent mode: using isolated DB at {} (not touching production pipeline.db)",
+            temp_path.display()
+        );
+        Some(temp_path)
+    } else {
+        // Server mode: use the project-local production database.
+        app_state
            .project_root
            .lock()
            .unwrap()
            .as_ref()
-        .map(|root| root.join(".huskies").join("pipeline.db"));
+            .map(|root| root.join(".huskies").join("pipeline.db"))
+    };

    if let Some(ref db_path) = pipeline_db_path {
        if let Err(e) = db::init(db_path).await {
            crate::slog!("[db] Failed to initialise pipeline.db: {e}");
        } else {
+            // ── Migration drift self-check (server mode only) ─────────────────────
+            //
+            // In server mode, detect whether the live database contains migrations
+            // that were applied by a newer binary (e.g. a feature-branch agent that
+            // ran before the feature was merged).  If so, log each unknown migration
+            // and exit with a clear actionable message.  This is the root cause of
+            // the 2026-05-14 21:07 production outage where the server came up but
+            // the CRDT never initialised.
+            if !is_agent && let Some(pool) = db::get_shared_pool() {
+                let drift = db::check_schema_drift(pool).await;
+                if !drift.is_empty() {
+                    for m in &drift {
+                        crate::slog!(
+                            "[db] UNKNOWN migration {} ('{}') applied at {} \
+                             is not in the compiled-in set",
+                            m.version,
+                            m.description,
+                            m.installed_on,
+                        );
+                    }
+                    eprintln!();
+                    eprintln!(
+                        "error: pipeline.db contains {} migration(s) that are not \
+                         recognised by this binary:",
+                        drift.len()
+                    );
+                    for m in &drift {
+                        eprintln!(
+                            "  \u{2022} migration {} ('{}') applied at {}",
+                            m.version, m.description, m.installed_on
+                        );
+                    }
+                    eprintln!();
+                    eprintln!(
+                        "This means the database was previously opened by a newer \
+                         version of huskies."
+                    );
+                    eprintln!(
+                        "To fix: rebuild huskies from the latest source (the branch \
+                         that added these migrations) and restart."
+                    );
+                    eprintln!(
+                        "Do NOT start the old binary against this database — it will \
+                         behave incorrectly."
+                    );
+                    std::process::exit(1);
+                }
+            }
+
            // One-shot migration: move any existing JSON store files into SQLite.
            let huskies_dir = db_path.parent().unwrap_or(db_path);
            migrate_json_stores_to_sqlite(huskies_dir).await;
@@ -156,6 +156,17 @@ pub(crate) fn spawn_tick_loop(
         {scheduled_count} scheduled timer(s)"
    );

+    let (reconcile_interval, done_retention) = root
+        .as_ref()
+        .and_then(|r| config::ProjectConfig::load(r).ok())
+        .map(|c| {
+            (
+                c.watcher.reconcile_interval_secs,
+                std::time::Duration::from_secs(c.watcher.done_retention_secs),
+            )
+        })
+        .unwrap_or((30, std::time::Duration::from_secs(4 * 3600)));
+
    tokio::spawn(async move {
        let mut interval = tokio::time::interval(std::time::Duration::from_secs(1));
        let mut tick_count: u64 = 0;
@@ -190,6 +201,15 @@ pub(crate) fn spawn_tick_loop(
                }
                agents.reap_stale_merge_jobs();
            }
+
+            // Periodic reconciler: converge subscriber side effects so that
+            // Lagged broadcast events never leave state permanently diverged.
+            if tick_count.is_multiple_of(reconcile_interval)
+                && let Some(ref r) = root
+            {
+                crate::slog!("[reconcile] Running periodic reconcile pass.");
+                run_reconcile_pass(r, &agents, done_retention).await;
+            }
        }
    });
 }
@@ -450,16 +470,50 @@ async fn execute_prompt_action(
    }
 }

-/// Spawn the startup reconstruction task: replay the current pipeline state
-/// through the [`TransitionFired`][crate::pipeline_state::TransitionFired]
-/// broadcast channel so that all existing subscribers (worktree lifecycle,
-/// merge-failure auto-spawn, auto-assign) react identically to a live
-/// transition, then trigger a full auto-assign pass.
+/// Run one full reconcile pass: call each subscriber's idempotent `reconcile()`
+/// entry point so that side effects converge regardless of whether the
+/// broadcast channel lagged during startup or at runtime.
 ///
-/// Replaces the legacy scan-based `reconcile_on_startup` approach.  The CRDT
-/// is the durable source of truth; replaying it as synthetic self-transitions
-/// is cheaper, simpler, and idempotent: a second replay produces another burst
-/// of events that subscribers safely ignore for already-assigned stories.
+/// Safe to call any number of times — every reconcile function is idempotent.
+pub(crate) async fn run_reconcile_pass(
+    root: &std::path::Path,
+    agents: &Arc<AgentPool>,
+    done_retention: std::time::Duration,
+) {
+    // Content-GC: purge content-store entries for terminal/tombstoned stories.
+    crate::db::gc::sweep_zombie_content_on_startup();
+
+    // Worktree create: ensure every Coding story has a worktree.
+    crate::agents::pool::worktree_lifecycle::reconcile_worktree_create(root, agents.port()).await;
+
+    // Worktree cleanup: remove worktrees for terminal stories.
+    crate::agents::pool::worktree_lifecycle::reconcile_worktree_cleanup(root).await;
+
+    // Done-archive: archive Done stories whose retention period has elapsed.
+    crate::io::watcher::sweep_done_to_archived(done_retention);
+
+    // Cost-rollup: re-populate the in-memory register from disk.
+    crate::agents::pool::cost_rollup_subscriber::reconcile_cost_rollup(root);
+
+    // Merge-failure: spawn mergemaster for ConflictDetected stories with no active agent.
+    crate::agents::pool::auto_assign::reconcile_merge_failure(agents, root).await;
+
+    // Merge-block: no-op (in-memory counter cannot be reconstructed from CRDT).
+    crate::agents::pool::auto_assign::reconcile_merge_failure_block();
+
+    // Audit-log: no-op (historical replay would produce misleading entries).
+    crate::pipeline_state::reconcile_audit_log();
+}
+
+/// Spawn the startup reconciliation task: run a full reconcile pass so that all
+/// side-effect subscribers converge on the current CRDT state without flooding
+/// the broadcast channel, then trigger a full auto-assign pass.
+///
+/// Replaces the former `replay_current_pipeline_state()` approach, which
+/// sent one synthetic `TransitionFired` per CRDT item through the broadcast
+/// channel.  With >256 items that caused `Subscriber lagged` warnings and
+/// left subscribers with diverged state.  Direct reconcile calls bypass the
+/// channel entirely and scale to any CRDT size.
 pub(crate) fn spawn_startup_reconciliation(
    startup_root: Option<PathBuf>,
    startup_agents: Arc<AgentPool>,
@@ -467,20 +521,189 @@ pub(crate) fn spawn_startup_reconciliation(
 ) {
    if let Some(root) = startup_root {
        tokio::spawn(async move {
-            // Purge content-store entries for stories that reached terminal
-            // stages in a previous session (before the GC subscriber was active).
-            crate::db::gc::sweep_zombie_content_on_startup();
-            crate::slog!(
-                "[startup] Replaying current pipeline state through TransitionFired channel."
-            );
-            crate::pipeline_state::replay_current_pipeline_state();
+            let done_retention = crate::config::ProjectConfig::load(&root)
+                .map(|c| std::time::Duration::from_secs(c.watcher.done_retention_secs))
+                .unwrap_or_else(|_| std::time::Duration::from_secs(4 * 3600));
+            crate::slog!("[startup] Running per-subscriber reconcile pass.");
+            run_reconcile_pass(&root, &startup_agents, done_retention).await;
            crate::slog!("[auto-assign] Scanning pipeline stages for unassigned work.");
            startup_agents.auto_assign_available_work(&root).await;
            let _ = startup_reconciliation_tx.send(ReconciliationEvent {
                story_id: String::new(),
                status: "done".to_string(),
-                message: "Startup event replay complete.".to_string(),
+                message: "Startup reconcile pass complete.".to_string(),
            });
        });
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::db::{
+        ContentKey, ItemMeta, ensure_content_store, write_content, write_item_with_content,
+    };
+    use crate::io::watcher::WatcherEvent;
+    use tokio::sync::broadcast;
+
+    fn make_pool() -> Arc<AgentPool> {
+        let (tx, _) = broadcast::channel::<WatcherEvent>(16);
+        Arc::new(AgentPool::new(3099, tx))
+    }
+
+    fn setup_huskies_dir(tmp: &tempfile::TempDir) -> std::path::PathBuf {
+        let root = tmp.path().to_path_buf();
+        std::fs::create_dir_all(root.join(".huskies")).unwrap();
+        std::fs::write(root.join(".huskies/project.toml"), "").unwrap();
+        root
+    }
+
+    /// AC4 + AC6: seeding >256 CRDT items and running the reconcile pass must not
+    /// produce any "Subscriber lagged" warnings (structural guarantee — the new
+    /// path never broadcasts through the channel) and must purge zombie content
+    /// for all terminal stories after one reconcile tick.
+    ///
+    /// Distribution: 300 Backlog + 200 Coding + 200 Abandoned (terminal) + 300 QA
+    /// = 1000 items.  Each of the 200 Abandoned stories gets a content-store entry
+    /// seeded before the reconcile so we can assert it is cleaned up.
+    #[tokio::test]
+    async fn reconcile_pass_scales_to_1000_items_without_lagged_divergence() {
+        crate::crdt_state::init_for_test();
+        ensure_content_store();
+
+        let tmp = tempfile::tempdir().unwrap();
+        let root = setup_huskies_dir(&tmp);
+        let pool = make_pool();
+
+        // ── Seed 1000 items across several stages ──────────────────────────
+        for i in 0..300u32 {
+            let id = format!("1066_backlog_{i:04}");
+            write_item_with_content(
+                &id,
+                "1_backlog",
+                "---\nname: Backlog\n---\n",
+                ItemMeta::named("Backlog"),
+            );
+        }
+        for i in 0..200u32 {
+            let id = format!("1066_coding_{i:04}");
+            write_item_with_content(
+                &id,
+                "2_current",
+                "---\nname: Coding\n---\n",
+                ItemMeta::named("Coding"),
+            );
+        }
+        for i in 0..200u32 {
+            let id = format!("1066_abandoned_{i:04}");
+            write_item_with_content(
+                &id,
+                "2_current",
+                "---\nname: Abandoned\n---\n",
+                ItemMeta::named("Abandoned"),
+            );
+            // Move to terminal stage (Abandoned).
+            crate::agents::lifecycle::abandon_story(&id).expect("abandon must succeed");
+            // Seed a content-store entry to verify GC cleans it up.
+            write_content(ContentKey::Story(&id), "zombie content");
+        }
+        for i in 0..300u32 {
+            let id = format!("1066_qa_{i:04}");
+            write_item_with_content(&id, "3_qa", "---\nname: QA\n---\n", ItemMeta::named("QA"));
+        }
+
+        // ── Subscribe BEFORE the reconcile to catch any Lagged events ──────
+        let mut transition_rx = crate::pipeline_state::subscribe_transitions();
+
+        // ── Run one reconcile pass ─────────────────────────────────────────
+        // Use zero retention so any Done items (none here, but defensive) archive immediately.
+        run_reconcile_pass(&root, &pool, std::time::Duration::ZERO).await;
+
+        // ── Drain the transition channel; must contain zero Lagged events ──
+        // The reconcile path never broadcasts through TRANSITION_TX, so any
+        // events here are from the abandon_story calls above (all pre-reconcile).
+        let mut lagged_count = 0u64;
+        loop {
+            match transition_rx.try_recv() {
+                Ok(_) => {}
+                Err(tokio::sync::broadcast::error::TryRecvError::Lagged(n)) => {
+                    lagged_count += n;
+                }
+                Err(tokio::sync::broadcast::error::TryRecvError::Empty)
+                | Err(tokio::sync::broadcast::error::TryRecvError::Closed) => break,
+            }
+        }
+
+        // The reconcile pass itself must not have sent anything through the channel.
+        // (abandon_story above may have sent some events, but those are pre-reconcile
+        // lifecycle transitions, not the reconcile itself.)
+        assert_eq!(
+            lagged_count, 0,
+            "run_reconcile_pass must not broadcast through the transition channel (no Lagged)"
+        );
+
+        // ── Assert: zombie content purged for all 200 Abandoned stories ────
+        for i in 0..200u32 {
+            let id = format!("1066_abandoned_{i:04}");
+            assert!(
+                crate::db::read_content(ContentKey::Story(&id)).is_none(),
+                "zombie content must be purged for abandoned story {id}"
+            );
+        }
+    }
+
+    /// AC4 regression: the subscriber channel (capacity 256) must not lag when
+    /// 1000 items are seeded — the reconcile path bypasses the channel entirely.
+    #[tokio::test]
+    async fn reconcile_never_floods_broadcast_channel() {
+        crate::crdt_state::init_for_test();
+        ensure_content_store();
+
+        let tmp = tempfile::tempdir().unwrap();
+        let root = setup_huskies_dir(&tmp);
+        let pool = make_pool();
+
+        // Seed 1000 Backlog items (no lifecycle transitions — clean slate).
+        for i in 0..1000u32 {
+            let id = format!("1066_flood_{i:04}");
+            write_item_with_content(
+                &id,
+                "1_backlog",
+                "---\nname: Flood\n---\n",
+                ItemMeta::named("Flood"),
+            );
+        }
+
+        // Subscribe and drain pre-existing channel noise.  Note: `TRANSITION_TX`
+        // is a single process-global broadcast channel shared by every test in
+        // this binary, so other tests running on parallel threads may write to
+        // it during our window.  We can't assert `msg_count == 0` — that's
+        // racy by construction.  The real "never floods" invariant is captured
+        // by the Lagged check: 1000 seeded items must not overflow the
+        // 256-slot channel, which is only possible if the reconcile path
+        // bypasses the broadcast (which is what AC4 requires).
+        let mut rx = crate::pipeline_state::subscribe_transitions();
+        while let Ok(_) | Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) =
+            rx.try_recv()
+        {}
+
+        run_reconcile_pass(&root, &pool, std::time::Duration::ZERO).await;
+
+        let mut lagged = false;
+        loop {
+            match rx.try_recv() {
+                Ok(_) => {}
+                Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => {
+                    lagged = true;
+                    break;
+                }
+                Err(_) => break,
+            }
+        }
+
+        assert!(
+            !lagged,
+            "run_reconcile_pass must never cause Lagged on the broadcast channel"
+        );
+    }
+}
@@ -14,8 +14,26 @@ use super::{WorktreeInfo, worktree_path, write_mcp_json};
 /// - Creates the worktree at `{project_root}/.huskies/worktrees/{story_id}`
 ///   on branch `feature/story-{story_id}`.
 /// - Writes `.mcp.json` in the worktree pointing to the MCP server at `port`.
-/// - Runs setup commands from the config for each component.
+/// - Runs setup commands from the config for each component **only on fresh
+///   creation** — see below.
 /// - If the worktree/branch already exists, reuses rather than errors.
+///
+/// **Idempotency on reuse:** when `wt_path` already exists, this function does
+/// **not** re-run [`run_setup_commands`].  Setup commands typically include
+/// destructive operations like `npm ci` (`rm -rf node_modules` then reinstall)
+/// that, if run concurrently with another reuse from a different caller, leave
+/// `node_modules` in a half-populated state (broken `.bin/*` symlinks pointing
+/// at empty package directories).  This used to be rare and tolerable, but
+/// after story 1066 added a 30-second periodic reconciler that calls
+/// `reconcile_worktree_create` → `create_worktree`, every Coding story got a
+/// destructive `npm ci` every 30s — racing the merge-gate's own frontend
+/// build and producing the `sh: 1: tsc: not found` failure that bricked
+/// story 1086 retries on 2026-05-15.
+///
+/// The reuse path now matches the documented contract of
+/// `reconcile_worktree_create`: "no-op for stories whose worktree already
+/// exists."  If a worktree is in a bad state and needs re-setup, the caller
+/// must explicitly delete it and call `create_worktree` again.
 pub async fn create_worktree(
    project_root: &Path,
    story_id: &str,
@@ -30,14 +48,15 @@ pub async fn create_worktree(
        .unwrap_or_else(|| detect_base_branch(project_root));
    let root = project_root.to_path_buf();

-    // Already exists — reuse (ensure sparse checkout is configured)
+    // Already exists — reuse without re-running destructive setup commands.
+    // Sparse checkout is reconfigured (cheap, idempotent) and `.mcp.json` is
+    // rewritten in case the server port changed across restarts.
    if wt_path.exists() {
        let wt_clone = wt_path.clone();
        tokio::task::spawn_blocking(move || configure_sparse_checkout(&wt_clone))
            .await
            .map_err(|e| format!("spawn_blocking: {e}"))??;
        write_mcp_json(&wt_path, port)?;
-        run_setup_commands(&wt_path, config).await;
        return Ok(WorktreeInfo {
            path: wt_path,
            branch,
@@ -374,32 +393,80 @@ mod tests {
    }

    #[tokio::test]
-    async fn create_worktree_reuse_succeeds_despite_setup_failure() {
+    async fn create_worktree_reuse_does_not_rerun_setup_commands() {
+        // Regression for the 2026-05-15 1086 outage: the reuse path used to
+        // re-run setup commands (including destructive `npm ci`).  Combined
+        // with story 1066's 30-second periodic reconciler, this fired
+        // `npm ci` against every Coding story every 30s and caused
+        // `tsc: not found` gate failures.  The reuse path must now be a
+        // no-op for setup commands.
        let tmp = TempDir::new().unwrap();
        let project_root = tmp.path().join("my-project");
        fs::create_dir_all(&project_root).unwrap();
        init_git_repo(&project_root);

        // First creation — no setup commands, should succeed
-        create_worktree(&project_root, "173_reuse_fail", &empty_config(), 3001)
+        create_worktree(&project_root, "173_reuse_no_setup", &empty_config(), 3001)
            .await
            .unwrap();

-        // Second call — worktree exists, setup commands fail, must still succeed
+        // Second call — worktree exists.  Setup commands are configured to
+        // FAIL (`exit 1`); if the reuse path were still running them, the
+        // failure log would surface — but more importantly, this test
+        // documents that the reuse path is expected to NEVER reach
+        // `run_setup_commands` and therefore can never produce a setup
+        // failure regardless of how broken the setup config is.
        let result = create_worktree(
            &project_root,
-            "173_reuse_fail",
+            "173_reuse_no_setup",
            &failing_setup_config(),
            3002,
        )
        .await;
        assert!(
            result.is_ok(),
-            "create_worktree reuse must succeed even if setup commands fail: {:?}",
+            "reuse must succeed and must not run setup commands: {:?}",
            result.err()
        );
    }

+    #[tokio::test]
+    async fn create_worktree_reuse_does_not_create_setup_marker_file() {
+        // Stronger version of the above: assert that on reuse, a setup
+        // command that would have created a marker file does NOT run.
+        let tmp = TempDir::new().unwrap();
+        let project_root = tmp.path().join("my-project");
+        fs::create_dir_all(&project_root).unwrap();
+        init_git_repo(&project_root);
+
+        // First creation — no setup, so no marker yet.
+        let info = create_worktree(&project_root, "174_reuse_marker", &empty_config(), 3001)
+            .await
+            .unwrap();
+        let marker = info.path.join("__setup_ran__");
+        assert!(!marker.exists(), "no marker after empty-setup creation");
+
+        // Second call with a setup command that WOULD create the marker if
+        // run.  The reuse path must not run it.
+        let cfg = ProjectConfig {
+            component: vec![ComponentConfig {
+                name: "marker".to_string(),
+                path: ".".to_string(),
+                setup: vec!["touch __setup_ran__".to_string()],
+                teardown: vec![],
+            }],
+            ..empty_config()
+        };
+        create_worktree(&project_root, "174_reuse_marker", &cfg, 3002)
+            .await
+            .unwrap();
+
+        assert!(
+            !marker.exists(),
+            "reuse path must not run setup commands; marker file was created"
+        );
+    }
+
    #[test]
    fn install_pre_commit_hook_creates_executable_hook_and_sets_hookspath() {
        let tmp = TempDir::new().unwrap();
Author	SHA1	Message	Date
Timmy	fb82bd7bca	test(tick_loop): de-flake reconcile_never_floods_broadcast_channel The test asserted msg_count == 0 on a process-global broadcast channel (TRANSITION_TX is a single OnceLock<Sender> shared across the test binary), so any concurrent test calling apply_transition could land events in our receiver between the drain and the post-reconcile check. Observed failure: 3 stray transitions from parallel tests. Drop the strict count check. The real "never floods" invariant is captured by the Lagged check alone: 1000 seeded items must not overflow the 256-slot channel, which can only hold if the reconcile path bypasses the broadcast (AC4). The sibling test `reconcile_pass_scales_to_1000_items_without_lagged_divergence` already uses this Lagged-only pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-15 11:13:31 +01:00
Timmy	b7df5cbe4e	fix(agents): kill-then-status reorder in stop_agent stop_agent had the same order-of-operations bug fixed in the watchdog: status flipped to Failed before the claude process was verified gone, opening the idempotency window that allowed a duplicate spawn to race in alongside the surviving process. Now follows the three-step protocol: 1. Read worktree path under a read-only lock (no mutation). 2. SIGKILL the worktree's process tree via process_kill and block until verified gone — start_agent's Running/Pending whitelist continues to reject duplicate spawns throughout. 3. Only then mutate the agent record, abort the task handle, and drop the child_killers entry. Falls back to the old portable_pty SIGHUP path (with a warning) when no worktree was recorded, matching the watchdog's behaviour. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-15 10:46:02 +01:00
Timmy	fe9804b32c	feat: add process_kill module + use it to fix watchdog double-spawn Adds `crate::process_kill` — reliable SIGKILL-with-verify primitives used across the server in place of the various ad-hoc kill paths that ignored their kill-effective return values. The module exposes three pieces: - `sigkill_pids_and_verify(pids)`: SIGKILL each pid and block (up to 2s) until every pid is verified gone. Returns survivors if not. - `pids_matching(pattern)`: pgrep -f wrapper. - `descendant_pids(root)`: recursive pgrep -P walker for process trees. Wires the watchdog's limit-termination path through it, and reorders the protocol to fix the duplicate-coder bug observed on story 1086 (2026-05-15): Before: check_agent_limits set status=Failed before the kill ran. The kill itself was `portable_pty::ChildKiller::kill()`, which sends SIGHUP on Unix — claude-code ignores SIGHUP, so the process kept running while the agent record was already marked terminated. The idempotency check in `start_agent` whitelists Running/Pending, so the next auto-assign pass spawned a fresh agent alongside the still-alive prior one. Two claude PIDs sharing one session_id, racing on the same worktree. After: status update is moved OUT of check_agent_limits and into the caller AFTER the kill is verified. The kill itself is now SIGKILL-the- process-tree-in-the-worktree, with explicit verification that every pid is gone. The idempotency window is closed. The existing watchdog test suite (14 tests) still passes; 7 new tests cover the process_kill primitives directly. `agents/pool/process.rs`'s `kill_all_children` and `kill_child_for_key` still use the old portable_pty SIGHUP path — they have the same bug but in lower-impact code paths (shutdown, operator stop). They will be migrated under a separate story to keep this commit focused. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-15 10:36:33 +01:00
Timmy	8446ab1c71	chore: gitignore .huskies/double_timmy_log.md Local-only scratchpad for tracking suspected duplicate-Timmy / duplicate-create_story incidents while we hunt the cause. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-15 10:06:37 +01:00
dave	b5054b08d3	huskies: regen source-map.json	2026-05-15 08:47:38 +00:00
dave	df32a1542b	huskies: merge 1087 story Pipeline+Status split — Step D: migrate CRDT storage to (Pipeline, Status) and remove the Stage enum	2026-05-15 08:47:38 +00:00
dave	e82602db77	huskies: merge 1086 story Pipeline+Status split — Step C: migrate auto-assign, subscribers, and lifecycle transitions to read Pipeline + Status	2026-05-15 08:26:39 +00:00
Timmy	2d6105c778	fix: skip setup commands on worktree reuse so reconciler doesn't fire npm ci every 30s Story 1066 (merged 2026-05-14 23:39) introduced a periodic reconciler that calls `reconcile_worktree_create` every 30 seconds (default `reconcile_interval_secs`). The reconciler's docstring promises it is a no-op for stories whose worktree already exists — but the implementation calls `create_worktree`, whose reuse path was running `run_setup_commands` unconditionally. Setup includes destructive `npm ci` (rm -rf node_modules then reinstall), so every Coding story got `npm ci` fired every 30 seconds. When story 1086 hit a gate-failure retry loop on 2026-05-15, the merge gate's own `npm install`/`npm run build` raced one of these reconciler-driven `npm ci` runs that was wiping node_modules — leaving `.bin/tsc` as a broken symlink pointing into a half-populated `typescript/` package and producing `sh: 1: tsc: not found`. 37 npm ci fires for 1086 in 5 hours against only 3 real Coding transitions, a 12x amplification driven entirely by the 30-second reconcile cadence. Fix: align `create_worktree`'s behaviour with the contract `reconcile_worktree_create` already documents — reuse is a no-op for setup commands. Sparse checkout and `.mcp.json` rewrite still run (both cheap and idempotent). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-15 08:57:38 +01:00
Timmy	d89940e85b	fix: drop source-map.json from agent orientation bundle The orientation bundle was 96 KB per coder spawn with 85 KB of that being source-map.json — a static symbol listing that drowned out the workflow rules in AGENT.md and likely explains why PLAN.md ceremony is being skipped (the instruction is ~5% of the bundle, buried under a wall of symbols). Agents are excellent at grep on demand, so the source map adds little value as a preloaded cheat sheet. File stays on disk for the merge-time source-map-check doc-coverage gate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-15 07:48:18 +01:00
dave	60fceee204	huskies: regen source-map.json	2026-05-15 02:03:30 +00:00
dave	13f7dab5f0	huskies: merge 1088	2026-05-15 02:03:30 +00:00
dave	f7413cc711	huskies: regen source-map.json	2026-05-15 01:38:05 +00:00
dave	b053f14d58	huskies: merge 1085	2026-05-15 01:38:05 +00:00
dave	56179d712e	huskies: merge 1078	2026-05-15 01:32:29 +00:00
dave	a06bf6778b	huskies: regen source-map.json	2026-05-15 01:27:25 +00:00
dave	1506141155	huskies: merge 1072	2026-05-15 01:27:25 +00:00
dave	ae69cd50b1	huskies: regen source-map.json	2026-05-15 00:58:57 +00:00
dave	0c23d209a0	huskies: merge 1077	2026-05-15 00:58:57 +00:00
dave	eac5763e03	huskies: merge 1075	2026-05-15 00:48:06 +00:00
dave	6530eeab6d	huskies: merge 811	2026-05-15 00:42:14 +00:00
dave	5eb8f2f8a7	huskies: regen source-map.json	2026-05-15 00:37:01 +00:00
dave	f9b140add9	huskies: merge 1073	2026-05-15 00:37:01 +00:00
dave	d4db96f709	huskies: merge 1070	2026-05-15 00:20:29 +00:00
dave	5f08573db8	huskies: merge 1076	2026-05-15 00:10:15 +00:00
dave	da83fcb78d	huskies: merge 1074	2026-05-15 00:01:58 +00:00
dave	f04bdd1f14	huskies: regen source-map.json	2026-05-14 23:45:53 +00:00
dave	bb6a6063e8	huskies: merge 1066	2026-05-14 23:45:53 +00:00
dave	bf813d910b	huskies: regen source-map.json	2026-05-14 23:29:32 +00:00
dave	374aa77f27	huskies: merge 1069	2026-05-14 23:29:32 +00:00