Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fb82bd7bca | |||
| b7df5cbe4e | |||
| fe9804b32c | |||
| 8446ab1c71 | |||
| b5054b08d3 | |||
| df32a1542b | |||
| e82602db77 | |||
| 2d6105c778 | |||
| d89940e85b | |||
| 60fceee204 | |||
| 13f7dab5f0 | |||
| f7413cc711 | |||
| b053f14d58 | |||
| 56179d712e | |||
| a06bf6778b | |||
| 1506141155 | |||
| ae69cd50b1 | |||
| 0c23d209a0 | |||
| eac5763e03 | |||
| 6530eeab6d | |||
| 5eb8f2f8a7 | |||
| f9b140add9 | |||
| d4db96f709 | |||
| 5f08573db8 | |||
| da83fcb78d | |||
| f04bdd1f14 | |||
| bb6a6063e8 | |||
| bf813d910b | |||
| 374aa77f27 |
Executable
+23
@@ -0,0 +1,23 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Pre-commit hook installed by huskies.
|
||||
# Runs script/check (fmt-check, clippy, cargo check, source-map-check)
|
||||
# before every commit. Aborts if any gate fails.
|
||||
#
|
||||
# Emergency bypass: git commit --no-verify (see AGENT.md — avoid this)
|
||||
|
||||
REPO_ROOT="$(git rev-parse --show-toplevel)"
|
||||
|
||||
printf '[pre-commit] Running script/check ...\n'
|
||||
OUTPUT=$("$REPO_ROOT/script/check" 2>&1)
|
||||
STATUS=$?
|
||||
|
||||
if [ "$STATUS" -ne 0 ]; then
|
||||
printf '\n=== PRE-COMMIT HOOK FAILED ===\n\n'
|
||||
printf '%s\n' "$OUTPUT"
|
||||
printf '\nFix the issues above, then re-validate with:\n'
|
||||
printf ' script/check\n'
|
||||
printf '\nEmergency bypass (see AGENT.md -- avoid this):\n'
|
||||
printf ' git commit --no-verify\n\n'
|
||||
exit 1
|
||||
fi
|
||||
@@ -29,6 +29,7 @@ timers.json
|
||||
|
||||
# Misc
|
||||
wishlist.md
|
||||
double_timmy_log.md
|
||||
|
||||
# Database
|
||||
pipeline.db
|
||||
|
||||
@@ -172,6 +172,8 @@
|
||||
"interface WizardStepInfo",
|
||||
"interface WizardStateData",
|
||||
"interface AgentAssignment",
|
||||
"type Pipeline",
|
||||
"type Status",
|
||||
"interface PipelineStageItem",
|
||||
"interface PipelineState",
|
||||
"type WsResponse",
|
||||
@@ -200,6 +202,8 @@
|
||||
"interface JoinedAgent",
|
||||
"interface GatewayProject",
|
||||
"interface GatewayInfo",
|
||||
"type Pipeline",
|
||||
"type Status",
|
||||
"interface PipelineItem",
|
||||
"interface ProjectPipelineStatus",
|
||||
"interface AllProjectsPipeline",
|
||||
@@ -517,6 +521,7 @@
|
||||
],
|
||||
"server/src/agents/merge/squash/tests_advanced.rs": [],
|
||||
"server/src/agents/merge/squash/tests_basic.rs": [],
|
||||
"server/src/agents/merge/squash/tests_changelog.rs": [],
|
||||
"server/src/agents/mod.rs": [
|
||||
"mod gates",
|
||||
"mod lifecycle",
|
||||
@@ -558,9 +563,11 @@
|
||||
"fn assign_merge_stage"
|
||||
],
|
||||
"server/src/agents/pool/auto_assign/merge_failure_block_subscriber.rs": [
|
||||
"fn reconcile_merge_failure_block",
|
||||
"fn spawn_merge_failure_block_subscriber"
|
||||
],
|
||||
"server/src/agents/pool/auto_assign/merge_failure_subscriber.rs": [
|
||||
"fn reconcile_merge_failure",
|
||||
"fn spawn_merge_failure_subscriber"
|
||||
],
|
||||
"server/src/agents/pool/auto_assign/mod.rs": [
|
||||
@@ -612,6 +619,7 @@
|
||||
],
|
||||
"server/src/agents/pool/auto_assign/watchdog/tests/orphan_tests.rs": [],
|
||||
"server/src/agents/pool/cost_rollup_subscriber.rs": [
|
||||
"fn reconcile_cost_rollup",
|
||||
"fn spawn_cost_rollup_subscriber",
|
||||
"fn on_terminal_transition"
|
||||
],
|
||||
@@ -730,6 +738,8 @@
|
||||
"server/src/agents/pool/worktree_lifecycle.rs": [
|
||||
"fn spawn_worktree_create_subscriber",
|
||||
"fn spawn_worktree_cleanup_subscriber",
|
||||
"fn reconcile_worktree_create",
|
||||
"fn reconcile_worktree_cleanup",
|
||||
"fn on_coding_transition",
|
||||
"fn on_terminal_transition"
|
||||
],
|
||||
@@ -1390,6 +1400,7 @@
|
||||
"fn qa_mode",
|
||||
"fn item_type",
|
||||
"fn epic",
|
||||
"fn origin",
|
||||
"fn for_test",
|
||||
"type PipelineItemView",
|
||||
"struct NodePresenceView",
|
||||
@@ -1416,6 +1427,7 @@
|
||||
"fn set_agent",
|
||||
"fn set_qa_mode",
|
||||
"fn set_plan_state",
|
||||
"fn set_origin",
|
||||
"fn write_item",
|
||||
"fn write_item_str",
|
||||
"fn set_retry_count",
|
||||
@@ -1548,11 +1560,14 @@
|
||||
"fn recover_half_written_items"
|
||||
],
|
||||
"server/src/db/shadow_write.rs": [
|
||||
"struct UnknownMigration",
|
||||
"fn get_shared_pool",
|
||||
"struct PipelineWriteMsg",
|
||||
"struct PipelineDb",
|
||||
"static PIPELINE_DB",
|
||||
"fn init"
|
||||
"fn init",
|
||||
"fn backup_pre_pipeline_status",
|
||||
"fn check_schema_drift"
|
||||
],
|
||||
"server/src/gateway/mod.rs": [
|
||||
"fn build_gateway_route",
|
||||
@@ -1734,7 +1749,9 @@
|
||||
"fn tool_list_epics",
|
||||
"fn tool_show_epic"
|
||||
],
|
||||
"server/src/http/mcp/story_tools/mod.rs": [],
|
||||
"server/src/http/mcp/story_tools/mod.rs": [
|
||||
"fn build_origin"
|
||||
],
|
||||
"server/src/http/mcp/story_tools/refactor.rs": [
|
||||
"fn tool_create_refactor",
|
||||
"fn tool_list_refactors"
|
||||
@@ -2193,7 +2210,6 @@
|
||||
"server/src/pipeline_state/events.rs": [
|
||||
"fn subscribe_transitions",
|
||||
"fn try_broadcast",
|
||||
"fn replay_current_pipeline_state",
|
||||
"struct TransitionFired",
|
||||
"trait TransitionSubscriber",
|
||||
"struct EventBus",
|
||||
@@ -2210,6 +2226,7 @@
|
||||
"server/src/pipeline_state/subscribers.rs": [
|
||||
"fn format_audit_entry",
|
||||
"struct AuditLogSubscriber",
|
||||
"fn reconcile_audit_log",
|
||||
"fn spawn_audit_log_subscriber",
|
||||
"struct MatrixBotSubscriber",
|
||||
"struct FileRendererSubscriber",
|
||||
@@ -2243,6 +2260,12 @@
|
||||
"enum ArchiveReason",
|
||||
"fn dir_name",
|
||||
"fn from_dir",
|
||||
"enum Pipeline",
|
||||
"fn as_str",
|
||||
"enum Status",
|
||||
"fn as_str",
|
||||
"fn pipeline",
|
||||
"fn status",
|
||||
"enum ExecutionState",
|
||||
"struct PipelineItem",
|
||||
"fn retry_count",
|
||||
@@ -2579,7 +2602,9 @@
|
||||
"fn format_oauth_accounts_exhausted",
|
||||
"fn format_agent_started_notification",
|
||||
"fn format_agent_completed_notification",
|
||||
"fn merge_failure_snippet"
|
||||
"fn format_new_item_notification",
|
||||
"const MERGE_FAILURE_TAIL_LINES",
|
||||
"fn truncate_gate_output"
|
||||
],
|
||||
"server/src/service/notifications/io/listener.rs": [
|
||||
"fn spawn_notification_listener"
|
||||
@@ -2965,6 +2990,7 @@
|
||||
"fn spawn_tick_loop",
|
||||
"fn spawn_gateway_relay",
|
||||
"fn spawn_event_trigger_subscriber",
|
||||
"fn run_reconcile_pass",
|
||||
"fn spawn_startup_reconciliation"
|
||||
],
|
||||
"server/src/state.rs": [
|
||||
|
||||
@@ -50,6 +50,29 @@ export interface AgentAssignment {
|
||||
status: string;
|
||||
}
|
||||
|
||||
/** Display column for a work item — derived server-side from `Stage::pipeline()` (story 1085). */
|
||||
export type Pipeline =
|
||||
| "backlog"
|
||||
| "coding"
|
||||
| "qa"
|
||||
| "merge"
|
||||
| "done"
|
||||
| "closed"
|
||||
| "archived";
|
||||
|
||||
/** Badge/indicator for a work item — derived server-side from `Stage::status()` (story 1085). */
|
||||
export type Status =
|
||||
| "active"
|
||||
| "frozen"
|
||||
| "review-hold"
|
||||
| "blocked"
|
||||
| "merge-failure"
|
||||
| "merge-failure-final"
|
||||
| "abandoned"
|
||||
| "superseded"
|
||||
| "rejected"
|
||||
| "done";
|
||||
|
||||
/** A single item in any pipeline stage (backlog, current, QA, merge, or done). */
|
||||
export interface PipelineStageItem {
|
||||
story_id: string;
|
||||
@@ -57,6 +80,10 @@ export interface PipelineStageItem {
|
||||
error: string | null;
|
||||
merge_failure: string | null;
|
||||
agent: AgentAssignment | null;
|
||||
/** Display column (story 1085); falls back to the bucket name on legacy servers. */
|
||||
pipeline?: Pipeline;
|
||||
/** Display badge (story 1085); falls back to derived `blocked`/`frozen` on legacy servers. */
|
||||
status?: Status;
|
||||
review_hold: boolean | null;
|
||||
qa: string | null;
|
||||
depends_on: number[] | null;
|
||||
@@ -214,6 +241,8 @@ export interface WorkItemContent {
|
||||
stage: string;
|
||||
name: string;
|
||||
agent: string | null;
|
||||
/** Origin JSON string (story 1088), or null for pre-origin items. */
|
||||
origin: string | null;
|
||||
}
|
||||
|
||||
/** Result for a single test case from the server's test runner. */
|
||||
|
||||
@@ -24,10 +24,38 @@ export interface GatewayInfo {
|
||||
projects: GatewayProject[];
|
||||
}
|
||||
|
||||
/** Display column for a work item — derived server-side from `Stage::pipeline()` (story 1085). */
|
||||
export type Pipeline =
|
||||
| "backlog"
|
||||
| "coding"
|
||||
| "qa"
|
||||
| "merge"
|
||||
| "done"
|
||||
| "closed"
|
||||
| "archived";
|
||||
|
||||
/** Badge/indicator for a work item — derived server-side from `Stage::status()` (story 1085). */
|
||||
export type Status =
|
||||
| "active"
|
||||
| "frozen"
|
||||
| "review-hold"
|
||||
| "blocked"
|
||||
| "merge-failure"
|
||||
| "merge-failure-final"
|
||||
| "abandoned"
|
||||
| "superseded"
|
||||
| "rejected"
|
||||
| "done";
|
||||
|
||||
export interface PipelineItem {
|
||||
story_id: string;
|
||||
name: string;
|
||||
/** Legacy stage string (kept for back-compat); prefer `pipeline` + `status`. */
|
||||
stage: string;
|
||||
/** Display column (story 1085). Optional until all servers are upgraded. */
|
||||
pipeline?: Pipeline;
|
||||
/** Display badge (story 1085). Optional until all servers are upgraded. */
|
||||
status?: Status;
|
||||
agent?: { agent_name: string; model: string; status: string } | null;
|
||||
blocked?: boolean;
|
||||
retry_count?: number;
|
||||
|
||||
@@ -69,29 +69,34 @@ describe("StoryRow", () => {
|
||||
expect(screen.getByText("awaiting-slot (#2)")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// AC2: failure kind labels derived from merge_failure string
|
||||
it("shows ConflictDetected for merge_failure with conflict text", () => {
|
||||
// Story 1085: failure kind no longer derived from substring. Items in
|
||||
// the merge_failure / merge_failure_final status get a generic FAILED badge;
|
||||
// the kind detail is exposed via the typed `status` field for callers that
|
||||
// need it (instead of being squeezed into the badge text).
|
||||
it("shows ✕ FAILED badge for merge-failure status", () => {
|
||||
const item: PipelineItem = {
|
||||
story_id: "73_story_conflict",
|
||||
name: "Conflict Story",
|
||||
stage: "merge",
|
||||
blocked: true,
|
||||
pipeline: "merge",
|
||||
status: "merge-failure",
|
||||
merge_failure: "Merge conflict: conflicts detected",
|
||||
};
|
||||
render(<StoryRow item={item} />);
|
||||
expect(screen.getByText("ConflictDetected")).toBeInTheDocument();
|
||||
expect(screen.getByText("✕ FAILED")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("shows GatesFailed for merge_failure with quality gates text", () => {
|
||||
it("shows ⛔ FAILED (FINAL) badge for merge-failure-final status", () => {
|
||||
const item: PipelineItem = {
|
||||
story_id: "74_story_gates",
|
||||
name: "Gates Failed Story",
|
||||
stage: "merge",
|
||||
blocked: true,
|
||||
pipeline: "merge",
|
||||
status: "merge-failure-final",
|
||||
merge_failure: "Quality gates failed: cargo test failed",
|
||||
};
|
||||
render(<StoryRow item={item} />);
|
||||
expect(screen.getByText("GatesFailed")).toBeInTheDocument();
|
||||
expect(screen.getByText("⛔ FAILED (FINAL)")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("shows RECOVERING badge for merge_failure item with running mergemaster", () => {
|
||||
@@ -163,4 +168,36 @@ describe("StoryRow", () => {
|
||||
render(<StoryRow item={item} />);
|
||||
expect(screen.getByText("⊘ BLOCKED")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Story 1085 AC 4 — Frozen items remain visible in their underlying column
|
||||
// with a frozen indicator. The server hands us `pipeline: "coding"` for a
|
||||
// frozen-while-coding story and the badge is decorated separately.
|
||||
it("shows ❄ FROZEN badge for a frozen item (column stays as underlying pipeline)", () => {
|
||||
const item: PipelineItem = {
|
||||
story_id: "70_story_frozen_coding",
|
||||
name: "Paused Coding Story",
|
||||
stage: "current",
|
||||
pipeline: "coding",
|
||||
status: "frozen",
|
||||
};
|
||||
render(<StoryRow item={item} />);
|
||||
expect(screen.getByText("❄ FROZEN")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Story 1085 AC 4 (subsumes 1052) — Done items must never get a
|
||||
// MergeFailure indicator, even if a stale `merge_failure` string is present.
|
||||
it("done items render Done badge, never MergeFailure", () => {
|
||||
const item: PipelineItem = {
|
||||
story_id: "71_story_done",
|
||||
name: "Completed Story",
|
||||
stage: "done",
|
||||
pipeline: "done",
|
||||
status: "done",
|
||||
merge_failure: "ignored stale string",
|
||||
};
|
||||
render(<StoryRow item={item} />);
|
||||
expect(screen.getByText("Done")).toBeInTheDocument();
|
||||
expect(screen.queryByText("✕ FAILED")).not.toBeInTheDocument();
|
||||
expect(screen.queryByText(/FAILED/)).not.toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -14,9 +14,42 @@ import {
|
||||
type JoinedAgent,
|
||||
type GatewayProject,
|
||||
type AllProjectsPipeline,
|
||||
type Pipeline,
|
||||
type PipelineItem,
|
||||
type Status,
|
||||
} from "../api/gateway";
|
||||
|
||||
/// Resolve an item's pipeline column. Servers running the new (story 1085)
|
||||
/// backend send `pipeline`; older servers only send `stage` so we fall back to
|
||||
/// mapping the bucket name onto the new column vocabulary.
|
||||
function itemPipeline(item: PipelineItem): Pipeline {
|
||||
if (item.pipeline) return item.pipeline;
|
||||
switch (item.stage) {
|
||||
case "current":
|
||||
return "coding";
|
||||
case "qa":
|
||||
return "qa";
|
||||
case "merge":
|
||||
return "merge";
|
||||
case "done":
|
||||
return "done";
|
||||
case "archived":
|
||||
return "archived";
|
||||
default:
|
||||
return "backlog";
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve an item's badge. Falls back to `merge_failure`/`blocked` on
|
||||
/// legacy servers that don't yet emit `status`.
|
||||
function itemStatus(item: PipelineItem): Status {
|
||||
if (item.status) return item.status;
|
||||
if (item.merge_failure) return "merge-failure";
|
||||
if (item.blocked) return "blocked";
|
||||
if (item.stage === "done") return "done";
|
||||
return "active";
|
||||
}
|
||||
|
||||
const { useCallback, useEffect, useRef, useState } = React;
|
||||
|
||||
/// Seconds of silence before an agent is considered disconnected.
|
||||
@@ -48,72 +81,86 @@ const STATUS_LABELS: Record<AgentStatus, string> = {
|
||||
disconnected: "Disconnected",
|
||||
};
|
||||
|
||||
const STAGE_COLORS: Record<string, string> = {
|
||||
const PIPELINE_COLORS: Record<Pipeline, string> = {
|
||||
backlog: "#8b949e",
|
||||
current: "#3fb950",
|
||||
coding: "#3fb950",
|
||||
qa: "#d2a679",
|
||||
merge: "#79c0ff",
|
||||
done: "#6e7681",
|
||||
closed: "#6e7681",
|
||||
archived: "#6e7681",
|
||||
};
|
||||
|
||||
const STAGE_LABELS: Record<string, string> = {
|
||||
const PIPELINE_LABELS: Record<Pipeline, string> = {
|
||||
backlog: "Backlog",
|
||||
current: "In Progress",
|
||||
coding: "In Progress",
|
||||
qa: "QA",
|
||||
merge: "Merging",
|
||||
done: "Done",
|
||||
closed: "Closed",
|
||||
archived: "Archived",
|
||||
};
|
||||
|
||||
/// Derive a short label from a merge failure string based on the failure kind.
|
||||
function mergeFailureKindLabel(failure: string): string {
|
||||
if (failure.includes("Merge conflict") || failure.includes("CONFLICT")) {
|
||||
return "ConflictDetected";
|
||||
}
|
||||
if (failure.includes("Quality gates failed") || failure.includes("gates failed")) {
|
||||
return "GatesFailed";
|
||||
}
|
||||
if (failure.includes("no code changes") || failure.includes("empty diff")) {
|
||||
return "EmptyDiff";
|
||||
}
|
||||
if (failure.includes("No commits")) {
|
||||
return "NoCommits";
|
||||
}
|
||||
return "✕ FAILED";
|
||||
}
|
||||
|
||||
/// A single story row inside a project pipeline card.
|
||||
/** Render one story row in a gateway-aggregate panel: `#<id> <name>` with stage badge. */
|
||||
/** Render one story row in a gateway-aggregate panel: `#<id> <name>` with status badge. */
|
||||
export function StoryRow({ item, mergeQueuePos }: { item: PipelineItem; mergeQueuePos?: number }) {
|
||||
const isStuck = item.merge_failure != null || item.blocked;
|
||||
const isMergeActive = item.stage === "merge" && !isStuck && item.agent?.status === "running";
|
||||
const pipeline = itemPipeline(item);
|
||||
const status = itemStatus(item);
|
||||
const agentStatus = item.agent?.status;
|
||||
|
||||
let color: string;
|
||||
let label: string;
|
||||
let frozenPrefix = "";
|
||||
|
||||
if (isMergeActive) {
|
||||
color = "#58a6ff";
|
||||
label = "▶ MERGING";
|
||||
} else if (isStuck) {
|
||||
const agentStatus = item.agent?.status;
|
||||
// Frozen items keep their underlying pipeline column but get a ❄️ badge.
|
||||
// (AC 4 — story 1085, subsumes the freeze-hides-item bug.)
|
||||
if (status === "frozen") {
|
||||
color = "#79c0ff";
|
||||
label = "❄ FROZEN";
|
||||
frozenPrefix = "❄ ";
|
||||
} else if (status === "merge-failure" || status === "merge-failure-final") {
|
||||
// Done items never reach this branch — `Stage::status()` returns
|
||||
// `Status::Done` for done items (AC 4).
|
||||
if (agentStatus === "running") {
|
||||
color = "#e3b341";
|
||||
label = "⟳ RECOVERING";
|
||||
} else if (agentStatus === "pending") {
|
||||
color = "#e3b341";
|
||||
label = "⏳ QUEUED";
|
||||
} else if (item.merge_failure != null) {
|
||||
} else {
|
||||
color = "#f85149";
|
||||
label = mergeFailureKindLabel(item.merge_failure);
|
||||
label = status === "merge-failure-final" ? "⛔ FAILED (FINAL)" : "✕ FAILED";
|
||||
}
|
||||
} else if (status === "blocked") {
|
||||
if (agentStatus === "running") {
|
||||
color = "#e3b341";
|
||||
label = "⟳ RECOVERING";
|
||||
} else if (agentStatus === "pending") {
|
||||
color = "#e3b341";
|
||||
label = "⏳ QUEUED";
|
||||
} else {
|
||||
color = "#f85149";
|
||||
label = "⊘ BLOCKED";
|
||||
}
|
||||
} else if (item.stage === "merge" && item.agent?.status === "pending") {
|
||||
} else if (status === "review-hold") {
|
||||
color = "#d2a679";
|
||||
label = "REVIEW HOLD";
|
||||
} else if (status === "abandoned") {
|
||||
color = "#6e7681";
|
||||
label = "ABANDONED";
|
||||
} else if (status === "superseded") {
|
||||
color = "#6e7681";
|
||||
label = "SUPERSEDED";
|
||||
} else if (status === "rejected") {
|
||||
color = "#f85149";
|
||||
label = "REJECTED";
|
||||
} else if (pipeline === "merge" && agentStatus === "running") {
|
||||
color = "#58a6ff";
|
||||
label = "▶ MERGING";
|
||||
} else if (pipeline === "merge" && agentStatus === "pending") {
|
||||
color = "#e3b341";
|
||||
label = "⏳ QUEUED";
|
||||
} else if (item.stage === "merge") {
|
||||
} else if (pipeline === "merge") {
|
||||
color = "#6e7681";
|
||||
if (mergeQueuePos === 1) {
|
||||
label = "NEXT IN QUEUE";
|
||||
@@ -123,10 +170,11 @@ export function StoryRow({ item, mergeQueuePos }: { item: PipelineItem; mergeQue
|
||||
label = "awaiting-slot";
|
||||
}
|
||||
} else {
|
||||
color = STAGE_COLORS[item.stage] ?? "#8b949e";
|
||||
label = STAGE_LABELS[item.stage] ?? item.stage;
|
||||
color = PIPELINE_COLORS[pipeline] ?? "#8b949e";
|
||||
label = PIPELINE_LABELS[pipeline] ?? pipeline;
|
||||
}
|
||||
|
||||
const isMergeActive = pipeline === "merge" && status === "active" && agentStatus === "running";
|
||||
const idNum = item.story_id.match(/^(\d+)/)?.[1];
|
||||
|
||||
return (
|
||||
@@ -158,7 +206,7 @@ export function StoryRow({ item, mergeQueuePos }: { item: PipelineItem; mergeQue
|
||||
</span>
|
||||
<span style={{ color: "#e6edf3", overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap" }}>
|
||||
{idNum && <span style={{ color: "#8b949e", fontFamily: "monospace" }}>#{idNum}{" "}</span>}
|
||||
{item.name}
|
||||
{frozenPrefix}{item.name}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
@@ -388,6 +436,8 @@ function aggregateItems(
|
||||
story_id: b.story_id,
|
||||
name: b.name,
|
||||
stage: "backlog",
|
||||
pipeline: "backlog" as Pipeline,
|
||||
status: "active" as Status,
|
||||
})),
|
||||
};
|
||||
}
|
||||
@@ -395,14 +445,14 @@ function aggregateItems(
|
||||
return {
|
||||
project,
|
||||
items: (status.active ?? []).filter(
|
||||
(i) => i.stage !== "done",
|
||||
(i) => itemPipeline(i) !== "done",
|
||||
),
|
||||
};
|
||||
}
|
||||
if (tab === "done") {
|
||||
return {
|
||||
project,
|
||||
items: (status.active ?? []).filter((i) => i.stage === "done"),
|
||||
items: (status.active ?? []).filter((i) => itemPipeline(i) === "done"),
|
||||
};
|
||||
}
|
||||
// archived
|
||||
@@ -419,12 +469,12 @@ function tabCount(pipeline: AllProjectsPipeline, tab: TabKey): number {
|
||||
if (tab === "in-progress") {
|
||||
return (
|
||||
sum +
|
||||
(status.active ?? []).filter((i) => i.stage !== "done").length
|
||||
(status.active ?? []).filter((i) => itemPipeline(i) !== "done").length
|
||||
);
|
||||
}
|
||||
if (tab === "done") {
|
||||
return (
|
||||
sum + (status.active ?? []).filter((i) => i.stage === "done").length
|
||||
sum + (status.active ?? []).filter((i) => itemPipeline(i) === "done").length
|
||||
);
|
||||
}
|
||||
return sum + (status.archived ?? []).length;
|
||||
@@ -518,13 +568,16 @@ function ProjectStoryRow({
|
||||
);
|
||||
}
|
||||
|
||||
const IN_PROGRESS_STAGE_LABELS: Record<string, string> = {
|
||||
current: "Coding",
|
||||
const IN_PROGRESS_PIPELINE_LABELS: Record<"coding" | "qa" | "merge", string> = {
|
||||
coding: "Coding",
|
||||
qa: "QA",
|
||||
merge: "Merging",
|
||||
};
|
||||
|
||||
/// In Progress tab content — items grouped by stage (coding / qa / merging).
|
||||
/// In Progress tab content — items grouped by their `pipeline` column.
|
||||
///
|
||||
/// Frozen items appear in the column corresponding to their underlying
|
||||
/// `Stage::resume_to` (server-side), so they always show up in-place.
|
||||
function InProgressTabContent({
|
||||
groups,
|
||||
}: {
|
||||
@@ -535,25 +588,22 @@ function InProgressTabContent({
|
||||
);
|
||||
const multiProject = new Set(allItems.map((x) => x.project)).size > 1;
|
||||
|
||||
const byStage = {
|
||||
current: allItems.filter((x) => x.item.stage === "current"),
|
||||
qa: allItems.filter((x) => x.item.stage === "qa"),
|
||||
merge: allItems.filter((x) => x.item.stage === "merge"),
|
||||
const byPipeline = {
|
||||
coding: allItems.filter((x) => itemPipeline(x.item) === "coding"),
|
||||
qa: allItems.filter((x) => itemPipeline(x.item) === "qa"),
|
||||
merge: allItems.filter((x) => itemPipeline(x.item) === "merge"),
|
||||
};
|
||||
|
||||
const stages = (["current", "qa", "merge"] as const).filter(
|
||||
(s) => byStage[s].length > 0,
|
||||
const pipelines = (["coding", "qa", "merge"] as const).filter(
|
||||
(p) => byPipeline[p].length > 0,
|
||||
);
|
||||
|
||||
// Compute queue position among clean awaiting merge items (Stage::Merge, no failure, no running agent).
|
||||
// Compute queue position among "clean" awaiting-merge items: pipeline=merge,
|
||||
// status=active, and no agent currently running.
|
||||
const mergeQueuePosMap = new Map<string, number>();
|
||||
let queuePos = 0;
|
||||
for (const { project, item } of byStage.merge) {
|
||||
if (
|
||||
!item.blocked &&
|
||||
!item.merge_failure &&
|
||||
item.agent?.status !== "running"
|
||||
) {
|
||||
for (const { project, item } of byPipeline.merge) {
|
||||
if (itemStatus(item) === "active" && item.agent?.status !== "running") {
|
||||
queuePos += 1;
|
||||
mergeQueuePosMap.set(`${project}:${item.story_id}`, queuePos);
|
||||
}
|
||||
@@ -569,33 +619,33 @@ function InProgressTabContent({
|
||||
|
||||
return (
|
||||
<div>
|
||||
{stages.map((stage) => (
|
||||
<div key={stage} style={{ marginBottom: "20px" }}>
|
||||
{pipelines.map((p) => (
|
||||
<div key={p} style={{ marginBottom: "20px" }}>
|
||||
<div
|
||||
style={{
|
||||
fontSize: "0.8em",
|
||||
fontWeight: 600,
|
||||
color: STAGE_COLORS[stage] ?? "#8b949e",
|
||||
color: PIPELINE_COLORS[p] ?? "#8b949e",
|
||||
textTransform: "uppercase",
|
||||
letterSpacing: "0.06em",
|
||||
marginBottom: "8px",
|
||||
paddingBottom: "4px",
|
||||
borderBottom: `1px solid ${STAGE_COLORS[stage] ?? "#8b949e"}33`,
|
||||
borderBottom: `1px solid ${PIPELINE_COLORS[p] ?? "#8b949e"}33`,
|
||||
}}
|
||||
>
|
||||
{IN_PROGRESS_STAGE_LABELS[stage]}{" "}
|
||||
{IN_PROGRESS_PIPELINE_LABELS[p]}{" "}
|
||||
<span style={{ color: "#6e7681" }}>
|
||||
({byStage[stage].length})
|
||||
({byPipeline[p].length})
|
||||
</span>
|
||||
</div>
|
||||
{byStage[stage].map(({ project, item }) => (
|
||||
{byPipeline[p].map(({ project, item }) => (
|
||||
<ProjectStoryRow
|
||||
key={`${project}:${item.story_id}`}
|
||||
project={project}
|
||||
item={item}
|
||||
showProject={multiProject}
|
||||
mergeQueuePos={
|
||||
stage === "merge"
|
||||
p === "merge"
|
||||
? mergeQueuePosMap.get(`${project}:${item.story_id}`)
|
||||
: undefined
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ const DEFAULT_CONTENT = {
|
||||
stage: "current",
|
||||
name: "Big Title Story",
|
||||
agent: null,
|
||||
origin: null,
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
|
||||
@@ -43,6 +43,7 @@ const DEFAULT_CONTENT = {
|
||||
stage: "current",
|
||||
name: "Big Title Story",
|
||||
agent: null,
|
||||
origin: null,
|
||||
};
|
||||
|
||||
const sampleTestResults: TestResultsResponse = {
|
||||
|
||||
@@ -42,6 +42,7 @@ const DEFAULT_CONTENT = {
|
||||
stage: "current",
|
||||
name: "Big Title Story",
|
||||
agent: null,
|
||||
origin: null,
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
@@ -127,6 +128,7 @@ describe("WorkItemDetailPanel", () => {
|
||||
stage: "current",
|
||||
name: "My Story Name",
|
||||
agent: null,
|
||||
origin: null,
|
||||
});
|
||||
render(
|
||||
<WorkItemDetailPanel
|
||||
@@ -146,6 +148,7 @@ describe("WorkItemDetailPanel", () => {
|
||||
stage: "current",
|
||||
name: "My Story Name",
|
||||
agent: null,
|
||||
origin: null,
|
||||
});
|
||||
render(
|
||||
<WorkItemDetailPanel
|
||||
@@ -164,6 +167,7 @@ describe("WorkItemDetailPanel", () => {
|
||||
stage: "current",
|
||||
name: "My Story Name",
|
||||
agent: null,
|
||||
origin: null,
|
||||
});
|
||||
render(
|
||||
<WorkItemDetailPanel
|
||||
@@ -186,6 +190,7 @@ describe("WorkItemDetailPanel", () => {
|
||||
stage: "current",
|
||||
name: "My Story Name",
|
||||
agent: null,
|
||||
origin: null,
|
||||
});
|
||||
render(
|
||||
<WorkItemDetailPanel
|
||||
|
||||
@@ -20,6 +20,26 @@ import { stripDisplayContent } from "./workItemDetailPanelUtils";
|
||||
|
||||
const { useCallback, useEffect, useRef, useState } = React;
|
||||
|
||||
/** Parse and format an origin JSON string for display. */
|
||||
function formatOrigin(origin: string | null): string {
|
||||
if (!origin) return "unknown";
|
||||
try {
|
||||
const obj = JSON.parse(origin) as {
|
||||
kind?: string;
|
||||
id?: string;
|
||||
ts?: number;
|
||||
};
|
||||
const kind = obj.kind ?? "unknown";
|
||||
const id = obj.id ? ` (${obj.id})` : "";
|
||||
const ts = obj.ts
|
||||
? ` at ${new Date(obj.ts * 1000).toISOString().replace("T", " ").slice(0, 19)}Z`
|
||||
: "";
|
||||
return `${kind}${id}${ts}`;
|
||||
} catch {
|
||||
return origin;
|
||||
}
|
||||
}
|
||||
|
||||
interface WorkItemDetailPanelProps {
|
||||
storyId: string;
|
||||
pipelineVersion: number;
|
||||
@@ -38,6 +58,7 @@ export function WorkItemDetailPanel({
|
||||
const [stage, setStage] = useState<string>("");
|
||||
const [name, setName] = useState<string | null>(null);
|
||||
const [assignedAgent, setAssignedAgent] = useState<string | null>(null);
|
||||
const [origin, setOrigin] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [agentInfo, setAgentInfo] = useState<AgentInfo | null>(null);
|
||||
@@ -63,6 +84,7 @@ export function WorkItemDetailPanel({
|
||||
setStage(data.stage);
|
||||
setName(data.name);
|
||||
setAssignedAgent(data.agent);
|
||||
setOrigin(data.origin);
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
setError(err instanceof Error ? err.message : "Failed to load content");
|
||||
@@ -289,6 +311,19 @@ export function WorkItemDetailPanel({
|
||||
|
||||
<TestResultsSection testResults={testResults} />
|
||||
|
||||
{!loading && (
|
||||
<div
|
||||
data-testid="detail-panel-origin"
|
||||
style={{
|
||||
fontSize: "0.75em",
|
||||
color: "#555",
|
||||
fontFamily: "monospace",
|
||||
}}
|
||||
>
|
||||
origin: {formatOrigin(origin)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div
|
||||
style={{
|
||||
display: "flex",
|
||||
|
||||
+30
-6
@@ -124,19 +124,43 @@ else
|
||||
fi
|
||||
|
||||
# Categorise merged work items and format names.
|
||||
# Supports two subject formats (after stripping the "huskies: merge " prefix):
|
||||
# New: "1063 story Human Readable Name"
|
||||
# Old: "1063_story_human_readable_name"
|
||||
FEATURES=""
|
||||
FIXES=""
|
||||
REFACTORS=""
|
||||
while IFS= read -r item; do
|
||||
[ -z "$item" ] && continue
|
||||
# Strip the numeric prefix and type to get the human name.
|
||||
name=$(echo "$item" | sed -E 's/^[0-9]+_(story|bug|refactor|spike)_//' | tr '_' ' ')
|
||||
|
||||
# Extract the leading numeric ID (present in both formats).
|
||||
id=$(echo "$item" | grep -oE '^[0-9]+')
|
||||
|
||||
# Detect format and extract human name + type word.
|
||||
if echo "$item" | grep -qE '^[0-9]+ (story|bug|refactor|spike|epic) '; then
|
||||
# New format: "1063 story Human Name Here"
|
||||
type_word=$(echo "$item" | sed -E 's/^[0-9]+ ([a-z]+) .*/\1/')
|
||||
name=$(echo "$item" | sed -E 's/^[0-9]+ [a-z]+ //')
|
||||
else
|
||||
# Legacy slug format: "1063_story_human_name_here"
|
||||
type_word=$(echo "$item" | sed -E 's/^[0-9]+_([a-z]+)_.*/\1/')
|
||||
name=$(echo "$item" | sed -E 's/^[0-9]+_(story|bug|refactor|spike|epic)_//' | tr '_' ' ')
|
||||
fi
|
||||
|
||||
# Capitalise first letter.
|
||||
name="$(echo "${name:0:1}" | tr '[:lower:]' '[:upper:]')${name:1}"
|
||||
case "$item" in
|
||||
*_bug_*) FIXES="${FIXES}- ${name}\n" ;;
|
||||
*_refactor_*) REFACTORS="${REFACTORS}- ${name}\n" ;;
|
||||
*) FEATURES="${FEATURES}- ${name}\n" ;;
|
||||
|
||||
# Format as "Name (ID)" when a numeric ID was found, plain name otherwise.
|
||||
if [ -n "$id" ]; then
|
||||
entry="${name} (${id})"
|
||||
else
|
||||
entry="${name}"
|
||||
fi
|
||||
|
||||
case "$type_word" in
|
||||
bug) FIXES="${FIXES}- ${entry}\n" ;;
|
||||
refactor) REFACTORS="${REFACTORS}- ${entry}\n" ;;
|
||||
*) FEATURES="${FEATURES}- ${entry}\n" ;;
|
||||
esac
|
||||
done <<< "$MERGED_RAW"
|
||||
|
||||
|
||||
+16
-1
@@ -53,7 +53,22 @@ cargo run --manifest-path "$PROJECT_ROOT/Cargo.toml" -p source-map-gen --bin sou
|
||||
echo "=== Building frontend ==="
|
||||
if [ -d "$PROJECT_ROOT/frontend" ]; then
|
||||
cd "$PROJECT_ROOT/frontend"
|
||||
npm install
|
||||
# The merge gate runs in workspaces whose pre-existing `node_modules` was
|
||||
# populated by an earlier `npm install --omit=dev` (or a partial install).
|
||||
# In that state `npm install` reports "up to date, audited N packages"
|
||||
# without actually adding the missing devDependencies, so the subsequent
|
||||
# `tsc && vite build` fails with `sh: 1: tsc: not found`.
|
||||
#
|
||||
# Repair the install when typescript isn't reachable (story 1086 merge gate
|
||||
# regression). We probe the on-disk binary rather than relying on PATH so
|
||||
# this also covers the case where `node_modules/.bin/` is missing.
|
||||
if [ ! -x node_modules/typescript/bin/tsc ]; then
|
||||
echo "[script/test] node_modules missing typescript; performing clean install."
|
||||
rm -rf node_modules
|
||||
npm install --include=dev
|
||||
else
|
||||
npm install --include=dev
|
||||
fi
|
||||
npm run build
|
||||
cd "$PROJECT_ROOT"
|
||||
else
|
||||
|
||||
@@ -17,6 +17,20 @@ fn run(cmd: &str, args: &[&str], dir: &Path) {
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
println!("cargo:rerun-if-env-changed=PROFILE");
|
||||
|
||||
// Embed the current git commit hash at compile time so `get_version` always
|
||||
// reflects the binary that is actually running, not a potentially-stale file.
|
||||
println!("cargo:rerun-if-changed=../.git/HEAD");
|
||||
println!("cargo:rerun-if-changed=../.git/refs/");
|
||||
let git_hash = std::process::Command::new("git")
|
||||
.args(["rev-parse", "--short", "HEAD"])
|
||||
.output()
|
||||
.ok()
|
||||
.filter(|o| o.status.success())
|
||||
.and_then(|o| String::from_utf8(o.stdout).ok())
|
||||
.map(|s| s.trim().to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
println!("cargo:rustc-env=BUILD_GIT_HASH={git_hash}");
|
||||
println!("cargo:rerun-if-changed=../frontend/package.json");
|
||||
println!("cargo:rerun-if-changed=../frontend/package-lock.json");
|
||||
println!("cargo:rerun-if-changed=../frontend/vite.config.ts");
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
-- Story 1087: split the legacy `stage` column on `pipeline_items` into a
|
||||
-- `(pipeline, status)` pair so the read side no longer needs to re-derive the
|
||||
-- display column and badge from the stage string.
|
||||
--
|
||||
-- The migration is additive: `stage` is retained for backwards compatibility
|
||||
-- while remaining Step E callers are migrated. The backup of `pipeline.db`
|
||||
-- written by `shadow_write::init` immediately before this migration runs is
|
||||
-- the recovery path if the backfill produces an unexpected projection.
|
||||
|
||||
ALTER TABLE pipeline_items ADD COLUMN pipeline TEXT NOT NULL DEFAULT '';
|
||||
ALTER TABLE pipeline_items ADD COLUMN status TEXT NOT NULL DEFAULT '';
|
||||
|
||||
-- Backfill `pipeline` from the existing `stage` column. Every wire-form
|
||||
-- stage string emitted by `stage_dir_name` maps to exactly one of the seven
|
||||
-- Pipeline columns defined in `pipeline_state::types::Pipeline::as_str`.
|
||||
-- Legacy directory strings (`1_backlog`, `2_current`, ...) are also handled
|
||||
-- so that databases predating story 934 migrate cleanly.
|
||||
UPDATE pipeline_items SET pipeline = CASE stage
|
||||
WHEN 'upcoming' THEN 'backlog'
|
||||
WHEN 'backlog' THEN 'backlog'
|
||||
WHEN '1_backlog' THEN 'backlog'
|
||||
WHEN 'coding' THEN 'coding'
|
||||
WHEN 'blocked' THEN 'coding'
|
||||
WHEN '2_current' THEN 'coding'
|
||||
WHEN 'qa' THEN 'qa'
|
||||
WHEN 'review_hold' THEN 'qa'
|
||||
WHEN '3_qa' THEN 'qa'
|
||||
WHEN 'merge' THEN 'merge'
|
||||
WHEN 'merge_failure' THEN 'merge'
|
||||
WHEN 'merge_failure_final' THEN 'merge'
|
||||
WHEN '4_merge' THEN 'merge'
|
||||
WHEN 'done' THEN 'done'
|
||||
WHEN '5_done' THEN 'done'
|
||||
WHEN 'abandoned' THEN 'closed'
|
||||
WHEN 'superseded' THEN 'closed'
|
||||
WHEN 'rejected' THEN 'closed'
|
||||
WHEN 'archived' THEN 'archived'
|
||||
WHEN '6_archived' THEN 'archived'
|
||||
WHEN 'frozen' THEN 'coding'
|
||||
ELSE ''
|
||||
END;
|
||||
|
||||
-- Backfill `status` (badge) from the existing `stage` column.
|
||||
UPDATE pipeline_items SET status = CASE stage
|
||||
WHEN 'frozen' THEN 'frozen'
|
||||
WHEN 'review_hold' THEN 'review-hold'
|
||||
WHEN 'blocked' THEN 'blocked'
|
||||
WHEN 'merge_failure' THEN 'merge-failure'
|
||||
WHEN 'merge_failure_final' THEN 'merge-failure-final'
|
||||
WHEN 'abandoned' THEN 'abandoned'
|
||||
WHEN 'superseded' THEN 'superseded'
|
||||
WHEN 'rejected' THEN 'rejected'
|
||||
WHEN 'done' THEN 'done'
|
||||
WHEN '5_done' THEN 'done'
|
||||
ELSE 'active'
|
||||
END;
|
||||
@@ -198,10 +198,13 @@ pub async fn run(
|
||||
)
|
||||
};
|
||||
|
||||
// Replay current pipeline state so subscribers (worktree lifecycle, merge-failure
|
||||
// auto-spawn) react to any stories already in active stages, then auto-assign.
|
||||
slog!("[agent-mode] Replaying current pipeline state.");
|
||||
crate::pipeline_state::replay_current_pipeline_state();
|
||||
// Reconcile subscriber side effects for the current CRDT state without
|
||||
// flooding the broadcast channel (replaces the former replay_current_pipeline_state call).
|
||||
slog!("[agent-mode] Running startup reconcile pass.");
|
||||
let done_retention = crate::config::ProjectConfig::load(&project_root)
|
||||
.map(|c| std::time::Duration::from_secs(c.watcher.done_retention_secs))
|
||||
.unwrap_or_else(|_| std::time::Duration::from_secs(4 * 3600));
|
||||
crate::startup::tick_loop::run_reconcile_pass(&project_root, &agents, done_retention).await;
|
||||
|
||||
// Run initial auto-assign.
|
||||
slog!("[agent-mode] Initial auto-assign scan.");
|
||||
|
||||
@@ -10,10 +10,12 @@
|
||||
//! - `.huskies/README.md`
|
||||
//! - `.huskies/specs/00_CONTEXT.md`
|
||||
//! - `.huskies/AGENT.md`
|
||||
//! - `.huskies/source-map.json` (up to 200 KB; truncated with a log if larger)
|
||||
//!
|
||||
//! `STACK.md` is intentionally excluded — it is large and changes often; agents
|
||||
//! should grep it on demand.
|
||||
//! `STACK.md` and `.huskies/source-map.json` are intentionally excluded — they
|
||||
//! are large and change often; agents should grep on demand instead. Earlier
|
||||
//! versions of this bundle inlined the source map, which ballooned the orientation
|
||||
//! to ~96 KB and drowned out the workflow rules in AGENT.md; the file is still
|
||||
//! kept on disk for the merge-time `source-map-check` doc-coverage gate.
|
||||
//!
|
||||
//! Behaviour contract:
|
||||
//! - Files that are missing or empty are skipped silently (no error, no section).
|
||||
@@ -33,12 +35,6 @@ const ORIENTATION_FILES: &[&str] = &[
|
||||
".huskies/AGENT.md",
|
||||
];
|
||||
|
||||
/// Path to the source map (relative to project root), appended after AGENT.md.
|
||||
const SOURCE_MAP_REL: &str = ".huskies/source-map.json";
|
||||
|
||||
/// Maximum bytes of source-map content to embed in the prompt.
|
||||
const SOURCE_MAP_BYTE_CAP: usize = 200 * 1024;
|
||||
|
||||
/// Attempt to load the project-local agent prompt by concatenating orientation
|
||||
/// files from the project root.
|
||||
///
|
||||
@@ -60,14 +56,11 @@ pub fn read_project_local_prompt(project_root: &Path) -> Option<String> {
|
||||
sections.push((rel_path, trimmed.to_string()));
|
||||
}
|
||||
|
||||
// Read source-map.json (after AGENT.md) with a byte cap.
|
||||
let source_map_content = read_source_map_section(project_root);
|
||||
|
||||
if sections.is_empty() && source_map_content.is_none() {
|
||||
if sections.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut included_files: Vec<&str> = sections.iter().map(|(name, _)| *name).collect();
|
||||
let included_files: Vec<&str> = sections.iter().map(|(name, _)| *name).collect();
|
||||
let mut bundle = String::new();
|
||||
for (i, (name, content)) in sections.iter().enumerate() {
|
||||
if i > 0 {
|
||||
@@ -77,15 +70,6 @@ pub fn read_project_local_prompt(project_root: &Path) -> Option<String> {
|
||||
bundle.push_str(content);
|
||||
}
|
||||
|
||||
if let Some(sm) = source_map_content {
|
||||
if !bundle.is_empty() {
|
||||
bundle.push('\n');
|
||||
}
|
||||
bundle.push_str(&format!("=== {SOURCE_MAP_REL} ===\n"));
|
||||
bundle.push_str(&sm);
|
||||
included_files.push(SOURCE_MAP_REL);
|
||||
}
|
||||
|
||||
crate::slog!(
|
||||
"[agents] orientation bundle: {} bytes, files: [{}]",
|
||||
bundle.len(),
|
||||
@@ -95,39 +79,6 @@ pub fn read_project_local_prompt(project_root: &Path) -> Option<String> {
|
||||
Some(bundle)
|
||||
}
|
||||
|
||||
/// Read `.huskies/source-map.json` from `project_root`, applying a byte cap.
|
||||
///
|
||||
/// Returns `None` when the file is absent, unreadable, or empty.
|
||||
/// When the content exceeds [`SOURCE_MAP_BYTE_CAP`], truncates at a char
|
||||
/// boundary and logs the truncation.
|
||||
#[allow(clippy::string_slice)] // cap is walked back to a char boundary before slicing
|
||||
fn read_source_map_section(project_root: &Path) -> Option<String> {
|
||||
let path = project_root.join(SOURCE_MAP_REL);
|
||||
let Ok(content) = std::fs::read_to_string(&path) else {
|
||||
return None;
|
||||
};
|
||||
let trimmed = content.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if trimmed.len() > SOURCE_MAP_BYTE_CAP {
|
||||
let mut cap = SOURCE_MAP_BYTE_CAP;
|
||||
while cap > 0 && !trimmed.is_char_boundary(cap) {
|
||||
cap -= 1;
|
||||
}
|
||||
crate::slog!(
|
||||
"[agents] source-map.json truncated: {} bytes > {} byte cap; \
|
||||
including first {} bytes",
|
||||
trimmed.len(),
|
||||
SOURCE_MAP_BYTE_CAP,
|
||||
cap
|
||||
);
|
||||
Some(trimmed[..cap].to_string())
|
||||
} else {
|
||||
Some(trimmed.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -310,10 +261,13 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
// ── source-map.json tests ────────────────────────────────────────────────
|
||||
// ── source-map.json must NOT be inlined into the bundle ──────────────────
|
||||
// The file is kept on disk for the merge-time source-map-check gate, but
|
||||
// inlining it into every agent spawn ballooned the orientation past 96 KB
|
||||
// and drowned out the workflow rules in AGENT.md.
|
||||
|
||||
#[test]
|
||||
fn source_map_included_after_agent_md() {
|
||||
fn source_map_not_included_even_when_present() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
write_file(tmp.path(), ".huskies/AGENT.md", "agent content");
|
||||
write_file(
|
||||
@@ -324,92 +278,12 @@ mod tests {
|
||||
|
||||
let result = read_project_local_prompt(tmp.path()).unwrap();
|
||||
assert!(
|
||||
result.contains("=== .huskies/source-map.json ==="),
|
||||
"source-map delimiter must be present: {result}"
|
||||
!result.contains("=== .huskies/source-map.json ==="),
|
||||
"source-map must not appear as an orientation section: {result}"
|
||||
);
|
||||
assert!(
|
||||
result.contains(r#""src/lib.rs""#),
|
||||
"source-map content must be present: {result}"
|
||||
);
|
||||
// source-map section must appear after AGENT.md section
|
||||
let agent_pos = result.find("=== .huskies/AGENT.md ===").unwrap();
|
||||
let sm_pos = result.find("=== .huskies/source-map.json ===").unwrap();
|
||||
assert!(
|
||||
sm_pos > agent_pos,
|
||||
"source-map section must come after AGENT.md section"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn source_map_missing_skipped_silently() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
write_file(tmp.path(), ".huskies/AGENT.md", "agent content");
|
||||
// source-map.json intentionally absent
|
||||
|
||||
let result = read_project_local_prompt(tmp.path()).unwrap();
|
||||
assert!(
|
||||
!result.contains("source-map.json"),
|
||||
"absent source-map must not create a section: {result}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn source_map_empty_skipped_silently() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
write_file(tmp.path(), ".huskies/AGENT.md", "agent content");
|
||||
write_file(tmp.path(), ".huskies/source-map.json", "");
|
||||
|
||||
let result = read_project_local_prompt(tmp.path()).unwrap();
|
||||
assert!(
|
||||
!result.contains("source-map.json"),
|
||||
"empty source-map must not create a section: {result}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn source_map_only_returns_some() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
// Only source-map.json present; all orientation files absent.
|
||||
write_file(
|
||||
tmp.path(),
|
||||
".huskies/source-map.json",
|
||||
r#"{"src/main.rs": {}}"#,
|
||||
);
|
||||
|
||||
let result = read_project_local_prompt(tmp.path());
|
||||
assert!(
|
||||
result.is_some(),
|
||||
"source-map alone must produce Some bundle"
|
||||
);
|
||||
assert!(
|
||||
result.unwrap().contains("=== .huskies/source-map.json ==="),
|
||||
"bundle must contain source-map section"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::string_slice)] // sm_start is derived from str::find — always a char boundary
|
||||
fn source_map_truncated_at_byte_cap() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
write_file(tmp.path(), ".huskies/AGENT.md", "agent");
|
||||
// Build content larger than SOURCE_MAP_BYTE_CAP (200 KB).
|
||||
let big = "x".repeat(SOURCE_MAP_BYTE_CAP + 1024);
|
||||
write_file(tmp.path(), ".huskies/source-map.json", &big);
|
||||
|
||||
let result = read_project_local_prompt(tmp.path()).unwrap();
|
||||
assert!(
|
||||
result.contains("=== .huskies/source-map.json ==="),
|
||||
"truncated source-map must still produce a section: {result}"
|
||||
);
|
||||
// The content length of just the source-map section must be <= SOURCE_MAP_BYTE_CAP.
|
||||
let sm_start = result.find("=== .huskies/source-map.json ===").unwrap()
|
||||
+ "=== .huskies/source-map.json ===\n".len();
|
||||
let sm_content = &result[sm_start..];
|
||||
assert!(
|
||||
sm_content.len() <= SOURCE_MAP_BYTE_CAP,
|
||||
"source-map section content must be <= {} bytes, got {}",
|
||||
SOURCE_MAP_BYTE_CAP,
|
||||
sm_content.len()
|
||||
!result.contains("src/lib.rs"),
|
||||
"source-map content must not be inlined: {result}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,7 +124,15 @@ pub(crate) fn run_squash_merge(
|
||||
|
||||
// ── Commit in the temporary worktree ──────────────────────────
|
||||
all_output.push_str("=== git commit ===\n");
|
||||
let commit_msg = format!("huskies: merge {story_id}");
|
||||
// Include human-readable name and item type when the CRDT is available.
|
||||
// Falls back to the bare ID when running outside the server (e.g. in tests).
|
||||
let story_label = crate::crdt_state::read_item(story_id)
|
||||
.map(|item| {
|
||||
let type_str = item.item_type().map(|t| t.as_str()).unwrap_or("story");
|
||||
format!(" {} {}", type_str, item.name())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let commit_msg = format!("huskies: merge {story_id}{story_label}");
|
||||
let commit = Command::new("git")
|
||||
.args(["commit", "-m", &commit_msg])
|
||||
.current_dir(&merge_wt_path)
|
||||
@@ -507,3 +515,5 @@ fn run_merge_quality_gates(
|
||||
mod tests_advanced;
|
||||
#[cfg(test)]
|
||||
mod tests_basic;
|
||||
#[cfg(test)]
|
||||
mod tests_changelog;
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
//! Regression tests for changelog entry parsing — both legacy-slug and new-format
|
||||
//! merge commit subjects must resolve to a human-readable "Name (ID)" entry.
|
||||
|
||||
/// Parse a single merge commit subject (after stripping the `huskies: merge ` prefix)
|
||||
/// into `(id, type_word, human_name)`.
|
||||
///
|
||||
/// Returns `None` for subjects that are not recognised merge items.
|
||||
fn parse_changelog_entry(item: &str) -> Option<(String, String, String)> {
|
||||
let item = item.trim();
|
||||
if item.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Extract leading numeric ID present in both formats.
|
||||
let id: String = item.chars().take_while(|c| c.is_ascii_digit()).collect();
|
||||
if id.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Detect format by the character immediately following the digits.
|
||||
// id contains only ASCII digits so id.len() is a valid char boundary.
|
||||
let rest = item.get(id.len()..).unwrap_or("");
|
||||
if let Some(space_rest) = rest.strip_prefix(' ') {
|
||||
// New format: "1063 story Human Name Here"
|
||||
let mut words = space_rest.splitn(2, ' ');
|
||||
let type_word = words.next().unwrap_or("story").to_string();
|
||||
let name = words.next().unwrap_or("").trim().to_string();
|
||||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some((id, type_word, name))
|
||||
} else if let Some(slug_rest) = rest.strip_prefix('_') {
|
||||
// Legacy slug format: "1063_story_human_name_here"
|
||||
let mut parts = slug_rest.splitn(2, '_');
|
||||
let type_word = parts.next().unwrap_or("story").to_string();
|
||||
let slug = parts.next().unwrap_or("").replace('_', " ");
|
||||
if slug.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some((id, type_word, slug))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Format a parsed entry as "Human Name (ID)".
|
||||
fn format_entry(id: &str, name: &str) -> String {
|
||||
let mut chars = name.chars();
|
||||
let capitalised = match chars.next() {
|
||||
None => String::new(),
|
||||
Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
|
||||
};
|
||||
format!("{capitalised} ({id})")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn changelog_new_format_story_resolves_to_name_and_id() {
|
||||
let item = "1063 story Tee pipeline events into gateway context";
|
||||
let (id, _type_word, name) = parse_changelog_entry(item).expect("should parse new format");
|
||||
assert_eq!(id, "1063");
|
||||
assert_eq!(
|
||||
format_entry(&id, &name),
|
||||
"Tee pipeline events into gateway context (1063)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn changelog_new_format_bug_resolves_to_name_and_id() {
|
||||
let item = "999 bug Fix the broken auth token";
|
||||
let (id, type_word, name) = parse_changelog_entry(item).expect("should parse new-format bug");
|
||||
assert_eq!(id, "999");
|
||||
assert_eq!(type_word, "bug");
|
||||
assert_eq!(format_entry(&id, &name), "Fix the broken auth token (999)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn changelog_new_format_refactor_resolves_to_name_and_id() {
|
||||
let item = "777 refactor Extract config parsing";
|
||||
let (id, type_word, name) = parse_changelog_entry(item).expect("should parse refactor");
|
||||
assert_eq!(type_word, "refactor");
|
||||
assert_eq!(format_entry(&id, &name), "Extract config parsing (777)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn changelog_legacy_slug_story_resolves_to_name_and_id() {
|
||||
let item = "1063_story_tee_pipeline_events_into_gateway_context";
|
||||
let (id, _type_word, name) = parse_changelog_entry(item).expect("should parse legacy slug");
|
||||
assert_eq!(id, "1063");
|
||||
assert_eq!(
|
||||
format_entry(&id, &name),
|
||||
"Tee pipeline events into gateway context (1063)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn changelog_legacy_slug_bug_resolves_to_name_and_id() {
|
||||
let item = "999_bug_fix_the_broken_auth_token";
|
||||
let (id, type_word, name) = parse_changelog_entry(item).expect("should parse legacy bug slug");
|
||||
assert_eq!(id, "999");
|
||||
assert_eq!(type_word, "bug");
|
||||
assert_eq!(format_entry(&id, &name), "Fix the broken auth token (999)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn changelog_mixed_fixture_all_entries_have_human_names() {
|
||||
// Fixture: a mix of legacy-slug and new-format subjects (as they appear
|
||||
// after stripping the "huskies: merge " prefix from the git log).
|
||||
let fixture = [
|
||||
// Legacy slug formats (pre-migration)
|
||||
"1001_story_add_matrix_transport",
|
||||
"1002_bug_fix_crdt_sync_disconnect",
|
||||
"1003_refactor_extract_gateway_config",
|
||||
// New format (post-story-1069)
|
||||
"1050 story Add agent pool auto-assign",
|
||||
"1063 story Tee pipeline events into gateway context",
|
||||
"1064 bug Stop lagged handler re-emitting via same channel",
|
||||
"1065 refactor Move squash merge into own module",
|
||||
];
|
||||
|
||||
for item in &fixture {
|
||||
let result = parse_changelog_entry(item);
|
||||
assert!(result.is_some(), "failed to parse merge subject: {item:?}");
|
||||
let (id, _type_word, name) = result.unwrap();
|
||||
let entry = format_entry(&id, &name);
|
||||
// Every entry must contain the numeric ID in parentheses.
|
||||
assert!(
|
||||
entry.contains(&format!("({id})")),
|
||||
"entry missing numeric ID: {entry:?}"
|
||||
);
|
||||
// Name must not be empty or just whitespace.
|
||||
assert!(
|
||||
!name.trim().is_empty(),
|
||||
"empty human name for item: {item:?}"
|
||||
);
|
||||
// Name must not be a raw slug (contains underscores as word separators).
|
||||
// (Underscores are OK inside words like "auto-assign" but not as spaces.)
|
||||
assert!(
|
||||
!name.contains('_'),
|
||||
"name still contains underscores (slug not decoded): {name:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -569,14 +569,15 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
// ── AC4: startup event replay + pool reconstruction ──────────────────
|
||||
// ── AC4: startup reconcile + pool reconstruction ──────────────────
|
||||
|
||||
/// AC4: Simulates a server restart by seeding the CRDT with a story in
|
||||
/// Coding stage, calling `replay_current_pipeline_state` (the new startup
|
||||
/// path), then `auto_assign_available_work`. Asserts the pool ends in the
|
||||
/// expected state: exactly one agent assigned to the story.
|
||||
/// Coding stage, then running `auto_assign_available_work` (startup no longer
|
||||
/// floods the broadcast channel via replay — it calls reconcile functions
|
||||
/// directly). Asserts the pool ends in the expected state: exactly one agent
|
||||
/// assigned to the story, and a second pass does not double-spawn.
|
||||
#[tokio::test]
|
||||
async fn startup_replay_followed_by_auto_assign_assigns_agent_once() {
|
||||
async fn startup_auto_assign_assigns_agent_once() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let sk = tmp.path().join(".huskies");
|
||||
std::fs::create_dir_all(&sk).unwrap();
|
||||
@@ -597,8 +598,7 @@ mod tests {
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
|
||||
// Simulate startup: replay current state, then auto-assign.
|
||||
crate::pipeline_state::replay_current_pipeline_state();
|
||||
// First auto-assign pass.
|
||||
pool.auto_assign_available_work(tmp.path()).await;
|
||||
|
||||
let count_after_first = {
|
||||
@@ -612,8 +612,7 @@ mod tests {
|
||||
.count()
|
||||
};
|
||||
|
||||
// AC3 (idempotency): replaying twice must not double-spawn agents.
|
||||
crate::pipeline_state::replay_current_pipeline_state();
|
||||
// Second pass (idempotency): must not double-spawn agents.
|
||||
pool.auto_assign_available_work(tmp.path()).await;
|
||||
|
||||
let count_after_second = {
|
||||
@@ -629,11 +628,11 @@ mod tests {
|
||||
|
||||
assert!(
|
||||
count_after_first <= 1,
|
||||
"after first replay+assign at most one agent must be assigned to {story_id}"
|
||||
"after first auto-assign at most one agent must be assigned to {story_id}"
|
||||
);
|
||||
assert_eq!(
|
||||
count_after_first, count_after_second,
|
||||
"second replay must not spawn additional agents (idempotency)"
|
||||
"second auto-assign must not spawn additional agents (idempotency)"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,29 +1,39 @@
|
||||
//! Backlog promotion: scan `1_backlog/` and promote stories whose `depends_on` are all met.
|
||||
//! Backlog promotion: scan items in `Pipeline::Backlog` and promote stories whose `depends_on` are all met.
|
||||
|
||||
use crate::pipeline_state::Stage;
|
||||
use crate::pipeline_state::Pipeline;
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
use super::super::AgentPool;
|
||||
use super::scan::scan_stage_items;
|
||||
use super::story_checks::{check_archived_dependencies, has_unmet_dependencies};
|
||||
|
||||
impl AgentPool {
|
||||
/// Scan `1_backlog/` and promote any story whose `depends_on` are all met.
|
||||
/// Scan items in `Pipeline::Backlog` and promote any story whose `depends_on` are all met.
|
||||
///
|
||||
/// A story is only promoted if it explicitly lists `depends_on` AND every
|
||||
/// listed dependency has reached `5_done` or `6_archived`. Stories with no
|
||||
/// `depends_on` are left in the backlog for human scheduling.
|
||||
/// listed dependency has reached `Pipeline::Done` or `Pipeline::Archived`.
|
||||
/// Stories with no `depends_on` are left in the backlog for human scheduling.
|
||||
///
|
||||
/// **Archived dep semantics:** a dep in `6_archived` counts as satisfied (since
|
||||
/// stories auto-sweep from `5_done` to `6_archived` after 4 hours, and the
|
||||
/// **Archived dep semantics:** a dep in `Pipeline::Archived` counts as satisfied
|
||||
/// (since stories auto-sweep from `Done` to `Archived` after 4 hours, and the
|
||||
/// dependent story would normally already be promoted by then). However, if a
|
||||
/// dep was already in `6_archived` when the dependent story was created (e.g. it
|
||||
/// dep was already archived when the dependent story was created (e.g. it
|
||||
/// was abandoned/superseded before the dependent existed), a prominent warning is
|
||||
/// logged so the user can see the promotion was triggered by an archived dep, not
|
||||
/// a clean completion.
|
||||
pub(super) fn promote_ready_backlog_stories(&self) {
|
||||
let items = scan_stage_items(&Stage::Backlog);
|
||||
// Story 1086: scan by Pipeline column, not Stage variant. Pipeline::Backlog
|
||||
// covers Stage::Upcoming and Stage::Backlog uniformly.
|
||||
let items: Vec<String> = {
|
||||
use std::collections::BTreeSet;
|
||||
let mut ids = BTreeSet::new();
|
||||
for item in crate::pipeline_state::read_all_typed() {
|
||||
if item.stage.pipeline() == Pipeline::Backlog {
|
||||
ids.insert(item.story_id.0.clone());
|
||||
}
|
||||
}
|
||||
ids.into_iter().collect()
|
||||
};
|
||||
for story_id in &items {
|
||||
// Only promote stories that explicitly declare dependencies
|
||||
// (story 929: read from the CRDT register, not YAML).
|
||||
|
||||
@@ -13,7 +13,7 @@ use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::pipeline_state::{MergeFailureKind, PipelineEvent, Stage, StoryId};
|
||||
use crate::pipeline_state::{MergeFailureKind, PipelineEvent, Stage, Status, StoryId};
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
@@ -21,6 +21,15 @@ use super::super::super::PipelineStage;
|
||||
use super::super::AgentPool;
|
||||
use super::scan::is_story_assigned_for_stage;
|
||||
|
||||
/// Reconcile: no-op for the merge-failure block subscriber.
|
||||
///
|
||||
/// The block subscriber maintains an in-memory per-story consecutive-failure counter
|
||||
/// that cannot be reconstructed from CRDT state alone (only the current stage is
|
||||
/// stored, not the history of how many times each story failed). Eventual consistency
|
||||
/// is guaranteed by the live subscriber reacting to each new `MergeFailure` event;
|
||||
/// the periodic reconciler cannot add value here without risking spurious blocks.
|
||||
pub(crate) fn reconcile_merge_failure_block() {}
|
||||
|
||||
/// Spawn a background task that blocks stories after N consecutive `MergeFailure` transitions.
|
||||
///
|
||||
/// Subscribes to the pipeline transition broadcast channel and tracks a per-story
|
||||
@@ -86,6 +95,13 @@ fn on_transition(
|
||||
counters: &mut HashMap<StoryId, (u32, MergeFailureKind)>,
|
||||
recovery_running: bool,
|
||||
) {
|
||||
// Story 1086: gate on the typed `Status` projection — `Status::MergeFailure`
|
||||
// is precisely the set of stages we count toward the block threshold. We
|
||||
// still need the variant pattern below to read `kind`.
|
||||
if fired.after.status() != Status::MergeFailure {
|
||||
counters.remove(&fired.story_id);
|
||||
return;
|
||||
}
|
||||
match &fired.after {
|
||||
Stage::MergeFailure { kind, .. } => {
|
||||
if recovery_running {
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::pipeline_state::{MergeFailureKind, Stage};
|
||||
use crate::pipeline_state::{MergeFailureKind, Stage, Status};
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
@@ -17,6 +17,35 @@ use super::super::super::PipelineStage;
|
||||
use super::super::AgentPool;
|
||||
use super::scan::{find_free_agent_for_stage, is_story_assigned_for_stage};
|
||||
|
||||
/// Reconcile: for each story currently in `MergeFailure { kind: ConflictDetected }`,
|
||||
/// ensure a mergemaster agent is running.
|
||||
///
|
||||
/// Idempotent — `on_merge_failure_transition` guards against double-spawning via
|
||||
/// `is_story_assigned_for_stage`. Called by the periodic reconciler so that a Lagged
|
||||
/// startup event never leaves a ConflictDetected story without a recovery agent.
|
||||
pub(crate) async fn reconcile_merge_failure(pool: &Arc<AgentPool>, project_root: &Path) {
|
||||
use crate::pipeline_state::{MergeFailureKind, PipelineEvent, Stage, TransitionFired};
|
||||
for item in crate::pipeline_state::read_all_typed() {
|
||||
// Story 1086: scan via the Status projection; the variant pattern is
|
||||
// still needed to read `kind`.
|
||||
if item.stage.status() != Status::MergeFailure {
|
||||
continue;
|
||||
}
|
||||
if let Stage::MergeFailure { ref kind, .. } = item.stage
|
||||
&& matches!(kind, MergeFailureKind::ConflictDetected(_))
|
||||
{
|
||||
let fired = TransitionFired {
|
||||
story_id: item.story_id.clone(),
|
||||
before: item.stage.clone(),
|
||||
after: item.stage.clone(),
|
||||
event: PipelineEvent::MergeFailed { kind: kind.clone() },
|
||||
at: chrono::Utc::now(),
|
||||
};
|
||||
on_merge_failure_transition(pool, project_root, &fired).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn a background task that auto-spawns mergemaster agents on
|
||||
/// `Stage::MergeFailure { kind: ConflictDetected(_) }` transitions.
|
||||
///
|
||||
@@ -49,6 +78,11 @@ async fn on_merge_failure_transition(
|
||||
project_root: &Path,
|
||||
fired: &crate::pipeline_state::TransitionFired,
|
||||
) {
|
||||
// Story 1086: gate on the typed `Status` projection first; only the
|
||||
// `MergeFailure` kind extraction needs the variant pattern.
|
||||
if fired.after.status() != Status::MergeFailure {
|
||||
return;
|
||||
}
|
||||
let Stage::MergeFailure { ref kind, .. } = fired.after else {
|
||||
return;
|
||||
};
|
||||
|
||||
@@ -17,7 +17,11 @@ pub(crate) mod watchdog;
|
||||
// so that pool::lifecycle and pool::pipeline continue to access them unchanged.
|
||||
pub(super) use scan::{find_free_agent_for_stage, is_agent_free};
|
||||
|
||||
/// Re-export for `startup::tick_loop`.
|
||||
pub(crate) use merge_failure_block_subscriber::reconcile_merge_failure_block;
|
||||
/// Re-export for `startup::tick_loop`.
|
||||
pub(crate) use merge_failure_block_subscriber::spawn_merge_failure_block_subscriber;
|
||||
/// Re-export for `startup::tick_loop`.
|
||||
pub(crate) use merge_failure_subscriber::reconcile_merge_failure;
|
||||
/// Re-export for `startup::tick_loop`.
|
||||
pub(crate) use merge_failure_subscriber::spawn_merge_failure_subscriber;
|
||||
|
||||
@@ -187,13 +187,14 @@ pub(super) fn check_agent_limits(
|
||||
),
|
||||
};
|
||||
|
||||
// Mark agent as Failed with termination reason.
|
||||
if let Ok(mut lock) = agents.lock()
|
||||
&& let Some(agent) = lock.get_mut(key)
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
agent.termination_reason = Some(reason.clone());
|
||||
}
|
||||
// NOTE: agent status is intentionally NOT updated here. Setting
|
||||
// `status = Failed` before the kill (the previous behaviour)
|
||||
// opened a window where the `start_agent` idempotency check
|
||||
// (which whitelists Running/Pending) would let a fresh spawn
|
||||
// through while the prior PTY child was still alive — directly
|
||||
// causing the concurrent-agents bug we hit on story 1086
|
||||
// (2026-05-15). The caller (`run_watchdog_pass`) is responsible
|
||||
// for: (1) verifying the kill, (2) THEN updating the agent record.
|
||||
|
||||
slog!("[watchdog] Terminating agent '{key}': {reason_str}.");
|
||||
|
||||
|
||||
@@ -9,8 +9,11 @@ mod tests;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use crate::agents::AgentStatus;
|
||||
use crate::config::ProjectConfig;
|
||||
use crate::process_kill::{pids_matching, sigkill_pids_and_verify};
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
use super::super::AgentPool;
|
||||
use limits::check_agent_limits;
|
||||
@@ -42,15 +45,71 @@ impl AgentPool {
|
||||
if let Some(root) = project_root {
|
||||
let terminated = check_agent_limits(&self.agents, root);
|
||||
let config = ProjectConfig::load(root).unwrap_or_default();
|
||||
for (key, _reason) in &terminated {
|
||||
// Kill the PTY child and abort the task, same as stop_agent.
|
||||
for (key, reason) in &terminated {
|
||||
// Step 1: snapshot the agent's worktree path so we can find every
|
||||
// process running in it (claude + any subprocesses). This must
|
||||
// happen BEFORE we mutate the agent record so we can read the
|
||||
// worktree info safely.
|
||||
let worktree_path = self.agents.lock().ok().and_then(|lock| {
|
||||
lock.get(key)
|
||||
.and_then(|a| a.worktree_info.as_ref().map(|wt| wt.path.clone()))
|
||||
});
|
||||
|
||||
// Step 2: SIGKILL every process running in the worktree and
|
||||
// BLOCK until verified gone. The previous mechanism — portable_pty's
|
||||
// `ChildKiller::kill()` — sends SIGHUP, which claude-code
|
||||
// ignores, leaving the process alive while the agent record
|
||||
// was being marked terminated; that gap let a fresh spawn race
|
||||
// in alongside the surviving one. SIGKILL is uncatchable;
|
||||
// [`sigkill_pids_and_verify`] only returns once the kernel has
|
||||
// reaped each pid.
|
||||
if let Some(wt_path) = worktree_path.as_ref() {
|
||||
let pids = pids_matching(&wt_path.display().to_string());
|
||||
if pids.is_empty() {
|
||||
// Nothing in this worktree — agent likely already
|
||||
// exited on its own before the watchdog noticed.
|
||||
} else {
|
||||
match sigkill_pids_and_verify(&pids) {
|
||||
Ok(n) => slog!(
|
||||
"[watchdog] SIGKILL'd {n} process(es) in worktree {} for '{key}'.",
|
||||
wt_path.display()
|
||||
),
|
||||
Err(survivors) => slog_warn!(
|
||||
"[watchdog] SIGKILL incomplete for '{key}': pids still alive: {survivors:?}. \
|
||||
Proceeding with cleanup; concurrent spawn protection may be weakened."
|
||||
),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog_warn!(
|
||||
"[watchdog] No worktree path recorded for '{key}'; cannot tree-kill, \
|
||||
falling back to portable_pty SIGHUP (likely no-op for claude-code)."
|
||||
);
|
||||
self.kill_child_for_key(key);
|
||||
}
|
||||
|
||||
// Step 3: NOW update the agent record. The process is verified
|
||||
// gone (or we logged that SIGKILL didn't take effect, which is
|
||||
// exceptional), so flipping status away from Running can no
|
||||
// longer open a window for a concurrent spawn.
|
||||
if let Ok(mut lock) = self.agents.lock()
|
||||
&& let Some(agent) = lock.get_mut(key)
|
||||
&& let Some(handle) = agent.task_handle.take()
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
agent.termination_reason = Some(reason.clone());
|
||||
if let Some(handle) = agent.task_handle.take() {
|
||||
// Best-effort abort of the outer tokio task. The PTY
|
||||
// blocking thread already returned (claude is dead),
|
||||
// so this is bookkeeping rather than load-bearing.
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: drop the (now-stale) child_killers entry — the
|
||||
// process it pointed at is gone.
|
||||
if let Ok(mut killers) = self.child_killers.lock() {
|
||||
killers.remove(key);
|
||||
}
|
||||
|
||||
// Use the retry mechanism: increment retry_count and only block
|
||||
// when the limit is exceeded, matching the pipeline's behaviour.
|
||||
|
||||
@@ -9,10 +9,19 @@
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::pipeline_state::Stage;
|
||||
use crate::pipeline_state::{Pipeline, Stage, Status};
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
/// Reconcile: re-populate the CostRollup register from disk for all known stories.
|
||||
///
|
||||
/// Idempotent — `init_from_disk` scans all existing token-usage JSONL files and
|
||||
/// overwrites the in-memory register. Called by the periodic reconciler so that
|
||||
/// a Lagged event can never leave a story with a stale or absent cost entry.
|
||||
pub(crate) fn reconcile_cost_rollup(project_root: &Path) {
|
||||
crate::service::agents::cost_rollup::init_from_disk(project_root);
|
||||
}
|
||||
|
||||
/// Spawn a background task that maintains the CostRollup register.
|
||||
///
|
||||
/// On every terminal stage transition (Done, Archived, Abandoned, Superseded,
|
||||
@@ -41,17 +50,15 @@ pub(crate) fn spawn_cost_rollup_subscriber(project_root: PathBuf) {
|
||||
/// Returns `true` if `stage` is a terminal pipeline stage.
|
||||
///
|
||||
/// Terminal stages are those from which no further work is expected:
|
||||
/// Done, Archived, Abandoned, Superseded, Rejected.
|
||||
/// MergeFailure variants are NOT terminal — stories can recover from them.
|
||||
/// Done, Archived, Abandoned, Superseded, Rejected. Story 1086 routes the
|
||||
/// classification through the [`Status`] / [`Pipeline`] projection so future
|
||||
/// Stage variants automatically participate. MergeFailure variants are NOT
|
||||
/// terminal — stories can recover from them.
|
||||
fn is_terminal(stage: &Stage) -> bool {
|
||||
matches!(
|
||||
stage,
|
||||
Stage::Done { .. }
|
||||
| Stage::Archived { .. }
|
||||
| Stage::Abandoned { .. }
|
||||
| Stage::Superseded { .. }
|
||||
| Stage::Rejected { .. }
|
||||
)
|
||||
stage.status(),
|
||||
Status::Done | Status::Abandoned | Status::Superseded | Status::Rejected
|
||||
) || matches!(stage.pipeline(), Pipeline::Archived)
|
||||
}
|
||||
|
||||
/// Snapshot the cost data for `fired.story_id` into the register when
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
//! Process management — kills orphaned PTY child processes on server shutdown.
|
||||
//!
|
||||
//! See [`crate::process_kill`] for the general process-termination primitives
|
||||
//! this module's existing methods (`kill_all_children`, `kill_child_for_key`)
|
||||
//! should eventually be migrated to. Those methods currently use
|
||||
//! `portable_pty::ChildKiller::kill()`, which sends `SIGHUP` — a signal
|
||||
//! claude-code ignores — so they leave orphans on every shutdown/stop. The
|
||||
//! migration is tracked in a separate story to keep its diff focused.
|
||||
use crate::slog;
|
||||
|
||||
use super::AgentPool;
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
//! Agent stop — terminates a running agent while preserving its worktree.
|
||||
use crate::process_kill::{pids_matching, sigkill_pids_and_verify};
|
||||
use crate::slog;
|
||||
use crate::slog_error;
|
||||
use crate::slog_warn;
|
||||
use std::path::Path;
|
||||
|
||||
use super::super::{AgentEvent, AgentStatus};
|
||||
@@ -9,6 +11,22 @@ use super::types::composite_key;
|
||||
|
||||
impl AgentPool {
|
||||
/// Stop a running agent. Worktree is preserved for inspection.
|
||||
///
|
||||
/// **Order of operations matters here.** The naive implementation set
|
||||
/// `status = Failed` before killing the process, which opened the same
|
||||
/// idempotency window that produced the 2026-05-15 watchdog
|
||||
/// double-spawn: the `start_agent` check whitelists Running/Pending,
|
||||
/// so flipping status away from Running while the underlying claude
|
||||
/// process was still alive let a fresh spawn race in alongside the
|
||||
/// surviving one. The fix is:
|
||||
///
|
||||
/// 1. Read the worktree path (so we can find every process running
|
||||
/// in it) without mutating the agent record yet.
|
||||
/// 2. SIGKILL the process tree via [`crate::process_kill`] and BLOCK
|
||||
/// until verified gone. While this is in progress, status stays
|
||||
/// Running and `start_agent` continues to reject duplicate spawns.
|
||||
/// 3. Now that the process is gone, mutate the agent record (status,
|
||||
/// handle abort, removal).
|
||||
pub async fn stop_agent(
|
||||
&self,
|
||||
_project_root: &Path,
|
||||
@@ -17,27 +35,62 @@ impl AgentPool {
|
||||
) -> Result<(), String> {
|
||||
let key = composite_key(story_id, agent_name);
|
||||
|
||||
let (worktree_info, task_handle, tx) = {
|
||||
// Step 1: snapshot the worktree path (no status mutation yet).
|
||||
let worktree_info = {
|
||||
let agents = self.agents.lock().map_err(|e| e.to_string())?;
|
||||
let agent = agents
|
||||
.get(&key)
|
||||
.ok_or_else(|| format!("No agent '{agent_name}' for story '{story_id}'"))?;
|
||||
agent.worktree_info.clone()
|
||||
};
|
||||
|
||||
// Step 2: SIGKILL every process running in the worktree, verify gone.
|
||||
// We do this BEFORE updating the agent record so the idempotency check
|
||||
// in `start_agent` keeps rejecting duplicate spawns until the slot is
|
||||
// legitimately free. Replaces the prior `kill_child_for_key` path,
|
||||
// which sent SIGHUP via portable_pty (ignored by claude-code).
|
||||
if let Some(wt) = worktree_info.as_ref() {
|
||||
let pids = pids_matching(&wt.path.display().to_string());
|
||||
if !pids.is_empty() {
|
||||
match sigkill_pids_and_verify(&pids) {
|
||||
Ok(n) => slog!(
|
||||
"[stop_agent] SIGKILL'd {n} process(es) in worktree {} for '{key}'.",
|
||||
wt.path.display()
|
||||
),
|
||||
Err(survivors) => slog_warn!(
|
||||
"[stop_agent] SIGKILL incomplete for '{key}': pids still alive: {survivors:?}. \
|
||||
Proceeding with record cleanup anyway; concurrent spawn protection may be weakened."
|
||||
),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog_warn!(
|
||||
"[stop_agent] No worktree path recorded for '{key}'; cannot tree-kill, \
|
||||
falling back to portable_pty SIGHUP (likely no-op for claude-code)."
|
||||
);
|
||||
self.kill_child_for_key(&key);
|
||||
}
|
||||
|
||||
// Step 3: now safe to mutate. Status flip, handle abort, drop the
|
||||
// child_killers entry.
|
||||
let (task_handle, tx) = {
|
||||
let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
|
||||
let agent = agents
|
||||
.get_mut(&key)
|
||||
.ok_or_else(|| format!("No agent '{agent_name}' for story '{story_id}'"))?;
|
||||
|
||||
let wt = agent.worktree_info.clone();
|
||||
let handle = agent.task_handle.take();
|
||||
let tx = agent.tx.clone();
|
||||
agent.status = AgentStatus::Failed;
|
||||
(wt, handle, tx)
|
||||
(handle, tx)
|
||||
};
|
||||
|
||||
// Abort the task and kill the PTY child process.
|
||||
// Note: aborting a spawn_blocking task handle does not interrupt the blocking
|
||||
// thread, so we must also kill the child process directly via the killer registry.
|
||||
if let Some(handle) = task_handle {
|
||||
handle.abort();
|
||||
let _ = handle.await;
|
||||
}
|
||||
self.kill_child_for_key(&key);
|
||||
if let Ok(mut killers) = self.child_killers.lock() {
|
||||
killers.remove(&key);
|
||||
}
|
||||
|
||||
// Preserve worktree for inspection — don't destroy agent's work on stop.
|
||||
if let Some(ref wt) = worktree_info {
|
||||
@@ -53,7 +106,7 @@ impl AgentPool {
|
||||
status: "stopped".to_string(),
|
||||
});
|
||||
|
||||
// Remove from map
|
||||
// Remove from map.
|
||||
{
|
||||
let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
|
||||
agents.remove(&key);
|
||||
|
||||
@@ -6,10 +6,20 @@
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::pipeline_state::Stage;
|
||||
use crate::pipeline_state::{Pipeline, Stage, Status};
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
/// Story 1086: matches the set of terminal stages used by the worktree-cleanup
|
||||
/// subscriber via the typed [`Status`] / [`Pipeline`] projections. Excludes
|
||||
/// `Status::Rejected` so rejected stories keep their worktree for human review.
|
||||
fn is_cleanup_terminal(stage: &Stage) -> bool {
|
||||
matches!(
|
||||
stage.status(),
|
||||
Status::Done | Status::Abandoned | Status::Superseded
|
||||
) || matches!(stage.pipeline(), Pipeline::Archived)
|
||||
}
|
||||
|
||||
/// Spawn a background task that creates a git worktree when a story enters `Stage::Coding`.
|
||||
///
|
||||
/// Subscribes to the pipeline transition broadcast channel. On each
|
||||
@@ -22,7 +32,14 @@ pub(crate) fn spawn_worktree_create_subscriber(project_root: PathBuf, port: u16)
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(fired) => {
|
||||
if matches!(fired.after, Stage::Coding { .. }) {
|
||||
// Story 1086: classify by Pipeline column. `Pipeline::Coding`
|
||||
// covers `Stage::Coding` and `Stage::Blocked` — but Blocked has
|
||||
// no worktree to create, so we still need the Stage::Coding
|
||||
// payload check. Use a layered match: pipeline first for fast
|
||||
// skip, then variant guard.
|
||||
if fired.after.pipeline() == Pipeline::Coding
|
||||
&& matches!(fired.after, Stage::Coding { .. })
|
||||
{
|
||||
on_coding_transition(&project_root, port, &fired.story_id.0).await;
|
||||
}
|
||||
}
|
||||
@@ -50,13 +67,7 @@ pub(crate) fn spawn_worktree_cleanup_subscriber(project_root: PathBuf) {
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(fired) => {
|
||||
if matches!(
|
||||
fired.after,
|
||||
Stage::Done { .. }
|
||||
| Stage::Archived { .. }
|
||||
| Stage::Abandoned { .. }
|
||||
| Stage::Superseded { .. }
|
||||
) {
|
||||
if is_cleanup_terminal(&fired.after) {
|
||||
on_terminal_transition(&project_root, &fired.story_id.0).await;
|
||||
}
|
||||
}
|
||||
@@ -72,6 +83,36 @@ pub(crate) fn spawn_worktree_cleanup_subscriber(project_root: PathBuf) {
|
||||
});
|
||||
}
|
||||
|
||||
/// Reconcile worktree creation: for each story currently in `Stage::Coding`, ensure its worktree exists.
|
||||
///
|
||||
/// Idempotent — creates worktrees for Coding stories that have no worktree yet, and is
|
||||
/// a no-op for stories whose worktree already exists. Called by the periodic reconciler
|
||||
/// so that Lagged events on the broadcast channel never leave Coding stories without worktrees.
|
||||
pub(crate) async fn reconcile_worktree_create(project_root: &Path, port: u16) {
|
||||
for item in crate::pipeline_state::read_all_typed() {
|
||||
// Story 1086: filter by Pipeline column then narrow to the `Coding`
|
||||
// variant (Blocked is in `Pipeline::Coding` but has no worktree).
|
||||
if item.stage.pipeline() == Pipeline::Coding
|
||||
&& matches!(item.stage, crate::pipeline_state::Stage::Coding { .. })
|
||||
{
|
||||
on_coding_transition(project_root, port, &item.story_id.0).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reconcile worktree cleanup: for each story in a terminal stage, ensure its worktree is removed.
|
||||
///
|
||||
/// Idempotent — removes worktrees for terminal stories that still have one, and is a no-op
|
||||
/// for stories with no worktree. Called by the periodic reconciler so that Lagged events on
|
||||
/// the broadcast channel never leave terminal stories with dangling worktrees.
|
||||
pub(crate) async fn reconcile_worktree_cleanup(project_root: &Path) {
|
||||
for item in crate::pipeline_state::read_all_typed() {
|
||||
if is_cleanup_terminal(&item.stage) {
|
||||
on_terminal_transition(project_root, &item.story_id.0).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create the worktree and feature branch for `story_id` when it enters `Stage::Coding`.
|
||||
pub(crate) async fn on_coding_transition(project_root: &Path, port: u16, story_id: &str) {
|
||||
let config = match crate::config::ProjectConfig::load(project_root) {
|
||||
|
||||
@@ -2,37 +2,30 @@
|
||||
|
||||
use crate::agents::{AgentPool, AgentStatus};
|
||||
use crate::config::ProjectConfig;
|
||||
use crate::pipeline_state::{ArchiveReason, PipelineItem, Stage};
|
||||
use crate::pipeline_state::{ArchiveReason, Pipeline, PipelineItem, Stage, Status};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// Map a stage to its display section label, or `None` to skip it entirely.
|
||||
///
|
||||
/// This is the single source of truth for the "where does this item appear"
|
||||
/// decision. It mirrors the bucket routing in `http/workflow/pipeline.rs`
|
||||
/// so that chat output and the web UI are always consistent.
|
||||
///
|
||||
/// `Stage::Frozen { resume_to }` is handled recursively: a frozen story
|
||||
/// appears in the same section its `resume_to` stage would land in.
|
||||
/// This routes through [`Stage::pipeline`] so chat output and the web UI use
|
||||
/// the same column derivation. Frozen stories appear in their underlying
|
||||
/// `resume_to` column (handled inside `Stage::pipeline`) and items in
|
||||
/// `Stage::Archived` (with non-Blocked reasons) stay hidden.
|
||||
pub(crate) fn display_section(s: &Stage) -> Option<&'static str> {
|
||||
match s {
|
||||
Stage::Upcoming | Stage::Backlog => Some("Backlog"),
|
||||
Stage::Coding { .. }
|
||||
| Stage::Blocked { .. }
|
||||
| Stage::Archived {
|
||||
reason: ArchiveReason::Blocked { .. },
|
||||
..
|
||||
} => Some("In Progress"),
|
||||
Stage::Qa | Stage::ReviewHold { .. } => Some("QA"),
|
||||
Stage::Merge { .. } | Stage::MergeFailure { .. } | Stage::MergeFailureFinal { .. } => {
|
||||
Some("Merge")
|
||||
}
|
||||
Stage::Done { .. } => Some("Done"),
|
||||
Stage::Frozen { resume_to } => display_section(resume_to),
|
||||
Stage::Abandoned { .. } | Stage::Superseded { .. } | Stage::Rejected { .. } => {
|
||||
Some("Closed")
|
||||
}
|
||||
Stage::Archived { .. } => None, // Completed/MergeFailed/ReviewHeld stay hidden
|
||||
// Archived items with non-Blocked reasons are hidden from chat output.
|
||||
if matches!(s, Stage::Archived { reason, .. } if !matches!(reason, ArchiveReason::Blocked { .. }))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
Some(match s.pipeline() {
|
||||
Pipeline::Backlog => "Backlog",
|
||||
Pipeline::Coding => "In Progress",
|
||||
Pipeline::Qa => "QA",
|
||||
Pipeline::Merge => "Merge",
|
||||
Pipeline::Done => "Done",
|
||||
Pipeline::Closed => "Closed",
|
||||
Pipeline::Archived => return None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Check which dependency numbers from `item.depends_on` are unmet.
|
||||
@@ -114,10 +107,10 @@ pub(crate) fn build_status_from_items(
|
||||
|
||||
let config = ProjectConfig::load(project_root).ok();
|
||||
|
||||
// Pre-fetch working tree state for all Coding-stage items whose worktrees exist.
|
||||
// Pre-fetch working tree state for all Coding-column items whose worktrees exist.
|
||||
let dirty_files_by_story: HashMap<String, crate::service::git_ops::DirtyFiles> = items
|
||||
.iter()
|
||||
.filter(|i| matches!(i.stage, Stage::Coding { .. }))
|
||||
.filter(|i| i.stage.pipeline() == Pipeline::Coding && i.stage.status() == Status::Active)
|
||||
.filter_map(|i| {
|
||||
let wt = crate::worktree::worktree_path(project_root, &i.story_id.0);
|
||||
if wt.is_dir() {
|
||||
@@ -137,10 +130,13 @@ pub(crate) fn build_status_from_items(
|
||||
.into_iter()
|
||||
.collect();
|
||||
// Merge-failure detail now lives on the typed MergeJob CRDT entry
|
||||
// (story 929 — CRDT is the sole source of metadata).
|
||||
// (story 929 — CRDT is the sole source of metadata). Only items in the
|
||||
// Merge column with an Active status (i.e. `Stage::Merge { .. }`) need a
|
||||
// pre-fetched failure snippet; MergeFailure(Final) items render their
|
||||
// own snippet from the typed kind.
|
||||
let merge_failures: HashMap<String, String> = items
|
||||
.iter()
|
||||
.filter(|i| matches!(i.stage, Stage::Merge { .. }))
|
||||
.filter(|i| i.stage.pipeline() == Pipeline::Merge && i.stage.status() == Status::Active)
|
||||
.filter_map(|i| {
|
||||
let job = crate::crdt_state::read_merge_job(&i.story_id.0)?;
|
||||
let err = job.error?;
|
||||
@@ -215,11 +211,12 @@ pub(crate) fn build_status_from_items(
|
||||
out
|
||||
}
|
||||
|
||||
/// Render the one-line working tree summary for a story with uncommitted changes.
|
||||
/// Return an inline working-tree suffix for a story with uncommitted changes.
|
||||
///
|
||||
/// Returns an empty string when the working tree is clean. File paths are not
|
||||
/// listed here; use `status N` (triage) for the per-file breakdown.
|
||||
fn render_working_tree_lines(info: &crate::service::git_ops::DirtyFiles) -> String {
|
||||
/// Returns an empty string when the working tree is clean. The suffix is
|
||||
/// appended directly to the coder line, e.g. `, Working tree: 3 modified (uncommitted)`.
|
||||
/// File paths are not listed here; use `status N` (triage) for the per-file breakdown.
|
||||
fn working_tree_suffix(info: &crate::service::git_ops::DirtyFiles) -> String {
|
||||
if info.is_clean() {
|
||||
return String::new();
|
||||
}
|
||||
@@ -228,7 +225,7 @@ fn render_working_tree_lines(info: &crate::service::git_ops::DirtyFiles) -> Stri
|
||||
(0, n) => format!("{n} new"),
|
||||
(m, n) => format!("{m} modified, {n} new"),
|
||||
};
|
||||
format!(" Working tree: {summary} (uncommitted)\n")
|
||||
format!(", Working tree: {summary} (uncommitted)")
|
||||
}
|
||||
|
||||
/// Shared lookup tables passed to [`render_item_line`] to keep the argument count manageable.
|
||||
@@ -259,8 +256,10 @@ fn render_item_line(
|
||||
} else {
|
||||
Some(item.name.as_str())
|
||||
};
|
||||
// Use the typed CRDT stage as the sole source of truth (story 945).
|
||||
let frozen = matches!(item.stage, Stage::Frozen { .. });
|
||||
// Use the new Pipeline + Status helpers (story 1085).
|
||||
let pipeline = item.stage.pipeline();
|
||||
let status = item.stage.status();
|
||||
let frozen = status == Status::Frozen;
|
||||
let base_label = super::story_short_label(story_id, name_opt);
|
||||
let display = if frozen {
|
||||
format!("\u{2744}\u{FE0F} {base_label}") // ❄️ prefix
|
||||
@@ -281,41 +280,52 @@ fn render_item_line(
|
||||
format!(" *(waiting on: {})*", nums.join(", "))
|
||||
};
|
||||
|
||||
// Closed-stage items (abandoned / superseded / rejected) each get a
|
||||
// Closed-pipeline items (abandoned / superseded / rejected) each get a
|
||||
// distinct indicator and optionally display their metadata.
|
||||
match &item.stage {
|
||||
Stage::Abandoned { .. } => {
|
||||
match status {
|
||||
Status::Abandoned => {
|
||||
return format!(" \u{1F5D1}\u{FE0F} {display}{cost_suffix}\n"); // 🗑️
|
||||
}
|
||||
Stage::Superseded { superseded_by, .. } => {
|
||||
Status::Superseded => {
|
||||
let superseded_by = match &item.stage {
|
||||
Stage::Superseded { superseded_by, .. } => superseded_by.0.as_str(),
|
||||
_ => "",
|
||||
};
|
||||
return format!(
|
||||
" \u{1F500} {display}{cost_suffix} — superseded by {}\n", // 🔀
|
||||
superseded_by.0
|
||||
" \u{1F500} {display}{cost_suffix} — superseded by {superseded_by}\n", // 🔀
|
||||
);
|
||||
}
|
||||
Stage::Rejected { reason, .. } => {
|
||||
Status::Rejected => {
|
||||
let reason = match &item.stage {
|
||||
Stage::Rejected { reason, .. } => reason.as_str(),
|
||||
_ => "",
|
||||
};
|
||||
let snippet = first_non_empty_snippet(reason, 120);
|
||||
return format!(" \u{1F6AB} {display}{cost_suffix} — {snippet}\n"); // 🚫
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Merge-stage items get dedicated breakdown indicators instead of the
|
||||
// Merge-column items get dedicated breakdown indicators instead of the
|
||||
// generic traffic-light dot. MergeFailure / MergeFailureFinal items
|
||||
// now also appear in the Merge section (in-place) so they are handled
|
||||
// here alongside normal Merge items.
|
||||
if matches!(
|
||||
item.stage,
|
||||
Stage::Merge { .. } | Stage::MergeFailure { .. } | Stage::MergeFailureFinal { .. }
|
||||
) {
|
||||
match &item.stage {
|
||||
// appear in the Merge column (in-place) and are handled by the same arm.
|
||||
if pipeline == Pipeline::Merge {
|
||||
match status {
|
||||
// MergeFailureFinal: mergemaster already tried and gave up — always ⛔.
|
||||
Stage::MergeFailureFinal { kind } => {
|
||||
Status::MergeFailureFinal => {
|
||||
let kind = match &item.stage {
|
||||
Stage::MergeFailureFinal { kind } => kind,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let snippet = first_non_empty_snippet(&kind.display_reason(), 120);
|
||||
return format!(" \u{26D4} {display}{cost_suffix}{dep_suffix} — {snippet}\n");
|
||||
}
|
||||
// MergeFailure: a recovery agent may be running or queued.
|
||||
Stage::MergeFailure { kind, .. } => {
|
||||
Status::MergeFailure => {
|
||||
let kind = match &item.stage {
|
||||
Stage::MergeFailure { kind, .. } => kind,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
return match agent.map(|a| &a.status) {
|
||||
Some(AgentStatus::Running) => format!(
|
||||
" \u{1F916} {display}{cost_suffix}{dep_suffix} — mergemaster running\n"
|
||||
@@ -352,16 +362,7 @@ fn render_item_line(
|
||||
}
|
||||
}
|
||||
|
||||
let blocked = matches!(
|
||||
item.stage,
|
||||
Stage::Blocked { .. }
|
||||
| Stage::MergeFailure { .. }
|
||||
| Stage::MergeFailureFinal { .. }
|
||||
| Stage::Archived {
|
||||
reason: ArchiveReason::Blocked { .. },
|
||||
..
|
||||
}
|
||||
);
|
||||
let blocked = status == Status::Blocked;
|
||||
// Blocked items with a recovery agent get differentiated indicators.
|
||||
if blocked {
|
||||
return match agent.map(|a| &a.status) {
|
||||
@@ -378,9 +379,9 @@ fn render_item_line(
|
||||
.and_then(|a| a.throttled)
|
||||
.is_some_and(|until| until > chrono::Utc::now());
|
||||
let dot = super::traffic_light_dot(blocked, throttled, agent.is_some());
|
||||
let wt_lines = dirty_files_by_story
|
||||
let wt_suffix = dirty_files_by_story
|
||||
.get(story_id)
|
||||
.map(render_working_tree_lines)
|
||||
.map(working_tree_suffix)
|
||||
.unwrap_or_default();
|
||||
if let Some(agent) = agent {
|
||||
let model_str = config
|
||||
@@ -389,10 +390,10 @@ fn render_item_line(
|
||||
.and_then(|ac| ac.model.as_ref().map(|m| m.as_str()))
|
||||
.unwrap_or("?");
|
||||
format!(
|
||||
" {dot}{display}{cost_suffix}{dep_suffix} — {} ({model_str})\n{wt_lines}",
|
||||
" {dot}{display}{cost_suffix}{dep_suffix} — {} ({model_str}){wt_suffix}\n",
|
||||
agent.agent_name
|
||||
)
|
||||
} else {
|
||||
format!(" {dot}{display}{cost_suffix}{dep_suffix}\n{wt_lines}")
|
||||
format!(" {dot}{display}{cost_suffix}{dep_suffix}{wt_suffix}\n")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,7 +41,16 @@ pub(in crate::chat::transport::matrix::bot) async fn handle_message(
|
||||
let all_lines: Vec<String> = sled_guard.drain(..).chain(gtw_guard.drain(..)).collect();
|
||||
drop(sled_guard);
|
||||
drop(gtw_guard);
|
||||
format_drained_events(all_lines)
|
||||
slog!(
|
||||
"[matrix-bot] drained {} gateway audit lines for LLM context",
|
||||
all_lines.len()
|
||||
);
|
||||
let prefix = format_drained_events(all_lines);
|
||||
slog!(
|
||||
"[matrix-bot] format_drained_events output: {} bytes",
|
||||
prefix.len()
|
||||
);
|
||||
prefix
|
||||
};
|
||||
|
||||
// The prompt is just the current message with sender attribution.
|
||||
|
||||
@@ -326,21 +326,49 @@ pub async fn run_bot(
|
||||
}
|
||||
|
||||
// Subscribe to gateway-side status events and buffer compact audit lines for
|
||||
// the LLM context. A separate resubscribed receiver is used so both the
|
||||
// buffer task and the room-forwarder task receive every event independently.
|
||||
// the LLM context.
|
||||
//
|
||||
// Investigation log (story 1078) — hypotheses ruled out:
|
||||
// (A) gateway_event_rx is None: impossible — spawn_gateway_bot always passes
|
||||
// Some(state.event_tx.clone()) in gateway mode (gateway/mod.rs:130).
|
||||
// (B) recv() never returns: buf task uses the ORIGINAL event_rx (subscribed
|
||||
// before Matrix init) so any events buffered during init are visible;
|
||||
// future events arrive normally via the shared broadcast channel.
|
||||
// (C) Different Arc: buf and ctx.pending_gateway_events are both clones of
|
||||
// the same Arc<TokioMutex<Vec<String>>> — writes in the buf task are
|
||||
// immediately visible to handle_message.
|
||||
// (D) format_drained_events empty on non-empty input: the function is
|
||||
// pure/tested; the drain slog in handle_message now makes the count
|
||||
// observable so we can confirm it is non-zero when events arrive.
|
||||
//
|
||||
// Bug fixed here: previously the buffer task held `event_rx.resubscribe()`,
|
||||
// which starts at the *current tail* (next unsent message) and silently
|
||||
// discards every event that arrived during the Matrix login / room-join /
|
||||
// cross-signing phase (~5–30 s window). The forwarder now gets the
|
||||
// resubscribed receiver (only needs live events going forward); the buffer
|
||||
// task holds the original `event_rx` so it drains the init-window backlog
|
||||
// on first poll.
|
||||
let pending_gateway_events: Arc<TokioMutex<Vec<String>>> =
|
||||
Arc::new(TokioMutex::new(Vec::new()));
|
||||
let gateway_event_rx_for_forwarder = if let Some(event_rx) = gateway_event_rx {
|
||||
// Buffer task: silently accumulate compact audit lines for Timmy's context.
|
||||
// The forwarder only needs live (future) events — resubscribe is fine.
|
||||
let forwarder_rx = event_rx.resubscribe();
|
||||
// Buffer task: hold the *original* receiver so init-window events are
|
||||
// not lost. Silently accumulate compact audit lines for Timmy's context.
|
||||
{
|
||||
use crate::service::gateway::polling::format_gateway_audit_line;
|
||||
let buf_rx = event_rx.resubscribe();
|
||||
let buf = Arc::clone(&pending_gateway_events);
|
||||
slog!("[matrix-bot] subscribed to gateway events; buffer task starting");
|
||||
tokio::spawn(async move {
|
||||
let mut rx = buf_rx;
|
||||
let mut rx = event_rx;
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(event) => {
|
||||
slog!(
|
||||
"[matrix-bot] buffered audit line for project={} id={}",
|
||||
event.project,
|
||||
event.event.timestamp_ms()
|
||||
);
|
||||
let line = format_gateway_audit_line(&event.project, &event.event);
|
||||
buf.lock().await.push(line);
|
||||
}
|
||||
@@ -352,7 +380,7 @@ pub async fn run_bot(
|
||||
}
|
||||
});
|
||||
}
|
||||
Some(event_rx)
|
||||
Some(forwarder_rx)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -592,4 +620,89 @@ mod tests {
|
||||
assert_eq!(steps[2], 20);
|
||||
assert_eq!(steps[3], 40);
|
||||
}
|
||||
|
||||
/// Regression test (story 1078): gateway broadcast events must reach
|
||||
/// `pending_gateway_events` and produce an `audit ts=…` line in the
|
||||
/// `format_drained_events` output that is prepended to Timmy's prompt.
|
||||
///
|
||||
/// The test spins up a mock `event_tx` broadcaster, sends one
|
||||
/// `StageTransition` event, lets the buffer task process it, drains the
|
||||
/// buffer, and asserts the result contains the expected audit prefix.
|
||||
#[tokio::test]
|
||||
async fn gateway_buffer_task_injects_audit_line_into_context() {
|
||||
use super::super::messages::format_drained_events;
|
||||
use crate::service::events::StoredEvent;
|
||||
use crate::service::gateway::GatewayStatusEvent;
|
||||
use crate::service::gateway::polling::format_gateway_audit_line;
|
||||
|
||||
let (event_tx, event_rx) = tokio::sync::broadcast::channel::<GatewayStatusEvent>(16);
|
||||
|
||||
// pending_gateway_events shared between buffer task and drain site.
|
||||
let pending: Arc<TokioMutex<Vec<String>>> = Arc::new(TokioMutex::new(Vec::new()));
|
||||
|
||||
// Spawn a minimal buffer task — same logic as run_bot uses.
|
||||
{
|
||||
let buf = Arc::clone(&pending);
|
||||
tokio::spawn(async move {
|
||||
let mut rx = event_rx;
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(event) => {
|
||||
let line = format_gateway_audit_line(&event.project, &event.event);
|
||||
buf.lock().await.push(line);
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Send one stage-transition event, as a project node would.
|
||||
let evt = GatewayStatusEvent {
|
||||
project: "huskies".to_string(),
|
||||
event: StoredEvent::StageTransition {
|
||||
story_id: "42_story_feat".to_string(),
|
||||
story_name: String::new(),
|
||||
from_stage: "2_current".to_string(),
|
||||
to_stage: "3_qa".to_string(),
|
||||
timestamp_ms: 1_000_000,
|
||||
},
|
||||
};
|
||||
let receivers = event_tx.send(evt).unwrap_or(0);
|
||||
assert!(
|
||||
receivers > 0,
|
||||
"event must have at least one active receiver"
|
||||
);
|
||||
|
||||
// Wait for the buffer task to process the event.
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(2);
|
||||
loop {
|
||||
if !pending.lock().await.is_empty() {
|
||||
break;
|
||||
}
|
||||
assert!(
|
||||
std::time::Instant::now() < deadline,
|
||||
"buffer task did not receive the event within 2 s"
|
||||
);
|
||||
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
|
||||
}
|
||||
|
||||
// Drain and format — mirrors what handle_message does.
|
||||
let lines: Vec<String> = pending.lock().await.drain(..).collect();
|
||||
let prefix = format_drained_events(lines);
|
||||
|
||||
assert!(
|
||||
prefix.contains("audit ts="),
|
||||
"prompt prefix must contain 'audit ts='; got: {prefix}"
|
||||
);
|
||||
assert!(
|
||||
prefix.contains("project=huskies"),
|
||||
"prompt prefix must name the project; got: {prefix}"
|
||||
);
|
||||
assert!(
|
||||
prefix.starts_with("<system-reminder>\n"),
|
||||
"prefix must open with <system-reminder>; got: {prefix}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,6 +161,12 @@ pub struct WatcherConfig {
|
||||
/// moved to `6_archived/`. Default: 14400 (4 hours).
|
||||
#[serde(default = "default_done_retention_secs")]
|
||||
pub done_retention_secs: u64,
|
||||
/// How often (in seconds) the periodic reconciler runs to converge
|
||||
/// subscriber side effects. The reconciler calls each subscriber's
|
||||
/// `reconcile()` entry point so that Lagged events never leave persistent
|
||||
/// state diverged. Default: 30 seconds.
|
||||
#[serde(default = "default_reconcile_interval_secs")]
|
||||
pub reconcile_interval_secs: u64,
|
||||
}
|
||||
|
||||
impl Default for WatcherConfig {
|
||||
@@ -168,6 +174,7 @@ impl Default for WatcherConfig {
|
||||
Self {
|
||||
sweep_interval_secs: default_sweep_interval_secs(),
|
||||
done_retention_secs: default_done_retention_secs(),
|
||||
reconcile_interval_secs: default_reconcile_interval_secs(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -180,6 +187,10 @@ fn default_done_retention_secs() -> u64 {
|
||||
4 * 60 * 60 // 4 hours
|
||||
}
|
||||
|
||||
fn default_reconcile_interval_secs() -> u64 {
|
||||
30
|
||||
}
|
||||
|
||||
fn default_qa() -> String {
|
||||
"server".to_string()
|
||||
}
|
||||
|
||||
@@ -56,7 +56,8 @@ pub use write::{
|
||||
bump_retry_count, migrate_legacy_stage_strings, migrate_merge_job, migrate_names_from_slugs,
|
||||
migrate_node_claims_to_agent_claims, migrate_story_ids_to_numeric, name_from_story_id,
|
||||
purge_done_stage_merge_jobs, set_agent, set_depends_on, set_epic, set_item_type, set_name,
|
||||
set_plan_state, set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count, write_item,
|
||||
set_origin, set_plan_state, set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count,
|
||||
write_item,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -29,6 +29,8 @@ pub struct CrdtItemDump {
|
||||
/// Hex-encoded OpId of the list insert op — cross-reference with `crdt_ops`.
|
||||
pub content_index: String,
|
||||
pub is_deleted: bool,
|
||||
/// Origin JSON string, or `None` for items that pre-date story 1088.
|
||||
pub origin: Option<String>,
|
||||
}
|
||||
|
||||
/// Top-level debug dump of the in-memory CRDT state.
|
||||
@@ -149,6 +151,10 @@ pub fn dump_crdt_state(story_id_filter: Option<&str>) -> CrdtStateDump {
|
||||
JsonValue::Number(n) if n > 0.0 => Some(n),
|
||||
_ => None,
|
||||
};
|
||||
let origin = match item_crdt.origin.view() {
|
||||
JsonValue::String(s) if !s.is_empty() => Some(s),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let content_index = op.id.iter().map(|b| format!("{b:02x}")).collect::<String>();
|
||||
|
||||
@@ -163,6 +169,7 @@ pub fn dump_crdt_state(story_id_filter: Option<&str>) -> CrdtStateDump {
|
||||
claim_ts,
|
||||
content_index,
|
||||
is_deleted: op.is_deleted,
|
||||
origin,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -408,6 +415,11 @@ pub(super) fn extract_item_view(item: &PipelineItemCrdt) -> Option<PipelineItemV
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let origin = match item.origin.view() {
|
||||
JsonValue::String(s) if !s.is_empty() => Some(s),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let stage = project_stage_for_view(
|
||||
&stage_str,
|
||||
&story_id,
|
||||
@@ -429,6 +441,7 @@ pub(super) fn extract_item_view(item: &PipelineItemCrdt) -> Option<PipelineItemV
|
||||
qa_mode,
|
||||
item_type,
|
||||
epic,
|
||||
origin,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -585,56 +598,48 @@ fn project_stage_for_view(
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether a dependency (by numeric ID prefix) is in `5_done` or `6_archived`
|
||||
/// according to CRDT state.
|
||||
/// Check whether a dependency (by numeric ID prefix) is in `Pipeline::Done` or
|
||||
/// `Pipeline::Archived` according to CRDT state.
|
||||
///
|
||||
/// Returns `true` if the dependency is satisfied (item found in a done stage).
|
||||
/// Matches both legacy slug-form IDs (`"664_story_foo"`) and numeric-only IDs
|
||||
/// (`"664"`) so the check remains correct after the slug→numeric migration.
|
||||
/// See `dep_is_archived_crdt` to distinguish archive-satisfied from cleanly-done.
|
||||
/// Returns `true` if the dependency is satisfied (item found in a Done or
|
||||
/// Archived pipeline column). Matches both legacy slug-form IDs
|
||||
/// (`"664_story_foo"`) and numeric-only IDs (`"664"`) so the check remains
|
||||
/// correct after the slug→numeric migration. Story 1086 routes the check
|
||||
/// through the `Pipeline` projection so that future Stage variants automatically
|
||||
/// participate via [`crate::pipeline_state::Stage::pipeline`]. See
|
||||
/// `dep_is_archived_crdt` to distinguish archive-satisfied from cleanly-done.
|
||||
pub fn dep_is_done_crdt(dep_number: u32) -> bool {
|
||||
use crate::pipeline_state::{Stage, read_all_typed};
|
||||
use crate::pipeline_state::{Pipeline, read_all_typed};
|
||||
let exact = dep_number.to_string();
|
||||
let prefix = format!("{dep_number}_");
|
||||
read_all_typed().into_iter().any(|item| {
|
||||
(item.story_id.0 == exact || item.story_id.0.starts_with(&prefix))
|
||||
&& matches!(
|
||||
item.stage,
|
||||
Stage::Done { .. }
|
||||
| Stage::Archived { .. }
|
||||
| Stage::Abandoned { .. }
|
||||
| Stage::Superseded { .. }
|
||||
| Stage::Rejected { .. }
|
||||
)
|
||||
&& matches!(item.stage.pipeline(), Pipeline::Done | Pipeline::Archived)
|
||||
})
|
||||
}
|
||||
|
||||
/// Check whether a dependency (by numeric ID prefix) is specifically in `6_archived`
|
||||
/// according to CRDT state.
|
||||
/// Check whether a dependency (by numeric ID prefix) is specifically in
|
||||
/// `Pipeline::Archived` according to CRDT state.
|
||||
///
|
||||
/// Used to detect when a dependency is satisfied via archive rather than via a clean
|
||||
/// completion through `5_done`. Returns `false` when the CRDT layer is not initialised.
|
||||
/// Matches both legacy slug-form IDs (`"664_story_foo"`) and numeric-only IDs (`"664"`).
|
||||
/// completion through `Pipeline::Done`. Returns `false` when the CRDT layer is not
|
||||
/// initialised. Matches both legacy slug-form IDs (`"664_story_foo"`) and
|
||||
/// numeric-only IDs (`"664"`).
|
||||
pub fn dep_is_archived_crdt(dep_number: u32) -> bool {
|
||||
use crate::pipeline_state::{Stage, read_all_typed};
|
||||
use crate::pipeline_state::{Pipeline, read_all_typed};
|
||||
let exact = dep_number.to_string();
|
||||
let prefix = format!("{dep_number}_");
|
||||
read_all_typed().into_iter().any(|item| {
|
||||
(item.story_id.0 == exact || item.story_id.0.starts_with(&prefix))
|
||||
&& matches!(
|
||||
item.stage,
|
||||
Stage::Archived { .. }
|
||||
| Stage::Abandoned { .. }
|
||||
| Stage::Superseded { .. }
|
||||
| Stage::Rejected { .. }
|
||||
)
|
||||
&& item.stage.pipeline() == Pipeline::Archived
|
||||
})
|
||||
}
|
||||
|
||||
/// Check unmet dependencies for a story by reading its `depends_on` from the
|
||||
/// CRDT document and checking each dependency against CRDT state.
|
||||
///
|
||||
/// Returns the list of dependency numbers that are NOT in `5_done` or `6_archived`.
|
||||
/// Returns the list of dependency numbers whose stage is NOT in `Pipeline::Done`
|
||||
/// or `Pipeline::Archived`.
|
||||
pub fn check_unmet_deps_crdt(story_id: &str) -> Vec<u32> {
|
||||
let item = match read_item(story_id) {
|
||||
Some(i) => i,
|
||||
|
||||
@@ -105,6 +105,26 @@ pub struct PipelineItemCrdt {
|
||||
/// means no merge task is in flight. Projected into `Stage::Merge {
|
||||
/// server_start_time }` so callers never read this register directly.
|
||||
pub merge_server_start: LwwRegisterCrdt<f64>,
|
||||
/// Story 1086: kebab-case wire form of the [`crate::pipeline_state::Pipeline`]
|
||||
/// projection of the current `stage`. Written by `write_item` alongside
|
||||
/// `stage` so display/scan code on remote peers can route by pipeline column
|
||||
/// without re-deriving from the stage string. Empty string means "use the
|
||||
/// value derived from `stage`" (legacy items predating 1086).
|
||||
pub pipeline: LwwRegisterCrdt<String>,
|
||||
/// Story 1086: kebab-case wire form of the [`crate::pipeline_state::Status`]
|
||||
/// projection of the current `stage`. Written alongside `stage` so badge
|
||||
/// renderers can read the status directly without re-projecting from the
|
||||
/// stage string. Empty string means "use the value derived from `stage`"
|
||||
/// (legacy items predating 1086).
|
||||
pub status: LwwRegisterCrdt<String>,
|
||||
/// Story 1088: origin of the work item — who or what created it.
|
||||
///
|
||||
/// Stored as a compact JSON string, e.g.
|
||||
/// `{"kind":"user","id":"","ts":1716768000.0}` or
|
||||
/// `{"kind":"agent","id":"coder-1","ts":1716768000.0}`.
|
||||
/// Empty string on older items that pre-date this register; the typed
|
||||
/// read path surfaces those as `None`, which the UI renders as `"unknown"`.
|
||||
pub origin: LwwRegisterCrdt<String>,
|
||||
}
|
||||
|
||||
/// CRDT node that holds a single peer's presence entry.
|
||||
@@ -203,6 +223,9 @@ pub struct WorkItem {
|
||||
pub(super) item_type: Option<crate::io::story_metadata::ItemType>,
|
||||
/// Epic this item belongs to. `None` when the item has no parent epic.
|
||||
pub(super) epic: Option<EpicId>,
|
||||
/// Origin of the work item (story 1088). `None` for items created before
|
||||
/// the origin register was introduced; those display as `"unknown"`.
|
||||
pub(super) origin: Option<String>,
|
||||
}
|
||||
|
||||
impl WorkItem {
|
||||
@@ -261,6 +284,12 @@ impl WorkItem {
|
||||
self.epic
|
||||
}
|
||||
|
||||
/// Origin of the work item (story 1088), or `None` for items created before
|
||||
/// the origin register was introduced.
|
||||
pub fn origin(&self) -> Option<&str> {
|
||||
self.origin.as_deref()
|
||||
}
|
||||
|
||||
/// Construct a `WorkItem` for use in tests outside `crdt_state::*`.
|
||||
///
|
||||
/// Within `crdt_state` use a struct literal directly (fields are `pub(super)`).
|
||||
@@ -286,6 +315,7 @@ impl WorkItem {
|
||||
qa_mode,
|
||||
item_type,
|
||||
epic,
|
||||
origin: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,6 +235,31 @@ pub fn set_plan_state(story_id: &str, state: crate::pipeline_state::PlanState) -
|
||||
true
|
||||
}
|
||||
|
||||
/// Set the `origin` CRDT register for a pipeline item (story 1088).
|
||||
///
|
||||
/// Writes a compact JSON string describing who or what created the item, e.g.
|
||||
/// `{"kind":"user","id":"","ts":1716768000.0}` or
|
||||
/// `{"kind":"agent","id":"coder-1","ts":1716768000.0}`.
|
||||
///
|
||||
/// Passing an empty string is treated as "no origin set" (equivalent to the
|
||||
/// pre-1088 state for older items). Returns `true` if the item was found and
|
||||
/// the op was applied, `false` otherwise.
|
||||
pub fn set_origin(story_id: &str, origin: &str) -> bool {
|
||||
let Some(state_mutex) = get_crdt() else {
|
||||
return false;
|
||||
};
|
||||
let Ok(mut state) = state_mutex.lock() else {
|
||||
return false;
|
||||
};
|
||||
let Some(&idx) = state.index.get(story_id) else {
|
||||
return false;
|
||||
};
|
||||
apply_and_persist(&mut state, |s| {
|
||||
s.crdt.doc.items[idx].origin.set(origin.to_string())
|
||||
});
|
||||
true
|
||||
}
|
||||
|
||||
/// Write a pipeline item state through CRDT operations.
|
||||
///
|
||||
/// If the item exists, updates its registers. If not, inserts a new item
|
||||
@@ -256,6 +281,11 @@ pub fn write_item(
|
||||
merged_at: Option<f64>,
|
||||
) {
|
||||
let stage_str = stage_dir_name(stage);
|
||||
// Story 1086: persist the typed Pipeline + Status projections alongside
|
||||
// the stage register so subscribers/display code on remote peers can route
|
||||
// by them without re-deriving from the stage string.
|
||||
let pipeline_str = stage.pipeline().as_str();
|
||||
let status_str = stage.status().as_str();
|
||||
let claim: Option<&AgentClaim> = match stage {
|
||||
Stage::Coding { claim, .. } => claim.as_ref(),
|
||||
Stage::Merge { claim, .. } => claim.as_ref(),
|
||||
@@ -311,6 +341,14 @@ pub fn write_item(
|
||||
apply_and_persist(&mut state, |s| {
|
||||
s.crdt.doc.items[idx].stage.set(stage_str.to_string())
|
||||
});
|
||||
// Story 1086: keep `pipeline` and `status` registers in lock-step with
|
||||
// the stage write so subscribers/display can read them directly.
|
||||
apply_and_persist(&mut state, |s| {
|
||||
s.crdt.doc.items[idx].pipeline.set(pipeline_str.to_string())
|
||||
});
|
||||
apply_and_persist(&mut state, |s| {
|
||||
s.crdt.doc.items[idx].status.set(status_str.to_string())
|
||||
});
|
||||
|
||||
if let Some(n) = name {
|
||||
apply_and_persist(&mut state, |s| {
|
||||
@@ -394,6 +432,10 @@ pub fn write_item(
|
||||
"resume_to": "",
|
||||
"plan_state": "",
|
||||
"merge_server_start": merge_server_start_val,
|
||||
// Story 1086: typed Pipeline + Status projections written at insert.
|
||||
"pipeline": pipeline_str,
|
||||
"status": status_str,
|
||||
"origin": "",
|
||||
})
|
||||
.into();
|
||||
|
||||
@@ -424,6 +466,10 @@ pub fn write_item(
|
||||
item.resume_to.advance_seq(floor);
|
||||
item.plan_state.advance_seq(floor);
|
||||
item.merge_server_start.advance_seq(floor);
|
||||
// Story 1086.
|
||||
item.pipeline.advance_seq(floor);
|
||||
item.status.advance_seq(floor);
|
||||
item.origin.advance_seq(floor);
|
||||
}
|
||||
|
||||
// Broadcast a CrdtEvent for the new item.
|
||||
|
||||
@@ -10,8 +10,8 @@ mod migrations;
|
||||
mod tests;
|
||||
|
||||
pub use item::{
|
||||
bump_retry_count, set_agent, set_depends_on, set_epic, set_item_type, set_name, set_plan_state,
|
||||
set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count, write_item,
|
||||
bump_retry_count, set_agent, set_depends_on, set_epic, set_item_type, set_name, set_origin,
|
||||
set_plan_state, set_qa_mode, set_resume_to, set_resume_to_raw, set_retry_count, write_item,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -434,6 +434,7 @@ async fn handle_work_items_get(params: Value) -> Value {
|
||||
"stage": c.stage,
|
||||
"name": c.name,
|
||||
"agent": c.agent,
|
||||
"origin": c.origin,
|
||||
}),
|
||||
Err(e) => serde_json::json!({"error": e.to_string()}),
|
||||
}
|
||||
|
||||
+11
-9
@@ -12,7 +12,7 @@
|
||||
//! zombie entries left over from sessions that predate the subscriber.
|
||||
|
||||
use crate::db::{ContentKey, all_content_ids, delete_content};
|
||||
use crate::pipeline_state::Stage;
|
||||
use crate::pipeline_state::{Pipeline, Stage, Status};
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
@@ -111,16 +111,18 @@ pub(crate) fn sweep_zombie_content_on_startup() {
|
||||
}
|
||||
}
|
||||
|
||||
/// Return `true` when `stage` is one of the five terminal pipeline stages.
|
||||
/// Return `true` when `stage` is one of the terminal pipeline classifications.
|
||||
///
|
||||
/// Story 1086: matches via the [`Status`] projection (Done / Abandoned /
|
||||
/// Superseded / Rejected) plus [`Pipeline::Archived`] for plain archived items
|
||||
/// (which carry `Status::Active`). Future Stage variants automatically
|
||||
/// participate by returning the appropriate Status / Pipeline from
|
||||
/// [`Stage::status`] / [`Stage::pipeline`].
|
||||
fn is_terminal_stage(stage: &Stage) -> bool {
|
||||
matches!(
|
||||
stage,
|
||||
Stage::Done { .. }
|
||||
| Stage::Archived { .. }
|
||||
| Stage::Abandoned { .. }
|
||||
| Stage::Superseded { .. }
|
||||
| Stage::Rejected { .. }
|
||||
)
|
||||
stage.status(),
|
||||
Status::Done | Status::Abandoned | Status::Superseded | Status::Rejected
|
||||
) || matches!(stage.pipeline(), Pipeline::Archived)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
+321
-1
@@ -29,7 +29,7 @@ pub mod shadow_write;
|
||||
|
||||
pub use content_store::{ContentKey, all_content_ids, delete_content, read_content, write_content};
|
||||
pub use ops::{ItemMeta, delete_item, move_item_stage, next_item_number, write_item_with_content};
|
||||
pub use shadow_write::{get_shared_pool, init};
|
||||
pub use shadow_write::{check_schema_drift, get_shared_pool, init};
|
||||
|
||||
#[cfg(test)]
|
||||
pub use content_store::ensure_content_store;
|
||||
@@ -395,6 +395,112 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: root cause of the 2026-05-14 21:07 production outage.
|
||||
///
|
||||
/// A headless agent on a feature branch (whose binary includes a new
|
||||
/// sqlx migration) must NEVER apply that migration to the production
|
||||
/// pipeline.db. Verify that opening an agent-local DB and running
|
||||
/// migrations on it leaves the production DB's `_sqlx_migrations` table
|
||||
/// unchanged.
|
||||
///
|
||||
/// The enforcement mechanism is in `init_subsystems(is_agent=true)`, which
|
||||
/// redirects to a temp path. This test validates the SQLite isolation
|
||||
/// property: migrations applied to one file are confined to that file.
|
||||
#[tokio::test]
|
||||
async fn agent_db_isolation_does_not_affect_production_db() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let prod_db_path = tmp.path().join("production.db");
|
||||
let agent_db_path = tmp.path().join("agent_temp.db");
|
||||
|
||||
// Set up the production DB — apply the current compiled-in migrations.
|
||||
let prod_opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||
.filename(&prod_db_path)
|
||||
.create_if_missing(true);
|
||||
let prod_pool = sqlx::SqlitePool::connect_with(prod_opts).await.unwrap();
|
||||
sqlx::migrate!("./migrations")
|
||||
.run(&prod_pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Record the migration versions present in the production DB.
|
||||
let before: Vec<(i64,)> =
|
||||
sqlx::query_as("SELECT version FROM _sqlx_migrations ORDER BY version")
|
||||
.fetch_all(&prod_pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Simulate the agent opening its own isolated DB and running migrations.
|
||||
let agent_opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||
.filename(&agent_db_path)
|
||||
.create_if_missing(true);
|
||||
let agent_pool = sqlx::SqlitePool::connect_with(agent_opts).await.unwrap();
|
||||
sqlx::migrate!("./migrations")
|
||||
.run(&agent_pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Production DB must be completely unaffected by the agent's migration run.
|
||||
let after: Vec<(i64,)> =
|
||||
sqlx::query_as("SELECT version FROM _sqlx_migrations ORDER BY version")
|
||||
.fetch_all(&prod_pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
before, after,
|
||||
"agent opening its own DB must not alter the production DB migration table"
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that `check_schema_drift` returns an empty list when all
|
||||
/// migrations in the database are recognised by this binary.
|
||||
#[tokio::test]
|
||||
async fn check_schema_drift_empty_when_all_known() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let db_path = tmp.path().join("drift_test.db");
|
||||
let opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||
.filename(&db_path)
|
||||
.create_if_missing(true);
|
||||
let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
|
||||
sqlx::migrate!("./migrations").run(&pool).await.unwrap();
|
||||
|
||||
let drift = super::shadow_write::check_schema_drift(&pool).await;
|
||||
assert!(
|
||||
drift.is_empty(),
|
||||
"no drift expected when DB matches the compiled-in migration set"
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that `check_schema_drift` identifies a manually-inserted
|
||||
/// migration row that is not part of the compiled-in set.
|
||||
#[tokio::test]
|
||||
async fn check_schema_drift_detects_unknown_migration() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let db_path = tmp.path().join("drift_future.db");
|
||||
let opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||
.filename(&db_path)
|
||||
.create_if_missing(true);
|
||||
let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
|
||||
sqlx::migrate!("./migrations").run(&pool).await.unwrap();
|
||||
|
||||
// Inject a fake "future" migration that no binary compiled today would know.
|
||||
let fake_checksum: Vec<u8> = vec![0u8; 20];
|
||||
sqlx::query(
|
||||
"INSERT INTO _sqlx_migrations \
|
||||
(version, description, installed_on, success, checksum, execution_time) \
|
||||
VALUES (99999999999999, 'future_migration', '2099-01-01T00:00:00Z', 1, ?1, 0)",
|
||||
)
|
||||
.bind(&fake_checksum)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let drift = super::shadow_write::check_schema_drift(&pool).await;
|
||||
assert_eq!(drift.len(), 1, "exactly one unknown migration expected");
|
||||
assert_eq!(drift[0].version, 99999999999999_i64);
|
||||
assert_eq!(drift[0].description, "future_migration");
|
||||
}
|
||||
|
||||
/// Story 864: passing `ItemMeta::default()` against a content blob that
|
||||
/// LOOKS like front-matter must NOT silently extract metadata into the
|
||||
/// CRDT. The whole point of removing the implicit YAML round-trip is
|
||||
@@ -482,4 +588,218 @@ mod tests {
|
||||
"retry_count must reset to 0 on stage transition"
|
||||
);
|
||||
}
|
||||
|
||||
/// Story 1087, AC2: the split-stage migration projects every supported
|
||||
/// wire-form `stage` string into the canonical `(pipeline, status)` pair.
|
||||
/// The fixture covers each Stage variant (and the legacy numeric-prefix
|
||||
/// directory names retained for back-compat).
|
||||
#[tokio::test]
|
||||
async fn split_stage_migration_backfills_pipeline_and_status_for_every_variant() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let db_path = tmp.path().join("pipeline.db");
|
||||
let opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||
.filename(&db_path)
|
||||
.create_if_missing(true);
|
||||
let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
|
||||
sqlx::migrate!("./migrations").run(&pool).await.unwrap();
|
||||
|
||||
// (stage written by older code, expected pipeline, expected status)
|
||||
let fixture: &[(&str, &str, &str)] = &[
|
||||
("upcoming", "backlog", "active"),
|
||||
("backlog", "backlog", "active"),
|
||||
("coding", "coding", "active"),
|
||||
("blocked", "coding", "blocked"),
|
||||
("qa", "qa", "active"),
|
||||
("review_hold", "qa", "review-hold"),
|
||||
("merge", "merge", "active"),
|
||||
("merge_failure", "merge", "merge-failure"),
|
||||
("merge_failure_final", "merge", "merge-failure-final"),
|
||||
("done", "done", "done"),
|
||||
("abandoned", "closed", "abandoned"),
|
||||
("superseded", "closed", "superseded"),
|
||||
("rejected", "closed", "rejected"),
|
||||
("archived", "archived", "active"),
|
||||
("frozen", "coding", "frozen"),
|
||||
// Legacy numeric-prefix directory names.
|
||||
("1_backlog", "backlog", "active"),
|
||||
("2_current", "coding", "active"),
|
||||
("3_qa", "qa", "active"),
|
||||
("4_merge", "merge", "active"),
|
||||
("5_done", "done", "done"),
|
||||
("6_archived", "archived", "active"),
|
||||
];
|
||||
|
||||
let now = chrono::Utc::now().to_rfc3339();
|
||||
for (idx, (stage, _, _)) in fixture.iter().enumerate() {
|
||||
let id = format!("1087_fixture_{idx}");
|
||||
sqlx::query(
|
||||
"INSERT INTO pipeline_items \
|
||||
(id, name, stage, agent, retry_count, depends_on, content, created_at, updated_at) \
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?8)",
|
||||
)
|
||||
.bind(&id)
|
||||
.bind("fixture")
|
||||
.bind(*stage)
|
||||
.bind(Option::<String>::None)
|
||||
.bind(Option::<i64>::None)
|
||||
.bind(Option::<String>::None)
|
||||
.bind("---\nname: fixture\n---\n")
|
||||
.bind(&now)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Force the split-stage backfill to run against the rows we just
|
||||
// inserted. In production this is `sqlx::migrate!`'s job, but the
|
||||
// sqlx migrator only runs migrations once per DB and they were already
|
||||
// applied at the top of the test before any rows existed. Reissuing
|
||||
// the backfill statements is the migration logic under test.
|
||||
sqlx::query(
|
||||
"UPDATE pipeline_items SET pipeline = CASE stage \
|
||||
WHEN 'upcoming' THEN 'backlog' \
|
||||
WHEN 'backlog' THEN 'backlog' \
|
||||
WHEN '1_backlog' THEN 'backlog' \
|
||||
WHEN 'coding' THEN 'coding' \
|
||||
WHEN 'blocked' THEN 'coding' \
|
||||
WHEN '2_current' THEN 'coding' \
|
||||
WHEN 'qa' THEN 'qa' \
|
||||
WHEN 'review_hold' THEN 'qa' \
|
||||
WHEN '3_qa' THEN 'qa' \
|
||||
WHEN 'merge' THEN 'merge' \
|
||||
WHEN 'merge_failure' THEN 'merge' \
|
||||
WHEN 'merge_failure_final' THEN 'merge' \
|
||||
WHEN '4_merge' THEN 'merge' \
|
||||
WHEN 'done' THEN 'done' \
|
||||
WHEN '5_done' THEN 'done' \
|
||||
WHEN 'abandoned' THEN 'closed' \
|
||||
WHEN 'superseded' THEN 'closed' \
|
||||
WHEN 'rejected' THEN 'closed' \
|
||||
WHEN 'archived' THEN 'archived' \
|
||||
WHEN '6_archived' THEN 'archived' \
|
||||
WHEN 'frozen' THEN 'coding' \
|
||||
ELSE '' END",
|
||||
)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"UPDATE pipeline_items SET status = CASE stage \
|
||||
WHEN 'frozen' THEN 'frozen' \
|
||||
WHEN 'review_hold' THEN 'review-hold' \
|
||||
WHEN 'blocked' THEN 'blocked' \
|
||||
WHEN 'merge_failure' THEN 'merge-failure' \
|
||||
WHEN 'merge_failure_final' THEN 'merge-failure-final' \
|
||||
WHEN 'abandoned' THEN 'abandoned' \
|
||||
WHEN 'superseded' THEN 'superseded' \
|
||||
WHEN 'rejected' THEN 'rejected' \
|
||||
WHEN 'done' THEN 'done' \
|
||||
WHEN '5_done' THEN 'done' \
|
||||
ELSE 'active' END",
|
||||
)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for (idx, (stage_input, expect_pipeline, expect_status)) in fixture.iter().enumerate() {
|
||||
let id = format!("1087_fixture_{idx}");
|
||||
let row: (String, String) =
|
||||
sqlx::query_as("SELECT pipeline, status FROM pipeline_items WHERE id = ?1")
|
||||
.bind(&id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
row.0, *expect_pipeline,
|
||||
"stage {stage_input:?} should backfill pipeline to {expect_pipeline:?}, got {:?}",
|
||||
row.0
|
||||
);
|
||||
assert_eq!(
|
||||
row.1, *expect_status,
|
||||
"stage {stage_input:?} should backfill status to {expect_status:?}, got {:?}",
|
||||
row.1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Story 1087, AC1: `shadow_write::init` writes a timestamped backup of
|
||||
/// pipeline.db before the split-stage migration applies, and skips the
|
||||
/// backup on subsequent restarts (after the migration is recorded).
|
||||
#[tokio::test]
|
||||
async fn pre_pipeline_status_backup_only_runs_once() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let db_path = tmp.path().join("pipeline.db");
|
||||
|
||||
// Seed a "pre-1087" DB: open without applying the split-stage migration.
|
||||
// We do this by opening with `create_if_missing` and running only the
|
||||
// legacy migrations — but the simplest way to simulate that here is to
|
||||
// hand-craft a DB containing an `_sqlx_migrations` table that lists
|
||||
// every migration EXCEPT the split-stage one.
|
||||
let opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||
.filename(&db_path)
|
||||
.create_if_missing(true);
|
||||
let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
|
||||
// Apply migrations the normal way, then delete the split-stage row so
|
||||
// the backup branch fires on the next `init`.
|
||||
sqlx::migrate!("./migrations").run(&pool).await.unwrap();
|
||||
sqlx::query("DELETE FROM _sqlx_migrations WHERE version = 20260515000000")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
pool.close().await;
|
||||
|
||||
// First call: backup branch fires, side-car file appears.
|
||||
super::shadow_write::backup_pre_pipeline_status(&db_path).await;
|
||||
let backups: Vec<_> = std::fs::read_dir(tmp.path())
|
||||
.unwrap()
|
||||
.filter_map(Result::ok)
|
||||
.filter(|e| {
|
||||
e.file_name()
|
||||
.to_string_lossy()
|
||||
.contains(".pre-pipeline-status.")
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(
|
||||
backups.len(),
|
||||
1,
|
||||
"expected exactly one .pre-pipeline-status backup, got {}",
|
||||
backups.len()
|
||||
);
|
||||
|
||||
// Re-apply the migration so the marker row is back, simulating a
|
||||
// post-migration server restart.
|
||||
let opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||
.filename(&db_path)
|
||||
.create_if_missing(false);
|
||||
let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
|
||||
let fake_checksum: Vec<u8> = vec![0u8; 20];
|
||||
sqlx::query(
|
||||
"INSERT INTO _sqlx_migrations \
|
||||
(version, description, installed_on, success, checksum, execution_time) \
|
||||
VALUES (20260515000000, 'split_stage_into_pipeline_status', '2026-05-15T00:00:00Z', 1, ?1, 0)",
|
||||
)
|
||||
.bind(&fake_checksum)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
pool.close().await;
|
||||
|
||||
// Second call: no new backup written.
|
||||
super::shadow_write::backup_pre_pipeline_status(&db_path).await;
|
||||
let backups_after: Vec<_> = std::fs::read_dir(tmp.path())
|
||||
.unwrap()
|
||||
.filter_map(Result::ok)
|
||||
.filter(|e| {
|
||||
e.file_name()
|
||||
.to_string_lossy()
|
||||
.contains(".pre-pipeline-status.")
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(
|
||||
backups_after.len(),
|
||||
1,
|
||||
"post-migration init must not create another backup; got {} backups",
|
||||
backups_after.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,10 +11,23 @@ use crate::slog;
|
||||
use sqlx::SqlitePool;
|
||||
use sqlx::sqlite::SqliteConnectOptions;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::sync::OnceLock;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
/// One migration row in the live database that is not in the compiled-in set.
|
||||
///
|
||||
/// Returned by [`check_schema_drift`] for each unknown migration.
|
||||
pub struct UnknownMigration {
|
||||
/// sqlx migration version number (derived from the filename timestamp).
|
||||
pub version: i64,
|
||||
/// Human-readable description from the migration filename.
|
||||
pub description: String,
|
||||
/// When the migration was applied, as stored in `_sqlx_migrations.installed_on`.
|
||||
pub installed_on: String,
|
||||
}
|
||||
|
||||
/// The process-global SQLite pool, set once by [`init`].
|
||||
///
|
||||
/// Other modules call [`get_shared_pool`] to access the pool without needing
|
||||
@@ -56,6 +69,13 @@ pub async fn init(db_path: &Path) -> Result<(), sqlx::Error> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Story 1087: before running the migration that splits `stage` into
|
||||
// (`pipeline`, `status`), take a timestamped side-car copy of the live DB
|
||||
// so the pre-split state is recoverable. Skip the copy when the file does
|
||||
// not yet exist (fresh installs) or when the split-stage migration has
|
||||
// already been applied (subsequent restarts).
|
||||
backup_pre_pipeline_status(db_path).await;
|
||||
|
||||
let options = SqliteConnectOptions::new()
|
||||
.filename(db_path)
|
||||
.create_if_missing(true);
|
||||
@@ -133,3 +153,88 @@ pub async fn init(db_path: &Path) -> Result<(), sqlx::Error> {
|
||||
let _ = PIPELINE_DB.set(PipelineDb { tx });
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Story 1087: file name of the split-stage migration. The version prefix is
|
||||
/// the same `i64` sqlx assigns to that migration on `installed_on` rows in
|
||||
/// `_sqlx_migrations`.
|
||||
const SPLIT_STAGE_MIGRATION_VERSION: i64 = 20260515000000;
|
||||
|
||||
/// Story 1087: take a timestamped side-car copy of `pipeline.db` if and only if
|
||||
/// the split-stage migration has not yet been applied. This is the AC1 backup
|
||||
/// — `pipeline.db.pre-pipeline-status.<unix-ts>.bak` next to the live file.
|
||||
///
|
||||
/// Failures are logged but never propagated: a missing backup must not block
|
||||
/// the server from starting (a corrupt source file or a read-only directory
|
||||
/// will be surfaced by the migration step itself).
|
||||
pub(crate) async fn backup_pre_pipeline_status(db_path: &Path) {
|
||||
if !db_path.exists() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Cheap pre-check: open the DB read-only and see whether the split-stage
|
||||
// migration version is recorded in `_sqlx_migrations`. If it is, the
|
||||
// backup has already been taken on a previous start and there is nothing
|
||||
// to do.
|
||||
let options = SqliteConnectOptions::new()
|
||||
.filename(db_path)
|
||||
.read_only(true)
|
||||
.create_if_missing(false);
|
||||
|
||||
let probe = SqlitePool::connect_with(options).await;
|
||||
if let Ok(pool) = probe {
|
||||
let already_split: Result<Option<(i64,)>, _> =
|
||||
sqlx::query_as("SELECT version FROM _sqlx_migrations WHERE version = ?1 LIMIT 1")
|
||||
.bind(SPLIT_STAGE_MIGRATION_VERSION)
|
||||
.fetch_optional(&pool)
|
||||
.await;
|
||||
pool.close().await;
|
||||
if let Ok(Some(_)) = already_split {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let ts = chrono::Utc::now().timestamp();
|
||||
let mut backup = db_path.as_os_str().to_owned();
|
||||
backup.push(format!(".pre-pipeline-status.{ts}.bak"));
|
||||
let backup_path = std::path::PathBuf::from(backup);
|
||||
|
||||
match tokio::fs::copy(db_path, &backup_path).await {
|
||||
Ok(_) => slog!(
|
||||
"[db] Wrote pre-pipeline-status backup of {} to {}",
|
||||
db_path.display(),
|
||||
backup_path.display(),
|
||||
),
|
||||
Err(e) => slog!(
|
||||
"[db] Failed to write pre-pipeline-status backup of {}: {e}",
|
||||
db_path.display(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare the live `_sqlx_migrations` table against the compiled-in migration
|
||||
/// set and return any rows whose version is not known to this binary.
|
||||
///
|
||||
/// A non-empty result means the database was previously opened by a newer
|
||||
/// binary that applied additional migrations. The server must refuse to start
|
||||
/// in that state because the schema may contain tables or columns that this
|
||||
/// binary does not understand.
|
||||
pub async fn check_schema_drift(pool: &SqlitePool) -> Vec<UnknownMigration> {
|
||||
let migrator = sqlx::migrate!("./migrations");
|
||||
let known: HashSet<i64> = migrator.migrations.iter().map(|m| m.version).collect();
|
||||
|
||||
let rows: Vec<(i64, String, String)> = sqlx::query_as(
|
||||
"SELECT version, description, installed_on FROM _sqlx_migrations ORDER BY version",
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
rows.into_iter()
|
||||
.filter(|(v, _, _)| !known.contains(v))
|
||||
.map(|(version, description, installed_on)| UnknownMigration {
|
||||
version,
|
||||
description,
|
||||
installed_on,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -92,9 +92,20 @@ pub(crate) fn tool_dump_crdt(args: &Value) -> Result<String, String> {
|
||||
.items
|
||||
.into_iter()
|
||||
.map(|item| {
|
||||
// Story 1087: emit `pipeline` and `status` alongside `stage` so
|
||||
// crdt-dump consumers can route by column/badge without re-deriving
|
||||
// the projection from the stage string.
|
||||
let (pipeline, status) = item
|
||||
.stage
|
||||
.as_deref()
|
||||
.and_then(crate::pipeline_state::Stage::from_dir)
|
||||
.map(|s| (s.pipeline().as_str(), s.status().as_str()))
|
||||
.unwrap_or(("", ""));
|
||||
json!({
|
||||
"story_id": item.story_id,
|
||||
"stage": item.stage,
|
||||
"pipeline": pipeline,
|
||||
"status": status,
|
||||
"name": item.name,
|
||||
"agent": item.agent,
|
||||
"retry_count": item.retry_count,
|
||||
@@ -103,6 +114,7 @@ pub(crate) fn tool_dump_crdt(args: &Value) -> Result<String, String> {
|
||||
"claimed_at": item.claim_ts,
|
||||
"content_index": item.content_index,
|
||||
"is_deleted": item.is_deleted,
|
||||
"origin": item.origin,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
@@ -123,11 +135,10 @@ pub(crate) fn tool_dump_crdt(args: &Value) -> Result<String, String> {
|
||||
|
||||
/// MCP tool: return the server version, build hash, and running port.
|
||||
pub(crate) fn tool_get_version(ctx: &AppContext) -> Result<String, String> {
|
||||
let build_hash =
|
||||
std::fs::read_to_string(".huskies/build_hash").unwrap_or_else(|_| "unknown".to_string());
|
||||
let build_hash = option_env!("BUILD_GIT_HASH").unwrap_or("unknown");
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"version": env!("CARGO_PKG_VERSION"),
|
||||
"build_hash": build_hash.trim(),
|
||||
"build_hash": build_hash,
|
||||
"port": ctx.services.agents.port(),
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
@@ -312,4 +323,33 @@ mod tests {
|
||||
let result = tool_get_server_logs(&json!({"lines": 9999})).unwrap();
|
||||
let _ = result;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_get_version_ignores_build_hash_file_and_reports_compile_time_value() {
|
||||
// Regression: get_version must NOT read .huskies/build_hash at runtime.
|
||||
// Write a deliberately wrong value to the file and assert get_version
|
||||
// returns the compile-time hash, not the file content.
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
let huskies_dir = dir.path().join(".huskies");
|
||||
std::fs::create_dir_all(&huskies_dir).unwrap();
|
||||
std::fs::write(huskies_dir.join("build_hash"), "wrong_hash_sentinel_xyz").unwrap();
|
||||
|
||||
let ctx = crate::http::test_helpers::test_ctx(dir.path());
|
||||
let result = tool_get_version(&ctx).expect("tool_get_version must not fail");
|
||||
let parsed: serde_json::Value = serde_json::from_str(&result).expect("must be valid JSON");
|
||||
|
||||
let returned_hash = parsed["build_hash"]
|
||||
.as_str()
|
||||
.expect("build_hash must be a string");
|
||||
assert_ne!(
|
||||
returned_hash, "wrong_hash_sentinel_xyz",
|
||||
"get_version must not read .huskies/build_hash; got '{returned_hash}'"
|
||||
);
|
||||
// The returned hash must equal the compile-time value.
|
||||
let compile_time_hash = option_env!("BUILD_GIT_HASH").unwrap_or("unknown");
|
||||
assert_eq!(
|
||||
returned_hash, compile_time_hash,
|
||||
"get_version must return compile-time BUILD_GIT_HASH"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -195,6 +195,9 @@ pub(super) async fn tool_status(args: &Value, ctx: &AppContext) -> Result<String
|
||||
if !deps.is_empty() {
|
||||
front_matter.insert("depends_on".to_string(), json!(deps));
|
||||
}
|
||||
// Story 1088: origin tracking.
|
||||
let origin_str = view.origin().unwrap_or("unknown");
|
||||
front_matter.insert("origin".to_string(), json!(origin_str));
|
||||
let stage_claim = match &typed_item.stage {
|
||||
crate::pipeline_state::Stage::Coding { claim, .. } => claim.as_ref(),
|
||||
crate::pipeline_state::Stage::Merge { claim, .. } => claim.as_ref(),
|
||||
|
||||
@@ -38,6 +38,16 @@ pub(crate) fn tool_create_bug(args: &Value, ctx: &AppContext) -> Result<String,
|
||||
depends_on.as_deref(),
|
||||
)?;
|
||||
|
||||
crate::crdt_state::set_origin(&bug_id, &super::build_origin(args));
|
||||
|
||||
let _ = ctx
|
||||
.watcher_tx
|
||||
.send(crate::io::watcher::WatcherEvent::NewItemCreated {
|
||||
item_id: bug_id.clone(),
|
||||
item_type: "bug".to_string(),
|
||||
name: req.name.as_ref().to_string(),
|
||||
});
|
||||
|
||||
Ok(format!("Created bug: {bug_id}"))
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,8 @@ pub(crate) fn tool_create_epic(args: &Value, ctx: &AppContext) -> Result<String,
|
||||
},
|
||||
)?;
|
||||
|
||||
crate::crdt_state::set_origin(&epic_id, &super::build_origin(args));
|
||||
|
||||
Ok(format!("Created epic: {epic_id}"))
|
||||
}
|
||||
|
||||
@@ -127,10 +129,14 @@ pub(crate) fn tool_show_epic(args: &Value, _ctx: &AppContext) -> Result<String,
|
||||
if matches!(item.stage, Stage::Done { .. }) {
|
||||
done += 1;
|
||||
}
|
||||
// Story 1087: expose pipeline + status alongside the legacy
|
||||
// stage name so epic-show callers can route by column/badge.
|
||||
member_items.push(json!({
|
||||
"story_id": sid,
|
||||
"name": item.name,
|
||||
"stage": stage_name,
|
||||
"pipeline": item.stage.pipeline().as_str(),
|
||||
"status": item.stage.status().as_str(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,33 @@ mod refactor;
|
||||
mod spike;
|
||||
mod story;
|
||||
|
||||
/// Build a compact origin JSON string for a newly-created work item (story 1088).
|
||||
///
|
||||
/// `args` may contain an `"origin"` object with `kind`, `id`, and `ts` fields
|
||||
/// supplied by the caller (e.g. a coder agent passing its own identity). When
|
||||
/// absent the default is `{"kind":"user","id":"","ts":<now>}`.
|
||||
///
|
||||
/// Callers that create items on behalf of system automation (e.g. gate-failure
|
||||
/// auto-filing) should pass `kind = "system"` and `id = "<automation-name>"`.
|
||||
pub(super) fn build_origin(args: &serde_json::Value) -> String {
|
||||
let ts = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs_f64();
|
||||
|
||||
if let Some(origin_obj) = args.get("origin").and_then(|v| v.as_object()) {
|
||||
let kind = origin_obj
|
||||
.get("kind")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("user");
|
||||
let id = origin_obj.get("id").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let ts_val = origin_obj.get("ts").and_then(|v| v.as_f64()).unwrap_or(ts);
|
||||
serde_json::json!({"kind": kind, "id": id, "ts": ts_val}).to_string()
|
||||
} else {
|
||||
serde_json::json!({"kind": "user", "id": "", "ts": ts}).to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) use bug::{tool_close_bug, tool_create_bug, tool_list_bugs};
|
||||
pub(crate) use criteria::{
|
||||
tool_add_criterion, tool_check_criterion, tool_edit_criterion, tool_ensure_acceptance,
|
||||
|
||||
@@ -36,6 +36,16 @@ pub(crate) fn tool_create_refactor(args: &Value, ctx: &AppContext) -> Result<Str
|
||||
depends_on.as_deref(),
|
||||
)?;
|
||||
|
||||
crate::crdt_state::set_origin(&refactor_id, &super::build_origin(args));
|
||||
|
||||
let _ = ctx
|
||||
.watcher_tx
|
||||
.send(crate::io::watcher::WatcherEvent::NewItemCreated {
|
||||
item_id: refactor_id.clone(),
|
||||
item_type: "refactor".to_string(),
|
||||
name: req.name.as_ref().to_string(),
|
||||
});
|
||||
|
||||
Ok(format!("Created refactor: {refactor_id}"))
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,16 @@ pub(crate) fn tool_create_spike(args: &Value, ctx: &AppContext) -> Result<String
|
||||
depends_on.as_deref(),
|
||||
)?;
|
||||
|
||||
crate::crdt_state::set_origin(&spike_id, &super::build_origin(args));
|
||||
|
||||
let _ = ctx
|
||||
.watcher_tx
|
||||
.send(crate::io::watcher::WatcherEvent::NewItemCreated {
|
||||
item_id: spike_id.clone(),
|
||||
item_type: "spike".to_string(),
|
||||
name: req.name.as_ref().to_string(),
|
||||
});
|
||||
|
||||
Ok(format!("Created spike: {spike_id}"))
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,16 @@ pub(crate) fn tool_create_story(args: &Value, ctx: &AppContext) -> Result<String
|
||||
false,
|
||||
)?;
|
||||
|
||||
crate::crdt_state::set_origin(&story_id, &super::super::build_origin(args));
|
||||
|
||||
let _ = ctx
|
||||
.watcher_tx
|
||||
.send(crate::io::watcher::WatcherEvent::NewItemCreated {
|
||||
item_id: story_id.clone(),
|
||||
item_type: "story".to_string(),
|
||||
name: req.name.as_ref().to_string(),
|
||||
});
|
||||
|
||||
// Bug 503: warn at creation time if any depends_on points at an already-archived story.
|
||||
let archived_deps: Vec<u32> = depends_on_ids
|
||||
.as_deref()
|
||||
|
||||
@@ -39,34 +39,32 @@ pub(crate) fn tool_get_pipeline_status(ctx: &AppContext) -> Result<String, Strin
|
||||
let state = load_pipeline_state(ctx)?;
|
||||
let running_merges = ctx.services.agents.list_running_merges()?;
|
||||
|
||||
fn slim_name(name: &str) -> &str {
|
||||
crate::chat::util::truncate_at_char_boundary(name, 120)
|
||||
}
|
||||
|
||||
fn map_items(items: &[crate::http::workflow::UpcomingStory], stage: &str) -> Vec<Value> {
|
||||
items
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let mut item = json!({
|
||||
"story_id": s.story_id,
|
||||
"name": s.name,
|
||||
"name": slim_name(&s.name),
|
||||
"stage": stage,
|
||||
"pipeline": s.pipeline.as_str(),
|
||||
"status": s.status.as_str(),
|
||||
"agent": s.agent.as_ref().map(|a| json!({
|
||||
"agent_name": a.agent_name,
|
||||
"model": a.model,
|
||||
"status": a.status,
|
||||
})),
|
||||
});
|
||||
// Include blocked/retry_count when present so callers can
|
||||
// identify stories stuck in the pipeline.
|
||||
if let Some(true) = s.blocked {
|
||||
item["blocked"] = json!(true);
|
||||
}
|
||||
if let Some(rc) = s.retry_count {
|
||||
item["retry_count"] = json!(rc);
|
||||
}
|
||||
if let Some(ref mf) = s.merge_failure {
|
||||
item["merge_failure"] = json!(mf);
|
||||
}
|
||||
if let Some(ref epic_id) = s.epic_id {
|
||||
item["epic_id"] = json!(epic_id);
|
||||
}
|
||||
item
|
||||
})
|
||||
.collect()
|
||||
@@ -81,19 +79,21 @@ pub(crate) fn tool_get_pipeline_status(ctx: &AppContext) -> Result<String, Strin
|
||||
let backlog: Vec<Value> = state
|
||||
.backlog
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let mut item = json!({ "story_id": s.story_id, "name": s.name });
|
||||
if let Some(ref epic_id) = s.epic_id {
|
||||
item["epic_id"] = json!(epic_id);
|
||||
}
|
||||
item
|
||||
})
|
||||
.map(|s| json!({ "story_id": s.story_id, "name": slim_name(&s.name) }))
|
||||
.collect();
|
||||
|
||||
let archived: Vec<Value> = state
|
||||
.archived
|
||||
.iter()
|
||||
.map(|s| json!({ "story_id": s.story_id, "name": s.name, "stage": "archived" }))
|
||||
.map(|s| {
|
||||
json!({
|
||||
"story_id": s.story_id,
|
||||
"name": slim_name(&s.name),
|
||||
"stage": "archived",
|
||||
"pipeline": s.pipeline.as_str(),
|
||||
"status": s.status.as_str(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_json::to_string_pretty(&json!({
|
||||
@@ -248,6 +248,82 @@ mod tests {
|
||||
assert_eq!(item["valid"], true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_status_50_items_under_10kb() {
|
||||
crate::db::ensure_content_store();
|
||||
let stages = [
|
||||
("1_backlog", "backlog"),
|
||||
("2_current", "current"),
|
||||
("3_qa", "qa"),
|
||||
("4_merge", "merge"),
|
||||
("5_done", "done"),
|
||||
];
|
||||
for (i, (dir, _)) in stages.iter().enumerate() {
|
||||
for j in 0..10 {
|
||||
let id = format!("99{i}{j}0_story_size_test");
|
||||
let name = format!("Pipeline Size Test Story {i}-{j}");
|
||||
crate::db::write_item_with_content(
|
||||
&id,
|
||||
dir,
|
||||
&format!("---\nname: \"{name}\"\n---\n"),
|
||||
crate::db::ItemMeta {
|
||||
name: Some(name),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let ctx = test_ctx(tmp.path());
|
||||
let result = tool_get_pipeline_status(&ctx).unwrap();
|
||||
assert!(
|
||||
result.len() < 10 * 1024,
|
||||
"50-item response must be under 10 KB; got {} bytes",
|
||||
result.len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_status_per_item_under_500_bytes() {
|
||||
crate::db::ensure_content_store();
|
||||
// Insert one item per active stage with a moderately long name.
|
||||
let stages = [
|
||||
("2_current", "9995_story_peritem_current"),
|
||||
("3_qa", "9996_story_peritem_qa"),
|
||||
("4_merge", "9997_story_peritem_merge"),
|
||||
("5_done", "9998_story_peritem_done"),
|
||||
];
|
||||
for (dir, id) in &stages {
|
||||
let name = "A Reasonably Named Story For Size Testing";
|
||||
crate::db::write_item_with_content(
|
||||
id,
|
||||
dir,
|
||||
&format!("---\nname: \"{name}\"\n---\n"),
|
||||
crate::db::ItemMeta {
|
||||
name: Some(name.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
}
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let ctx = test_ctx(tmp.path());
|
||||
let result = tool_get_pipeline_status(&ctx).unwrap();
|
||||
let parsed: Value = serde_json::from_str(&result).unwrap();
|
||||
let active = parsed["active"].as_array().unwrap();
|
||||
for item in active {
|
||||
if stages.iter().any(|(_, id)| item["story_id"] == *id) {
|
||||
let item_json = serde_json::to_string(item).unwrap();
|
||||
assert!(
|
||||
item_json.len() < 500,
|
||||
"per-item payload must be under 500 bytes; story_id={} got {} bytes: {}",
|
||||
item["story_id"],
|
||||
item_json.len(),
|
||||
item_json
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_validate_stories_with_invalid_front_matter() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
|
||||
@@ -574,7 +574,7 @@ pub(super) fn story_tools() -> Vec<Value> {
|
||||
}),
|
||||
json!({
|
||||
"name": "get_pipeline_status",
|
||||
"description": "Return a structured snapshot of the full work item pipeline. Includes all active stages (current, qa, merge, done) with each item's stage, name, and assigned agent. Also includes upcoming backlog items.",
|
||||
"description": "Return a structured snapshot of the full work item pipeline. Each item includes only slim fields: story_id, name (capped at 120 chars), stage, agent (with agent_name/model/status), and optional boolean flags blocked and retry_count. Active stages (current, qa, merge, done) appear in the 'active' array; backlog items in 'backlog'. For full story details, use status(story_id) or dump_crdt.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {}
|
||||
|
||||
@@ -24,6 +24,10 @@ pub struct UpcomingStory {
|
||||
pub merge_failure: Option<String>,
|
||||
/// Active agent working on this item, if any.
|
||||
pub agent: Option<AgentAssignment>,
|
||||
/// Display column (story 1085) — derived from `Stage::pipeline()`.
|
||||
pub pipeline: crate::pipeline_state::Pipeline,
|
||||
/// Display badge/indicator (story 1085) — derived from `Stage::status()`.
|
||||
pub status: crate::pipeline_state::Status,
|
||||
/// True when the item is held in QA for human review.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub review_hold: Option<bool>,
|
||||
@@ -142,6 +146,8 @@ pub fn load_pipeline_state(ctx: &AppContext) -> Result<PipelineState, String> {
|
||||
error: None,
|
||||
merge_failure,
|
||||
agent,
|
||||
pipeline: item.stage.pipeline(),
|
||||
status: item.stage.status(),
|
||||
review_hold,
|
||||
qa,
|
||||
retry_count: if item.retry_count() > 0 {
|
||||
@@ -278,6 +284,8 @@ pub fn load_upcoming_stories(_ctx: &AppContext) -> Result<Vec<UpcomingStory>, St
|
||||
error: None,
|
||||
merge_failure: None,
|
||||
agent: None,
|
||||
pipeline: item.stage.pipeline(),
|
||||
status: item.stage.status(),
|
||||
review_hold: None,
|
||||
qa: None,
|
||||
retry_count: if item_retry_count > 0 {
|
||||
|
||||
@@ -90,4 +90,14 @@ pub enum WatcherEvent {
|
||||
/// `true` if acceptance gates passed; `false` if they failed.
|
||||
success: bool,
|
||||
},
|
||||
/// A new work item was successfully created and added to the backlog.
|
||||
/// Triggers a creation notification to configured chat rooms.
|
||||
NewItemCreated {
|
||||
/// Work item ID (e.g. `"1075_refactor_split_stage_enum"`).
|
||||
item_id: String,
|
||||
/// Human-readable item type (`"story"`, `"bug"`, `"refactor"`, `"spike"`).
|
||||
item_type: String,
|
||||
/// Human-readable item name.
|
||||
name: String,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -21,7 +21,6 @@ mod sweep;
|
||||
|
||||
pub use events::WatcherEvent;
|
||||
pub(crate) use sweep::spawn_done_to_archived_subscriber;
|
||||
#[cfg(test)]
|
||||
pub(crate) use sweep::sweep_done_to_archived;
|
||||
|
||||
use crate::slog;
|
||||
|
||||
@@ -29,13 +29,20 @@ use std::time::Duration;
|
||||
///
|
||||
/// Replaces the periodic `sweep_done_to_archived` call from the tick loop.
|
||||
pub(crate) fn spawn_done_to_archived_subscriber(done_retention: Duration) {
|
||||
use crate::pipeline_state::{PipelineEvent, Stage, apply_transition, subscribe_transitions};
|
||||
use crate::pipeline_state::{
|
||||
PipelineEvent, Stage, Status, apply_transition, subscribe_transitions,
|
||||
};
|
||||
|
||||
let mut rx = subscribe_transitions();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(fired) => {
|
||||
// Story 1086: gate on the typed `Status::Done` projection;
|
||||
// the variant pattern is still required to read `merged_at`.
|
||||
if fired.after.status() != Status::Done {
|
||||
continue;
|
||||
}
|
||||
if let Stage::Done { merged_at, .. } = fired.after {
|
||||
let story_id = fired.story_id.0.clone();
|
||||
let retention = done_retention;
|
||||
@@ -70,7 +77,7 @@ pub(crate) fn spawn_done_to_archived_subscriber(done_retention: Duration) {
|
||||
});
|
||||
}
|
||||
|
||||
/// Sweep items in `Stage::Done` whose `merged_at` timestamp exceeds the
|
||||
/// Reconcile: sweep items in `Stage::Done` whose `merged_at` timestamp exceeds the
|
||||
/// retention duration to `Stage::Archived` via the typed transition table.
|
||||
///
|
||||
/// Routes through [`crate::pipeline_state::apply_transition`] so the
|
||||
@@ -78,14 +85,22 @@ pub(crate) fn spawn_done_to_archived_subscriber(done_retention: Duration) {
|
||||
/// `TransitionFired` event is emitted to subscribers (worktree pruning,
|
||||
/// matrix notifier, etc.).
|
||||
///
|
||||
/// Used in tests for direct one-shot sweeps; production code uses
|
||||
/// Called at startup and by the periodic reconciler to archive Done stories
|
||||
/// whose retention has elapsed, even when the `TransitionFired` subscriber
|
||||
/// lagged and missed their Done event. Production reactive archiving uses
|
||||
/// [`spawn_done_to_archived_subscriber`] instead.
|
||||
#[cfg(test)]
|
||||
///
|
||||
/// Logs a summary INFO line on every call: candidates evaluated and items
|
||||
/// archived, or "no items past retention" when nothing was swept.
|
||||
pub(crate) fn sweep_done_to_archived(done_retention: Duration) {
|
||||
use crate::pipeline_state::{PipelineEvent, Stage, apply_transition, read_all_typed};
|
||||
|
||||
let mut candidates: usize = 0;
|
||||
let mut archived: usize = 0;
|
||||
|
||||
for item in read_all_typed() {
|
||||
if let Stage::Done { merged_at, .. } = &item.stage {
|
||||
candidates += 1;
|
||||
let age = chrono::Utc::now()
|
||||
.signed_duration_since(*merged_at)
|
||||
.to_std()
|
||||
@@ -93,7 +108,10 @@ pub(crate) fn sweep_done_to_archived(done_retention: Duration) {
|
||||
if age >= done_retention {
|
||||
let story_id = item.story_id.0.clone();
|
||||
match apply_transition(&story_id, PipelineEvent::Accepted, None) {
|
||||
Ok(_) => slog!("[watcher] sweep: promoted {story_id} → archived"),
|
||||
Ok(_) => {
|
||||
archived += 1;
|
||||
slog!("[watcher] sweep: promoted {story_id} → archived")
|
||||
}
|
||||
Err(e) => {
|
||||
slog!("[watcher] sweep: transition error for {story_id}: {e}")
|
||||
}
|
||||
@@ -101,4 +119,10 @@ pub(crate) fn sweep_done_to_archived(done_retention: Duration) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if archived > 0 {
|
||||
slog!("[watcher] sweep: {candidates} candidate(s) evaluated, {archived} archived");
|
||||
} else {
|
||||
slog!("[watcher] sweep: {candidates} candidate(s) evaluated, no items past retention");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -301,6 +301,48 @@ async fn done_to_archived_subscriber_archives_on_transition() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: simulates a server restart occurring between move-to-done and
|
||||
/// the configured retention window expiry.
|
||||
///
|
||||
/// Before the fix the archive-deadline was held only in the reactive
|
||||
/// subscriber's volatile sleep task; a restart would lose that task and the
|
||||
/// item would never be archived. The fix is that `sweep_done_to_archived`
|
||||
/// reads `merged_at` from the CRDT (durable across restarts) and archives any
|
||||
/// item whose age exceeds the retention, so the next periodic reconcile tick
|
||||
/// after restart picks it up regardless of whether a sleep task existed.
|
||||
#[test]
|
||||
fn restart_scenario_sweep_archives_past_retention_after_sweep_tick() {
|
||||
crate::crdt_state::init_for_test();
|
||||
crate::db::ensure_content_store();
|
||||
|
||||
let story_id = "9885_sweep_restart_regression";
|
||||
|
||||
// Simulate: item moved to Done 10 seconds before the restart.
|
||||
// The reactive subscriber would have had a sleep task for the remaining
|
||||
// retention time; that task is now gone (process restarted).
|
||||
let ten_seconds_ago = (chrono::Utc::now() - chrono::Duration::seconds(10)).timestamp() as f64;
|
||||
crate::crdt_state::write_item_str(
|
||||
story_id,
|
||||
"5_done",
|
||||
Some("Restart regression test"),
|
||||
None,
|
||||
None,
|
||||
Some(ten_seconds_ago),
|
||||
);
|
||||
|
||||
// The next periodic reconcile tick after restart calls sweep_done_to_archived
|
||||
// directly. With 5-second retention and merged_at 10s ago, the item must
|
||||
// be archived even though no reactive subscriber sleep task exists.
|
||||
sweep_done_to_archived(Duration::from_secs(5));
|
||||
|
||||
let items = crate::pipeline_state::read_all_typed();
|
||||
let item = items.iter().find(|i| i.story_id.0 == story_id);
|
||||
assert!(
|
||||
item.is_some_and(|i| matches!(i.stage, crate::pipeline_state::Stage::Archived { .. })),
|
||||
"item past retention must be archived on the next sweep tick after a server restart"
|
||||
);
|
||||
}
|
||||
|
||||
/// Prove that an item with merged_at NEWER than done_retention is NOT swept.
|
||||
#[test]
|
||||
fn sweep_keeps_item_newer_than_retention() {
|
||||
|
||||
+4
-4
@@ -33,6 +33,8 @@ pub mod mesh;
|
||||
/// Node identity — Ed25519 keypair generation and stable node ID management.
|
||||
pub mod node_identity;
|
||||
pub(crate) mod pipeline_state;
|
||||
/// Reliable process-termination primitives shared across the server.
|
||||
pub mod process_kill;
|
||||
/// Rebuild — process restart and shutdown coordination.
|
||||
pub mod rebuild;
|
||||
mod service;
|
||||
@@ -82,12 +84,10 @@ async fn main() -> Result<(), std::io::Error> {
|
||||
});
|
||||
|
||||
// Log version and build hash so we can verify what's running.
|
||||
let build_hash =
|
||||
std::fs::read_to_string(".huskies/build_hash").unwrap_or_else(|_| "unknown".to_string());
|
||||
slog!(
|
||||
"[startup] huskies v{} (build {})",
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
build_hash.trim()
|
||||
option_env!("BUILD_GIT_HASH").unwrap_or("unknown")
|
||||
);
|
||||
|
||||
let app_state = Arc::new(SessionState::default());
|
||||
@@ -151,7 +151,7 @@ async fn main() -> Result<(), std::io::Error> {
|
||||
startup::project::open_project_root(is_init, explicit_path, &cwd, &app_state, &store, port)
|
||||
.await;
|
||||
|
||||
startup::project::init_subsystems(&app_state, &cwd).await;
|
||||
startup::project::init_subsystems(&app_state, &cwd, is_agent).await;
|
||||
|
||||
let crdt_join_token = cli
|
||||
.join_token
|
||||
|
||||
@@ -36,32 +36,6 @@ pub(super) fn try_broadcast(fired: &TransitionFired) {
|
||||
let _ = get_or_init_tx().send(fired.clone());
|
||||
}
|
||||
|
||||
/// Replay the current CRDT pipeline state as a burst of synthetic
|
||||
/// [`TransitionFired`] events at server startup.
|
||||
///
|
||||
/// Reads every item from the CRDT and broadcasts a self-transition
|
||||
/// (`before == after`) for each one so that all existing subscribers
|
||||
/// (worktree lifecycle, merge-failure auto-spawn, auto-assign) react
|
||||
/// identically to a live event. This replaces the legacy scan-based
|
||||
/// `reconcile_on_startup` path.
|
||||
///
|
||||
/// Idempotent: a second call produces another burst of events, but every
|
||||
/// subscriber already guards against duplicate work (e.g.
|
||||
/// `is_story_assigned_for_stage` returns true once an agent is running,
|
||||
/// and worktree creation is a no-op when the worktree already exists).
|
||||
pub fn replay_current_pipeline_state() {
|
||||
for item in super::read_all_typed() {
|
||||
let fired = TransitionFired {
|
||||
story_id: item.story_id.clone(),
|
||||
before: item.stage.clone(),
|
||||
after: item.stage,
|
||||
event: super::PipelineEvent::DepsMet,
|
||||
at: chrono::Utc::now(),
|
||||
};
|
||||
try_broadcast(&fired);
|
||||
}
|
||||
}
|
||||
|
||||
/// Fired when a pipeline stage transition completes.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TransitionFired {
|
||||
@@ -183,58 +157,4 @@ mod tests {
|
||||
}
|
||||
|
||||
// ── TransitionError Display ─────────────────────────────────────────
|
||||
|
||||
// ── replay_current_pipeline_state ──────────────────────────────────
|
||||
|
||||
/// AC1: replay broadcasts a synthetic event for every item in the CRDT.
|
||||
#[test]
|
||||
fn replay_broadcasts_event_for_crdt_item_in_coding_stage() {
|
||||
crate::crdt_state::init_for_test();
|
||||
crate::db::ensure_content_store();
|
||||
|
||||
let story_id = "9901_replay_coding";
|
||||
crate::db::write_item_with_content(
|
||||
story_id,
|
||||
"2_current",
|
||||
"---\nname: Replay Coding\n---\n",
|
||||
crate::db::ItemMeta::named("Replay Coding"),
|
||||
);
|
||||
|
||||
let mut rx = subscribe_transitions();
|
||||
replay_current_pipeline_state();
|
||||
|
||||
let mut found = false;
|
||||
while let Ok(fired) = rx.try_recv() {
|
||||
if fired.story_id.0 == story_id && matches!(fired.after, Stage::Coding { .. }) {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
found,
|
||||
"replay must broadcast a Coding event for a story in 2_current"
|
||||
);
|
||||
}
|
||||
|
||||
/// AC3: calling replay_current_pipeline_state twice fires events both times.
|
||||
///
|
||||
/// Pool-state idempotency (no duplicate agents) is enforced by subscribers,
|
||||
/// not by the replay function itself. This test verifies that replay is safe
|
||||
/// to call multiple times without panicking.
|
||||
#[test]
|
||||
fn replay_twice_does_not_panic() {
|
||||
crate::crdt_state::init_for_test();
|
||||
crate::db::ensure_content_store();
|
||||
|
||||
let story_id = "9902_replay_idem";
|
||||
crate::db::write_item_with_content(
|
||||
story_id,
|
||||
"3_qa",
|
||||
"---\nname: Replay QA\n---\n",
|
||||
crate::db::ItemMeta::named("Replay QA"),
|
||||
);
|
||||
|
||||
// Two successive replays must not panic.
|
||||
replay_current_pipeline_state();
|
||||
replay_current_pipeline_state();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,8 +41,8 @@ mod tests;
|
||||
#[allow(unused_imports)]
|
||||
pub use types::{
|
||||
AgentClaim, AgentName, ArchiveReason, BranchName, ExecutionState, GitSha, MergeFailureKind,
|
||||
NodePubkey, PipelineItem, PlanState, Stage, StoryId, TransitionError, stage_dir_name,
|
||||
stage_label,
|
||||
NodePubkey, Pipeline, PipelineItem, PlanState, Stage, Status, StoryId, TransitionError,
|
||||
stage_dir_name, stage_label,
|
||||
};
|
||||
|
||||
#[allow(unused_imports)]
|
||||
@@ -51,10 +51,7 @@ pub use transition::{
|
||||
};
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub use events::{
|
||||
EventBus, TransitionFired, TransitionSubscriber, replay_current_pipeline_state,
|
||||
subscribe_transitions,
|
||||
};
|
||||
pub use events::{EventBus, TransitionFired, TransitionSubscriber, subscribe_transitions};
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub use projection::ProjectionError;
|
||||
@@ -66,6 +63,7 @@ pub use apply::{
|
||||
transition_to_unfrozen,
|
||||
};
|
||||
|
||||
pub(crate) use subscribers::reconcile_audit_log;
|
||||
pub use subscribers::spawn_audit_log_subscriber;
|
||||
|
||||
#[allow(unused_imports)]
|
||||
|
||||
@@ -35,6 +35,14 @@ impl TransitionSubscriber for AuditLogSubscriber {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reconcile: no-op for the audit log subscriber.
|
||||
///
|
||||
/// The audit log records live transitions only. Replaying historical CRDT state at
|
||||
/// reconcile time would produce misleading entries (wrong timestamps, duplicate lines).
|
||||
/// Eventual consistency of the audit log is not required — missed events are simply
|
||||
/// absent from the log, which is acceptable.
|
||||
pub(crate) fn reconcile_audit_log() {}
|
||||
|
||||
/// Spawn a background task that writes a structured audit log entry for every pipeline transition.
|
||||
///
|
||||
/// Subscribes to the transition broadcast channel. Every `TransitionFired` event produces
|
||||
|
||||
@@ -429,6 +429,144 @@ impl Stage {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Display split (story 1085): Pipeline column + Status badge ─────────────
|
||||
|
||||
/// Column placement for a work item in the UI/chat status display.
|
||||
///
|
||||
/// Derived from [`Stage`] via [`Stage::pipeline`]. Display callers route items
|
||||
/// to columns by this enum instead of pattern-matching `Stage` variants, so
|
||||
/// new badges (e.g. `Frozen`, `Blocked`) do not produce new columns.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum Pipeline {
|
||||
/// Items in `Upcoming` or `Backlog` stages.
|
||||
Backlog,
|
||||
/// Items being coded (or blocked while in the coding lane).
|
||||
Coding,
|
||||
/// Items in QA or `ReviewHold`.
|
||||
Qa,
|
||||
/// Items in `Merge`, `MergeFailure`, or `MergeFailureFinal`.
|
||||
Merge,
|
||||
/// Items in `Done`.
|
||||
Done,
|
||||
/// Abandoned, superseded, or rejected items.
|
||||
Closed,
|
||||
/// Items swept into `Archived`.
|
||||
Archived,
|
||||
}
|
||||
|
||||
impl Pipeline {
|
||||
/// Stable wire-format identifier (kebab-case).
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Pipeline::Backlog => "backlog",
|
||||
Pipeline::Coding => "coding",
|
||||
Pipeline::Qa => "qa",
|
||||
Pipeline::Merge => "merge",
|
||||
Pipeline::Done => "done",
|
||||
Pipeline::Closed => "closed",
|
||||
Pipeline::Archived => "archived",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Badge/indicator for a work item, orthogonal to its [`Pipeline`] column.
|
||||
///
|
||||
/// Derived from [`Stage`] via [`Stage::status`]. A `Frozen` story stays in
|
||||
/// its underlying `Pipeline` column (e.g. `Coding`) and is decorated with
|
||||
/// `Status::Frozen` for the display. `Status::Done` is reserved for items in
|
||||
/// the `Done` column and is never produced for items still in flight, so a
|
||||
/// done item never carries a `MergeFailure*` badge (story 1052).
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case", tag = "kind")]
|
||||
pub enum Status {
|
||||
/// No special badge — normal in-progress item.
|
||||
Active,
|
||||
/// Item is paused (`Stage::Frozen`).
|
||||
Frozen,
|
||||
/// Item is held for human review (`Stage::ReviewHold`).
|
||||
ReviewHold,
|
||||
/// Item is blocked (`Stage::Blocked` or legacy `Archived(Blocked)`).
|
||||
Blocked,
|
||||
/// Merge failed; mergemaster may still be recovering.
|
||||
MergeFailure,
|
||||
/// Merge failed beyond automatic recovery.
|
||||
MergeFailureFinal,
|
||||
/// User abandoned the item.
|
||||
Abandoned,
|
||||
/// Item was superseded by another work item.
|
||||
Superseded,
|
||||
/// Item was permanently rejected.
|
||||
Rejected,
|
||||
/// Item completed successfully.
|
||||
Done,
|
||||
}
|
||||
|
||||
impl Status {
|
||||
/// Stable wire-format identifier (kebab-case).
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Status::Active => "active",
|
||||
Status::Frozen => "frozen",
|
||||
Status::ReviewHold => "review-hold",
|
||||
Status::Blocked => "blocked",
|
||||
Status::MergeFailure => "merge-failure",
|
||||
Status::MergeFailureFinal => "merge-failure-final",
|
||||
Status::Abandoned => "abandoned",
|
||||
Status::Superseded => "superseded",
|
||||
Status::Rejected => "rejected",
|
||||
Status::Done => "done",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stage {
|
||||
/// Display column for this stage. `Frozen { resume_to }` recurses so a
|
||||
/// paused story keeps its underlying column.
|
||||
pub fn pipeline(&self) -> Pipeline {
|
||||
match self {
|
||||
Stage::Upcoming | Stage::Backlog => Pipeline::Backlog,
|
||||
Stage::Coding { .. } | Stage::Blocked { .. } => Pipeline::Coding,
|
||||
Stage::Qa | Stage::ReviewHold { .. } => Pipeline::Qa,
|
||||
Stage::Merge { .. } | Stage::MergeFailure { .. } | Stage::MergeFailureFinal { .. } => {
|
||||
Pipeline::Merge
|
||||
}
|
||||
Stage::Frozen { resume_to } => resume_to.pipeline(),
|
||||
Stage::Done { .. } => Pipeline::Done,
|
||||
Stage::Abandoned { .. } | Stage::Superseded { .. } | Stage::Rejected { .. } => {
|
||||
Pipeline::Closed
|
||||
}
|
||||
Stage::Archived {
|
||||
reason: ArchiveReason::Blocked { .. },
|
||||
..
|
||||
} => Pipeline::Coding,
|
||||
Stage::Archived { .. } => Pipeline::Archived,
|
||||
}
|
||||
}
|
||||
|
||||
/// Display badge for this stage. `Frozen { resume_to }` returns
|
||||
/// `Status::Frozen` regardless of the inner stage; callers wanting the
|
||||
/// underlying badge inspect `resume_to` directly.
|
||||
pub fn status(&self) -> Status {
|
||||
match self {
|
||||
Stage::Frozen { .. } => Status::Frozen,
|
||||
Stage::ReviewHold { .. } => Status::ReviewHold,
|
||||
Stage::Blocked { .. }
|
||||
| Stage::Archived {
|
||||
reason: ArchiveReason::Blocked { .. },
|
||||
..
|
||||
} => Status::Blocked,
|
||||
Stage::MergeFailure { .. } => Status::MergeFailure,
|
||||
Stage::MergeFailureFinal { .. } => Status::MergeFailureFinal,
|
||||
Stage::Abandoned { .. } => Status::Abandoned,
|
||||
Stage::Superseded { .. } => Status::Superseded,
|
||||
Stage::Rejected { .. } => Status::Rejected,
|
||||
Stage::Done { .. } => Status::Done,
|
||||
_ => Status::Active,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Per-node execution state ────────────────────────────────────────────────
|
||||
|
||||
/// Per-node execution tracking, stored in the CRDT under each node's pubkey.
|
||||
|
||||
@@ -0,0 +1,322 @@
|
||||
//! Reliable process-termination primitives.
|
||||
//!
|
||||
//! The huskies server kills child processes in several distinct places:
|
||||
//! the watchdog terminates agents that have exceeded turn/budget limits,
|
||||
//! `stop_agent` terminates on operator request, `kill_all_children` runs at
|
||||
//! server shutdown, the merge-gate completion path kills stale `cargo`
|
||||
//! processes, and `script/local-release` tears down the gateway during a
|
||||
//! redeploy. Every one of these used to send a signal that the target was
|
||||
//! free to ignore (most commonly `portable_pty`'s `SIGHUP`), with no
|
||||
//! verification that the process actually exited. Agents and bots that
|
||||
//! ignore `SIGHUP` survived the "kill", which produced concurrent claude
|
||||
//! processes on the same story — directly the duplicate-spawn bug we hit on
|
||||
//! 2026-05-15.
|
||||
//!
|
||||
//! This module provides one trustworthy way to kill processes: SIGKILL with
|
||||
//! verification. Build a pid set with the helpers in this module (or your
|
||||
//! own), then hand it to [`sigkill_pids_and_verify`].
|
||||
//!
|
||||
//! All functions on this module are deliberately Unix-only — huskies runs in
|
||||
//! Linux containers and macOS dev hosts, both POSIX.
|
||||
|
||||
use crate::slog_warn;
|
||||
|
||||
/// Maximum time we'll wait for SIGKILL'd processes to disappear before
|
||||
/// declaring failure. SIGKILL is uncatchable, so the kernel normally
|
||||
/// reaps within tens of milliseconds; anything past 2 s indicates the
|
||||
/// process is wedged in uninterruptible IO (e.g. waiting on a frozen NFS
|
||||
/// mount). Caller can decide whether to proceed despite survivors.
|
||||
const KILL_VERIFY_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(2);
|
||||
|
||||
/// Polling interval while waiting for processes to disappear. 100 ms is
|
||||
/// fine-grained enough that the typical few-ms reap latency is barely
|
||||
/// observable, but coarse enough that we don't burn CPU spinning.
|
||||
const KILL_VERIFY_POLL: std::time::Duration = std::time::Duration::from_millis(100);
|
||||
|
||||
/// SIGKILL every pid in `pids`, then poll until all of them are gone.
|
||||
///
|
||||
/// Returns `Ok(n)` where `n == pids.len()` when every pid is verified
|
||||
/// reaped within [`KILL_VERIFY_TIMEOUT`]. Returns `Err(survivors)` with the
|
||||
/// pids still alive after the timeout — extremely rare for SIGKILL but
|
||||
/// possible if a process is wedged in uninterruptible IO. An empty `pids`
|
||||
/// slice returns `Ok(0)` immediately.
|
||||
///
|
||||
/// **Why SIGKILL and not SIGTERM-first:** several huskies-internal targets
|
||||
/// (claude-code, the bot itself) either ignore the polite signals or take
|
||||
/// arbitrarily long to honour them. The watchdog only kills agents that
|
||||
/// have already misbehaved by definition (exceeded budget/turn limits), so
|
||||
/// there is no reason to give them a graceful-shutdown grace period.
|
||||
pub fn sigkill_pids_and_verify(pids: &[u32]) -> Result<usize, Vec<u32>> {
|
||||
if pids.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
for &pid in pids {
|
||||
// libc::kill returns -1 on failure (with errno). We deliberately
|
||||
// ignore the result: the process may already be gone (errno ESRCH),
|
||||
// and trying again wouldn't help. The verification loop below is
|
||||
// the source of truth for "did this work".
|
||||
unsafe { libc::kill(pid as i32, libc::SIGKILL) };
|
||||
}
|
||||
|
||||
let deadline = std::time::Instant::now() + KILL_VERIFY_TIMEOUT;
|
||||
while std::time::Instant::now() < deadline {
|
||||
if pids.iter().copied().all(|pid| !pid_is_alive(pid)) {
|
||||
return Ok(pids.len());
|
||||
}
|
||||
std::thread::sleep(KILL_VERIFY_POLL);
|
||||
}
|
||||
|
||||
let survivors: Vec<u32> = pids
|
||||
.iter()
|
||||
.copied()
|
||||
.filter(|&pid| pid_is_alive(pid))
|
||||
.collect();
|
||||
if survivors.is_empty() {
|
||||
Ok(pids.len())
|
||||
} else {
|
||||
slog_warn!(
|
||||
"[process_kill] SIGKILL did not reap pids within {:?}: {survivors:?}. \
|
||||
They may be wedged in uninterruptible IO.",
|
||||
KILL_VERIFY_TIMEOUT
|
||||
);
|
||||
Err(survivors)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return every pid whose command line matches `pattern` (passed to
|
||||
/// `pgrep -f`). Empty when nothing matches or when `pgrep` is unavailable.
|
||||
///
|
||||
/// Useful for collecting processes by a path or argument substring — e.g.
|
||||
/// "every process running in `<worktree>/`" or "every cargo invocation
|
||||
/// against this `Cargo.toml`".
|
||||
pub fn pids_matching(pattern: &str) -> Vec<u32> {
|
||||
let Ok(output) = std::process::Command::new("pgrep")
|
||||
.args(["-f", pattern])
|
||||
.output()
|
||||
else {
|
||||
return Vec::new();
|
||||
};
|
||||
String::from_utf8_lossy(&output.stdout)
|
||||
.lines()
|
||||
.filter_map(|l| l.trim().parse::<u32>().ok())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Return every descendant pid of `root_pid`, deepest-first, **excluding**
|
||||
/// `root_pid` itself. Walks the parent→child relation via `pgrep -P`.
|
||||
///
|
||||
/// Deepest-first ordering lets callers signal leaves before their parents
|
||||
/// when that matters; for SIGKILL it makes no difference.
|
||||
pub fn descendant_pids(root_pid: u32) -> Vec<u32> {
|
||||
let mut out: Vec<u32> = Vec::new();
|
||||
walk_descendants(root_pid, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_descendants(pid: u32, out: &mut Vec<u32>) {
|
||||
let Ok(output) = std::process::Command::new("pgrep")
|
||||
.args(["-P", &pid.to_string()])
|
||||
.output()
|
||||
else {
|
||||
return;
|
||||
};
|
||||
let kids: Vec<u32> = String::from_utf8_lossy(&output.stdout)
|
||||
.lines()
|
||||
.filter_map(|l| l.trim().parse::<u32>().ok())
|
||||
.collect();
|
||||
for kid in kids {
|
||||
walk_descendants(kid, out);
|
||||
out.push(kid);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether `pid` currently exists. Implemented via `kill(pid, 0)` —
|
||||
/// no signal is sent, only existence is probed.
|
||||
fn pid_is_alive(pid: u32) -> bool {
|
||||
// signal 0: "is this process around?" Returns 0 if the process exists
|
||||
// and we have permission to signal it, -1 with errno otherwise.
|
||||
unsafe { libc::kill(pid as i32, 0) == 0 }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::process::{Child, Command, Stdio};
|
||||
use std::thread::JoinHandle;
|
||||
|
||||
/// Spawn a sleeper for kill testing, and spawn a background reaper that
|
||||
/// calls `wait()` as soon as the child exits. Returns the pid plus the
|
||||
/// reaper join handle so the test can confirm reaping after the kill.
|
||||
///
|
||||
/// The reaper is essential because the production code's verify loop
|
||||
/// uses `kill(pid, 0)` to test existence — which returns 0 for zombies.
|
||||
/// If no one reaps the test's sleeper, its pid stays occupied (as a
|
||||
/// zombie) and `sigkill_pids_and_verify` mistakenly reports survivors.
|
||||
/// In production the PTY blocking thread is always reaping on behalf of
|
||||
/// portable_pty, so this isn't a concern there.
|
||||
fn spawn_sleeper_with_reaper(secs: u64) -> (u32, JoinHandle<()>) {
|
||||
let child: Child = Command::new("sleep")
|
||||
.arg(secs.to_string())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.stdin(Stdio::null())
|
||||
.spawn()
|
||||
.expect("failed to spawn sleep");
|
||||
let pid = child.id();
|
||||
let reaper = std::thread::spawn(move || {
|
||||
let mut c = child;
|
||||
let _ = c.wait();
|
||||
});
|
||||
(pid, reaper)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sigkill_empty_slice_is_ok() {
|
||||
let result = sigkill_pids_and_verify(&[]);
|
||||
assert!(matches!(result, Ok(0)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sigkill_real_process_is_verified_gone() {
|
||||
let (pid, reaper) = spawn_sleeper_with_reaper(60);
|
||||
assert!(pid_is_alive(pid), "sleeper should be alive before kill");
|
||||
|
||||
let result = sigkill_pids_and_verify(&[pid]);
|
||||
assert!(
|
||||
matches!(result, Ok(1)),
|
||||
"sigkill must verify the process is gone: {result:?}"
|
||||
);
|
||||
let _ = reaper.join();
|
||||
assert!(!pid_is_alive(pid), "sleeper must be dead after kill");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sigkill_already_dead_pid_is_ok() {
|
||||
let (pid, reaper) = spawn_sleeper_with_reaper(0);
|
||||
let _ = reaper.join();
|
||||
// Wait briefly for the kernel to recycle the pid.
|
||||
for _ in 0..20 {
|
||||
if !pid_is_alive(pid) {
|
||||
break;
|
||||
}
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
}
|
||||
// Now SIGKILL a pid that no longer exists. Result must still be Ok.
|
||||
let result = sigkill_pids_and_verify(&[pid]);
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"sigkill of already-dead pid must succeed: {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sigkill_multiple_real_processes() {
|
||||
let mut handles: Vec<(u32, JoinHandle<()>)> =
|
||||
(0..3).map(|_| spawn_sleeper_with_reaper(60)).collect();
|
||||
let pids: Vec<u32> = handles.iter().map(|(p, _)| *p).collect();
|
||||
for &pid in &pids {
|
||||
assert!(pid_is_alive(pid));
|
||||
}
|
||||
let result = sigkill_pids_and_verify(&pids);
|
||||
assert!(
|
||||
matches!(result, Ok(3)),
|
||||
"all 3 sleepers must die: {result:?}"
|
||||
);
|
||||
for (_, reaper) in handles.drain(..) {
|
||||
let _ = reaper.join();
|
||||
}
|
||||
for &pid in &pids {
|
||||
assert!(!pid_is_alive(pid), "pid {pid} survived sigkill");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pids_matching_finds_a_running_process() {
|
||||
// pgrep -f matches the FULL command line, so the marker has to be
|
||||
// in argv somewhere. Putting it in a shell comment doesn't work —
|
||||
// sh strips it. Override argv[0] so the marker is durably visible.
|
||||
use std::os::unix::process::CommandExt;
|
||||
let marker = format!("kill-test-marker-{}-{}", std::process::id(), rand_u64());
|
||||
let argv0 = format!("test-marker-{marker}");
|
||||
let child: Child = Command::new("sleep")
|
||||
.arg0(argv0)
|
||||
.arg("60")
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.stdin(Stdio::null())
|
||||
.spawn()
|
||||
.expect("spawn");
|
||||
let child_pid = child.id();
|
||||
let reaper = std::thread::spawn(move || {
|
||||
let mut c = child;
|
||||
let _ = c.wait();
|
||||
});
|
||||
|
||||
// pgrep needs a moment to see the new process.
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
|
||||
let found = pids_matching(&marker);
|
||||
assert!(
|
||||
found.contains(&child_pid),
|
||||
"pids_matching should find pid {child_pid} for marker '{marker}'; got {found:?}"
|
||||
);
|
||||
|
||||
// Cleanup so the test doesn't leak a sleeper.
|
||||
let _ = sigkill_pids_and_verify(&[child_pid]);
|
||||
let _ = reaper.join();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pids_matching_returns_empty_when_no_match() {
|
||||
let pattern = format!("nonexistent-pattern-{}-{}", std::process::id(), rand_u64());
|
||||
let found = pids_matching(&pattern);
|
||||
assert!(found.is_empty(), "expected empty result, got {found:?}");
|
||||
}
|
||||
|
||||
/// Cheap unique-ish u64 for distinguishing test invocations without a
|
||||
/// dependency on a randomness crate.
|
||||
fn rand_u64() -> u64 {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn descendant_pids_of_real_process_tree() {
|
||||
// Build a parent sh that spawns a child sleep. The descendants of
|
||||
// the parent should include the sleep.
|
||||
let parent: Child = Command::new("sh")
|
||||
.args(["-c", "sleep 60"])
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.stdin(Stdio::null())
|
||||
.spawn()
|
||||
.expect("spawn parent");
|
||||
let parent_pid = parent.id();
|
||||
let reaper = std::thread::spawn(move || {
|
||||
let mut c = parent;
|
||||
let _ = c.wait();
|
||||
});
|
||||
|
||||
// Let the shell get around to fork+execing its child.
|
||||
std::thread::sleep(std::time::Duration::from_millis(200));
|
||||
|
||||
let descendants = descendant_pids(parent_pid);
|
||||
// On some shells `sh -c "sleep N"` exec-replaces sh with sleep, leaving
|
||||
// zero descendants. On others it forks. We don't care which; we only
|
||||
// care that the function doesn't panic and returns a sensible vec.
|
||||
assert!(
|
||||
descendants.iter().all(|&pid| pid != parent_pid),
|
||||
"descendant_pids must not include the root itself: {descendants:?}"
|
||||
);
|
||||
|
||||
// Cleanup: kill the parent and any descendants.
|
||||
let mut all = descendants;
|
||||
all.push(parent_pid);
|
||||
let _ = sigkill_pids_and_verify(&all);
|
||||
let _ = reaper.join();
|
||||
}
|
||||
}
|
||||
@@ -62,6 +62,9 @@ pub struct WorkItemContent {
|
||||
pub stage: crate::pipeline_state::Stage,
|
||||
pub name: String,
|
||||
pub agent: Option<crate::config::AgentName>,
|
||||
/// Origin of the work item (story 1088). `None` for items that pre-date
|
||||
/// the origin register; the web UI renders these as `"unknown"`.
|
||||
pub origin: Option<String>,
|
||||
}
|
||||
|
||||
/// A single entry in the project's configured agent roster.
|
||||
@@ -176,6 +179,9 @@ pub fn get_work_item_content(
|
||||
.map(|v| v.name().to_string())
|
||||
.unwrap_or_default();
|
||||
let crdt_agent = crdt_view.as_ref().and_then(|v| v.agent());
|
||||
let crdt_origin = crdt_view
|
||||
.as_ref()
|
||||
.and_then(|v| v.origin().map(str::to_string));
|
||||
|
||||
for (stage_dir, stage) in &stages {
|
||||
if let Some(content) = io::read_work_item_from_stage(&work_dir, stage_dir, &filename)? {
|
||||
@@ -184,6 +190,7 @@ pub fn get_work_item_content(
|
||||
stage: stage.clone(),
|
||||
name: crdt_name.clone(),
|
||||
agent: crdt_agent,
|
||||
origin: crdt_origin.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -201,6 +208,7 @@ pub fn get_work_item_content(
|
||||
stage,
|
||||
name: crdt_name,
|
||||
agent: crdt_agent,
|
||||
origin: crdt_origin,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@ pub enum EventAction {
|
||||
/// `true` if acceptance gates passed.
|
||||
success: bool,
|
||||
},
|
||||
/// Post a new-item-created notification.
|
||||
NewItemCreated,
|
||||
/// Log server-side only; do not post to chat (e.g. hard rate-limit blocks).
|
||||
LogOnly,
|
||||
/// Reload the project configuration.
|
||||
@@ -51,6 +53,7 @@ pub fn classify(event: &WatcherEvent) -> EventAction {
|
||||
WatcherEvent::AgentCompleted { success, .. } => {
|
||||
EventAction::AgentCompleted { success: *success }
|
||||
}
|
||||
WatcherEvent::NewItemCreated { .. } => EventAction::NewItemCreated,
|
||||
_ => EventAction::Skip,
|
||||
}
|
||||
}
|
||||
@@ -178,4 +181,14 @@ mod tests {
|
||||
EventAction::AgentCompleted { success: false }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_item_created_is_classified_correctly() {
|
||||
let event = WatcherEvent::NewItemCreated {
|
||||
item_id: "1075_refactor_split_stage".to_string(),
|
||||
item_type: "refactor".to_string(),
|
||||
name: "Split Stage enum".to_string(),
|
||||
};
|
||||
assert_eq!(classify(&event), EventAction::NewItemCreated);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -220,21 +220,48 @@ pub fn format_agent_completed_notification(
|
||||
(plain, html)
|
||||
}
|
||||
|
||||
/// Extract the first non-empty line from a merge failure reason, truncated to `max_len` chars.
|
||||
/// Format a new-work-item creation notification.
|
||||
///
|
||||
/// Used to produce a compact snippet for chat notifications.
|
||||
pub fn merge_failure_snippet(reason: &str, max_len: usize) -> String {
|
||||
let line = reason
|
||||
.lines()
|
||||
.find(|l| !l.trim().is_empty())
|
||||
.unwrap_or(reason);
|
||||
let mut chars = line.chars();
|
||||
let truncated: String = chars.by_ref().take(max_len).collect();
|
||||
if chars.next().is_some() {
|
||||
format!("{truncated}\u{2026}") // append …
|
||||
} else {
|
||||
truncated
|
||||
/// Returns `(plain_text, html)` suitable for `ChatTransport::send_message`.
|
||||
pub fn format_new_item_notification(
|
||||
item_id: &str,
|
||||
item_type: &str,
|
||||
name: &str,
|
||||
) -> (String, String) {
|
||||
let number = extract_item_number(item_id).unwrap_or(item_id);
|
||||
let emoji = match item_type {
|
||||
"bug" => "\u{1f41b}", // 🐛
|
||||
"refactor" => "\u{1f4dd}", // 📝
|
||||
"spike" => "\u{1f52c}", // 🔬
|
||||
_ => "\u{1f4d6}", // 📖 (story and unknown)
|
||||
};
|
||||
let plain = format!("{emoji} New {item_type} #{number} \u{2014} {name}");
|
||||
let html = format!("{emoji} New {item_type} <strong>#{number}</strong> \u{2014} {name}");
|
||||
(plain, html)
|
||||
}
|
||||
|
||||
/// Maximum number of trailing gate-output lines included in a merge-failure
|
||||
/// chat notification.
|
||||
///
|
||||
/// Gate output can be hundreds of lines; only the tail (where errors appear)
|
||||
/// is useful at a glance. Full output remains available via `get_merge_status`
|
||||
/// or the web UI — this limit is chat-display-only.
|
||||
pub const MERGE_FAILURE_TAIL_LINES: usize = 30;
|
||||
|
||||
/// Truncate `gate_output` to its last `max_lines` lines for chat notifications.
|
||||
///
|
||||
/// If the output contains more than `max_lines` non-empty lines, a leading
|
||||
/// marker line `[...output truncated, last N lines shown...]` is prepended to
|
||||
/// the tail so readers know output was cut. If the output fits within the
|
||||
/// limit it is returned unchanged (no marker added).
|
||||
pub fn truncate_gate_output(gate_output: &str, max_lines: usize) -> String {
|
||||
let lines: Vec<&str> = gate_output.lines().collect();
|
||||
if lines.len() <= max_lines {
|
||||
return gate_output.to_string();
|
||||
}
|
||||
let tail = &lines[lines.len() - max_lines..];
|
||||
let marker = format!("[...output truncated, last {max_lines} lines shown...]");
|
||||
format!("{marker}\n{}", tail.join("\n"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -568,6 +595,64 @@ mod tests {
|
||||
assert_eq!(plain, "\u{1F916} #42 \u{2014} coder-1 started");
|
||||
}
|
||||
|
||||
// ── truncate_gate_output ──────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn truncate_gate_output_short_output_returned_unchanged() {
|
||||
let output = "line1\nline2\nline3";
|
||||
assert_eq!(truncate_gate_output(output, 30), output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_gate_output_exact_limit_returned_unchanged() {
|
||||
let lines: Vec<String> = (1..=30).map(|i| format!("line{i}")).collect();
|
||||
let output = lines.join("\n");
|
||||
assert_eq!(truncate_gate_output(&output, 30), output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_gate_output_over_limit_prepends_marker() {
|
||||
let lines: Vec<String> = (1..=35).map(|i| format!("line{i}")).collect();
|
||||
let output = lines.join("\n");
|
||||
let result = truncate_gate_output(&output, 30);
|
||||
assert!(
|
||||
result.starts_with("[...output truncated, last 30 lines shown...]"),
|
||||
"must start with truncation marker; got: {result}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_gate_output_over_limit_contains_tail_lines() {
|
||||
let lines: Vec<String> = (1..=35).map(|i| format!("line{i}")).collect();
|
||||
let output = lines.join("\n");
|
||||
let result = truncate_gate_output(&output, 30);
|
||||
// Last 30 lines are line6..line35.
|
||||
assert!(result.contains("line35"), "must contain last line");
|
||||
assert!(result.contains("line6"), "must contain first tail line");
|
||||
assert!(!result.contains("line5"), "must not contain dropped line");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_gate_output_empty_input_returned_unchanged() {
|
||||
assert_eq!(truncate_gate_output("", 30), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_gate_output_single_line_returned_unchanged() {
|
||||
assert_eq!(truncate_gate_output("only one line", 30), "only one line");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_gate_output_marker_contains_configured_limit() {
|
||||
let lines: Vec<String> = (1..=10).map(|i| format!("x{i}")).collect();
|
||||
let output = lines.join("\n");
|
||||
let result = truncate_gate_output(&output, 5);
|
||||
assert!(
|
||||
result.contains("last 5 lines shown"),
|
||||
"marker must state configured limit; got: {result}"
|
||||
);
|
||||
}
|
||||
|
||||
// ── format_agent_completed_notification ───────────────────────────────────
|
||||
|
||||
#[test]
|
||||
@@ -599,6 +684,67 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
// ── format_new_item_notification ──────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn format_new_item_notification_story() {
|
||||
let (plain, html) =
|
||||
format_new_item_notification("42_story_my_feature", "story", "My Feature");
|
||||
assert_eq!(plain, "\u{1f4d6} New story #42 \u{2014} My Feature");
|
||||
assert_eq!(
|
||||
html,
|
||||
"\u{1f4d6} New story <strong>#42</strong> \u{2014} My Feature"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_new_item_notification_bug() {
|
||||
let (plain, html) =
|
||||
format_new_item_notification("99_bug_login_crash", "bug", "Login Crash");
|
||||
assert_eq!(plain, "\u{1f41b} New bug #99 \u{2014} Login Crash");
|
||||
assert_eq!(
|
||||
html,
|
||||
"\u{1f41b} New bug <strong>#99</strong> \u{2014} Login Crash"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_new_item_notification_refactor() {
|
||||
let (plain, html) = format_new_item_notification(
|
||||
"1075_refactor_split_stage",
|
||||
"refactor",
|
||||
"Split Stage enum into Pipeline + Status",
|
||||
);
|
||||
assert_eq!(
|
||||
plain,
|
||||
"\u{1f4dd} New refactor #1075 \u{2014} Split Stage enum into Pipeline + Status"
|
||||
);
|
||||
assert_eq!(
|
||||
html,
|
||||
"\u{1f4dd} New refactor <strong>#1075</strong> \u{2014} Split Stage enum into Pipeline + Status"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_new_item_notification_spike() {
|
||||
let (plain, html) =
|
||||
format_new_item_notification("7_spike_encoder_comparison", "spike", "Compare Encoders");
|
||||
assert_eq!(plain, "\u{1f52c} New spike #7 \u{2014} Compare Encoders");
|
||||
assert_eq!(
|
||||
html,
|
||||
"\u{1f52c} New spike <strong>#7</strong> \u{2014} Compare Encoders"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_new_item_notification_non_numeric_id_uses_full_id() {
|
||||
let (plain, _html) = format_new_item_notification("abc_story_thing", "story", "Some Story");
|
||||
assert_eq!(
|
||||
plain,
|
||||
"\u{1f4d6} New story #abc_story_thing \u{2014} Some Story"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_agent_completed_notification_empty_name_falls_back_to_number() {
|
||||
let (plain, _html) =
|
||||
|
||||
@@ -14,9 +14,10 @@ use tokio::sync::broadcast;
|
||||
use super::super::events::classify;
|
||||
use super::super::filter::{AGENT_EVENT_DEBOUNCE, should_send_rate_limit};
|
||||
use super::super::format::{
|
||||
format_agent_completed_notification, format_agent_started_notification,
|
||||
format_blocked_notification, format_error_notification, format_oauth_account_swapped,
|
||||
format_oauth_accounts_exhausted, format_rate_limit_notification, merge_failure_snippet,
|
||||
MERGE_FAILURE_TAIL_LINES, format_agent_completed_notification,
|
||||
format_agent_started_notification, format_blocked_notification, format_error_notification,
|
||||
format_new_item_notification, format_oauth_account_swapped, format_oauth_accounts_exhausted,
|
||||
format_rate_limit_notification, truncate_gate_output,
|
||||
};
|
||||
use super::super::route::rooms_for_notification;
|
||||
use super::{find_story_name_any_stage, read_story_name};
|
||||
@@ -119,9 +120,7 @@ pub fn spawn_notification_listener(
|
||||
continue;
|
||||
};
|
||||
let story_name = read_story_name(&project_root, "4_merge", story_id);
|
||||
// AC3: include only the first non-empty line of the failure,
|
||||
// truncated to ~120 chars.
|
||||
let snippet = merge_failure_snippet(reason, 120);
|
||||
let snippet = truncate_gate_output(reason, MERGE_FAILURE_TAIL_LINES);
|
||||
let (plain, html) = format_error_notification(story_id, &story_name, &snippet);
|
||||
slog!("[bot] Sending error notification: {plain}");
|
||||
for room_id in &rooms_for_notification(&get_room_ids) {
|
||||
@@ -276,6 +275,26 @@ pub fn spawn_notification_listener(
|
||||
pending_agent_events.insert(key, (plain, html));
|
||||
agent_flush_deadline = Some(tokio::time::Instant::now() + AGENT_EVENT_DEBOUNCE);
|
||||
}
|
||||
EventAction::NewItemCreated => {
|
||||
if !config.status_push_enabled {
|
||||
continue;
|
||||
}
|
||||
let WatcherEvent::NewItemCreated {
|
||||
ref item_id,
|
||||
ref item_type,
|
||||
ref name,
|
||||
} = event
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let (plain, html) = format_new_item_notification(item_id, item_type, name);
|
||||
slog!("[bot] Sending new-item notification: {plain}");
|
||||
for room_id in &rooms_for_notification(&get_room_ids) {
|
||||
if let Err(e) = transport.send_message(room_id, &plain, &html).await {
|
||||
slog!("[bot] Failed to send new-item notification to {room_id}: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
EventAction::LogOnly => {
|
||||
// Hard-block: log server-side for debugging; do NOT post to chat.
|
||||
// Hard-block auto-resume is normal operation — the status command
|
||||
|
||||
@@ -5,6 +5,89 @@ use super::spawn_notification_listener;
|
||||
use crate::io::watcher::WatcherEvent;
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
// ── spawn_notification_listener: MergeFailure ────────────────────────────────
|
||||
|
||||
/// Long gate output is truncated to the tail and includes the marker line.
|
||||
#[tokio::test]
|
||||
async fn merge_failure_long_output_is_truncated_to_tail() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
|
||||
let (transport, calls) = MockTransport::new();
|
||||
|
||||
spawn_notification_listener(
|
||||
transport,
|
||||
|| vec!["!room1:example.org".to_string()],
|
||||
watcher_rx,
|
||||
tmp.path().to_path_buf(),
|
||||
);
|
||||
|
||||
// Build a reason with 50 lines (more than MERGE_FAILURE_TAIL_LINES = 30).
|
||||
let long_reason: String = (1..=50).map(|i| format!("gate-line-{i}\n")).collect();
|
||||
|
||||
watcher_tx
|
||||
.send(WatcherEvent::MergeFailure {
|
||||
story_id: "1077_story_trunc".to_string(),
|
||||
reason: long_reason,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
|
||||
let calls = calls.lock().unwrap();
|
||||
assert_eq!(calls.len(), 1, "Expected exactly one notification");
|
||||
let (_, plain, _) = &calls[0];
|
||||
assert!(
|
||||
plain.contains("truncated"),
|
||||
"notification must contain the truncation marker; got: {plain}"
|
||||
);
|
||||
assert!(
|
||||
plain.contains("gate-line-50"),
|
||||
"notification must contain the last line; got: {plain}"
|
||||
);
|
||||
assert!(
|
||||
!plain.contains("gate-line-1\n"),
|
||||
"notification must not contain the first (dropped) line; got: {plain}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Short gate output (within limit) passes through unchanged, no marker added.
|
||||
#[tokio::test]
|
||||
async fn merge_failure_short_output_passes_through_unchanged() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let (watcher_tx, watcher_rx) = broadcast::channel::<WatcherEvent>(16);
|
||||
let (transport, calls) = MockTransport::new();
|
||||
|
||||
spawn_notification_listener(
|
||||
transport,
|
||||
|| vec!["!room1:example.org".to_string()],
|
||||
watcher_rx,
|
||||
tmp.path().to_path_buf(),
|
||||
);
|
||||
|
||||
let short_reason = "error: type mismatch on line 42\nexpected i32, found &str".to_string();
|
||||
|
||||
watcher_tx
|
||||
.send(WatcherEvent::MergeFailure {
|
||||
story_id: "1077_story_short".to_string(),
|
||||
reason: short_reason.clone(),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
|
||||
let calls = calls.lock().unwrap();
|
||||
assert_eq!(calls.len(), 1, "Expected exactly one notification");
|
||||
let (_, plain, _) = &calls[0];
|
||||
assert!(
|
||||
!plain.contains("truncated"),
|
||||
"short output must not have a truncation marker; got: {plain}"
|
||||
);
|
||||
assert!(
|
||||
plain.contains("type mismatch"),
|
||||
"short output must be included verbatim; got: {plain}"
|
||||
);
|
||||
}
|
||||
|
||||
// ── spawn_notification_listener: RateLimitWarning ────────────────────────────
|
||||
|
||||
/// AC2 + AC3: when a RateLimitWarning event arrives, send_message is called
|
||||
|
||||
@@ -191,6 +191,7 @@ mod tests {
|
||||
watcher: crate::config::WatcherConfig {
|
||||
sweep_interval_secs: 30,
|
||||
done_retention_secs: 7200,
|
||||
reconcile_interval_secs: 30,
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -37,6 +37,8 @@ pub fn watcher_event_to_response(e: WatcherEvent) -> Option<WsResponse> {
|
||||
// Agent lifecycle events are forwarded to chat transports only; no WebSocket message.
|
||||
WatcherEvent::AgentStarted { .. } => None,
|
||||
WatcherEvent::AgentCompleted { .. } => None,
|
||||
// Creation notifications are forwarded to chat transports only; no WebSocket message.
|
||||
WatcherEvent::NewItemCreated { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -210,6 +212,8 @@ mod tests {
|
||||
error: None,
|
||||
merge_failure: None,
|
||||
agent: None,
|
||||
pipeline: crate::pipeline_state::Pipeline::Backlog,
|
||||
status: crate::pipeline_state::Status::Active,
|
||||
review_hold: None,
|
||||
qa: None,
|
||||
retry_count: None,
|
||||
@@ -224,6 +228,8 @@ mod tests {
|
||||
error: None,
|
||||
merge_failure: None,
|
||||
agent: None,
|
||||
pipeline: crate::pipeline_state::Pipeline::Coding,
|
||||
status: crate::pipeline_state::Status::Active,
|
||||
review_hold: None,
|
||||
qa: None,
|
||||
retry_count: None,
|
||||
@@ -240,6 +246,8 @@ mod tests {
|
||||
error: None,
|
||||
merge_failure: None,
|
||||
agent: None,
|
||||
pipeline: crate::pipeline_state::Pipeline::Done,
|
||||
status: crate::pipeline_state::Status::Done,
|
||||
review_hold: None,
|
||||
qa: None,
|
||||
retry_count: None,
|
||||
@@ -301,6 +309,8 @@ mod tests {
|
||||
model: Some(crate::agents::AgentModel::Sonnet),
|
||||
status: crate::agents::AgentStatus::Running,
|
||||
}),
|
||||
pipeline: crate::pipeline_state::Pipeline::Coding,
|
||||
status: crate::pipeline_state::Status::Active,
|
||||
review_hold: None,
|
||||
qa: None,
|
||||
retry_count: None,
|
||||
|
||||
@@ -205,6 +205,8 @@ mod tests {
|
||||
error: None,
|
||||
merge_failure: None,
|
||||
agent: None,
|
||||
pipeline: crate::pipeline_state::Pipeline::Backlog,
|
||||
status: crate::pipeline_state::Status::Active,
|
||||
review_hold: None,
|
||||
qa: None,
|
||||
retry_count: None,
|
||||
|
||||
@@ -217,7 +217,13 @@ async fn migrate_json_stores_to_sqlite(huskies_dir: &Path) {
|
||||
}
|
||||
|
||||
/// Set up the server log file, node identity keypair, pipeline DB, and CRDT state.
|
||||
pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path) {
|
||||
///
|
||||
/// When `is_agent` is `true` the pipeline database is opened at an isolated
|
||||
/// temporary path (or at `HUSKIES_DB_PATH` if that env-var is set) so that the
|
||||
/// headless build agent never touches the production `.huskies/pipeline.db`.
|
||||
/// This prevents feature-branch migrations from being applied to the shared
|
||||
/// database and bricking the next server restart.
|
||||
pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path, is_agent: bool) {
|
||||
// Enable persistent server log file now that the project root is known.
|
||||
if let Some(ref root) = *app_state.project_root.lock().unwrap() {
|
||||
let log_dir = root.join(".huskies").join("logs");
|
||||
@@ -242,20 +248,91 @@ pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path) {
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise the SQLite pipeline shadow-write database and CRDT state layer.
|
||||
// Clone the path out before the await so we don't hold the MutexGuard across
|
||||
// an await point.
|
||||
let pipeline_db_path = app_state
|
||||
// Resolve the pipeline DB path.
|
||||
//
|
||||
// Priority order:
|
||||
// 1. HUSKIES_DB_PATH env var (operator override, any mode)
|
||||
// 2. Agent mode: process-local temp file so the production DB is never touched
|
||||
// 3. Default: {project_root}/.huskies/pipeline.db
|
||||
let pipeline_db_path: Option<PathBuf> = if let Ok(env_path) = std::env::var("HUSKIES_DB_PATH") {
|
||||
let p = PathBuf::from(&env_path);
|
||||
crate::slog!("[db] HUSKIES_DB_PATH override: {}", p.display());
|
||||
Some(p)
|
||||
} else if is_agent {
|
||||
// Headless agent: use an isolated temp DB so that any migrations compiled
|
||||
// into this binary (e.g. from a feature branch) are never applied to the
|
||||
// production database. The temp file is process-unique and harmless to
|
||||
// leave behind after the agent exits.
|
||||
let pid = std::process::id();
|
||||
let temp_path = std::env::temp_dir().join(format!("huskies-agent-{pid}.db"));
|
||||
crate::slog!(
|
||||
"[db] Agent mode: using isolated DB at {} (not touching production pipeline.db)",
|
||||
temp_path.display()
|
||||
);
|
||||
Some(temp_path)
|
||||
} else {
|
||||
// Server mode: use the project-local production database.
|
||||
app_state
|
||||
.project_root
|
||||
.lock()
|
||||
.unwrap()
|
||||
.as_ref()
|
||||
.map(|root| root.join(".huskies").join("pipeline.db"));
|
||||
.map(|root| root.join(".huskies").join("pipeline.db"))
|
||||
};
|
||||
|
||||
if let Some(ref db_path) = pipeline_db_path {
|
||||
if let Err(e) = db::init(db_path).await {
|
||||
crate::slog!("[db] Failed to initialise pipeline.db: {e}");
|
||||
} else {
|
||||
// ── Migration drift self-check (server mode only) ─────────────────────
|
||||
//
|
||||
// In server mode, detect whether the live database contains migrations
|
||||
// that were applied by a newer binary (e.g. a feature-branch agent that
|
||||
// ran before the feature was merged). If so, log each unknown migration
|
||||
// and exit with a clear actionable message. This is the root cause of
|
||||
// the 2026-05-14 21:07 production outage where the server came up but
|
||||
// the CRDT never initialised.
|
||||
if !is_agent && let Some(pool) = db::get_shared_pool() {
|
||||
let drift = db::check_schema_drift(pool).await;
|
||||
if !drift.is_empty() {
|
||||
for m in &drift {
|
||||
crate::slog!(
|
||||
"[db] UNKNOWN migration {} ('{}') applied at {} \
|
||||
is not in the compiled-in set",
|
||||
m.version,
|
||||
m.description,
|
||||
m.installed_on,
|
||||
);
|
||||
}
|
||||
eprintln!();
|
||||
eprintln!(
|
||||
"error: pipeline.db contains {} migration(s) that are not \
|
||||
recognised by this binary:",
|
||||
drift.len()
|
||||
);
|
||||
for m in &drift {
|
||||
eprintln!(
|
||||
" \u{2022} migration {} ('{}') applied at {}",
|
||||
m.version, m.description, m.installed_on
|
||||
);
|
||||
}
|
||||
eprintln!();
|
||||
eprintln!(
|
||||
"This means the database was previously opened by a newer \
|
||||
version of huskies."
|
||||
);
|
||||
eprintln!(
|
||||
"To fix: rebuild huskies from the latest source (the branch \
|
||||
that added these migrations) and restart."
|
||||
);
|
||||
eprintln!(
|
||||
"Do NOT start the old binary against this database — it will \
|
||||
behave incorrectly."
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// One-shot migration: move any existing JSON store files into SQLite.
|
||||
let huskies_dir = db_path.parent().unwrap_or(db_path);
|
||||
migrate_json_stores_to_sqlite(huskies_dir).await;
|
||||
|
||||
+240
-17
@@ -156,6 +156,17 @@ pub(crate) fn spawn_tick_loop(
|
||||
{scheduled_count} scheduled timer(s)"
|
||||
);
|
||||
|
||||
let (reconcile_interval, done_retention) = root
|
||||
.as_ref()
|
||||
.and_then(|r| config::ProjectConfig::load(r).ok())
|
||||
.map(|c| {
|
||||
(
|
||||
c.watcher.reconcile_interval_secs,
|
||||
std::time::Duration::from_secs(c.watcher.done_retention_secs),
|
||||
)
|
||||
})
|
||||
.unwrap_or((30, std::time::Duration::from_secs(4 * 3600)));
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(std::time::Duration::from_secs(1));
|
||||
let mut tick_count: u64 = 0;
|
||||
@@ -190,6 +201,15 @@ pub(crate) fn spawn_tick_loop(
|
||||
}
|
||||
agents.reap_stale_merge_jobs();
|
||||
}
|
||||
|
||||
// Periodic reconciler: converge subscriber side effects so that
|
||||
// Lagged broadcast events never leave state permanently diverged.
|
||||
if tick_count.is_multiple_of(reconcile_interval)
|
||||
&& let Some(ref r) = root
|
||||
{
|
||||
crate::slog!("[reconcile] Running periodic reconcile pass.");
|
||||
run_reconcile_pass(r, &agents, done_retention).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -450,16 +470,50 @@ async fn execute_prompt_action(
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the startup reconstruction task: replay the current pipeline state
|
||||
/// through the [`TransitionFired`][crate::pipeline_state::TransitionFired]
|
||||
/// broadcast channel so that all existing subscribers (worktree lifecycle,
|
||||
/// merge-failure auto-spawn, auto-assign) react identically to a live
|
||||
/// transition, then trigger a full auto-assign pass.
|
||||
/// Run one full reconcile pass: call each subscriber's idempotent `reconcile()`
|
||||
/// entry point so that side effects converge regardless of whether the
|
||||
/// broadcast channel lagged during startup or at runtime.
|
||||
///
|
||||
/// Replaces the legacy scan-based `reconcile_on_startup` approach. The CRDT
|
||||
/// is the durable source of truth; replaying it as synthetic self-transitions
|
||||
/// is cheaper, simpler, and idempotent: a second replay produces another burst
|
||||
/// of events that subscribers safely ignore for already-assigned stories.
|
||||
/// Safe to call any number of times — every reconcile function is idempotent.
|
||||
pub(crate) async fn run_reconcile_pass(
|
||||
root: &std::path::Path,
|
||||
agents: &Arc<AgentPool>,
|
||||
done_retention: std::time::Duration,
|
||||
) {
|
||||
// Content-GC: purge content-store entries for terminal/tombstoned stories.
|
||||
crate::db::gc::sweep_zombie_content_on_startup();
|
||||
|
||||
// Worktree create: ensure every Coding story has a worktree.
|
||||
crate::agents::pool::worktree_lifecycle::reconcile_worktree_create(root, agents.port()).await;
|
||||
|
||||
// Worktree cleanup: remove worktrees for terminal stories.
|
||||
crate::agents::pool::worktree_lifecycle::reconcile_worktree_cleanup(root).await;
|
||||
|
||||
// Done-archive: archive Done stories whose retention period has elapsed.
|
||||
crate::io::watcher::sweep_done_to_archived(done_retention);
|
||||
|
||||
// Cost-rollup: re-populate the in-memory register from disk.
|
||||
crate::agents::pool::cost_rollup_subscriber::reconcile_cost_rollup(root);
|
||||
|
||||
// Merge-failure: spawn mergemaster for ConflictDetected stories with no active agent.
|
||||
crate::agents::pool::auto_assign::reconcile_merge_failure(agents, root).await;
|
||||
|
||||
// Merge-block: no-op (in-memory counter cannot be reconstructed from CRDT).
|
||||
crate::agents::pool::auto_assign::reconcile_merge_failure_block();
|
||||
|
||||
// Audit-log: no-op (historical replay would produce misleading entries).
|
||||
crate::pipeline_state::reconcile_audit_log();
|
||||
}
|
||||
|
||||
/// Spawn the startup reconciliation task: run a full reconcile pass so that all
|
||||
/// side-effect subscribers converge on the current CRDT state without flooding
|
||||
/// the broadcast channel, then trigger a full auto-assign pass.
|
||||
///
|
||||
/// Replaces the former `replay_current_pipeline_state()` approach, which
|
||||
/// sent one synthetic `TransitionFired` per CRDT item through the broadcast
|
||||
/// channel. With >256 items that caused `Subscriber lagged` warnings and
|
||||
/// left subscribers with diverged state. Direct reconcile calls bypass the
|
||||
/// channel entirely and scale to any CRDT size.
|
||||
pub(crate) fn spawn_startup_reconciliation(
|
||||
startup_root: Option<PathBuf>,
|
||||
startup_agents: Arc<AgentPool>,
|
||||
@@ -467,20 +521,189 @@ pub(crate) fn spawn_startup_reconciliation(
|
||||
) {
|
||||
if let Some(root) = startup_root {
|
||||
tokio::spawn(async move {
|
||||
// Purge content-store entries for stories that reached terminal
|
||||
// stages in a previous session (before the GC subscriber was active).
|
||||
crate::db::gc::sweep_zombie_content_on_startup();
|
||||
crate::slog!(
|
||||
"[startup] Replaying current pipeline state through TransitionFired channel."
|
||||
);
|
||||
crate::pipeline_state::replay_current_pipeline_state();
|
||||
let done_retention = crate::config::ProjectConfig::load(&root)
|
||||
.map(|c| std::time::Duration::from_secs(c.watcher.done_retention_secs))
|
||||
.unwrap_or_else(|_| std::time::Duration::from_secs(4 * 3600));
|
||||
crate::slog!("[startup] Running per-subscriber reconcile pass.");
|
||||
run_reconcile_pass(&root, &startup_agents, done_retention).await;
|
||||
crate::slog!("[auto-assign] Scanning pipeline stages for unassigned work.");
|
||||
startup_agents.auto_assign_available_work(&root).await;
|
||||
let _ = startup_reconciliation_tx.send(ReconciliationEvent {
|
||||
story_id: String::new(),
|
||||
status: "done".to_string(),
|
||||
message: "Startup event replay complete.".to_string(),
|
||||
message: "Startup reconcile pass complete.".to_string(),
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::db::{
|
||||
ContentKey, ItemMeta, ensure_content_store, write_content, write_item_with_content,
|
||||
};
|
||||
use crate::io::watcher::WatcherEvent;
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
fn make_pool() -> Arc<AgentPool> {
|
||||
let (tx, _) = broadcast::channel::<WatcherEvent>(16);
|
||||
Arc::new(AgentPool::new(3099, tx))
|
||||
}
|
||||
|
||||
fn setup_huskies_dir(tmp: &tempfile::TempDir) -> std::path::PathBuf {
|
||||
let root = tmp.path().to_path_buf();
|
||||
std::fs::create_dir_all(root.join(".huskies")).unwrap();
|
||||
std::fs::write(root.join(".huskies/project.toml"), "").unwrap();
|
||||
root
|
||||
}
|
||||
|
||||
/// AC4 + AC6: seeding >256 CRDT items and running the reconcile pass must not
|
||||
/// produce any "Subscriber lagged" warnings (structural guarantee — the new
|
||||
/// path never broadcasts through the channel) and must purge zombie content
|
||||
/// for all terminal stories after one reconcile tick.
|
||||
///
|
||||
/// Distribution: 300 Backlog + 200 Coding + 200 Abandoned (terminal) + 300 QA
|
||||
/// = 1000 items. Each of the 200 Abandoned stories gets a content-store entry
|
||||
/// seeded before the reconcile so we can assert it is cleaned up.
|
||||
#[tokio::test]
|
||||
async fn reconcile_pass_scales_to_1000_items_without_lagged_divergence() {
|
||||
crate::crdt_state::init_for_test();
|
||||
ensure_content_store();
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = setup_huskies_dir(&tmp);
|
||||
let pool = make_pool();
|
||||
|
||||
// ── Seed 1000 items across several stages ──────────────────────────
|
||||
for i in 0..300u32 {
|
||||
let id = format!("1066_backlog_{i:04}");
|
||||
write_item_with_content(
|
||||
&id,
|
||||
"1_backlog",
|
||||
"---\nname: Backlog\n---\n",
|
||||
ItemMeta::named("Backlog"),
|
||||
);
|
||||
}
|
||||
for i in 0..200u32 {
|
||||
let id = format!("1066_coding_{i:04}");
|
||||
write_item_with_content(
|
||||
&id,
|
||||
"2_current",
|
||||
"---\nname: Coding\n---\n",
|
||||
ItemMeta::named("Coding"),
|
||||
);
|
||||
}
|
||||
for i in 0..200u32 {
|
||||
let id = format!("1066_abandoned_{i:04}");
|
||||
write_item_with_content(
|
||||
&id,
|
||||
"2_current",
|
||||
"---\nname: Abandoned\n---\n",
|
||||
ItemMeta::named("Abandoned"),
|
||||
);
|
||||
// Move to terminal stage (Abandoned).
|
||||
crate::agents::lifecycle::abandon_story(&id).expect("abandon must succeed");
|
||||
// Seed a content-store entry to verify GC cleans it up.
|
||||
write_content(ContentKey::Story(&id), "zombie content");
|
||||
}
|
||||
for i in 0..300u32 {
|
||||
let id = format!("1066_qa_{i:04}");
|
||||
write_item_with_content(&id, "3_qa", "---\nname: QA\n---\n", ItemMeta::named("QA"));
|
||||
}
|
||||
|
||||
// ── Subscribe BEFORE the reconcile to catch any Lagged events ──────
|
||||
let mut transition_rx = crate::pipeline_state::subscribe_transitions();
|
||||
|
||||
// ── Run one reconcile pass ─────────────────────────────────────────
|
||||
// Use zero retention so any Done items (none here, but defensive) archive immediately.
|
||||
run_reconcile_pass(&root, &pool, std::time::Duration::ZERO).await;
|
||||
|
||||
// ── Drain the transition channel; must contain zero Lagged events ──
|
||||
// The reconcile path never broadcasts through TRANSITION_TX, so any
|
||||
// events here are from the abandon_story calls above (all pre-reconcile).
|
||||
let mut lagged_count = 0u64;
|
||||
loop {
|
||||
match transition_rx.try_recv() {
|
||||
Ok(_) => {}
|
||||
Err(tokio::sync::broadcast::error::TryRecvError::Lagged(n)) => {
|
||||
lagged_count += n;
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::TryRecvError::Empty)
|
||||
| Err(tokio::sync::broadcast::error::TryRecvError::Closed) => break,
|
||||
}
|
||||
}
|
||||
|
||||
// The reconcile pass itself must not have sent anything through the channel.
|
||||
// (abandon_story above may have sent some events, but those are pre-reconcile
|
||||
// lifecycle transitions, not the reconcile itself.)
|
||||
assert_eq!(
|
||||
lagged_count, 0,
|
||||
"run_reconcile_pass must not broadcast through the transition channel (no Lagged)"
|
||||
);
|
||||
|
||||
// ── Assert: zombie content purged for all 200 Abandoned stories ────
|
||||
for i in 0..200u32 {
|
||||
let id = format!("1066_abandoned_{i:04}");
|
||||
assert!(
|
||||
crate::db::read_content(ContentKey::Story(&id)).is_none(),
|
||||
"zombie content must be purged for abandoned story {id}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// AC4 regression: the subscriber channel (capacity 256) must not lag when
|
||||
/// 1000 items are seeded — the reconcile path bypasses the channel entirely.
|
||||
#[tokio::test]
|
||||
async fn reconcile_never_floods_broadcast_channel() {
|
||||
crate::crdt_state::init_for_test();
|
||||
ensure_content_store();
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = setup_huskies_dir(&tmp);
|
||||
let pool = make_pool();
|
||||
|
||||
// Seed 1000 Backlog items (no lifecycle transitions — clean slate).
|
||||
for i in 0..1000u32 {
|
||||
let id = format!("1066_flood_{i:04}");
|
||||
write_item_with_content(
|
||||
&id,
|
||||
"1_backlog",
|
||||
"---\nname: Flood\n---\n",
|
||||
ItemMeta::named("Flood"),
|
||||
);
|
||||
}
|
||||
|
||||
// Subscribe and drain pre-existing channel noise. Note: `TRANSITION_TX`
|
||||
// is a single process-global broadcast channel shared by every test in
|
||||
// this binary, so other tests running on parallel threads may write to
|
||||
// it during our window. We can't assert `msg_count == 0` — that's
|
||||
// racy by construction. The real "never floods" invariant is captured
|
||||
// by the Lagged check: 1000 seeded items must not overflow the
|
||||
// 256-slot channel, which is only possible if the reconcile path
|
||||
// bypasses the broadcast (which is what AC4 requires).
|
||||
let mut rx = crate::pipeline_state::subscribe_transitions();
|
||||
while let Ok(_) | Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) =
|
||||
rx.try_recv()
|
||||
{}
|
||||
|
||||
run_reconcile_pass(&root, &pool, std::time::Duration::ZERO).await;
|
||||
|
||||
let mut lagged = false;
|
||||
loop {
|
||||
match rx.try_recv() {
|
||||
Ok(_) => {}
|
||||
Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => {
|
||||
lagged = true;
|
||||
break;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
!lagged,
|
||||
"run_reconcile_pass must never cause Lagged on the broadcast channel"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,8 +14,26 @@ use super::{WorktreeInfo, worktree_path, write_mcp_json};
|
||||
/// - Creates the worktree at `{project_root}/.huskies/worktrees/{story_id}`
|
||||
/// on branch `feature/story-{story_id}`.
|
||||
/// - Writes `.mcp.json` in the worktree pointing to the MCP server at `port`.
|
||||
/// - Runs setup commands from the config for each component.
|
||||
/// - Runs setup commands from the config for each component **only on fresh
|
||||
/// creation** — see below.
|
||||
/// - If the worktree/branch already exists, reuses rather than errors.
|
||||
///
|
||||
/// **Idempotency on reuse:** when `wt_path` already exists, this function does
|
||||
/// **not** re-run [`run_setup_commands`]. Setup commands typically include
|
||||
/// destructive operations like `npm ci` (`rm -rf node_modules` then reinstall)
|
||||
/// that, if run concurrently with another reuse from a different caller, leave
|
||||
/// `node_modules` in a half-populated state (broken `.bin/*` symlinks pointing
|
||||
/// at empty package directories). This used to be rare and tolerable, but
|
||||
/// after story 1066 added a 30-second periodic reconciler that calls
|
||||
/// `reconcile_worktree_create` → `create_worktree`, every Coding story got a
|
||||
/// destructive `npm ci` every 30s — racing the merge-gate's own frontend
|
||||
/// build and producing the `sh: 1: tsc: not found` failure that bricked
|
||||
/// story 1086 retries on 2026-05-15.
|
||||
///
|
||||
/// The reuse path now matches the documented contract of
|
||||
/// `reconcile_worktree_create`: "no-op for stories whose worktree already
|
||||
/// exists." If a worktree is in a bad state and needs re-setup, the caller
|
||||
/// must explicitly delete it and call `create_worktree` again.
|
||||
pub async fn create_worktree(
|
||||
project_root: &Path,
|
||||
story_id: &str,
|
||||
@@ -30,14 +48,15 @@ pub async fn create_worktree(
|
||||
.unwrap_or_else(|| detect_base_branch(project_root));
|
||||
let root = project_root.to_path_buf();
|
||||
|
||||
// Already exists — reuse (ensure sparse checkout is configured)
|
||||
// Already exists — reuse without re-running destructive setup commands.
|
||||
// Sparse checkout is reconfigured (cheap, idempotent) and `.mcp.json` is
|
||||
// rewritten in case the server port changed across restarts.
|
||||
if wt_path.exists() {
|
||||
let wt_clone = wt_path.clone();
|
||||
tokio::task::spawn_blocking(move || configure_sparse_checkout(&wt_clone))
|
||||
.await
|
||||
.map_err(|e| format!("spawn_blocking: {e}"))??;
|
||||
write_mcp_json(&wt_path, port)?;
|
||||
run_setup_commands(&wt_path, config).await;
|
||||
return Ok(WorktreeInfo {
|
||||
path: wt_path,
|
||||
branch,
|
||||
@@ -374,32 +393,80 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn create_worktree_reuse_succeeds_despite_setup_failure() {
|
||||
async fn create_worktree_reuse_does_not_rerun_setup_commands() {
|
||||
// Regression for the 2026-05-15 1086 outage: the reuse path used to
|
||||
// re-run setup commands (including destructive `npm ci`). Combined
|
||||
// with story 1066's 30-second periodic reconciler, this fired
|
||||
// `npm ci` against every Coding story every 30s and caused
|
||||
// `tsc: not found` gate failures. The reuse path must now be a
|
||||
// no-op for setup commands.
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let project_root = tmp.path().join("my-project");
|
||||
fs::create_dir_all(&project_root).unwrap();
|
||||
init_git_repo(&project_root);
|
||||
|
||||
// First creation — no setup commands, should succeed
|
||||
create_worktree(&project_root, "173_reuse_fail", &empty_config(), 3001)
|
||||
create_worktree(&project_root, "173_reuse_no_setup", &empty_config(), 3001)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Second call — worktree exists, setup commands fail, must still succeed
|
||||
// Second call — worktree exists. Setup commands are configured to
|
||||
// FAIL (`exit 1`); if the reuse path were still running them, the
|
||||
// failure log would surface — but more importantly, this test
|
||||
// documents that the reuse path is expected to NEVER reach
|
||||
// `run_setup_commands` and therefore can never produce a setup
|
||||
// failure regardless of how broken the setup config is.
|
||||
let result = create_worktree(
|
||||
&project_root,
|
||||
"173_reuse_fail",
|
||||
"173_reuse_no_setup",
|
||||
&failing_setup_config(),
|
||||
3002,
|
||||
)
|
||||
.await;
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"create_worktree reuse must succeed even if setup commands fail: {:?}",
|
||||
"reuse must succeed and must not run setup commands: {:?}",
|
||||
result.err()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn create_worktree_reuse_does_not_create_setup_marker_file() {
|
||||
// Stronger version of the above: assert that on reuse, a setup
|
||||
// command that would have created a marker file does NOT run.
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let project_root = tmp.path().join("my-project");
|
||||
fs::create_dir_all(&project_root).unwrap();
|
||||
init_git_repo(&project_root);
|
||||
|
||||
// First creation — no setup, so no marker yet.
|
||||
let info = create_worktree(&project_root, "174_reuse_marker", &empty_config(), 3001)
|
||||
.await
|
||||
.unwrap();
|
||||
let marker = info.path.join("__setup_ran__");
|
||||
assert!(!marker.exists(), "no marker after empty-setup creation");
|
||||
|
||||
// Second call with a setup command that WOULD create the marker if
|
||||
// run. The reuse path must not run it.
|
||||
let cfg = ProjectConfig {
|
||||
component: vec![ComponentConfig {
|
||||
name: "marker".to_string(),
|
||||
path: ".".to_string(),
|
||||
setup: vec!["touch __setup_ran__".to_string()],
|
||||
teardown: vec![],
|
||||
}],
|
||||
..empty_config()
|
||||
};
|
||||
create_worktree(&project_root, "174_reuse_marker", &cfg, 3002)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
!marker.exists(),
|
||||
"reuse path must not run setup commands; marker file was created"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn install_pre_commit_hook_creates_executable_hook_and_sets_hookspath() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
|
||||
Reference in New Issue
Block a user