From 761b6934f18ee25aebf91729e2664c41f962807e Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 14 May 2026 08:36:46 +0000 Subject: [PATCH] huskies: merge 1007 --- server/src/db/mod.rs | 6 +- server/src/http/mcp/diagnostics/mod.rs | 34 ++++++++ server/src/http/mcp/dispatch.rs | 3 + .../src/http/mcp/story_tools/story/create.rs | 80 ++++++++++++++++++- server/src/http/mcp/tools_list/mod.rs | 4 +- .../src/http/mcp/tools_list/system_tools.rs | 22 +++++ server/src/http/workflow/bug_ops/tests.rs | 73 +++++++++++++++++ 7 files changed, 215 insertions(+), 7 deletions(-) diff --git a/server/src/db/mod.rs b/server/src/db/mod.rs index 1edf2133..9244dcef 100644 --- a/server/src/db/mod.rs +++ b/server/src/db/mod.rs @@ -21,10 +21,8 @@ pub mod gc; pub mod ops; /// Recovery for half-written pipeline items (bug 1001 backfill). /// -/// No public MCP surface — kept as an in-process library for future incident -/// response. Re-expose via diagnostics::tool_recover_half_written_items if -/// the bug ever resurfaces. -#[allow(dead_code)] +/// Exposed via `diagnostics::tool_find_orphaned_items` and +/// `diagnostics::tool_recover_half_written_items` MCP tools. pub mod recover; /// Background shadow-write task — persists pipeline items to SQLite asynchronously. pub mod shadow_write; diff --git a/server/src/http/mcp/diagnostics/mod.rs b/server/src/http/mcp/diagnostics/mod.rs index f98a0d14..87e0aa7e 100644 --- a/server/src/http/mcp/diagnostics/mod.rs +++ b/server/src/http/mcp/diagnostics/mod.rs @@ -158,6 +158,40 @@ pub(crate) fn tool_mesh_status(_args: &Value) -> Result { .map_err(|e| format!("Serialization error: {e}")) } +/// MCP tool: find half-written (orphaned) pipeline items. +/// +/// Scans the in-memory content store for story IDs that have no corresponding +/// live CRDT entry. These are items that were partially written before bug +/// 1001 was fixed — the content store (and optionally the SQLite shadow) +/// accepted the write, but the CRDT silently rejected it (because the ID was +/// tombstoned). They are invisible to every normal read path. +pub(crate) fn tool_find_orphaned_items(_args: &Value) -> Result { + let orphans = crate::db::recover::find_half_written_items(); + serde_json::to_string_pretty(&serde_json::json!({ + "orphan_count": orphans.len(), + "orphans": orphans, + })) + .map_err(|e| format!("Serialization error: {e}")) +} + +/// MCP tool: recover half-written (orphaned) pipeline items. +/// +/// For each orphaned item (content store row with no live CRDT entry), lifts +/// the content onto a fresh non-tombstoned ID so it becomes addressable again. +/// Pass `only` to restrict recovery to specific orphan IDs; omit to recover all. +/// Returns a mapping of `old_id → new_id` for every successful recovery. +pub(crate) fn tool_recover_half_written_items(args: &Value) -> Result { + let only: Option> = args + .get("only") + .and_then(|v| serde_json::from_value(v.clone()).ok()); + let results = crate::db::recover::recover_half_written_items(only.as_deref()); + serde_json::to_string_pretty(&serde_json::json!({ + "recovered_count": results.len(), + "mappings": results, + })) + .map_err(|e| format!("Serialization error: {e}")) +} + /// MCP tool: count lines in a specific file relative to the project root. pub(crate) fn tool_loc_file(args: &Value, ctx: &AppContext) -> Result { let file_path = args diff --git a/server/src/http/mcp/dispatch.rs b/server/src/http/mcp/dispatch.rs index ad5cecf6..a725b1b1 100644 --- a/server/src/http/mcp/dispatch.rs +++ b/server/src/http/mcp/dispatch.rs @@ -93,6 +93,9 @@ pub async fn dispatch_tool_call( "purge_story" => story_tools::tool_purge_story(&args, ctx), // Debug CRDT dump (story 515) "dump_crdt" => diagnostics::tool_dump_crdt(&args), + // Half-written item recovery (bug 1001) + "find_orphaned_items" => diagnostics::tool_find_orphaned_items(&args), + "recover_half_written_items" => diagnostics::tool_recover_half_written_items(&args), // Read-only peer mesh diagnostics (story 720) "mesh_status" => diagnostics::tool_mesh_status(&args), // Arbitrary pipeline movement diff --git a/server/src/http/mcp/story_tools/story/create.rs b/server/src/http/mcp/story_tools/story/create.rs index c1b82784..f74eb374 100644 --- a/server/src/http/mcp/story_tools/story/create.rs +++ b/server/src/http/mcp/story_tools/story/create.rs @@ -94,17 +94,39 @@ pub(crate) fn tool_create_story(args: &Value, ctx: &AppContext) -> Result Result { let story_id = args .get("story_id") .and_then(|v| v.as_str()) .ok_or("Missing required argument: story_id")?; - crate::crdt_state::evict_item(story_id)?; + if let Err(evict_err) = crate::crdt_state::evict_item(story_id) { + // evict_item failed — either the CRDT is not initialised or the item + // has no live CRDT entry. Handle the half-written-item case: the + // content store has the row but the CRDT doesn't (bug 1001 remnant). + let in_content_store = + crate::db::read_content(crate::db::ContentKey::Story(story_id)).is_some(); + if in_content_store { + crate::db::delete_item(story_id); + return Ok(format!( + "'{story_id}' had no CRDT entry but existed in the content store \ + (half-written item — likely a pre-fix bug 1001 artifact). \ + Removed the orphaned content-store row." + )); + } + return Err(format!( + "'{story_id}' not found in CRDT or content store: {evict_err}" + )); + } Ok(format!( "Evicted '{story_id}' from in-memory CRDT (tombstone op persisted to crdt_ops; CONTENT_STORE entry dropped)." @@ -117,6 +139,60 @@ mod tests { use crate::http::test_helpers::test_ctx; use serde_json::json; + /// Regression for bug 1001: `purge_story` must succeed on a half-written + /// item (content store has a row but the CRDT has no entry). + #[test] + fn tool_purge_story_cleans_half_written_item() { + crate::crdt_state::init_for_test(); + crate::db::ensure_content_store(); + + // Seed a content-store row without a corresponding CRDT entry + // by writing then tombstoning, then re-writing the content store + // directly — this mimics the pre-fix half-write scenario. + let story_id = "9960_story_purge_halfwrite"; + crate::db::write_item_with_content( + story_id, + "1_backlog", + "---\nname: Half Written\n---\n", + crate::db::ItemMeta::named("Half Written"), + ); + crate::crdt_state::evict_item(story_id).expect("evict must succeed for setup"); + // Re-inject only the content row (simulating the bug 1001 half-write). + crate::db::write_content( + crate::db::ContentKey::Story(story_id), + "---\nname: Half Written\n---\n", + ); + // CRDT must have no entry at this point. + assert!( + crate::crdt_state::read_item(story_id).is_none(), + "setup: CRDT must not have the item" + ); + // But content store must have it. + assert!( + crate::db::read_content(crate::db::ContentKey::Story(story_id)).is_some(), + "setup: content store must have the item" + ); + + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + let result = tool_purge_story(&json!({"story_id": story_id}), &ctx); + assert!( + result.is_ok(), + "purge_story must succeed on a half-written item: {result:?}" + ); + let msg = result.unwrap(); + assert!( + msg.contains("half-written") || msg.contains("orphaned"), + "result should mention the half-write: {msg}" + ); + + // Content store must be clean after purge. + assert!( + crate::db::read_content(crate::db::ContentKey::Story(story_id)).is_none(), + "content store must be empty after purge of half-written item" + ); + } + #[test] fn tool_create_story_missing_name() { let tmp = tempfile::tempdir().unwrap(); diff --git a/server/src/http/mcp/tools_list/mod.rs b/server/src/http/mcp/tools_list/mod.rs index 3234c9ab..54836947 100644 --- a/server/src/http/mcp/tools_list/mod.rs +++ b/server/src/http/mcp/tools_list/mod.rs @@ -106,7 +106,9 @@ mod tests { assert!(names.contains(&"show_epic")); assert!(names.contains(&"freeze_story")); assert!(names.contains(&"unfreeze_story")); - assert_eq!(tools.len(), 74); + assert!(names.contains(&"find_orphaned_items")); + assert!(names.contains(&"recover_half_written_items")); + assert_eq!(tools.len(), 76); } #[test] diff --git a/server/src/http/mcp/tools_list/system_tools.rs b/server/src/http/mcp/tools_list/system_tools.rs index 7e0f09d5..2c8a5810 100644 --- a/server/src/http/mcp/tools_list/system_tools.rs +++ b/server/src/http/mcp/tools_list/system_tools.rs @@ -341,5 +341,27 @@ pub(super) fn system_tools() -> Vec { "properties": {} } }), + json!({ + "name": "find_orphaned_items", + "description": "Find half-written (orphaned) pipeline items: story IDs that exist in the content store but have no live CRDT entry. These are invisible to all normal read paths (list_refactors, get_pipeline_status, etc.) and result from the bug 1001 split-brain race. Returns a list of orphaned IDs with their names and tombstone status. Use recover_half_written_items to fix them.", + "inputSchema": { + "type": "object", + "properties": {} + } + }), + json!({ + "name": "recover_half_written_items", + "description": "Recover half-written (orphaned) pipeline items by lifting each onto a fresh non-tombstoned ID. For each orphan, allocates a new ID, copies the content, re-applies item_type and depends_on from front matter, verifies the new entry is live in the CRDT, then removes the orphaned row. Pass 'only' to restrict recovery to specific orphan IDs (safe for live systems); omit to recover all. Returns old_id → new_id mappings for every successful recovery.", + "inputSchema": { + "type": "object", + "properties": { + "only": { + "type": "array", + "items": { "type": "string" }, + "description": "Optional: restrict recovery to these specific orphan IDs (e.g. ['1000', '999']). Omit to recover all orphans." + } + } + } + }), ] } diff --git a/server/src/http/workflow/bug_ops/tests.rs b/server/src/http/workflow/bug_ops/tests.rs index e08f271e..6955b2ec 100644 --- a/server/src/http/workflow/bug_ops/tests.rs +++ b/server/src/http/workflow/bug_ops/tests.rs @@ -507,3 +507,76 @@ fn list_refactor_files_returns_ok() { let result = list_refactor_files(tmp.path()); assert!(result.is_ok()); } + +/// Regression for bug 1001: 8 rapid sequential `create_refactor` calls with +/// `depends_on` chains must all land cleanly in BOTH the content store AND +/// the CRDT. Before the fix, the allocator could return a tombstoned ID on +/// a tight loop, producing a half-written item invisible to `list_refactors` +/// and `update_story`. +#[test] +fn rapid_sequential_creates_all_land_in_crdt() { + crate::crdt_state::init_for_test(); + crate::db::ensure_content_store(); + let tmp = tempfile::tempdir().unwrap(); + + let mut ids: Vec = Vec::new(); + + for i in 0..8u32 { + // Build a depends_on chain: item i depends on item i-1. + let depends_on: Option> = if ids.is_empty() { + None + } else { + let last_num: u32 = ids.last().unwrap().parse().unwrap_or(0); + Some(vec![last_num]) + }; + + let id = create_refactor_file( + tmp.path(), + &format!("Rapid Refactor {i}"), + None, + &[format!("AC for rapid refactor {i}")], + depends_on.as_deref(), + ) + .unwrap_or_else(|e| panic!("create_refactor_file {i} failed: {e}")); + + // Immediately verify it landed in CRDT (the core transactional guarantee). + assert!( + crate::crdt_state::read_item(&id).is_some(), + "refactor {i} (id={id}) must be in CRDT immediately after create" + ); + + ids.push(id); + } + + assert_eq!(ids.len(), 8, "all 8 creates must succeed"); + assert_eq!( + ids.iter().collect::>().len(), + 8, + "all 8 IDs must be distinct" + ); + + // list_refactor_files must return all 8 (reads from CRDT). + let listed = list_refactor_files(tmp.path()).expect("list_refactor_files must succeed"); + let listed_ids: std::collections::HashSet<&str> = + listed.iter().map(|(id, _)| id.as_str()).collect(); + for id in &ids { + assert!( + listed_ids.contains(id.as_str()), + "refactor '{id}' must appear in list_refactor_files: {listed_ids:?}" + ); + } + + // set_name (the update_story --name path) must succeed for all 8. + for (i, id) in ids.iter().enumerate() { + let new_name = format!("Renamed Rapid Refactor {i}"); + let success = crate::crdt_state::set_name(id, Some(&new_name)); + assert!(success, "set_name for refactor {i} (id={id}) must succeed"); + let view = crate::crdt_state::read_item(id) + .unwrap_or_else(|| panic!("item {i} (id={id}) must still be in CRDT after rename")); + assert_eq!( + view.name(), + new_name, + "CRDT must reflect the new name for refactor {i}" + ); + } +}