From 92b1744c3ac62f008576d9d3e83916e06df1c4b8 Mon Sep 17 00:00:00 2001 From: Timmy Date: Wed, 13 May 2026 19:26:07 +0100 Subject: [PATCH] feat(1001): story_ids filter for recover_half_written_items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first dry-run against the live pipeline surfaced 735 orphans (35 tombstoned half-writes, 700 stale content rows with no CRDT entry — mostly artefacts of the pre-numeric-id era). Bulk-recovering would resurrect a lot of stories the user deliberately purged in the past. Add an optional `story_ids` filter that restricts both discovery (in dry-run) and recovery to a named subset, so the operator can target the specific recent half-writes without touching anything else. The new test asserts the filter is honoured. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/src/db/recover.rs | 50 +++++++++++++++++-- server/src/http/mcp/diagnostics/mod.rs | 26 ++++++++-- .../src/http/mcp/tools_list/system_tools.rs | 7 ++- 3 files changed, 74 insertions(+), 9 deletions(-) diff --git a/server/src/db/recover.rs b/server/src/db/recover.rs index d1b3363b..2a1a3091 100644 --- a/server/src/db/recover.rs +++ b/server/src/db/recover.rs @@ -84,13 +84,23 @@ pub fn find_half_written_items() -> Vec { /// the orphan is left in place. /// 5. Deletes the orphan's content-store + shadow-DB rows via `delete_item`. /// +/// If `only` is provided, recovery operates only on orphans whose `story_id` +/// is in the list — everything else is left alone. This is the safe default +/// for a live system that may have many historic purged ids visible as +/// orphans alongside the genuinely-recent half-writes. +/// /// Returns a (`old_id`, `new_id`, `name`) mapping for every successful /// recovery. Orphans that failed to recover are simply omitted from the /// returned list and logged at WARN level. -pub fn recover_half_written_items() -> Vec { +pub fn recover_half_written_items(only: Option<&[String]>) -> Vec { let orphans = find_half_written_items(); let mut results = Vec::with_capacity(orphans.len()); for orphan in orphans { + if let Some(filter) = only + && !filter.iter().any(|f| f == &orphan.story_id) + { + continue; + } match recover_one(&orphan) { Ok(result) => results.push(result), Err(e) => { @@ -358,7 +368,7 @@ mod tests { assert_eq!(entry.name, "Tombstoned Then Half-Written"); // Run the recovery and inspect the mapping. - let results = recover_half_written_items(); + let results = recover_half_written_items(None); let mapping = results .iter() .find(|r| r.old_id == old_id) @@ -382,10 +392,44 @@ mod tests { ); // Re-running recovery is a no-op (no orphans left). - let again = recover_half_written_items(); + let again = recover_half_written_items(None); assert!( again.iter().all(|r| r.old_id != old_id), "recovery should be idempotent for the same orphan" ); } + + /// The `only` filter restricts recovery to a specific id set; orphans + /// outside the filter are left alone. + #[test] + fn recover_only_filter_restricts_recovery_to_named_ids() { + crdt_state::init_for_test(); + crate::db::ensure_content_store(); + + // Two orphans, only one in the filter. + let recover_id = "9810"; + let keep_id = "9811"; + for id in [recover_id, keep_id] { + let name = format!("Item {id}"); + let content = body(&name, "refactor", None); + write_item_with_content(id, "1_backlog", &content, ItemMeta::named(&name)); + crdt_state::evict_item(id).expect("evict should succeed"); + crate::db::content_store::write_content(ContentKey::Story(id), &content); + } + + let results = recover_half_written_items(Some(&[recover_id.to_string()])); + assert_eq!(results.len(), 1, "exactly one orphan should be recovered"); + assert_eq!(results[0].old_id, recover_id); + + // The filtered-out orphan must still be a half-written item. + let half = find_half_written_items(); + assert!( + half.iter().any(|h| h.story_id == keep_id), + "filtered-out orphan should remain" + ); + assert!( + half.iter().all(|h| h.story_id != recover_id), + "recovered orphan should be gone" + ); + } } diff --git a/server/src/http/mcp/diagnostics/mod.rs b/server/src/http/mcp/diagnostics/mod.rs index 56d16746..cb251eb6 100644 --- a/server/src/http/mcp/diagnostics/mod.rs +++ b/server/src/http/mcp/diagnostics/mod.rs @@ -131,21 +131,37 @@ pub(crate) fn tool_recover_half_written_items(args: &Value) -> Result> = args.get("story_ids").and_then(|v| { + v.as_array().map(|arr| { + arr.iter() + .filter_map(|x| x.as_str().map(str::to_string)) + .collect() + }) + }); + if dry_run { - let half = crate::db::find_half_written_items(); + let mut half = crate::db::find_half_written_items(); + if let Some(filter) = &only { + half.retain(|h| filter.iter().any(|f| f == &h.story_id)); + } + let count = half.len(); return serde_json::to_string_pretty(&json!({ "dry_run": true, "found": half, - "count": half.len(), + "count": count, "message": format!( - "Discovered {} half-written item(s). Re-run with dry_run=false to recover them.", - half.len() + "Discovered {count} half-written item(s){scope}. Re-run with dry_run=false to recover them.", + scope = if only.is_some() { " matching the filter" } else { "" }, ), })) .map_err(|e| format!("Serialization error: {e}")); } - let results = crate::db::recover_half_written_items(); + let results = crate::db::recover_half_written_items(only.as_deref()); serde_json::to_string_pretty(&json!({ "dry_run": false, "recovered": results, diff --git a/server/src/http/mcp/tools_list/system_tools.rs b/server/src/http/mcp/tools_list/system_tools.rs index ef269aa4..1d02b8dc 100644 --- a/server/src/http/mcp/tools_list/system_tools.rs +++ b/server/src/http/mcp/tools_list/system_tools.rs @@ -290,13 +290,18 @@ pub(super) fn system_tools() -> Vec { }), json!({ "name": "recover_half_written_items", - "description": "Discover (and optionally recover) half-written pipeline items — rows whose content is in the SQLite shadow and content store but whose CRDT entry is absent or tombstoned. These are invisible to every CRDT-driven read path and can't be cleaned up by delete_story / purge_story. Recovery moves each orphan's content onto a fresh non-tombstoned id and reports the old→new mapping. Defaults to dry_run=true so you can see what would change before mutating.", + "description": "Discover (and optionally recover) half-written pipeline items — rows whose content is in the SQLite shadow and content store but whose CRDT entry is absent or tombstoned. These are invisible to every CRDT-driven read path and can't be cleaned up by delete_story / purge_story. Recovery moves each orphan's content onto a fresh non-tombstoned id and reports the old→new mapping. Defaults to dry_run=true so you can see what would change before mutating. Use story_ids to restrict the operation to a specific list of ids — strongly recommended on live systems where the orphan set may include historic purged stories that should stay dead.", "inputSchema": { "type": "object", "properties": { "dry_run": { "type": "boolean", "description": "When true (default), only discover half-written items and report them. When false, perform the recovery." + }, + "story_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Optional: restrict the operation (discovery in dry-run, recovery otherwise) to these story_ids. Anything else in the orphan set is left untouched. Strongly recommended for live recovery so you don't resurrect historic purged items." } }, "required": []