feat(1001): story_ids filter for recover_half_written_items

The first dry-run against the live pipeline surfaced 735 orphans (35
tombstoned half-writes, 700 stale content rows with no CRDT entry —
mostly artefacts of the pre-numeric-id era). Bulk-recovering would
resurrect a lot of stories the user deliberately purged in the past.

Add an optional `story_ids` filter that restricts both discovery (in
dry-run) and recovery to a named subset, so the operator can target
the specific recent half-writes without touching anything else. The
new test asserts the filter is honoured.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Timmy
2026-05-13 19:26:07 +01:00
parent cd411ba443
commit 92b1744c3a
3 changed files with 74 additions and 9 deletions
+47 -3
View File
@@ -84,13 +84,23 @@ pub fn find_half_written_items() -> Vec<HalfWritten> {
/// the orphan is left in place.
/// 5. Deletes the orphan's content-store + shadow-DB rows via `delete_item`.
///
/// If `only` is provided, recovery operates only on orphans whose `story_id`
/// is in the list — everything else is left alone. This is the safe default
/// for a live system that may have many historic purged ids visible as
/// orphans alongside the genuinely-recent half-writes.
///
/// Returns a (`old_id`, `new_id`, `name`) mapping for every successful
/// recovery. Orphans that failed to recover are simply omitted from the
/// returned list and logged at WARN level.
pub fn recover_half_written_items() -> Vec<RecoveryResult> {
pub fn recover_half_written_items(only: Option<&[String]>) -> Vec<RecoveryResult> {
let orphans = find_half_written_items();
let mut results = Vec::with_capacity(orphans.len());
for orphan in orphans {
if let Some(filter) = only
&& !filter.iter().any(|f| f == &orphan.story_id)
{
continue;
}
match recover_one(&orphan) {
Ok(result) => results.push(result),
Err(e) => {
@@ -358,7 +368,7 @@ mod tests {
assert_eq!(entry.name, "Tombstoned Then Half-Written");
// Run the recovery and inspect the mapping.
let results = recover_half_written_items();
let results = recover_half_written_items(None);
let mapping = results
.iter()
.find(|r| r.old_id == old_id)
@@ -382,10 +392,44 @@ mod tests {
);
// Re-running recovery is a no-op (no orphans left).
let again = recover_half_written_items();
let again = recover_half_written_items(None);
assert!(
again.iter().all(|r| r.old_id != old_id),
"recovery should be idempotent for the same orphan"
);
}
/// The `only` filter restricts recovery to a specific id set; orphans
/// outside the filter are left alone.
#[test]
fn recover_only_filter_restricts_recovery_to_named_ids() {
crdt_state::init_for_test();
crate::db::ensure_content_store();
// Two orphans, only one in the filter.
let recover_id = "9810";
let keep_id = "9811";
for id in [recover_id, keep_id] {
let name = format!("Item {id}");
let content = body(&name, "refactor", None);
write_item_with_content(id, "1_backlog", &content, ItemMeta::named(&name));
crdt_state::evict_item(id).expect("evict should succeed");
crate::db::content_store::write_content(ContentKey::Story(id), &content);
}
let results = recover_half_written_items(Some(&[recover_id.to_string()]));
assert_eq!(results.len(), 1, "exactly one orphan should be recovered");
assert_eq!(results[0].old_id, recover_id);
// The filtered-out orphan must still be a half-written item.
let half = find_half_written_items();
assert!(
half.iter().any(|h| h.story_id == keep_id),
"filtered-out orphan should remain"
);
assert!(
half.iter().all(|h| h.story_id != recover_id),
"recovered orphan should be gone"
);
}
}