92b1744c3a
The first dry-run against the live pipeline surfaced 735 orphans (35 tombstoned half-writes, 700 stale content rows with no CRDT entry — mostly artefacts of the pre-numeric-id era). Bulk-recovering would resurrect a lot of stories the user deliberately purged in the past. Add an optional `story_ids` filter that restricts both discovery (in dry-run) and recovery to a named subset, so the operator can target the specific recent half-writes without touching anything else. The new test asserts the filter is honoured. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
436 lines
16 KiB
Rust
436 lines
16 KiB
Rust
//! Recovery for half-written pipeline items (bug 1001).
|
|
//!
|
|
//! A half-written item is one whose content row exists in the content store /
|
|
//! SQLite shadow but whose CRDT entry is absent or tombstoned. These items
|
|
//! are invisible to every CRDT-driven read path (`list_refactors`,
|
|
//! `get_pipeline_status`, `read_item`, …), and `update_story` / `delete_story`
|
|
//! / `purge_story` all refuse to operate on them. They're effectively
|
|
//! orphaned.
|
|
//!
|
|
//! This module discovers them and lifts each one onto a fresh non-tombstoned
|
|
//! ID so the content becomes addressable again.
|
|
//!
|
|
//! The Stage 1 commit (`fix(1001):`) closes the door on *new* half-writes;
|
|
//! this module exists to clean up the rows that were created before that
|
|
//! commit landed.
|
|
|
|
use crate::crdt_state;
|
|
use crate::db::content_store::{ContentKey, all_content_ids, read_content};
|
|
use crate::db::ops::{ItemMeta, delete_item, next_item_number, write_item_with_content};
|
|
use crate::io::story_metadata::ItemType;
|
|
|
|
/// A pipeline item whose content row exists but whose CRDT entry is missing
|
|
/// or tombstoned.
|
|
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
|
|
pub struct HalfWritten {
|
|
/// The orphaned numeric story_id (as a string, matching CRDT keys).
|
|
pub story_id: String,
|
|
/// Best-effort human name pulled from the YAML front matter. Empty if
|
|
/// the front matter was unparseable.
|
|
pub name: String,
|
|
/// True if the CRDT has a tombstone for this id; false if the id is
|
|
/// simply absent (which can happen if the CRDT op was lost or never
|
|
/// applied for reasons other than a tombstone).
|
|
pub tombstoned: bool,
|
|
}
|
|
|
|
/// The outcome of recovering a single half-written item.
|
|
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
|
|
pub struct RecoveryResult {
|
|
/// The orphan's numeric id at the time of discovery.
|
|
pub old_id: String,
|
|
/// The fresh id the content was re-anchored to.
|
|
pub new_id: String,
|
|
/// Best-effort human name pulled from the YAML front matter.
|
|
pub name: String,
|
|
}
|
|
|
|
/// Find all half-written items currently present on this node.
|
|
///
|
|
/// Scans every id known to the in-memory content store and returns the ones
|
|
/// for which `crdt_state::read_item` is `None` (i.e. there is no live CRDT
|
|
/// entry). The `tombstoned` flag distinguishes the most common cause
|
|
/// (allocator collided with a tombstone) from rarer split-brain causes.
|
|
pub fn find_half_written_items() -> Vec<HalfWritten> {
|
|
let mut out = Vec::new();
|
|
for id in all_content_ids() {
|
|
if crdt_state::read_item(&id).is_some() {
|
|
continue;
|
|
}
|
|
// No live CRDT entry — orphan.
|
|
let content = read_content(ContentKey::Story(&id)).unwrap_or_default();
|
|
let name = parse_name_from_front_matter(&content).unwrap_or_default();
|
|
out.push(HalfWritten {
|
|
story_id: id.clone(),
|
|
name,
|
|
tombstoned: crdt_state::is_tombstoned(&id),
|
|
});
|
|
}
|
|
out.sort_by(|a, b| a.story_id.cmp(&b.story_id));
|
|
out
|
|
}
|
|
|
|
/// Move every half-written item's content onto a fresh non-tombstoned id.
|
|
///
|
|
/// For each orphan, this:
|
|
/// 1. Allocates a fresh id via `next_item_number` (which now skips
|
|
/// tombstones).
|
|
/// 2. Re-inserts the same content body at the new id with stage = backlog
|
|
/// and the parsed name.
|
|
/// 3. Re-applies `item_type` and `depends_on` from the original YAML front
|
|
/// matter where parseable.
|
|
/// 4. Verifies the new entry is visible via `read_item`. On verification
|
|
/// failure, the new id's content is rolled back (defence-in-depth) and
|
|
/// the orphan is left in place.
|
|
/// 5. Deletes the orphan's content-store + shadow-DB rows via `delete_item`.
|
|
///
|
|
/// If `only` is provided, recovery operates only on orphans whose `story_id`
|
|
/// is in the list — everything else is left alone. This is the safe default
|
|
/// for a live system that may have many historic purged ids visible as
|
|
/// orphans alongside the genuinely-recent half-writes.
|
|
///
|
|
/// Returns a (`old_id`, `new_id`, `name`) mapping for every successful
|
|
/// recovery. Orphans that failed to recover are simply omitted from the
|
|
/// returned list and logged at WARN level.
|
|
pub fn recover_half_written_items(only: Option<&[String]>) -> Vec<RecoveryResult> {
|
|
let orphans = find_half_written_items();
|
|
let mut results = Vec::with_capacity(orphans.len());
|
|
for orphan in orphans {
|
|
if let Some(filter) = only
|
|
&& !filter.iter().any(|f| f == &orphan.story_id)
|
|
{
|
|
continue;
|
|
}
|
|
match recover_one(&orphan) {
|
|
Ok(result) => results.push(result),
|
|
Err(e) => {
|
|
crate::slog_warn!(
|
|
"[db::recover] could not recover half-written id '{id}': {e}",
|
|
id = orphan.story_id
|
|
);
|
|
}
|
|
}
|
|
}
|
|
results
|
|
}
|
|
|
|
fn recover_one(orphan: &HalfWritten) -> Result<RecoveryResult, String> {
|
|
let content = read_content(ContentKey::Story(&orphan.story_id))
|
|
.ok_or_else(|| "content store row vanished between discovery and recovery".to_string())?;
|
|
|
|
// Best-effort metadata extraction from the YAML front matter. These all
|
|
// fall back to safe defaults if the line is missing or malformed.
|
|
let name = parse_name_from_front_matter(&content).unwrap_or_default();
|
|
let item_type = parse_type_from_front_matter(&content);
|
|
let depends_on = parse_depends_on_from_front_matter(&content);
|
|
|
|
let new_number = next_item_number();
|
|
let new_id = new_number.to_string();
|
|
|
|
// Sanity: the allocator should never hand back a tombstone, but bail if
|
|
// it somehow did so we don't reproduce the original bug.
|
|
if crdt_state::is_tombstoned(&new_id) {
|
|
return Err(format!(
|
|
"allocator returned tombstoned id '{new_id}' for recovery — refusing to write"
|
|
));
|
|
}
|
|
|
|
write_item_with_content(
|
|
&new_id,
|
|
"1_backlog",
|
|
&content,
|
|
ItemMeta {
|
|
name: if name.is_empty() {
|
|
None
|
|
} else {
|
|
Some(name.clone())
|
|
},
|
|
..Default::default()
|
|
},
|
|
);
|
|
if let Some(t) = item_type {
|
|
crdt_state::set_item_type(&new_id, Some(t));
|
|
}
|
|
if !depends_on.is_empty() {
|
|
crdt_state::set_depends_on(&new_id, &depends_on);
|
|
}
|
|
|
|
// Verify the new id materialised before we drop the orphan row.
|
|
if crdt_state::read_item(&new_id).is_none() {
|
|
// The new id didn't take — extremely unlikely after the Stage 1
|
|
// fixes, but if it happens, roll back the new write and leave the
|
|
// orphan in place for a human to look at.
|
|
delete_item(&new_id);
|
|
return Err(format!(
|
|
"newly-allocated id '{new_id}' did not register in CRDT"
|
|
));
|
|
}
|
|
|
|
// Drop the orphan content/shadow row.
|
|
delete_item(&orphan.story_id);
|
|
|
|
Ok(RecoveryResult {
|
|
old_id: orphan.story_id.clone(),
|
|
new_id,
|
|
name,
|
|
})
|
|
}
|
|
|
|
/// Pull the YAML front-matter block (everything between the first two `---`
|
|
/// lines, if any) as a slice of source lines.
|
|
fn front_matter_lines(content: &str) -> Vec<&str> {
|
|
let mut iter = content.lines();
|
|
let Some(first) = iter.next() else {
|
|
return Vec::new();
|
|
};
|
|
if first.trim() != "---" {
|
|
return Vec::new();
|
|
}
|
|
let mut out = Vec::new();
|
|
for line in iter {
|
|
if line.trim() == "---" {
|
|
return out;
|
|
}
|
|
out.push(line);
|
|
}
|
|
Vec::new() // no closing `---`
|
|
}
|
|
|
|
/// Parse `name: "..."` (or unquoted) from the YAML front matter.
|
|
fn parse_name_from_front_matter(content: &str) -> Option<String> {
|
|
for line in front_matter_lines(content) {
|
|
let trimmed = line.trim_start();
|
|
if let Some(rest) = trimmed.strip_prefix("name:") {
|
|
let value = rest.trim();
|
|
// Strip optional surrounding quotes.
|
|
let unquoted = value
|
|
.strip_prefix('"')
|
|
.and_then(|s| s.strip_suffix('"'))
|
|
.unwrap_or(value);
|
|
if unquoted.is_empty() {
|
|
return None;
|
|
}
|
|
return Some(unquoted.to_string());
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Parse `type: <story|bug|spike|refactor|epic>` from the YAML front matter.
|
|
fn parse_type_from_front_matter(content: &str) -> Option<ItemType> {
|
|
for line in front_matter_lines(content) {
|
|
let trimmed = line.trim_start();
|
|
if let Some(rest) = trimmed.strip_prefix("type:") {
|
|
let value = rest.trim();
|
|
return ItemType::from_str(value);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Parse `depends_on: [a, b, c]` from the YAML front matter.
|
|
///
|
|
/// Tolerates trailing whitespace and odd spacing; ignores entries that don't
|
|
/// parse as `u32`.
|
|
fn parse_depends_on_from_front_matter(content: &str) -> Vec<u32> {
|
|
for line in front_matter_lines(content) {
|
|
let trimmed = line.trim_start();
|
|
if let Some(rest) = trimmed.strip_prefix("depends_on:") {
|
|
let value = rest.trim();
|
|
// Expect a `[ ... ]` array literal.
|
|
let inner = value
|
|
.strip_prefix('[')
|
|
.and_then(|s| s.strip_suffix(']'))
|
|
.unwrap_or("");
|
|
return inner
|
|
.split(',')
|
|
.filter_map(|s| s.trim().parse::<u32>().ok())
|
|
.collect();
|
|
}
|
|
}
|
|
Vec::new()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
/// Build a story body that mirrors what create_*_file writes.
|
|
fn body(name: &str, item_type: &str, depends_on: Option<&[u32]>) -> String {
|
|
let mut s = String::from("---\n");
|
|
s.push_str(&format!("type: {item_type}\n"));
|
|
s.push_str(&format!("name: \"{}\"\n", name.replace('"', "\\\"")));
|
|
if let Some(deps) = depends_on
|
|
&& !deps.is_empty()
|
|
{
|
|
let parts: Vec<String> = deps.iter().map(u32::to_string).collect();
|
|
s.push_str(&format!("depends_on: [{}]\n", parts.join(", ")));
|
|
}
|
|
s.push_str("---\n\n# Heading\n\n## Acceptance Criteria\n\n- [ ] Real AC\n");
|
|
s
|
|
}
|
|
|
|
#[test]
|
|
fn front_matter_parsers_extract_name_type_and_depends_on() {
|
|
let content = body("My Refactor: typed Stage", "refactor", Some(&[42, 99]));
|
|
assert_eq!(
|
|
parse_name_from_front_matter(&content).as_deref(),
|
|
Some("My Refactor: typed Stage")
|
|
);
|
|
assert_eq!(
|
|
parse_type_from_front_matter(&content),
|
|
Some(ItemType::Refactor)
|
|
);
|
|
assert_eq!(parse_depends_on_from_front_matter(&content), vec![42, 99]);
|
|
}
|
|
|
|
#[test]
|
|
fn front_matter_parsers_handle_missing_optional_fields() {
|
|
let content = body("Plain Story", "story", None);
|
|
assert_eq!(
|
|
parse_name_from_front_matter(&content).as_deref(),
|
|
Some("Plain Story")
|
|
);
|
|
assert_eq!(
|
|
parse_depends_on_from_front_matter(&content),
|
|
Vec::<u32>::new()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn front_matter_parsers_handle_no_front_matter() {
|
|
let content = "no front matter here\n";
|
|
assert!(parse_name_from_front_matter(content).is_none());
|
|
assert!(parse_type_from_front_matter(content).is_none());
|
|
assert!(parse_depends_on_from_front_matter(content).is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn find_returns_empty_when_all_items_have_crdt_entries() {
|
|
crdt_state::init_for_test();
|
|
crate::db::ensure_content_store();
|
|
write_item_with_content(
|
|
"9800",
|
|
"1_backlog",
|
|
&body("Healthy", "refactor", None),
|
|
ItemMeta::named("Healthy"),
|
|
);
|
|
let half = find_half_written_items();
|
|
assert!(
|
|
half.iter().all(|h| h.story_id != "9800"),
|
|
"9800 should not be reported as half-written: {half:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn recover_lifts_half_written_item_to_a_fresh_id() {
|
|
crdt_state::init_for_test();
|
|
crate::db::ensure_content_store();
|
|
|
|
// Seed an item, tombstone it (this leaves a tombstone in the CRDT
|
|
// set), then manually write content at the tombstoned id to
|
|
// simulate a pre-fix half-write.
|
|
let old_id = "9801";
|
|
let content = body("Tombstoned Then Half-Written", "refactor", Some(&[100]));
|
|
write_item_with_content(
|
|
old_id,
|
|
"1_backlog",
|
|
&content,
|
|
ItemMeta::named("Tombstoned Then Half-Written"),
|
|
);
|
|
crdt_state::evict_item(old_id).expect("evict should succeed");
|
|
// Manually re-write content at the now-tombstoned id to mimic the
|
|
// pre-fix half-write (write_item will silently reject the CRDT side).
|
|
crate::db::content_store::write_content(ContentKey::Story(old_id), &content);
|
|
crdt_state::write_item(
|
|
old_id,
|
|
&crate::pipeline_state::Stage::Backlog,
|
|
Some("Tombstoned Then Half-Written"),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
assert!(
|
|
crdt_state::read_item(old_id).is_none(),
|
|
"test setup is wrong — old id should still be invisible after the tombstone-blocked write_item"
|
|
);
|
|
|
|
// Sanity: find_half_written_items picks it up.
|
|
let half = find_half_written_items();
|
|
let entry = half
|
|
.iter()
|
|
.find(|h| h.story_id == old_id)
|
|
.expect("half-written discovery should find the orphan");
|
|
assert!(entry.tombstoned, "orphan should be flagged as tombstoned");
|
|
assert_eq!(entry.name, "Tombstoned Then Half-Written");
|
|
|
|
// Run the recovery and inspect the mapping.
|
|
let results = recover_half_written_items(None);
|
|
let mapping = results
|
|
.iter()
|
|
.find(|r| r.old_id == old_id)
|
|
.expect("recovery should produce a mapping for the orphan");
|
|
assert_ne!(mapping.new_id, mapping.old_id);
|
|
assert_eq!(mapping.name, "Tombstoned Then Half-Written");
|
|
|
|
// The new id should be visible in the CRDT.
|
|
let view = crdt_state::read_item(&mapping.new_id)
|
|
.expect("new id must be live in CRDT after recovery");
|
|
assert_eq!(view.name(), "Tombstoned Then Half-Written");
|
|
// depends_on was carried across.
|
|
assert_eq!(view.depends_on(), &[100]);
|
|
// item_type was carried across.
|
|
assert_eq!(view.item_type(), Some(ItemType::Refactor));
|
|
|
|
// The old orphan row is gone.
|
|
assert!(
|
|
read_content(ContentKey::Story(old_id)).is_none(),
|
|
"orphan content should be cleared after recovery"
|
|
);
|
|
|
|
// Re-running recovery is a no-op (no orphans left).
|
|
let again = recover_half_written_items(None);
|
|
assert!(
|
|
again.iter().all(|r| r.old_id != old_id),
|
|
"recovery should be idempotent for the same orphan"
|
|
);
|
|
}
|
|
|
|
/// The `only` filter restricts recovery to a specific id set; orphans
|
|
/// outside the filter are left alone.
|
|
#[test]
|
|
fn recover_only_filter_restricts_recovery_to_named_ids() {
|
|
crdt_state::init_for_test();
|
|
crate::db::ensure_content_store();
|
|
|
|
// Two orphans, only one in the filter.
|
|
let recover_id = "9810";
|
|
let keep_id = "9811";
|
|
for id in [recover_id, keep_id] {
|
|
let name = format!("Item {id}");
|
|
let content = body(&name, "refactor", None);
|
|
write_item_with_content(id, "1_backlog", &content, ItemMeta::named(&name));
|
|
crdt_state::evict_item(id).expect("evict should succeed");
|
|
crate::db::content_store::write_content(ContentKey::Story(id), &content);
|
|
}
|
|
|
|
let results = recover_half_written_items(Some(&[recover_id.to_string()]));
|
|
assert_eq!(results.len(), 1, "exactly one orphan should be recovered");
|
|
assert_eq!(results[0].old_id, recover_id);
|
|
|
|
// The filtered-out orphan must still be a half-written item.
|
|
let half = find_half_written_items();
|
|
assert!(
|
|
half.iter().any(|h| h.story_id == keep_id),
|
|
"filtered-out orphan should remain"
|
|
);
|
|
assert!(
|
|
half.iter().all(|h| h.story_id != recover_id),
|
|
"recovered orphan should be gone"
|
|
);
|
|
}
|
|
}
|