fix(1001): stop create_* from half-writing onto tombstoned IDs

Root cause: db::next_item_number scanned the visible CRDT index and the
content store but not the tombstone set, so it would hand out a numeric
ID whose CRDT entry had been tombstoned. crdt_state::write_item then
silently no-op'd the insert (tombstone-match guard) while the content
store and SQLite shadow happily accepted the row, producing a split-
brain half-write that was invisible to every CRDT-driven read path and
couldn't be cleaned up by delete_story / purge_story.

This change closes the loop:

- crdt_state::read::{is_tombstoned, tombstoned_ids} expose the
  tombstone set so callers outside crdt_state can consult it.

- db::next_item_number now scans tombstoned_ids() too. The allocator
  skips past tombstoned numeric IDs instead of treating their slots as
  free.

- write_item logs a WARN when it rejects a write for a tombstoned ID
  (was silent). The warn is a tripwire — if the allocator ever lets one
  slip through again we'll see it in the log.

- create_item_in_backlog adds two defence-in-depth checks:
    (a) before any write, reject if the allocator returned a
        tombstoned ID;
    (b) after the writes, call read_item to confirm the CRDT entry
        materialised. If not, roll back the content-store + shadow-DB
        rows via db::delete_item and return Err.

Regression tests cover the allocator skip, the is_tombstoned accessor,
and the create_item_in_backlog rollback path.

Out of scope for this commit:
- Recovery of the already-half-written items currently in the running
  pipeline (989, 1000, 1001) — Stage 2/3 of the plan, handled
  separately.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Timmy
2026-05-13 19:05:48 +01:00
parent caed894db9
commit c61f715878
6 changed files with 222 additions and 12 deletions
+50
View File
@@ -245,6 +245,56 @@ mod tests {
assert!(n >= 1);
}
/// Regression test for bug 1001: `next_item_number` must not hand out a
/// tombstoned ID. Without this fix, an `evict_item` followed by a fresh
/// create reuses the tombstoned numeric slot, producing a half-written
/// item (content store + shadow DB accept; CRDT silently rejects).
#[test]
fn next_item_number_skips_tombstoned_ids() {
crate::crdt_state::init_for_test();
ensure_content_store();
// Seed a high-numbered item via the normal write path so it lands in
// the CRDT index.
let high_id = "9990";
write_item_with_content(
high_id,
"1_backlog",
"---\nname: To Be Tombstoned\n---\n",
ItemMeta::named("To Be Tombstoned"),
);
// Tombstone it. This adds 9990 to state.tombstones and clears the
// content-store row, so without the fix `next_item_number` would
// return 9990 again because it's invisible to both visible-index and
// content-id scans.
crate::crdt_state::evict_item(high_id).expect("evict should succeed");
assert!(crate::crdt_state::is_tombstoned(high_id));
let next = next_item_number();
assert!(
next > 9990,
"next_item_number must skip past tombstoned id 9990, got {next}"
);
}
/// is_tombstoned reflects the post-evict state.
#[test]
fn is_tombstoned_returns_true_after_evict() {
crate::crdt_state::init_for_test();
ensure_content_store();
let id = "9991";
write_item_with_content(
id,
"1_backlog",
"---\nname: Soon To Vanish\n---\n",
ItemMeta::named("Soon To Vanish"),
);
assert!(!crate::crdt_state::is_tombstoned(id));
crate::crdt_state::evict_item(id).expect("evict should succeed");
assert!(crate::crdt_state::is_tombstoned(id));
}
/// Regression test for bug 537: `delete_item` must issue a real SQL DELETE
/// rather than upserting stage = "deleted". A "deleted" shadow row that
/// survives a restart would be picked up by `sync_crdt_stages_from_db` and
+29 -11
View File
@@ -212,20 +212,27 @@ pub fn delete_item(story_id: &str) {
}
}
/// Get the next available item number by scanning both the CRDT state
/// and the in-memory content store for the highest existing number.
/// Get the next available item number by scanning the CRDT state, the
/// in-memory content store, AND the tombstone set for the highest existing
/// number.
///
/// Tombstoned IDs are excluded from `read_all_typed` (their CRDT entry is
/// `is_deleted`) and from `all_content_ids` (their content row is cleared by
/// `evict_item`). Without consulting the tombstone set, the allocator can
/// hand out a tombstoned numeric ID; `crdt_state::write_item` would then
/// silently reject the new entry while the content store and SQLite shadow
/// happily accept it, producing a split-brain half-write (bug 1001).
pub fn next_item_number() -> u32 {
let mut max_num: u32 = 0;
let parse_leading_digits = |s: &str| -> Option<u32> {
let num_str: String = s.chars().take_while(|c| c.is_ascii_digit()).collect();
num_str.parse::<u32>().ok()
};
// Scan CRDT items via typed projection.
for item in crate::pipeline_state::read_all_typed() {
let num_str: String = item
.story_id
.0
.chars()
.take_while(|c| c.is_ascii_digit())
.collect();
if let Ok(n) = num_str.parse::<u32>()
if let Some(n) = parse_leading_digits(&item.story_id.0)
&& n > max_num
{
max_num = n;
@@ -234,8 +241,19 @@ pub fn next_item_number() -> u32 {
// Also scan the content store (might have items not yet in CRDT).
for id in all_content_ids() {
let num_str: String = id.chars().take_while(|c| c.is_ascii_digit()).collect();
if let Ok(n) = num_str.parse::<u32>()
if let Some(n) = parse_leading_digits(&id)
&& n > max_num
{
max_num = n;
}
}
// Also scan tombstones — a tombstoned ID still poisons that slot because
// crdt_state::write_item rejects writes for tombstoned IDs. Without this
// pass, the next allocated ID can collide with a tombstone and produce
// a half-write (bug 1001).
for id in crate::crdt_state::tombstoned_ids() {
if let Some(n) = parse_leading_digits(&id)
&& n > max_num
{
max_num = n;