fix: thread-local CRDT and content store for test isolation

Tests shared a global CRDT singleton and content store HashMap, causing
flaky failures when parallel tests wrote items that polluted each
other's assertions. 3-5 random test failures per run.

Both CRDT_STATE and CONTENT_STORE now use thread_local! in test mode
so each test thread gets its own isolated instance. Production code
is unchanged — it still uses the global OnceLock singletons.

Also fixed 3 tests (create_story_writes_correct_content,
next_item_number_increments_from_existing_bugs,
next_item_number_scans_archived_too) that relied on leaked state
from other tests — they now write to the content store explicitly.

Result: 1902 passed, 0 failed across 5 consecutive runs.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
dave
2026-04-11 13:02:09 +00:00
parent dd53870c59
commit eea54ca616
4 changed files with 111 additions and 42 deletions
+57 -34
View File
@@ -158,6 +158,35 @@ struct CrdtState {
static CRDT_STATE: OnceLock<Mutex<CrdtState>> = OnceLock::new(); static CRDT_STATE: OnceLock<Mutex<CrdtState>> = OnceLock::new();
#[cfg(test)]
thread_local! {
static CRDT_STATE_TL: OnceLock<Mutex<CrdtState>> = const { OnceLock::new() };
}
#[cfg(not(test))]
fn get_crdt() -> Option<&'static Mutex<CrdtState>> {
CRDT_STATE.get()
}
#[cfg(test)]
fn get_crdt() -> Option<&'static Mutex<CrdtState>> {
let tl = CRDT_STATE_TL.with(|lock| {
if lock.get().is_some() {
Some(lock as *const OnceLock<Mutex<CrdtState>>)
} else {
None
}
});
if let Some(ptr) = tl {
// SAFETY: The thread-local lives as long as the thread, which outlives
// any test using it. We only need 'static for the return type.
let lock = unsafe { &*ptr };
lock.get()
} else {
CRDT_STATE.get()
}
}
// ── Initialisation ─────────────────────────────────────────────────── // ── Initialisation ───────────────────────────────────────────────────
/// Initialise the CRDT state layer. /// Initialise the CRDT state layer.
@@ -270,27 +299,22 @@ pub async fn init(db_path: &Path) -> Result<(), sqlx::Error> {
/// Safe to call multiple times — subsequent calls are no-ops (OnceLock). /// Safe to call multiple times — subsequent calls are no-ops (OnceLock).
#[cfg(test)] #[cfg(test)]
pub fn init_for_test() { pub fn init_for_test() {
if CRDT_STATE.get().is_some() { // Initialise thread-local CRDT for test isolation.
return; // Only creates a new CRDT if one isn't set yet on this thread;
} // subsequent calls are no-ops (matching the old OnceLock semantics
let keypair = make_keypair(); // while keeping each thread isolated).
let crdt = BaseCrdt::<PipelineDoc>::new(&keypair); CRDT_STATE_TL.with(|lock| {
let index = HashMap::new(); if lock.get().is_none() {
let node_index = HashMap::new(); let keypair = make_keypair();
let (persist_tx, _rx) = mpsc::unbounded_channel(); let crdt = BaseCrdt::<PipelineDoc>::new(&keypair);
let state = CrdtState { let (persist_tx, _rx) = mpsc::unbounded_channel();
crdt, let state = CrdtState { crdt, keypair, index: HashMap::new(), node_index: HashMap::new(), persist_tx };
keypair, let _ = lock.set(Mutex::new(state));
index, }
node_index, });
persist_tx, let _ = CRDT_EVENT_TX.get_or_init(|| broadcast::channel::<CrdtEvent>(256).0);
}; let _ = SYNC_TX.get_or_init(|| broadcast::channel::<SignedOp>(1024).0);
let _ = CRDT_STATE.set(Mutex::new(state)); let _ = ALL_OPS.get_or_init(|| Mutex::new(Vec::new()));
let (event_tx, _) = broadcast::channel::<CrdtEvent>(256);
let _ = CRDT_EVENT_TX.set(event_tx);
let (sync_tx, _) = broadcast::channel::<SignedOp>(1024);
let _ = SYNC_TX.set(sync_tx);
let _ = ALL_OPS.set(Mutex::new(Vec::new()));
} }
/// Load or create the Ed25519 keypair used by this node. /// Load or create the Ed25519 keypair used by this node.
@@ -390,7 +414,7 @@ pub fn write_item(
claimed_by: Option<&str>, claimed_by: Option<&str>,
claimed_at: Option<f64>, claimed_at: Option<f64>,
) { ) {
let Some(state_mutex) = CRDT_STATE.get() else { let Some(state_mutex) = get_crdt() else {
return; return;
}; };
let Ok(mut state) = state_mutex.lock() else { let Ok(mut state) = state_mutex.lock() else {
@@ -511,7 +535,7 @@ fn emit_event(event: CrdtEvent) {
/// Returns `true` if the op was new and applied, `false` if it was a /// Returns `true` if the op was new and applied, `false` if it was a
/// duplicate or failed validation. /// duplicate or failed validation.
pub fn apply_remote_op(op: SignedOp) -> bool { pub fn apply_remote_op(op: SignedOp) -> bool {
let Some(state_mutex) = CRDT_STATE.get() else { let Some(state_mutex) = get_crdt() else {
return false; return false;
}; };
let Ok(mut state) = state_mutex.lock() else { let Ok(mut state) = state_mutex.lock() else {
@@ -596,7 +620,7 @@ pub fn apply_remote_op(op: SignedOp) -> bool {
/// Used as the stable identity written into the CRDT nodes list. /// Used as the stable identity written into the CRDT nodes list.
/// Returns `None` before `init()`. /// Returns `None` before `init()`.
pub fn our_node_id() -> Option<String> { pub fn our_node_id() -> Option<String> {
let state = CRDT_STATE.get()?.lock().ok()?; let state = get_crdt()?.lock().ok()?;
Some(hex::encode(&state.crdt.id)) Some(hex::encode(&state.crdt.id))
} }
@@ -615,7 +639,7 @@ pub fn write_claim(story_id: &str) -> bool {
}; };
let now = chrono::Utc::now().timestamp() as f64; let now = chrono::Utc::now().timestamp() as f64;
let Some(state_mutex) = CRDT_STATE.get() else { let Some(state_mutex) = get_crdt() else {
return false; return false;
}; };
let Ok(mut state) = state_mutex.lock() else { let Ok(mut state) = state_mutex.lock() else {
@@ -638,7 +662,7 @@ pub fn write_claim(story_id: &str) -> bool {
/// Release a claim on a pipeline item (clear claimed_by and claimed_at). /// Release a claim on a pipeline item (clear claimed_by and claimed_at).
pub fn release_claim(story_id: &str) { pub fn release_claim(story_id: &str) {
let Some(state_mutex) = CRDT_STATE.get() else { let Some(state_mutex) = get_crdt() else {
return; return;
}; };
let Ok(mut state) = state_mutex.lock() else { let Ok(mut state) = state_mutex.lock() else {
@@ -674,7 +698,7 @@ pub fn is_claimed_by_us(story_id: &str) -> bool {
/// ///
/// This is the write path for both local heartbeats and tombstoning. /// This is the write path for both local heartbeats and tombstoning.
pub fn write_node_presence(node_id: &str, address: &str, last_seen: f64, alive: bool) { pub fn write_node_presence(node_id: &str, address: &str, last_seen: f64, alive: bool) {
let Some(state_mutex) = CRDT_STATE.get() else { let Some(state_mutex) = get_crdt() else {
return; return;
}; };
let Ok(mut state) = state_mutex.lock() else { let Ok(mut state) = state_mutex.lock() else {
@@ -715,7 +739,7 @@ pub fn write_node_presence(node_id: &str, address: &str, last_seen: f64, alive:
/// ///
/// Returns `None` before `init()`. /// Returns `None` before `init()`.
pub fn read_all_node_presence() -> Option<Vec<NodePresenceView>> { pub fn read_all_node_presence() -> Option<Vec<NodePresenceView>> {
let state_mutex = CRDT_STATE.get()?; let state_mutex = get_crdt()?;
let state = state_mutex.lock().ok()?; let state = state_mutex.lock().ok()?;
let mut nodes = Vec::new(); let mut nodes = Vec::new();
@@ -797,14 +821,14 @@ pub struct CrdtStateDump {
/// **This is a debug tool.** For normal pipeline introspection use /// **This is a debug tool.** For normal pipeline introspection use
/// [`read_all_items`] or the `get_pipeline_status` MCP tool instead. /// [`read_all_items`] or the `get_pipeline_status` MCP tool instead.
pub fn dump_crdt_state(story_id_filter: Option<&str>) -> CrdtStateDump { pub fn dump_crdt_state(story_id_filter: Option<&str>) -> CrdtStateDump {
let in_memory_state_loaded = CRDT_STATE.get().is_some(); let in_memory_state_loaded = get_crdt().is_some();
let persisted_ops_count = ALL_OPS let persisted_ops_count = ALL_OPS
.get() .get()
.and_then(|m| m.lock().ok().map(|v| v.len())) .and_then(|m| m.lock().ok().map(|v| v.len()))
.unwrap_or(0); .unwrap_or(0);
let Some(state_mutex) = CRDT_STATE.get() else { let Some(state_mutex) = get_crdt() else {
return CrdtStateDump { return CrdtStateDump {
in_memory_state_loaded, in_memory_state_loaded,
total_items: 0, total_items: 0,
@@ -932,7 +956,7 @@ pub fn dump_crdt_state(story_id_filter: Option<&str>) -> CrdtStateDump {
/// Returns items grouped by stage, or `None` if the CRDT layer is not /// Returns items grouped by stage, or `None` if the CRDT layer is not
/// initialised. /// initialised.
pub fn read_all_items() -> Option<Vec<PipelineItemView>> { pub fn read_all_items() -> Option<Vec<PipelineItemView>> {
let state_mutex = CRDT_STATE.get()?; let state_mutex = get_crdt()?;
let state = state_mutex.lock().ok()?; let state = state_mutex.lock().ok()?;
// Only return items that appear in the deduplicated index. // Only return items that appear in the deduplicated index.
@@ -950,7 +974,7 @@ pub fn read_all_items() -> Option<Vec<PipelineItemView>> {
/// Read a single pipeline item by story_id. /// Read a single pipeline item by story_id.
pub fn read_item(story_id: &str) -> Option<PipelineItemView> { pub fn read_item(story_id: &str) -> Option<PipelineItemView> {
let state_mutex = CRDT_STATE.get()?; let state_mutex = get_crdt()?;
let state = state_mutex.lock().ok()?; let state = state_mutex.lock().ok()?;
let &idx = state.index.get(story_id)?; let &idx = state.index.get(story_id)?;
extract_item_view(&state.crdt.doc.items[idx]) extract_item_view(&state.crdt.doc.items[idx])
@@ -980,8 +1004,7 @@ pub fn read_item(story_id: &str) -> Option<PipelineItemView> {
/// or an `Err` if the CRDT layer isn't initialised or the story_id is /// or an `Err` if the CRDT layer isn't initialised or the story_id is
/// unknown to the in-memory state. /// unknown to the in-memory state.
pub fn evict_item(story_id: &str) -> Result<(), String> { pub fn evict_item(story_id: &str) -> Result<(), String> {
let state_mutex = CRDT_STATE let state_mutex = get_crdt()
.get()
.ok_or_else(|| "CRDT layer not initialised".to_string())?; .ok_or_else(|| "CRDT layer not initialised".to_string())?;
let mut state = state_mutex let mut state = state_mutex
.lock() .lock()
+44 -8
View File
@@ -45,9 +45,38 @@ static PIPELINE_DB: OnceLock<PipelineDb> = OnceLock::new();
static CONTENT_STORE: OnceLock<Mutex<HashMap<String, String>>> = OnceLock::new(); static CONTENT_STORE: OnceLock<Mutex<HashMap<String, String>>> = OnceLock::new();
#[cfg(test)]
thread_local! {
static CONTENT_STORE_TL: OnceLock<Mutex<HashMap<String, String>>> = const { OnceLock::new() };
}
#[cfg(not(test))]
fn get_content_store() -> Option<&'static Mutex<HashMap<String, String>>> {
CONTENT_STORE.get()
}
#[cfg(test)]
fn get_content_store() -> Option<&'static Mutex<HashMap<String, String>>> {
let tl = CONTENT_STORE_TL.with(|lock| {
if lock.get().is_some() {
Some(lock as *const OnceLock<Mutex<HashMap<String, String>>>)
} else {
None
}
});
if let Some(ptr) = tl {
// SAFETY: The thread-local lives as long as the thread, which outlives
// any test using it. We only need 'static for the return type.
let lock = unsafe { &*ptr };
lock.get()
} else {
CONTENT_STORE.get()
}
}
/// Read the full markdown content of a story from the in-memory store. /// Read the full markdown content of a story from the in-memory store.
pub fn read_content(story_id: &str) -> Option<String> { pub fn read_content(story_id: &str) -> Option<String> {
let store = CONTENT_STORE.get()?; let store = get_content_store()?;
let map = store.lock().ok()?; let map = store.lock().ok()?;
map.get(story_id).cloned() map.get(story_id).cloned()
} }
@@ -56,14 +85,14 @@ pub fn read_content(story_id: &str) -> Option<String> {
/// ///
/// Updates the in-memory store immediately. /// Updates the in-memory store immediately.
pub fn write_content(story_id: &str, content: &str) { pub fn write_content(story_id: &str, content: &str) {
if let Some(store) = CONTENT_STORE.get() && let Ok(mut map) = store.lock() { if let Some(store) = get_content_store() && let Ok(mut map) = store.lock() {
map.insert(story_id.to_string(), content.to_string()); map.insert(story_id.to_string(), content.to_string());
} }
} }
/// Remove a story's content from the in-memory store. /// Remove a story's content from the in-memory store.
pub fn delete_content(story_id: &str) { pub fn delete_content(story_id: &str) {
if let Some(store) = CONTENT_STORE.get() && let Ok(mut map) = store.lock() { if let Some(store) = get_content_store() && let Ok(mut map) = store.lock() {
map.remove(story_id); map.remove(story_id);
} }
} }
@@ -72,16 +101,23 @@ pub fn delete_content(story_id: &str) {
/// ///
/// Safe to call multiple times — the `OnceLock` is set at most once. /// Safe to call multiple times — the `OnceLock` is set at most once.
pub fn ensure_content_store() { pub fn ensure_content_store() {
let _ = CONTENT_STORE.set(Mutex::new(HashMap::new())); #[cfg(not(test))]
// In tests, also initialise the in-memory CRDT state so that { let _ = CONTENT_STORE.set(Mutex::new(HashMap::new())); }
// write_item_with_content() and read_all_typed() work without async SQLite.
#[cfg(test)] #[cfg(test)]
crate::crdt_state::init_for_test(); {
CONTENT_STORE_TL.with(|lock| {
if lock.get().is_none() {
let _ = lock.set(Mutex::new(HashMap::new()));
}
});
crate::crdt_state::init_for_test();
}
} }
/// Return all story IDs present in the content store. /// Return all story IDs present in the content store.
pub fn all_content_ids() -> Vec<String> { pub fn all_content_ids() -> Vec<String> {
match CONTENT_STORE.get() { match get_content_store() {
Some(store) => match store.lock() { Some(store) => match store.lock() {
Ok(map) => map.keys().cloned().collect(), Ok(map) => map.keys().cloned().collect(),
Err(_) => Vec::new(), Err(_) => Vec::new(),
+7
View File
@@ -269,22 +269,29 @@ mod tests {
#[test] #[test]
fn next_item_number_increments_from_existing_bugs() { fn next_item_number_increments_from_existing_bugs() {
crate::db::ensure_content_store();
let tmp = tempfile::tempdir().unwrap(); let tmp = tempfile::tempdir().unwrap();
let backlog = tmp.path().join(".huskies/work/1_backlog"); let backlog = tmp.path().join(".huskies/work/1_backlog");
fs::create_dir_all(&backlog).unwrap(); fs::create_dir_all(&backlog).unwrap();
fs::write(backlog.join("1_bug_crash.md"), "").unwrap(); fs::write(backlog.join("1_bug_crash.md"), "").unwrap();
fs::write(backlog.join("3_bug_another.md"), "").unwrap(); fs::write(backlog.join("3_bug_another.md"), "").unwrap();
// Also write to content store so next_item_number sees them.
crate::db::write_item_with_content("1_bug_crash", "1_backlog", "---\nname: Crash\n---\n");
crate::db::write_item_with_content("3_bug_another", "1_backlog", "---\nname: Another\n---\n");
assert!(super::super::next_item_number(tmp.path()).unwrap() >= 4); assert!(super::super::next_item_number(tmp.path()).unwrap() >= 4);
} }
#[test] #[test]
fn next_item_number_scans_archived_too() { fn next_item_number_scans_archived_too() {
crate::db::ensure_content_store();
let tmp = tempfile::tempdir().unwrap(); let tmp = tempfile::tempdir().unwrap();
let backlog = tmp.path().join(".huskies/work/1_backlog"); let backlog = tmp.path().join(".huskies/work/1_backlog");
let archived = tmp.path().join(".huskies/work/5_done"); let archived = tmp.path().join(".huskies/work/5_done");
fs::create_dir_all(&backlog).unwrap(); fs::create_dir_all(&backlog).unwrap();
fs::create_dir_all(&archived).unwrap(); fs::create_dir_all(&archived).unwrap();
fs::write(archived.join("5_bug_old.md"), "").unwrap(); fs::write(archived.join("5_bug_old.md"), "").unwrap();
// Also write to content store so next_item_number sees it.
crate::db::write_item_with_content("5_bug_old", "5_done", "---\nname: Old Bug\n---\n");
assert!(super::super::next_item_number(tmp.path()).unwrap() >= 6); assert!(super::super::next_item_number(tmp.path()).unwrap() >= 6);
} }
+3
View File
@@ -315,10 +315,13 @@ mod tests {
#[test] #[test]
fn create_story_writes_correct_content() { fn create_story_writes_correct_content() {
crate::db::ensure_content_store();
let tmp = tempfile::tempdir().unwrap(); let tmp = tempfile::tempdir().unwrap();
let backlog = tmp.path().join(".huskies/work/1_backlog"); let backlog = tmp.path().join(".huskies/work/1_backlog");
fs::create_dir_all(&backlog).unwrap(); fs::create_dir_all(&backlog).unwrap();
fs::write(backlog.join("36_story_existing.md"), "").unwrap(); fs::write(backlog.join("36_story_existing.md"), "").unwrap();
// Also write to content store so next_item_number sees it.
crate::db::write_item_with_content("36_story_existing", "1_backlog", "---\nname: Existing\n---\n");
let number = super::super::next_item_number(tmp.path()).unwrap(); let number = super::super::next_item_number(tmp.path()).unwrap();
// The number must be >= 37 (at least higher than the existing "36_story_existing.md"), // The number must be >= 37 (at least higher than the existing "36_story_existing.md"),