huskies: merge 1128 story Bounded event queues + EventStreamGap sentinel + observability for context assembly
This commit is contained in:
+104
-6
@@ -17,6 +17,15 @@
|
||||
|
||||
use chrono::DateTime;
|
||||
|
||||
/// Monotonic per-sled logical sequence number identifying a pipeline event.
|
||||
///
|
||||
/// This is the sequence number that *would have been assigned* to an event in the
|
||||
/// contiguous logical event stream, as tracked by the event-log subscriber. It
|
||||
/// differs from the CRDT `event_seq` (which counts CRDT entries including gap
|
||||
/// sentinels) but is meaningful for identifying the range of dropped events when
|
||||
/// a gap is inserted.
|
||||
pub type EventId = u64;
|
||||
|
||||
/// A snapshot of a single persisted pipeline transition event.
|
||||
///
|
||||
/// Constructed by [`read_event_log`] from the raw CRDT entries.
|
||||
@@ -81,22 +90,49 @@ pub fn read_event_log() -> Vec<LoggedEvent> {
|
||||
entries
|
||||
}
|
||||
|
||||
/// Append a gap sentinel to the event log for the local sled.
|
||||
///
|
||||
/// Encodes the logical [`EventId`] range `[from_id, to_id]` of dropped events
|
||||
/// using the `EventStreamGap` pipeline event marker. Should be called whenever
|
||||
/// the event-log subscriber detects a lag in the broadcast channel so that no
|
||||
/// drop is silent.
|
||||
pub fn insert_gap_sentinel(from_id: EventId, to_id: EventId) {
|
||||
let sled_id = crate::crdt_state::our_node_id().unwrap_or_default();
|
||||
crate::crdt_state::append_gap_log_entry(&sled_id, from_id, to_id);
|
||||
log_gap_observability(&sled_id, from_id, to_id);
|
||||
}
|
||||
|
||||
/// Spawn a background task that persists every `TransitionFired` event to the CRDT.
|
||||
///
|
||||
/// Subscribes to the global `TransitionFired` broadcast channel and calls
|
||||
/// [`log_transition_event`] for every received event without filtering.
|
||||
/// Lagged events are warned about but do not cause the subscriber to exit.
|
||||
/// Subscribes to the global `TransitionFired` broadcast channel. Normal events
|
||||
/// are persisted via [`log_transition_event`]. When the subscriber lags (the
|
||||
/// broadcast channel drops the oldest messages), a single
|
||||
/// `EventStreamGap` sentinel is appended to the log covering the dropped range
|
||||
/// so no transition is silently lost.
|
||||
pub fn spawn_event_log_subscriber() {
|
||||
let mut rx = crate::pipeline_state::subscribe_transitions();
|
||||
tokio::spawn(async move {
|
||||
// Tracks the next expected logical sequence number in the subscriber's
|
||||
// view of the event stream. Incremented on every successfully processed
|
||||
// event; advanced by the gap size on each lag so we can identify the
|
||||
// exact logical range of dropped events.
|
||||
let mut next_logical_seq: EventId = 0;
|
||||
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(fired) => log_transition_event(&fired),
|
||||
Ok(fired) => {
|
||||
log_transition_event(&fired);
|
||||
next_logical_seq += 1;
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
||||
let from = next_logical_seq;
|
||||
let to = next_logical_seq + n - 1;
|
||||
crate::slog_warn!(
|
||||
"[event-log] Subscriber lagged, skipped {n} event(s); \
|
||||
some transitions may be absent from the persistent event log."
|
||||
"[event-log] Subscriber lagged; {n} event(s) dropped \
|
||||
(logical ids {from}..={to}); gap sentinel appended."
|
||||
);
|
||||
insert_gap_sentinel(from, to);
|
||||
next_logical_seq += n;
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
||||
}
|
||||
@@ -104,6 +140,22 @@ pub fn spawn_event_log_subscriber() {
|
||||
});
|
||||
}
|
||||
|
||||
/// Emit observability log lines after inserting a gap sentinel.
|
||||
fn log_gap_observability(sled_id: &str, from_id: EventId, to_id: EventId) {
|
||||
let entries = crate::crdt_state::read_all_event_log_entries();
|
||||
let sled_total: usize = entries.iter().filter(|e| e.sled_id == sled_id).count();
|
||||
let gap_count: usize = entries
|
||||
.iter()
|
||||
.filter(|e| {
|
||||
e.sled_id == sled_id && e.pipeline_event == crate::crdt_state::GAP_PIPELINE_EVENT
|
||||
})
|
||||
.count();
|
||||
crate::slog!(
|
||||
"[event-log] gap inserted sled={sled_id} from={from_id} to={to_id} \
|
||||
sled_entries={sled_total} gap_count={gap_count}"
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -195,6 +247,52 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
/// AC4: fill the feeder queue past capacity by inserting a gap sentinel, then
|
||||
/// assert (a) the gap sentinel appears in the event log and (b) the assembled
|
||||
/// context contains the human-readable gap line.
|
||||
#[test]
|
||||
fn gap_sentinel_in_log_and_assembled_context() {
|
||||
crate::crdt_state::init_for_test();
|
||||
|
||||
// Log 3 real events (logical ids 0, 1, 2).
|
||||
for i in 0..3u32 {
|
||||
log_transition_event(&make_fired(i));
|
||||
}
|
||||
|
||||
// Simulate: the feeder queue overflowed and logical ids 3..=5 were dropped.
|
||||
insert_gap_sentinel(3, 5);
|
||||
|
||||
// Log one more real event after the gap.
|
||||
log_transition_event(&make_fired(99));
|
||||
|
||||
// (a) Gap sentinel must appear in read_event_log().
|
||||
let entries = read_event_log();
|
||||
let gap = entries
|
||||
.iter()
|
||||
.find(|e| e.pipeline_event == crate::crdt_state::GAP_PIPELINE_EVENT);
|
||||
assert!(gap.is_some(), "gap sentinel must be present in event log");
|
||||
let gap = gap.unwrap();
|
||||
// from_stage encodes the from EventId; to_stage encodes the to EventId.
|
||||
assert_eq!(gap.from_stage, "3", "gap from_stage must be '3'");
|
||||
assert_eq!(gap.to_stage, "5", "gap to_stage must be '5'");
|
||||
|
||||
// (b) assemble_prompt_context must render the gap line.
|
||||
let ctx = crate::llm_session::assemble_prompt_context("room-gap-e2e");
|
||||
assert!(
|
||||
ctx.contains("events between 3 and 5 were dropped"),
|
||||
"assembled context must contain gap line; got: {ctx}"
|
||||
);
|
||||
// Real events must also appear.
|
||||
assert!(
|
||||
ctx.contains("test_0"),
|
||||
"first story must appear; got: {ctx}"
|
||||
);
|
||||
assert!(
|
||||
ctx.contains("test_99"),
|
||||
"last story must appear; got: {ctx}"
|
||||
);
|
||||
}
|
||||
|
||||
/// AC2: every `TransitionFired` event is written to the log without filtering.
|
||||
#[test]
|
||||
fn log_transition_event_appends_all_events() {
|
||||
|
||||
Reference in New Issue
Block a user