huskies: merge 507_story_apply_inbound_signedops_with_causal_order_queue_for_partition_recovery
This commit is contained in:
@@ -66,8 +66,23 @@ pub enum OpState {
|
||||
/// We have not received all of the causal dependencies of this operation. It has been queued
|
||||
/// up and will be executed when its causal dependencies have been delivered
|
||||
MissingCausalDependencies,
|
||||
/// This op has already been applied (identified by its `signed_digest`).
|
||||
/// The CRDT state is unchanged — this is a no-op (idempotent self-loop guard).
|
||||
AlreadySeen,
|
||||
}
|
||||
|
||||
/// Maximum total number of ops that may sit in the causal-order hold queue at any
|
||||
/// one time, summed across all pending dependency buckets.
|
||||
///
|
||||
/// **Overflow policy: drop oldest.**
|
||||
/// When the limit is reached, the oldest pending op in the largest dependency bucket
|
||||
/// is silently evicted before the new op is queued. Rationale: a misbehaving or
|
||||
/// heavily-partitioned peer can send ops whose causal ancestors never arrive, causing
|
||||
/// unbounded memory growth. Dropping the oldest entry preserves the most recent
|
||||
/// information and caps memory use. The peer can reconnect and receive a fresh bulk
|
||||
/// state dump to recover any dropped ops.
|
||||
pub const CAUSAL_QUEUE_MAX: usize = 256;
|
||||
|
||||
/// The following types can be used as a 'terminal' type in CRDTs
|
||||
pub trait MarkPrimitive: Into<JsonValue> + Default {}
|
||||
impl MarkPrimitive for bool {}
|
||||
@@ -112,6 +127,10 @@ pub struct BaseCrdt<T: CrdtNode> {
|
||||
/// of messages we've seen (represented by their [`SignedDigest`]).
|
||||
received: HashSet<SignedDigest>,
|
||||
message_q: HashMap<SignedDigest, Vec<SignedOp>>,
|
||||
|
||||
/// Total count of ops currently held in [`message_q`] waiting for their causal
|
||||
/// dependencies to be delivered. Used to enforce [`CAUSAL_QUEUE_MAX`].
|
||||
queue_len: usize,
|
||||
}
|
||||
|
||||
/// An [`Op<Value>`] with a few bits of extra metadata
|
||||
@@ -213,6 +232,7 @@ impl<T: CrdtNode + DebugView> BaseCrdt<T> {
|
||||
doc: T::new(id, vec![]),
|
||||
received: HashSet::new(),
|
||||
message_q: HashMap::new(),
|
||||
queue_len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,11 +248,36 @@ impl<T: CrdtNode + DebugView> BaseCrdt<T> {
|
||||
}
|
||||
|
||||
let op_id = op.signed_digest;
|
||||
|
||||
// Self-loop / dedup guard: if we have already processed this op (identified by
|
||||
// its signed_digest), return immediately without re-applying it. This prevents
|
||||
// echo loops where an op we broadcast to a peer comes back to us.
|
||||
if self.received.contains(&op_id) {
|
||||
return OpState::AlreadySeen;
|
||||
}
|
||||
|
||||
if !op.depends_on.is_empty() {
|
||||
for origin in &op.depends_on {
|
||||
if !self.received.contains(origin) {
|
||||
self.log_missing_causal_dep(origin);
|
||||
|
||||
// Bounded queue overflow: evict the oldest op from the largest
|
||||
// pending bucket before adding the new one. See CAUSAL_QUEUE_MAX.
|
||||
if self.queue_len >= CAUSAL_QUEUE_MAX {
|
||||
if let Some(bucket) = self
|
||||
.message_q
|
||||
.values_mut()
|
||||
.max_by_key(|v| v.len())
|
||||
{
|
||||
if !bucket.is_empty() {
|
||||
bucket.remove(0);
|
||||
self.queue_len = self.queue_len.saturating_sub(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.message_q.entry(*origin).or_default().push(op);
|
||||
self.queue_len += 1;
|
||||
return OpState::MissingCausalDependencies;
|
||||
}
|
||||
}
|
||||
@@ -247,12 +292,19 @@ impl<T: CrdtNode + DebugView> BaseCrdt<T> {
|
||||
// apply all of its causal dependents if there are any
|
||||
let dependent_queue = self.message_q.remove(&op_id);
|
||||
if let Some(mut q) = dependent_queue {
|
||||
self.queue_len = self.queue_len.saturating_sub(q.len());
|
||||
for dependent in q.drain(..) {
|
||||
self.apply(dependent);
|
||||
}
|
||||
}
|
||||
status
|
||||
}
|
||||
|
||||
/// Number of ops currently held in the causal-order queue waiting for their
|
||||
/// dependencies to be satisfied.
|
||||
pub fn causal_queue_len(&self) -> usize {
|
||||
self.queue_len
|
||||
}
|
||||
}
|
||||
|
||||
/// An enum representing a JSON value
|
||||
|
||||
Reference in New Issue
Block a user