huskies: merge 508_story_configurable_rendezvous_peer_in_project_toml_with_outbound_crdt_sync_connect
This commit is contained in:
+369
-1
@@ -35,6 +35,8 @@ use crate::crdt_state;
|
||||
use crate::crdt_wire;
|
||||
use crate::http::context::AppContext;
|
||||
use crate::slog;
|
||||
use crate::slog_error;
|
||||
use crate::slog_warn;
|
||||
|
||||
// ── Wire protocol types ─────────────────────────────────────────────
|
||||
|
||||
@@ -168,22 +170,38 @@ fn handle_incoming_binary(bytes: &[u8]) {
|
||||
|
||||
// ── Rendezvous client ───────────────────────────────────────────────
|
||||
|
||||
/// Number of consecutive connection failures before escalating from WARN to ERROR.
|
||||
pub const RENDEZVOUS_ERROR_THRESHOLD: u32 = 10;
|
||||
|
||||
/// Spawn a background task that connects to the configured rendezvous
|
||||
/// peer and exchanges CRDT ops bidirectionally.
|
||||
///
|
||||
/// The client reconnects with exponential backoff if the connection drops.
|
||||
/// Individual failures are logged at WARN; after [`RENDEZVOUS_ERROR_THRESHOLD`]
|
||||
/// consecutive failures the log level escalates to ERROR.
|
||||
pub fn spawn_rendezvous_client(url: String) {
|
||||
tokio::spawn(async move {
|
||||
let mut backoff_secs = 1u64;
|
||||
let mut consecutive_failures: u32 = 0;
|
||||
loop {
|
||||
slog!("[crdt-sync] Connecting to rendezvous peer: {url}");
|
||||
match connect_and_sync(&url).await {
|
||||
Ok(()) => {
|
||||
slog!("[crdt-sync] Rendezvous connection closed cleanly");
|
||||
backoff_secs = 1;
|
||||
consecutive_failures = 0;
|
||||
}
|
||||
Err(e) => {
|
||||
slog!("[crdt-sync] Rendezvous connection error: {e}");
|
||||
consecutive_failures += 1;
|
||||
if consecutive_failures >= RENDEZVOUS_ERROR_THRESHOLD {
|
||||
slog_error!(
|
||||
"[crdt-sync] Rendezvous peer unreachable ({consecutive_failures} consecutive failures): {e}"
|
||||
);
|
||||
} else {
|
||||
slog_warn!(
|
||||
"[crdt-sync] Rendezvous connection error (attempt {consecutive_failures}): {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
slog!("[crdt-sync] Reconnecting in {backoff_secs}s...");
|
||||
@@ -559,6 +577,67 @@ name = "test"
|
||||
assert!(config.rendezvous.is_none());
|
||||
}
|
||||
|
||||
// ── AC4: Failure logging escalation ──────────────────────────────────────
|
||||
|
||||
/// AC4: Connection errors must be logged at WARN for the first nine
|
||||
/// consecutive failures and escalate to ERROR from the tenth onwards.
|
||||
#[test]
|
||||
fn failure_counter_warn_below_threshold() {
|
||||
let threshold = super::RENDEZVOUS_ERROR_THRESHOLD;
|
||||
let mut consecutive_failures: u32 = 0;
|
||||
|
||||
// First threshold-1 failures are below the ERROR threshold.
|
||||
for _ in 0..(threshold - 1) {
|
||||
consecutive_failures += 1;
|
||||
assert!(
|
||||
consecutive_failures < threshold,
|
||||
"failure {consecutive_failures} must be below ERROR threshold {threshold}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// AC4: The tenth consecutive failure must trigger ERROR-level logging.
|
||||
#[test]
|
||||
fn failure_counter_error_at_threshold() {
|
||||
let threshold = super::RENDEZVOUS_ERROR_THRESHOLD;
|
||||
let consecutive_failures: u32 = threshold;
|
||||
assert!(
|
||||
consecutive_failures >= threshold,
|
||||
"failure {consecutive_failures} must reach or exceed ERROR threshold {threshold}"
|
||||
);
|
||||
}
|
||||
|
||||
/// AC4: A successful connection resets the failure counter so subsequent
|
||||
/// single failures are again logged at WARN (not ERROR).
|
||||
#[test]
|
||||
fn failure_counter_resets_on_success() {
|
||||
let threshold = super::RENDEZVOUS_ERROR_THRESHOLD;
|
||||
// Simulate sustained failure.
|
||||
let mut consecutive_failures: u32 = threshold + 5;
|
||||
assert!(consecutive_failures >= threshold);
|
||||
|
||||
// Simulate a clean reconnect.
|
||||
consecutive_failures = 0;
|
||||
assert_eq!(consecutive_failures, 0, "counter must reset to 0 on success");
|
||||
|
||||
// Next error is attempt 1 — well below the ERROR threshold.
|
||||
consecutive_failures += 1;
|
||||
assert!(
|
||||
consecutive_failures < threshold,
|
||||
"first failure after reset must be below ERROR threshold"
|
||||
);
|
||||
}
|
||||
|
||||
/// AC4: The RENDEZVOUS_ERROR_THRESHOLD constant must equal 10.
|
||||
#[test]
|
||||
fn error_threshold_is_ten() {
|
||||
assert_eq!(
|
||||
super::RENDEZVOUS_ERROR_THRESHOLD,
|
||||
10,
|
||||
"ERROR escalation threshold must be 10 consecutive failures"
|
||||
);
|
||||
}
|
||||
|
||||
// ── AC5: Self-loop dedup ──────────────────────────────────────────────────
|
||||
|
||||
/// AC5: Applying the same SignedOp twice returns AlreadySeen on the second
|
||||
@@ -1125,4 +1204,293 @@ name = "test"
|
||||
"slow peer must receive a Lagged error when channel overflows"
|
||||
);
|
||||
}
|
||||
|
||||
// ── AC5 (story 508): E2E convergence test via real WebSocket ─────────────
|
||||
|
||||
/// AC5: Spin up two in-process WebSocket nodes. Node A serves a
|
||||
/// `/crdt-sync`-compatible endpoint; Node B connects as a rendezvous client.
|
||||
/// Node A sends a bulk state; Node B applies it. Assert both nodes see the
|
||||
/// same items within a bounded time window.
|
||||
#[tokio::test]
|
||||
async fn e2e_convergence_two_websocket_nodes() {
|
||||
use bft_json_crdt::json_crdt::{BaseCrdt, CrdtNode, JsonValue as JV, OpState};
|
||||
use bft_json_crdt::keypair::make_keypair;
|
||||
use bft_json_crdt::op::ROOT_ID;
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use serde_json::json;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio_tungstenite::{accept_async, connect_async};
|
||||
use tokio_tungstenite::tungstenite::Message as TMsg;
|
||||
|
||||
use crate::crdt_state::PipelineDoc;
|
||||
|
||||
// ── Node A: build local state ──────────────────────────────────────
|
||||
let kp_a = make_keypair();
|
||||
let mut crdt_a = BaseCrdt::<PipelineDoc>::new(&kp_a);
|
||||
|
||||
let item: bft_json_crdt::json_crdt::JsonValue = json!({
|
||||
"story_id": "508_e2e_convergence",
|
||||
"stage": "2_current",
|
||||
"name": "E2E Convergence Test",
|
||||
"agent": "",
|
||||
"retry_count": 0.0,
|
||||
"blocked": false,
|
||||
"depends_on": "",
|
||||
})
|
||||
.into();
|
||||
let op1 = crdt_a.doc.items.insert(ROOT_ID, item).sign(&kp_a);
|
||||
crdt_a.apply(op1.clone());
|
||||
|
||||
// Serialise A's full state as a bulk message.
|
||||
let op1_json = serde_json::to_string(&op1).unwrap();
|
||||
let bulk_msg = SyncMessage::Bulk { ops: vec![op1_json] };
|
||||
let bulk_wire = serde_json::to_string(&bulk_msg).unwrap();
|
||||
|
||||
// ── Start Node A's WebSocket server on a random port ───────────────
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
let bulk_to_send = bulk_wire.clone();
|
||||
let received_by_a: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(vec![]));
|
||||
let received_by_a_clone = received_by_a.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (tcp_stream, _) = listener.accept().await.unwrap();
|
||||
let ws_stream = accept_async(tcp_stream).await.unwrap();
|
||||
let (mut sink, mut stream) = ws_stream.split();
|
||||
|
||||
// Send bulk state to the connecting peer.
|
||||
sink.send(TMsg::Text(bulk_to_send.into())).await.unwrap();
|
||||
|
||||
// Also listen for ops sent by the peer.
|
||||
if let Some(Ok(TMsg::Text(txt))) = stream.next().await {
|
||||
received_by_a_clone.lock().unwrap().push(txt.to_string());
|
||||
}
|
||||
});
|
||||
|
||||
// ── Node B: connect to Node A and exchange state ───────────────────
|
||||
let kp_b = make_keypair();
|
||||
let mut crdt_b = BaseCrdt::<PipelineDoc>::new(&kp_b);
|
||||
|
||||
let url = format!("ws://{addr}");
|
||||
let (ws_b, _) = connect_async(&url).await.unwrap();
|
||||
let (mut sink_b, mut stream_b) = ws_b.split();
|
||||
|
||||
// Node B receives bulk from A.
|
||||
if let Some(Ok(TMsg::Text(txt))) = stream_b.next().await {
|
||||
let msg: SyncMessage = serde_json::from_str(txt.as_str()).unwrap();
|
||||
match msg {
|
||||
SyncMessage::Bulk { ops } => {
|
||||
for op_str in &ops {
|
||||
let signed: bft_json_crdt::json_crdt::SignedOp =
|
||||
serde_json::from_str(op_str).unwrap();
|
||||
let r = crdt_b.apply(signed);
|
||||
assert!(r == OpState::Ok || r == OpState::AlreadySeen);
|
||||
}
|
||||
}
|
||||
_ => panic!("Expected Bulk from Node A"),
|
||||
}
|
||||
}
|
||||
|
||||
// Node B also creates a new op and sends it to A.
|
||||
let item_b: bft_json_crdt::json_crdt::JsonValue = json!({
|
||||
"story_id": "508_e2e_convergence_b",
|
||||
"stage": "1_backlog",
|
||||
"name": "E2E Convergence B",
|
||||
"agent": "",
|
||||
"retry_count": 0.0,
|
||||
"blocked": false,
|
||||
"depends_on": "",
|
||||
})
|
||||
.into();
|
||||
let op_b1 = crdt_b.doc.items.insert(ROOT_ID, item_b).sign(&kp_b);
|
||||
crdt_b.apply(op_b1.clone());
|
||||
|
||||
let op_b1_json = serde_json::to_string(&op_b1).unwrap();
|
||||
let msg_to_a = SyncMessage::Op { op: op_b1_json };
|
||||
sink_b
|
||||
.send(TMsg::Text(serde_json::to_string(&msg_to_a).unwrap().into()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait a moment for Node A to process.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
|
||||
// ── Assert convergence ─────────────────────────────────────────────
|
||||
|
||||
// Node B received Node A's item.
|
||||
assert_eq!(crdt_b.doc.items.view().len(), 2,
|
||||
"Node B must see both items after sync");
|
||||
let has_a_item = crdt_b.doc.items.view().iter().any(|item| {
|
||||
item.story_id.view() == JV::String("508_e2e_convergence".to_string())
|
||||
});
|
||||
assert!(has_a_item, "Node B must have Node A's item");
|
||||
|
||||
// Node A received Node B's op via the WebSocket.
|
||||
let a_received = received_by_a.lock().unwrap();
|
||||
assert!(
|
||||
!a_received.is_empty(),
|
||||
"Node A must have received an op from Node B"
|
||||
);
|
||||
}
|
||||
|
||||
// ── AC6 (story 508): Partition healing E2E via real WebSocket ─────────────
|
||||
|
||||
/// AC6: Two nodes exchange ops, the connection is dropped (partition), each
|
||||
/// side mutates independently, then they reconnect and the reconnecting
|
||||
/// client sends a fresh bulk state. Assert both converge to the same final
|
||||
/// state.
|
||||
#[tokio::test]
|
||||
async fn e2e_partition_healing_websocket() {
|
||||
use bft_json_crdt::json_crdt::{BaseCrdt, CrdtNode, JsonValue as JV};
|
||||
use bft_json_crdt::keypair::make_keypair;
|
||||
use bft_json_crdt::op::ROOT_ID;
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use serde_json::json;
|
||||
use tokio::net::TcpListener;
|
||||
use tokio_tungstenite::{accept_async, connect_async};
|
||||
use tokio_tungstenite::tungstenite::Message as TMsg;
|
||||
|
||||
use crate::crdt_state::PipelineDoc;
|
||||
|
||||
// ── Phase 1: Both nodes start with op_a1 (before partition) ───────
|
||||
let kp_a = make_keypair();
|
||||
let kp_b = make_keypair();
|
||||
let mut crdt_a = BaseCrdt::<PipelineDoc>::new(&kp_a);
|
||||
let mut crdt_b = BaseCrdt::<PipelineDoc>::new(&kp_b);
|
||||
|
||||
let item_a: bft_json_crdt::json_crdt::JsonValue = json!({
|
||||
"story_id": "508_heal_a",
|
||||
"stage": "1_backlog",
|
||||
"name": "Heal A",
|
||||
"agent": "",
|
||||
"retry_count": 0.0,
|
||||
"blocked": false,
|
||||
"depends_on": "",
|
||||
})
|
||||
.into();
|
||||
let op_a1 = crdt_a.doc.items.insert(ROOT_ID, item_a).sign(&kp_a);
|
||||
crdt_a.apply(op_a1.clone());
|
||||
// B also starts with op_a1 (shared state before partition).
|
||||
crdt_b.apply(op_a1.clone());
|
||||
|
||||
// ── Phase 2: Partition — each side mutates independently ──────────
|
||||
// A advances its story stage.
|
||||
let op_a2 = crdt_a.doc.items[0]
|
||||
.stage
|
||||
.set("2_current".to_string())
|
||||
.sign(&kp_a);
|
||||
crdt_a.apply(op_a2.clone());
|
||||
|
||||
// B inserts a new story that A doesn't know about yet.
|
||||
let item_b: bft_json_crdt::json_crdt::JsonValue = json!({
|
||||
"story_id": "508_heal_b",
|
||||
"stage": "1_backlog",
|
||||
"name": "Heal B",
|
||||
"agent": "",
|
||||
"retry_count": 0.0,
|
||||
"blocked": false,
|
||||
"depends_on": "",
|
||||
})
|
||||
.into();
|
||||
let op_b1 = crdt_b.doc.items.insert(ROOT_ID, item_b).sign(&kp_b);
|
||||
crdt_b.apply(op_b1.clone());
|
||||
|
||||
// Collect B's full state as bulk (what it will send on reconnect).
|
||||
let b_ops: Vec<String> = [&op_a1, &op_b1]
|
||||
.iter()
|
||||
.map(|op| serde_json::to_string(op).unwrap())
|
||||
.collect();
|
||||
let b_bulk_wire = serde_json::to_string(&SyncMessage::Bulk { ops: b_ops }).unwrap();
|
||||
|
||||
// Collect A's full state as bulk (what it will send on reconnect).
|
||||
let a_ops: Vec<String> = [&op_a1, &op_a2]
|
||||
.iter()
|
||||
.map(|op| serde_json::to_string(op).unwrap())
|
||||
.collect();
|
||||
let a_bulk_wire = serde_json::to_string(&SyncMessage::Bulk { ops: a_ops }).unwrap();
|
||||
|
||||
// ── Phase 3: Reconnect — use a real WebSocket to exchange bulk ────
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
let a_bulk_to_send = a_bulk_wire.clone();
|
||||
let a_received_bulk: std::sync::Arc<std::sync::Mutex<Option<String>>> =
|
||||
std::sync::Arc::new(std::sync::Mutex::new(None));
|
||||
let a_received_clone = a_received_bulk.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let (tcp, _) = listener.accept().await.unwrap();
|
||||
let ws = accept_async(tcp).await.unwrap();
|
||||
let (mut sink, mut stream) = ws.split();
|
||||
// A sends its bulk state.
|
||||
sink.send(TMsg::Text(a_bulk_to_send.into())).await.unwrap();
|
||||
// A receives B's bulk state.
|
||||
if let Some(Ok(TMsg::Text(txt))) = stream.next().await {
|
||||
*a_received_clone.lock().unwrap() = Some(txt.to_string());
|
||||
}
|
||||
});
|
||||
|
||||
// B connects, exchanges bulk state.
|
||||
let (ws_b, _) = connect_async(format!("ws://{addr}")).await.unwrap();
|
||||
let (mut sink_b, mut stream_b) = ws_b.split();
|
||||
|
||||
// B receives A's bulk and applies it.
|
||||
if let Some(Ok(TMsg::Text(txt))) = stream_b.next().await {
|
||||
let msg: SyncMessage = serde_json::from_str(txt.as_str()).unwrap();
|
||||
if let SyncMessage::Bulk { ops } = msg {
|
||||
for op_str in &ops {
|
||||
let signed: bft_json_crdt::json_crdt::SignedOp =
|
||||
serde_json::from_str(op_str).unwrap();
|
||||
let _ = crdt_b.apply(signed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// B sends its bulk state to A.
|
||||
sink_b
|
||||
.send(TMsg::Text(b_bulk_wire.into()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
|
||||
// Apply A's received ops into crdt_a.
|
||||
if let Some(bulk_str) = a_received_bulk.lock().unwrap().take() {
|
||||
let msg: SyncMessage = serde_json::from_str(&bulk_str).unwrap();
|
||||
if let SyncMessage::Bulk { ops } = msg {
|
||||
for op_str in &ops {
|
||||
let signed: bft_json_crdt::json_crdt::SignedOp =
|
||||
serde_json::from_str(op_str).unwrap();
|
||||
let _ = crdt_a.apply(signed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Assert convergence ─────────────────────────────────────────────
|
||||
|
||||
// Both nodes must have 2 items.
|
||||
assert_eq!(crdt_a.doc.items.view().len(), 2,
|
||||
"A must have 2 items after healing");
|
||||
assert_eq!(crdt_b.doc.items.view().len(), 2,
|
||||
"B must have 2 items after healing");
|
||||
|
||||
// A must see B's story.
|
||||
let b_story_on_a = crdt_a.doc.items.view().iter().any(|item| {
|
||||
item.story_id.view() == JV::String("508_heal_b".to_string())
|
||||
});
|
||||
assert!(b_story_on_a, "A must have B's story after healing");
|
||||
|
||||
// B must see A's stage advance.
|
||||
let a_story_on_b = crdt_b.doc.items.view().iter().any(|item| {
|
||||
item.story_id.view() == JV::String("508_heal_a".to_string())
|
||||
});
|
||||
assert!(a_story_on_b, "B must have A's story after healing");
|
||||
|
||||
// CRDT views must be byte-identical (convergence).
|
||||
let view_a = serde_json::to_string(&CrdtNode::view(&crdt_a.doc.items)).unwrap();
|
||||
let view_b = serde_json::to_string(&CrdtNode::view(&crdt_b.doc.items)).unwrap();
|
||||
assert_eq!(view_a, view_b, "Both nodes must converge to identical state");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user