2026-04-09 19:46:29 +01:00
|
|
|
/// WebSocket-based CRDT sync layer for replicating pipeline state between
|
|
|
|
|
/// huskies nodes.
|
|
|
|
|
///
|
|
|
|
|
/// # Protocol
|
|
|
|
|
///
|
2026-04-10 15:49:04 +00:00
|
|
|
/// The sync protocol is a hybrid of two frame types:
|
2026-04-09 19:46:29 +01:00
|
|
|
///
|
2026-04-10 15:49:04 +00:00
|
|
|
/// ## Text frames (bulk initial state)
|
|
|
|
|
/// A JSON object with a `"type"` field:
|
2026-04-09 19:46:29 +01:00
|
|
|
/// - `{"type":"bulk","ops":[...]}` — Initial state dump (array of serialised
|
|
|
|
|
/// `SignedOp` JSON strings). Sent by both sides immediately after connect.
|
2026-04-10 15:49:04 +00:00
|
|
|
///
|
|
|
|
|
/// ## Binary frames (real-time op broadcast)
|
|
|
|
|
/// Individual `SignedOp`s encoded via [`crate::crdt_wire`] (versioned JSON
|
|
|
|
|
/// envelope: `{"v":1,"op":{...}}`). Each locally-applied op is immediately
|
|
|
|
|
/// broadcast as a binary frame to all connected peers.
|
2026-04-09 19:46:29 +01:00
|
|
|
///
|
|
|
|
|
/// Both the server endpoint and the rendezvous client use the same protocol,
|
|
|
|
|
/// making the connection fully symmetric.
|
2026-04-10 15:49:04 +00:00
|
|
|
///
|
|
|
|
|
/// ## Backpressure
|
|
|
|
|
/// Each connected peer has its own [`tokio::sync::broadcast`] receiver. If a
|
|
|
|
|
/// slow peer allows the channel to fill (indicated by a `Lagged` error), the
|
|
|
|
|
/// connection is dropped with a warning log. The peer can reconnect and
|
|
|
|
|
/// receive a fresh bulk state dump to catch up.
|
2026-04-09 19:46:29 +01:00
|
|
|
use bft_json_crdt::json_crdt::SignedOp;
|
|
|
|
|
use futures::{SinkExt, StreamExt};
|
|
|
|
|
use poem::handler;
|
|
|
|
|
use poem::web::Data;
|
|
|
|
|
use poem::web::websocket::{Message as WsMessage, WebSocket};
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
use std::sync::Arc;
|
|
|
|
|
|
|
|
|
|
use crate::crdt_state;
|
2026-04-10 15:49:04 +00:00
|
|
|
use crate::crdt_wire;
|
2026-04-09 19:46:29 +01:00
|
|
|
use crate::http::context::AppContext;
|
|
|
|
|
use crate::slog;
|
|
|
|
|
|
|
|
|
|
// ── Wire protocol types ─────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
|
#[serde(tag = "type", rename_all = "snake_case")]
|
|
|
|
|
enum SyncMessage {
|
|
|
|
|
/// Bulk state dump sent on connect.
|
|
|
|
|
Bulk { ops: Vec<String> },
|
|
|
|
|
/// A single new op.
|
|
|
|
|
Op { op: String },
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Server-side WebSocket handler ───────────────────────────────────
|
|
|
|
|
|
|
|
|
|
#[handler]
|
|
|
|
|
pub async fn crdt_sync_handler(
|
|
|
|
|
ws: WebSocket,
|
|
|
|
|
_ctx: Data<&Arc<AppContext>>,
|
|
|
|
|
) -> impl poem::IntoResponse {
|
|
|
|
|
ws.on_upgrade(|socket| async move {
|
|
|
|
|
let (mut sink, mut stream) = socket.split();
|
|
|
|
|
|
|
|
|
|
slog!("[crdt-sync] Peer connected");
|
|
|
|
|
|
|
|
|
|
// Send bulk state dump.
|
|
|
|
|
if let Some(ops) = crdt_state::all_ops_json() {
|
|
|
|
|
let msg = SyncMessage::Bulk { ops };
|
|
|
|
|
if let Ok(json) = serde_json::to_string(&msg)
|
|
|
|
|
&& sink.send(WsMessage::Text(json)).await.is_err()
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Subscribe to new local ops.
|
|
|
|
|
let Some(mut op_rx) = crdt_state::subscribe_ops() else {
|
|
|
|
|
return;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
tokio::select! {
|
2026-04-10 15:49:04 +00:00
|
|
|
// Forward new local ops to the peer encoded via the wire codec.
|
2026-04-09 19:46:29 +01:00
|
|
|
result = op_rx.recv() => {
|
|
|
|
|
match result {
|
|
|
|
|
Ok(signed_op) => {
|
2026-04-10 15:49:04 +00:00
|
|
|
let bytes = crdt_wire::encode(&signed_op);
|
|
|
|
|
if sink.send(WsMessage::Binary(bytes)).await.is_err() {
|
|
|
|
|
break;
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
2026-04-10 15:49:04 +00:00
|
|
|
// The peer cannot keep up; disconnect so it can
|
|
|
|
|
// reconnect and receive a fresh bulk state dump.
|
|
|
|
|
slog!("[crdt-sync] Slow peer lagged {n} ops; disconnecting");
|
|
|
|
|
break;
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
Err(_) => break,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Receive ops from the peer.
|
|
|
|
|
frame = stream.next() => {
|
|
|
|
|
match frame {
|
|
|
|
|
Some(Ok(WsMessage::Text(text))) => {
|
2026-04-10 15:49:04 +00:00
|
|
|
// Bulk state dump or legacy text-frame op.
|
|
|
|
|
handle_incoming_text(&text);
|
|
|
|
|
}
|
|
|
|
|
Some(Ok(WsMessage::Binary(bytes))) => {
|
|
|
|
|
// Real-time op encoded via wire codec.
|
|
|
|
|
handle_incoming_binary(&bytes);
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
Some(Ok(WsMessage::Close(_))) | None => break,
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
slog!("[crdt-sync] Peer disconnected");
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:49:04 +00:00
|
|
|
/// Process an incoming text-frame sync message from a peer.
|
|
|
|
|
///
|
|
|
|
|
/// Text frames carry the bulk state dump (`SyncMessage::Bulk`) or legacy
|
|
|
|
|
/// single-op messages (`SyncMessage::Op`).
|
|
|
|
|
fn handle_incoming_text(text: &str) {
|
2026-04-09 19:46:29 +01:00
|
|
|
let msg: SyncMessage = match serde_json::from_str(text) {
|
|
|
|
|
Ok(m) => m,
|
|
|
|
|
Err(e) => {
|
2026-04-10 15:49:04 +00:00
|
|
|
slog!("[crdt-sync] Bad text message from peer: {e}");
|
2026-04-09 19:46:29 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
match msg {
|
|
|
|
|
SyncMessage::Bulk { ops } => {
|
|
|
|
|
let mut applied = 0u64;
|
|
|
|
|
for op_json in &ops {
|
|
|
|
|
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(op_json)
|
|
|
|
|
&& crdt_state::apply_remote_op(signed_op)
|
|
|
|
|
{
|
|
|
|
|
applied += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
slog!(
|
|
|
|
|
"[crdt-sync] Bulk sync: received {} ops, applied {applied}",
|
|
|
|
|
ops.len()
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
SyncMessage::Op { op } => {
|
|
|
|
|
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(&op) {
|
|
|
|
|
crdt_state::apply_remote_op(signed_op);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:49:04 +00:00
|
|
|
/// Process an incoming binary-frame op from a peer.
|
|
|
|
|
///
|
|
|
|
|
/// Binary frames carry a single `SignedOp` encoded via [`crdt_wire`].
|
|
|
|
|
fn handle_incoming_binary(bytes: &[u8]) {
|
|
|
|
|
match crdt_wire::decode(bytes) {
|
|
|
|
|
Ok(signed_op) => {
|
|
|
|
|
crdt_state::apply_remote_op(signed_op);
|
|
|
|
|
}
|
|
|
|
|
Err(e) => {
|
|
|
|
|
slog!("[crdt-sync] Bad binary frame from peer: {e}");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 19:46:29 +01:00
|
|
|
// ── Rendezvous client ───────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
/// Spawn a background task that connects to the configured rendezvous
|
|
|
|
|
/// peer and exchanges CRDT ops bidirectionally.
|
|
|
|
|
///
|
|
|
|
|
/// The client reconnects with exponential backoff if the connection drops.
|
|
|
|
|
pub fn spawn_rendezvous_client(url: String) {
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
let mut backoff_secs = 1u64;
|
|
|
|
|
loop {
|
|
|
|
|
slog!("[crdt-sync] Connecting to rendezvous peer: {url}");
|
|
|
|
|
match connect_and_sync(&url).await {
|
|
|
|
|
Ok(()) => {
|
|
|
|
|
slog!("[crdt-sync] Rendezvous connection closed cleanly");
|
|
|
|
|
backoff_secs = 1;
|
|
|
|
|
}
|
|
|
|
|
Err(e) => {
|
|
|
|
|
slog!("[crdt-sync] Rendezvous connection error: {e}");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
slog!("[crdt-sync] Reconnecting in {backoff_secs}s...");
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await;
|
|
|
|
|
backoff_secs = (backoff_secs * 2).min(30);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Connect to a remote sync endpoint and exchange ops until disconnect.
|
|
|
|
|
async fn connect_and_sync(url: &str) -> Result<(), String> {
|
|
|
|
|
let (ws_stream, _) = tokio_tungstenite::connect_async(url)
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
|
|
|
|
|
|
|
|
|
|
let (mut sink, mut stream) = ws_stream.split();
|
|
|
|
|
|
|
|
|
|
slog!("[crdt-sync] Connected to rendezvous peer");
|
|
|
|
|
|
|
|
|
|
// Send our bulk state.
|
|
|
|
|
if let Some(ops) = crdt_state::all_ops_json() {
|
|
|
|
|
let msg = SyncMessage::Bulk { ops };
|
|
|
|
|
if let Ok(json) = serde_json::to_string(&msg) {
|
|
|
|
|
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
|
|
|
|
sink.send(TungsteniteMsg::Text(json.into()))
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|e| format!("Send bulk failed: {e}"))?;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Subscribe to new local ops.
|
|
|
|
|
let Some(mut op_rx) = crdt_state::subscribe_ops() else {
|
|
|
|
|
return Err("CRDT not initialised".to_string());
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
tokio::select! {
|
|
|
|
|
result = op_rx.recv() => {
|
|
|
|
|
match result {
|
|
|
|
|
Ok(signed_op) => {
|
2026-04-10 15:49:04 +00:00
|
|
|
// Encode via wire codec and send as binary frame.
|
|
|
|
|
let bytes = crdt_wire::encode(&signed_op);
|
|
|
|
|
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
|
|
|
|
if sink.send(TungsteniteMsg::Binary(bytes.into())).await.is_err() {
|
|
|
|
|
break;
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
2026-04-10 15:49:04 +00:00
|
|
|
slog!("[crdt-sync] Slow rendezvous link lagged {n} ops; disconnecting");
|
|
|
|
|
break;
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
Err(_) => break,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
frame = stream.next() => {
|
|
|
|
|
match frame {
|
|
|
|
|
Some(Ok(tokio_tungstenite::tungstenite::Message::Text(text))) => {
|
2026-04-10 15:49:04 +00:00
|
|
|
handle_incoming_text(text.as_ref());
|
|
|
|
|
}
|
|
|
|
|
Some(Ok(tokio_tungstenite::tungstenite::Message::Binary(bytes))) => {
|
|
|
|
|
handle_incoming_binary(&bytes);
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
Some(Ok(tokio_tungstenite::tungstenite::Message::Close(_))) | None => break,
|
|
|
|
|
Some(Err(e)) => {
|
|
|
|
|
slog!("[crdt-sync] Rendezvous read error: {e}");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Tests ────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn sync_message_bulk_serialization_roundtrip() {
|
|
|
|
|
let msg = SyncMessage::Bulk {
|
|
|
|
|
ops: vec!["op1".to_string(), "op2".to_string()],
|
|
|
|
|
};
|
|
|
|
|
let json = serde_json::to_string(&msg).unwrap();
|
|
|
|
|
assert!(json.contains(r#""type":"bulk""#));
|
|
|
|
|
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
|
|
|
|
match deserialized {
|
|
|
|
|
SyncMessage::Bulk { ops } => {
|
|
|
|
|
assert_eq!(ops.len(), 2);
|
|
|
|
|
assert_eq!(ops[0], "op1");
|
|
|
|
|
assert_eq!(ops[1], "op2");
|
|
|
|
|
}
|
|
|
|
|
_ => panic!("Expected Bulk"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn sync_message_op_serialization_roundtrip() {
|
|
|
|
|
let msg = SyncMessage::Op {
|
|
|
|
|
op: r#"{"inner":{}}"#.to_string(),
|
|
|
|
|
};
|
|
|
|
|
let json = serde_json::to_string(&msg).unwrap();
|
|
|
|
|
assert!(json.contains(r#""type":"op""#));
|
|
|
|
|
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
|
|
|
|
match deserialized {
|
|
|
|
|
SyncMessage::Op { op } => {
|
|
|
|
|
assert_eq!(op, r#"{"inner":{}}"#);
|
|
|
|
|
}
|
|
|
|
|
_ => panic!("Expected Op"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
2026-04-10 15:49:04 +00:00
|
|
|
fn handle_incoming_text_bad_json_does_not_panic() {
|
|
|
|
|
handle_incoming_text("not valid json");
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
2026-04-10 15:49:04 +00:00
|
|
|
fn handle_incoming_text_bulk_with_invalid_ops_does_not_panic() {
|
2026-04-09 19:46:29 +01:00
|
|
|
let msg = SyncMessage::Bulk {
|
|
|
|
|
ops: vec!["not a valid signed op".to_string()],
|
|
|
|
|
};
|
|
|
|
|
let json = serde_json::to_string(&msg).unwrap();
|
2026-04-10 15:49:04 +00:00
|
|
|
handle_incoming_text(&json);
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
2026-04-10 15:49:04 +00:00
|
|
|
fn handle_incoming_text_op_with_invalid_op_does_not_panic() {
|
2026-04-09 19:46:29 +01:00
|
|
|
let msg = SyncMessage::Op {
|
|
|
|
|
op: "garbage".to_string(),
|
|
|
|
|
};
|
|
|
|
|
let json = serde_json::to_string(&msg).unwrap();
|
2026-04-10 15:49:04 +00:00
|
|
|
handle_incoming_text(&json);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn handle_incoming_binary_bad_bytes_does_not_panic() {
|
|
|
|
|
handle_incoming_binary(b"not valid wire codec");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn handle_incoming_binary_empty_bytes_does_not_panic() {
|
|
|
|
|
handle_incoming_binary(b"");
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn subscribe_ops_returns_none_before_init() {
|
|
|
|
|
// Before crdt_state::init() the channel doesn't exist yet.
|
|
|
|
|
// In test binaries it may or may not be initialised depending on
|
|
|
|
|
// other tests, so we just verify no panic.
|
|
|
|
|
let _ = crdt_state::subscribe_ops();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn all_ops_json_returns_none_before_init() {
|
|
|
|
|
let _ = crdt_state::all_ops_json();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn sync_message_bulk_empty_ops() {
|
|
|
|
|
let msg = SyncMessage::Bulk { ops: vec![] };
|
|
|
|
|
let json = serde_json::to_string(&msg).unwrap();
|
|
|
|
|
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
|
|
|
|
match deserialized {
|
|
|
|
|
SyncMessage::Bulk { ops } => assert!(ops.is_empty()),
|
|
|
|
|
_ => panic!("Expected Bulk"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Simulate the sync protocol by creating real SignedOps on two separate
|
|
|
|
|
/// CRDT instances and exchanging them through the SyncMessage wire format.
|
|
|
|
|
#[test]
|
|
|
|
|
fn two_node_sync_via_protocol_messages() {
|
|
|
|
|
use bft_json_crdt::json_crdt::{BaseCrdt, CrdtNode, OpState};
|
|
|
|
|
use bft_json_crdt::keypair::make_keypair;
|
|
|
|
|
use bft_json_crdt::op::ROOT_ID;
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
|
|
|
|
|
use crate::crdt_state::PipelineDoc;
|
|
|
|
|
|
|
|
|
|
// ── Node A: create an item ──
|
|
|
|
|
let kp_a = make_keypair();
|
|
|
|
|
let mut crdt_a = BaseCrdt::<PipelineDoc>::new(&kp_a);
|
|
|
|
|
|
|
|
|
|
let item: bft_json_crdt::json_crdt::JsonValue = json!({
|
|
|
|
|
"story_id": "100_story_sync_test",
|
|
|
|
|
"stage": "1_backlog",
|
|
|
|
|
"name": "Sync Test",
|
|
|
|
|
"agent": "",
|
|
|
|
|
"retry_count": 0.0,
|
|
|
|
|
"blocked": false,
|
|
|
|
|
"depends_on": "",
|
|
|
|
|
})
|
|
|
|
|
.into();
|
|
|
|
|
let op1 = crdt_a.doc.items.insert(ROOT_ID, item).sign(&kp_a);
|
|
|
|
|
assert_eq!(crdt_a.apply(op1.clone()), OpState::Ok);
|
|
|
|
|
|
|
|
|
|
// Serialise op1 into a SyncMessage::Op.
|
|
|
|
|
let op1_json = serde_json::to_string(&op1).unwrap();
|
|
|
|
|
let wire_msg = SyncMessage::Op { op: op1_json.clone() };
|
|
|
|
|
let wire_json = serde_json::to_string(&wire_msg).unwrap();
|
|
|
|
|
|
|
|
|
|
// ── Node B: receive the op through protocol ──
|
|
|
|
|
let kp_b = make_keypair();
|
|
|
|
|
let mut crdt_b = BaseCrdt::<PipelineDoc>::new(&kp_b);
|
|
|
|
|
assert!(crdt_b.doc.items.view().is_empty());
|
|
|
|
|
|
|
|
|
|
// Parse wire message and apply.
|
|
|
|
|
let parsed: SyncMessage = serde_json::from_str(&wire_json).unwrap();
|
|
|
|
|
match parsed {
|
|
|
|
|
SyncMessage::Op { op } => {
|
|
|
|
|
let signed_op: bft_json_crdt::json_crdt::SignedOp =
|
|
|
|
|
serde_json::from_str(&op).unwrap();
|
|
|
|
|
let result = crdt_b.apply(signed_op);
|
|
|
|
|
assert_eq!(result, OpState::Ok);
|
|
|
|
|
}
|
|
|
|
|
_ => panic!("Expected Op"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify Node B has the same state as Node A.
|
|
|
|
|
assert_eq!(crdt_b.doc.items.view().len(), 1);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
crdt_a.doc.items[0].story_id.view(),
|
|
|
|
|
crdt_b.doc.items[0].story_id.view()
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
crdt_a.doc.items[0].stage.view(),
|
|
|
|
|
crdt_b.doc.items[0].stage.view()
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// ── Node A: update stage ──
|
|
|
|
|
let op2 = crdt_a.doc.items[0]
|
|
|
|
|
.stage
|
|
|
|
|
.set("2_current".to_string())
|
|
|
|
|
.sign(&kp_a);
|
|
|
|
|
crdt_a.apply(op2.clone());
|
|
|
|
|
|
|
|
|
|
// Send via bulk message.
|
|
|
|
|
let op2_json = serde_json::to_string(&op2).unwrap();
|
|
|
|
|
let bulk_msg = SyncMessage::Bulk {
|
|
|
|
|
ops: vec![op1_json, op2_json],
|
|
|
|
|
};
|
|
|
|
|
let bulk_wire = serde_json::to_string(&bulk_msg).unwrap();
|
|
|
|
|
|
|
|
|
|
// ── Node C: receives full state via bulk ──
|
|
|
|
|
let kp_c = make_keypair();
|
|
|
|
|
let mut crdt_c = BaseCrdt::<PipelineDoc>::new(&kp_c);
|
|
|
|
|
|
|
|
|
|
let parsed_bulk: SyncMessage = serde_json::from_str(&bulk_wire).unwrap();
|
|
|
|
|
match parsed_bulk {
|
|
|
|
|
SyncMessage::Bulk { ops } => {
|
|
|
|
|
for op_str in &ops {
|
|
|
|
|
let signed: bft_json_crdt::json_crdt::SignedOp =
|
|
|
|
|
serde_json::from_str(op_str).unwrap();
|
|
|
|
|
crdt_c.apply(signed);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
_ => panic!("Expected Bulk"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Node C should have the updated stage.
|
|
|
|
|
assert_eq!(crdt_c.doc.items.view().len(), 1);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
crdt_c.doc.items[0].stage.view(),
|
|
|
|
|
bft_json_crdt::json_crdt::JsonValue::String("2_current".to_string())
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Verify that a single node's ops (insert + update) can be replayed
|
|
|
|
|
/// on another node via bulk sync and produce the same final state.
|
|
|
|
|
/// This is the core property needed for partition healing: when a
|
|
|
|
|
/// disconnected node reconnects, it sends all its ops as a bulk
|
|
|
|
|
/// message and the receiver catches up.
|
|
|
|
|
#[test]
|
|
|
|
|
fn partition_heal_via_bulk_replay() {
|
|
|
|
|
use bft_json_crdt::json_crdt::{BaseCrdt, CrdtNode, JsonValue as JV};
|
|
|
|
|
use bft_json_crdt::keypair::make_keypair;
|
|
|
|
|
use bft_json_crdt::op::ROOT_ID;
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
|
|
|
|
|
use crate::crdt_state::PipelineDoc;
|
|
|
|
|
|
|
|
|
|
let kp = make_keypair();
|
|
|
|
|
|
|
|
|
|
// Node A creates an item and advances it.
|
|
|
|
|
let mut crdt_a = BaseCrdt::<PipelineDoc>::new(&kp);
|
|
|
|
|
let item: bft_json_crdt::json_crdt::JsonValue = json!({
|
|
|
|
|
"story_id": "200_story_heal",
|
|
|
|
|
"stage": "1_backlog",
|
|
|
|
|
"name": "Heal Test",
|
|
|
|
|
"agent": "",
|
|
|
|
|
"retry_count": 0.0,
|
|
|
|
|
"blocked": false,
|
|
|
|
|
"depends_on": "",
|
|
|
|
|
})
|
|
|
|
|
.into();
|
|
|
|
|
|
|
|
|
|
let op1 = crdt_a.doc.items.insert(ROOT_ID, item).sign(&kp);
|
|
|
|
|
crdt_a.apply(op1.clone());
|
|
|
|
|
|
|
|
|
|
let op2 = crdt_a.doc.items[0]
|
|
|
|
|
.stage
|
|
|
|
|
.set("2_current".to_string())
|
|
|
|
|
.sign(&kp);
|
|
|
|
|
crdt_a.apply(op2.clone());
|
|
|
|
|
|
|
|
|
|
let op3 = crdt_a.doc.items[0]
|
|
|
|
|
.stage
|
|
|
|
|
.set("3_qa".to_string())
|
|
|
|
|
.sign(&kp);
|
|
|
|
|
crdt_a.apply(op3.clone());
|
|
|
|
|
|
|
|
|
|
// Serialise all ops as a bulk message (simulates partition heal).
|
|
|
|
|
let ops_json: Vec<String> = [&op1, &op2, &op3]
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|op| serde_json::to_string(op).unwrap())
|
|
|
|
|
.collect();
|
|
|
|
|
let bulk = SyncMessage::Bulk { ops: ops_json };
|
|
|
|
|
let wire = serde_json::to_string(&bulk).unwrap();
|
|
|
|
|
|
|
|
|
|
// Node B receives the bulk and reconstructs state.
|
|
|
|
|
let mut crdt_b = BaseCrdt::<PipelineDoc>::new(&kp);
|
|
|
|
|
let parsed: SyncMessage = serde_json::from_str(&wire).unwrap();
|
|
|
|
|
match parsed {
|
|
|
|
|
SyncMessage::Bulk { ops } => {
|
|
|
|
|
for op_str in &ops {
|
|
|
|
|
let signed: bft_json_crdt::json_crdt::SignedOp =
|
|
|
|
|
serde_json::from_str(op_str).unwrap();
|
|
|
|
|
crdt_b.apply(signed);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
_ => panic!("Expected Bulk"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Node B should match Node A exactly.
|
|
|
|
|
assert_eq!(crdt_b.doc.items.view().len(), 1);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
crdt_b.doc.items[0].stage.view(),
|
|
|
|
|
JV::String("3_qa".to_string())
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
crdt_a.doc.items[0].stage.view(),
|
|
|
|
|
crdt_b.doc.items[0].stage.view()
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
crdt_a.doc.items[0].name.view(),
|
|
|
|
|
crdt_b.doc.items[0].name.view()
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn config_rendezvous_parsed_from_toml() {
|
|
|
|
|
let toml_str = r#"
|
|
|
|
|
rendezvous = "ws://remote:3001/crdt-sync"
|
|
|
|
|
|
|
|
|
|
[[agent]]
|
|
|
|
|
name = "test"
|
|
|
|
|
"#;
|
|
|
|
|
let config: crate::config::ProjectConfig = toml::from_str(toml_str).unwrap();
|
|
|
|
|
assert_eq!(
|
|
|
|
|
config.rendezvous.as_deref(),
|
|
|
|
|
Some("ws://remote:3001/crdt-sync")
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn config_rendezvous_defaults_to_none() {
|
|
|
|
|
let config = crate::config::ProjectConfig::default();
|
|
|
|
|
assert!(config.rendezvous.is_none());
|
|
|
|
|
}
|
2026-04-10 15:49:04 +00:00
|
|
|
|
|
|
|
|
// ── AC8: peer lifecycle tests ─────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
/// AC8: A peer that connects and then receives a subsequently-applied op
|
|
|
|
|
/// gets that op encoded via the wire codec (binary frame).
|
|
|
|
|
#[test]
|
|
|
|
|
fn peer_receives_op_encoded_via_wire_codec() {
|
|
|
|
|
use bft_json_crdt::json_crdt::BaseCrdt;
|
|
|
|
|
use bft_json_crdt::keypair::make_keypair;
|
|
|
|
|
use bft_json_crdt::op::ROOT_ID;
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
|
|
|
|
|
use crate::crdt_state::PipelineDoc;
|
|
|
|
|
use crate::crdt_wire;
|
|
|
|
|
|
|
|
|
|
let kp = make_keypair();
|
|
|
|
|
let mut crdt = BaseCrdt::<PipelineDoc>::new(&kp);
|
|
|
|
|
let item: bft_json_crdt::json_crdt::JsonValue = json!({
|
|
|
|
|
"story_id": "506_story_lifecycle_test",
|
|
|
|
|
"stage": "1_backlog",
|
|
|
|
|
"name": "Lifecycle Test",
|
|
|
|
|
"agent": "",
|
|
|
|
|
"retry_count": 0.0,
|
|
|
|
|
"blocked": false,
|
|
|
|
|
"depends_on": "",
|
|
|
|
|
})
|
|
|
|
|
.into();
|
|
|
|
|
let op = crdt.doc.items.insert(ROOT_ID, item).sign(&kp);
|
|
|
|
|
|
|
|
|
|
// Simulate what the broadcast handler does: encode via wire codec.
|
|
|
|
|
let bytes = crdt_wire::encode(&op);
|
|
|
|
|
|
|
|
|
|
// The bytes must be a versioned JSON envelope, not a SyncMessage wrapper.
|
|
|
|
|
let text = std::str::from_utf8(&bytes).expect("wire output is valid UTF-8");
|
|
|
|
|
assert!(
|
|
|
|
|
text.contains("\"v\":1"),
|
|
|
|
|
"wire codec version tag must be present: {text}"
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
!text.contains("\"type\":\"op\""),
|
|
|
|
|
"must not be wrapped in SyncMessage: {text}"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// The receiving peer can decode and apply the op.
|
|
|
|
|
let decoded = crdt_wire::decode(&bytes).expect("decode must succeed");
|
|
|
|
|
assert_eq!(op, decoded);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// AC8: Multiple connected peers all receive the same broadcast op.
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn multiple_peers_all_receive_broadcast_op() {
|
|
|
|
|
use bft_json_crdt::json_crdt::BaseCrdt;
|
|
|
|
|
use bft_json_crdt::keypair::make_keypair;
|
|
|
|
|
use bft_json_crdt::op::ROOT_ID;
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
use tokio::sync::broadcast;
|
|
|
|
|
|
|
|
|
|
use crate::crdt_state::PipelineDoc;
|
|
|
|
|
use crate::crdt_wire;
|
|
|
|
|
|
|
|
|
|
// Create a broadcast channel (analogous to SYNC_TX).
|
|
|
|
|
let (tx, _) = broadcast::channel::<SignedOp>(16);
|
|
|
|
|
let mut rx_peer1 = tx.subscribe();
|
|
|
|
|
let mut rx_peer2 = tx.subscribe();
|
|
|
|
|
|
|
|
|
|
let kp = make_keypair();
|
|
|
|
|
let mut crdt = BaseCrdt::<PipelineDoc>::new(&kp);
|
|
|
|
|
let item: bft_json_crdt::json_crdt::JsonValue = json!({
|
|
|
|
|
"story_id": "506_story_multi_peer_test",
|
|
|
|
|
"stage": "1_backlog",
|
|
|
|
|
"name": "Multi-Peer Test",
|
|
|
|
|
"agent": "",
|
|
|
|
|
"retry_count": 0.0,
|
|
|
|
|
"blocked": false,
|
|
|
|
|
"depends_on": "",
|
|
|
|
|
})
|
|
|
|
|
.into();
|
|
|
|
|
let op = crdt.doc.items.insert(ROOT_ID, item).sign(&kp);
|
|
|
|
|
|
|
|
|
|
// Broadcast one op.
|
|
|
|
|
tx.send(op.clone()).expect("send must succeed");
|
|
|
|
|
|
|
|
|
|
// Both peers receive the same op.
|
|
|
|
|
let received1 = rx_peer1.recv().await.expect("peer 1 must receive");
|
|
|
|
|
let received2 = rx_peer2.recv().await.expect("peer 2 must receive");
|
|
|
|
|
assert_eq!(received1, op);
|
|
|
|
|
assert_eq!(received2, op);
|
|
|
|
|
|
|
|
|
|
// Both encode identically via wire codec.
|
|
|
|
|
let bytes1 = crdt_wire::encode(&received1);
|
|
|
|
|
let bytes2 = crdt_wire::encode(&received2);
|
|
|
|
|
assert_eq!(bytes1, bytes2, "wire-encoded bytes must be identical");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// AC8: A peer disconnecting mid-broadcast does not panic.
|
|
|
|
|
/// Simulated by dropping the receiver before the sender sends an op.
|
|
|
|
|
#[test]
|
|
|
|
|
fn disconnected_peer_does_not_panic() {
|
|
|
|
|
use bft_json_crdt::json_crdt::BaseCrdt;
|
|
|
|
|
use bft_json_crdt::keypair::make_keypair;
|
|
|
|
|
use bft_json_crdt::op::ROOT_ID;
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
use tokio::sync::broadcast;
|
|
|
|
|
|
|
|
|
|
use crate::crdt_state::PipelineDoc;
|
|
|
|
|
|
|
|
|
|
let (tx, rx) = broadcast::channel::<SignedOp>(16);
|
|
|
|
|
// Drop the receiver to simulate a peer that disconnected.
|
|
|
|
|
drop(rx);
|
|
|
|
|
|
|
|
|
|
let kp = make_keypair();
|
|
|
|
|
let mut crdt = BaseCrdt::<PipelineDoc>::new(&kp);
|
|
|
|
|
let item: bft_json_crdt::json_crdt::JsonValue = json!({
|
|
|
|
|
"story_id": "506_story_disconnect_test",
|
|
|
|
|
"stage": "1_backlog",
|
|
|
|
|
"name": "Disconnect Test",
|
|
|
|
|
"agent": "",
|
|
|
|
|
"retry_count": 0.0,
|
|
|
|
|
"blocked": false,
|
|
|
|
|
"depends_on": "",
|
|
|
|
|
})
|
|
|
|
|
.into();
|
|
|
|
|
let op = crdt.doc.items.insert(ROOT_ID, item).sign(&kp);
|
|
|
|
|
|
|
|
|
|
// Sending to a channel with no receivers returns an error; must not panic.
|
|
|
|
|
let _ = tx.send(op);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// AC8: A lagged receiver gets a `Lagged` error (confirming the
|
|
|
|
|
/// disconnect-on-overflow behaviour is reachable).
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn lagged_peer_gets_lagged_error() {
|
|
|
|
|
use bft_json_crdt::json_crdt::BaseCrdt;
|
|
|
|
|
use bft_json_crdt::keypair::make_keypair;
|
|
|
|
|
use bft_json_crdt::op::ROOT_ID;
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
use tokio::sync::broadcast;
|
|
|
|
|
|
|
|
|
|
use crate::crdt_state::PipelineDoc;
|
|
|
|
|
|
|
|
|
|
// Tiny capacity so we can trigger Lagged easily.
|
|
|
|
|
let (tx, mut rx) = broadcast::channel::<SignedOp>(2);
|
|
|
|
|
|
|
|
|
|
let kp = make_keypair();
|
|
|
|
|
let mut crdt = BaseCrdt::<PipelineDoc>::new(&kp);
|
|
|
|
|
let item: bft_json_crdt::json_crdt::JsonValue = json!({
|
|
|
|
|
"story_id": "506_story_lag_test",
|
|
|
|
|
"stage": "1_backlog",
|
|
|
|
|
"name": "Lag Test",
|
|
|
|
|
"agent": "",
|
|
|
|
|
"retry_count": 0.0,
|
|
|
|
|
"blocked": false,
|
|
|
|
|
"depends_on": "",
|
|
|
|
|
})
|
|
|
|
|
.into();
|
|
|
|
|
let op1 = crdt.doc.items.insert(ROOT_ID, item).sign(&kp);
|
|
|
|
|
crdt.apply(op1.clone());
|
|
|
|
|
|
|
|
|
|
// Overflow the tiny buffer by sending more ops than the capacity.
|
|
|
|
|
let op2 = crdt.doc.items[0]
|
|
|
|
|
.stage
|
|
|
|
|
.set("2_current".to_string())
|
|
|
|
|
.sign(&kp);
|
|
|
|
|
crdt.apply(op2.clone());
|
|
|
|
|
let op3 = crdt.doc.items[0]
|
|
|
|
|
.stage
|
|
|
|
|
.set("3_qa".to_string())
|
|
|
|
|
.sign(&kp);
|
|
|
|
|
crdt.apply(op3.clone());
|
|
|
|
|
let op4 = crdt.doc.items[0]
|
|
|
|
|
.stage
|
|
|
|
|
.set("4_merge".to_string())
|
|
|
|
|
.sign(&kp);
|
|
|
|
|
crdt.apply(op4.clone());
|
|
|
|
|
|
|
|
|
|
// Send more ops than the channel capacity without consuming.
|
|
|
|
|
let _ = tx.send(op1);
|
|
|
|
|
let _ = tx.send(op2);
|
|
|
|
|
let _ = tx.send(op3);
|
|
|
|
|
let _ = tx.send(op4);
|
|
|
|
|
|
|
|
|
|
// The slow peer should now see a Lagged error on next recv.
|
|
|
|
|
// Consume until we hit Lagged or run out.
|
|
|
|
|
let mut got_lagged = false;
|
|
|
|
|
for _ in 0..10 {
|
|
|
|
|
match rx.recv().await {
|
|
|
|
|
Err(broadcast::error::RecvError::Lagged(_)) => {
|
|
|
|
|
got_lagged = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
Ok(_) => continue,
|
|
|
|
|
Err(broadcast::error::RecvError::Closed) => break,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert!(
|
|
|
|
|
got_lagged,
|
|
|
|
|
"slow peer must receive a Lagged error when channel overflows"
|
|
|
|
|
);
|
|
|
|
|
}
|
2026-04-09 19:46:29 +01:00
|
|
|
}
|