Files
huskies/server/src/crdt_sync/mod.rs
T

1004 lines
42 KiB
Rust
Raw Normal View History

//! CRDT sync — WebSocket-based replication of pipeline state between huskies nodes.
/// WebSocket-based CRDT sync layer for replicating pipeline state between
/// huskies nodes.
///
/// # Protocol
///
/// ## Version negotiation
///
/// After the auth handshake, both sides send their first sync message:
///
/// - **v2 peers** send a `clock` frame: `{"type":"clock","clock":{ <node_id_hex>: <max_count>, ... }}`
/// containing a vector clock that maps each author's hex Ed25519 pubkey to the
/// count of ops received from that author. Upon receiving the peer's clock,
/// each side computes the delta via [`crdt_state::ops_since`] and sends only
/// the missing ops as a `bulk` frame.
///
/// - **v1 (legacy) peers** send a `bulk` frame directly (full op dump).
/// A v2 peer receiving a `bulk` first (instead of a `clock`) falls back to
/// the full-dump path: applies the incoming bulk and responds with its own
/// full bulk. This preserves backward compatibility — no code change needed
/// on the v1 side.
///
/// ## Text frames
/// A JSON object with a `"type"` field:
/// - `{"type":"clock","clock":{...}}` — Vector clock (v2 protocol).
/// - `{"type":"bulk","ops":[...]}` — Ops dump (full or delta).
/// - `{"type":"ready"}` — Signals that the bulk-delta phase is complete and the
/// sender is ready for real-time op streaming. Locally-generated ops are
/// buffered until the peer's `ready` is received, then flushed in order.
///
/// ## Binary frames (real-time op broadcast)
/// Individual `SignedOp`s encoded via [`crate::crdt_wire`] (versioned JSON
/// envelope: `{"v":1,"op":{...}}`). Each locally-applied op is immediately
/// broadcast as a binary frame to all connected peers.
///
/// Both the server endpoint and the rendezvous client use the same protocol,
/// making the connection fully symmetric.
///
/// ## Backpressure
/// Each connected peer has its own [`tokio::sync::broadcast`] receiver. If a
/// slow peer allows the channel to fill (indicated by a `Lagged` error), the
/// connection is dropped with a warning log. The peer can reconnect and
/// receive a fresh bulk state dump to catch up.
use bft_json_crdt::json_crdt::SignedOp;
use futures::{SinkExt, StreamExt};
use poem::handler;
use poem::http::StatusCode;
use poem::web::Data;
use poem::web::Query;
use poem::web::websocket::{Message as WsMessage, WebSocket};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, OnceLock};
use crate::crdt_snapshot;
use crate::crdt_state;
use crate::crdt_wire;
use crate::http::context::AppContext;
use crate::node_identity;
use crate::slog;
use crate::slog_error;
use crate::slog_warn;
// ── Auth configuration ──────────────────────────────────────────────
/// Default timeout for the auth handshake (seconds).
const AUTH_TIMEOUT_SECS: u64 = 10;
// ── Keepalive configuration ─────────────────────────────────────────
/// Interval (seconds) between WebSocket Ping frames sent by each side.
pub const PING_INTERVAL_SECS: u64 = 30;
/// Seconds without a Pong response before the connection is dropped.
pub const PONG_TIMEOUT_SECS: u64 = 60;
/// Trusted public keys loaded once at startup.
static TRUSTED_KEYS: OnceLock<Vec<String>> = OnceLock::new();
/// Initialise the trusted-key allow-list for connect-time mutual auth.
///
/// Must be called once at startup before any WebSocket connections are
/// accepted. Subsequent calls are no-ops (OnceLock).
pub fn init_trusted_keys(keys: Vec<String>) {
let _ = TRUSTED_KEYS.set(keys);
}
/// Return a reference to the trusted-key allow-list.
fn trusted_keys() -> &'static [String] {
TRUSTED_KEYS.get().map(|v| v.as_slice()).unwrap_or(&[])
}
// ── Bearer-token auth ───────────────────────────────────────────────
/// Time-to-live for CRDT bearer tokens in seconds (30 days).
const TOKEN_TTL_SECS: f64 = 30.0 * 24.0 * 3600.0;
/// Whether a bearer token is required for `/crdt-sync` connections.
/// `None` (uninitialised) → open access (backward compatible).
static REQUIRE_TOKEN: OnceLock<bool> = OnceLock::new();
/// Valid bearer tokens — maps token string to its expiry unix timestamp.
static CRDT_TOKENS: OnceLock<std::sync::RwLock<HashMap<String, f64>>> = OnceLock::new();
/// Initialise bearer-token auth for CRDT-sync connections.
///
/// Must be called once at startup before any WebSocket connections are accepted.
/// When `require` is `true`, clients must supply a valid `?token=` query
/// parameter on the upgrade request or receive HTTP 401. When `require` is
/// `false` (default) a token is optional — connections without one are
/// accepted, but a supplied token is still validated.
pub fn init_token_auth(require: bool, tokens: Vec<String>) {
let _ = REQUIRE_TOKEN.set(require);
let store = CRDT_TOKENS.get_or_init(|| std::sync::RwLock::new(HashMap::new()));
if let Ok(mut map) = store.write() {
let now = chrono::Utc::now().timestamp() as f64;
for token in tokens {
map.insert(token, now + TOKEN_TTL_SECS);
}
}
}
/// Add a bearer token to the CRDT-sync token store.
///
/// The token expires after [`TOKEN_TTL_SECS`] seconds. Returns the expiry
/// unix timestamp so callers can surface it in admin tooling.
pub fn add_join_token(token: String) -> f64 {
let store = CRDT_TOKENS.get_or_init(|| std::sync::RwLock::new(HashMap::new()));
let now = chrono::Utc::now().timestamp() as f64;
let expires_at = now + TOKEN_TTL_SECS;
if let Ok(mut map) = store.write() {
map.insert(token, expires_at);
}
expires_at
}
/// Validate a bearer token against the CRDT-sync token store.
///
/// Returns `true` if the token exists in the store and has not expired.
fn validate_join_token(token: &str) -> bool {
let Some(store) = CRDT_TOKENS.get() else {
return false;
};
let now = chrono::Utc::now().timestamp() as f64;
store
.read()
.ok()
.and_then(|map| map.get(token).copied())
.is_some_and(|expires_at| expires_at > now)
}
// ── Wire protocol types ─────────────────────────────────────────────
/// Auth handshake: challenge sent by the listener to the connector.
#[derive(Serialize, Deserialize, Debug)]
struct ChallengeMessage {
r#type: String,
nonce: String,
}
/// Auth handshake: auth reply sent by the connector to the listener.
#[derive(Serialize, Deserialize, Debug)]
struct AuthMessage {
r#type: String,
pubkey_hex: String,
signature_hex: String,
}
#[derive(Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum SyncMessage {
/// Bulk state dump sent on connect (v1) or delta ops after clock exchange (v2).
Bulk { ops: Vec<String> },
/// A single new op.
Op { op: String },
/// Vector clock exchanged on connect (v2 protocol).
///
/// Each entry maps a node's hex-encoded Ed25519 public key to the count of
/// ops received from that node. The receiving side computes the delta via
/// [`crdt_state::ops_since`] and sends only the missing ops.
Clock {
clock: std::collections::HashMap<String, u64>,
},
/// Signals that the bulk-delta phase is complete; the sender is ready for
/// real-time op streaming. Locally-generated ops are buffered until the
/// peer's `Ready` is received, then flushed in-order.
Ready,
}
/// Crate-visible re-export of `SyncMessage` for backwards-compatibility testing.
///
/// Used by `crdt_snapshot` tests to verify that snapshot messages are NOT
/// parseable as legacy `SyncMessage` variants — confirming that old peers
/// will gracefully reject them.
#[cfg(test)]
pub(crate) type SyncMessagePublic = SyncMessage;
// ── Server-side WebSocket handler ───────────────────────────────────
/// Query parameters accepted on the `/crdt-sync` WebSocket upgrade request.
#[derive(Deserialize)]
struct SyncQueryParams {
/// Optional bearer token. Required when the server is in token-required mode.
token: Option<String>,
}
/// WebSocket handler for CRDT peer synchronisation.
///
/// Accepts an optional `?token=<bearer-token>` query parameter. When the
/// server is configured with `crdt_require_token = true`, a valid token must
/// be supplied or the upgrade is rejected with HTTP 401. When the server is
/// in open-access mode (the default), a token is optional but still validated
/// if present.
#[handler]
pub async fn crdt_sync_handler(
ws: WebSocket,
_ctx: Data<&Arc<AppContext>>,
remote_addr: &poem::web::RemoteAddr,
Query(params): Query<SyncQueryParams>,
) -> poem::Response {
// ── Bearer-token check (pre-upgrade) ────────────────────────────
let require_token = REQUIRE_TOKEN.get().copied().unwrap_or(false);
match &params.token {
Some(t) => {
if !validate_join_token(t) {
slog!("[crdt-sync] Rejected connection: invalid or expired token");
return poem::Response::builder()
.status(StatusCode::UNAUTHORIZED)
.body("invalid or expired token");
}
}
None if require_token => {
slog!("[crdt-sync] Rejected connection: token required but not provided");
return poem::Response::builder()
.status(StatusCode::UNAUTHORIZED)
.body("token required");
}
None => {}
}
// ── WebSocket upgrade ────────────────────────────────────────────
use poem::IntoResponse as _;
let peer_addr = remote_addr.to_string();
ws.on_upgrade(move |socket| async move {
let (mut sink, mut stream) = socket.split();
slog!("[crdt-sync] Peer connected, starting auth handshake");
// ── Step 1: Send challenge to the connecting peer ─────────
let challenge = node_identity::generate_challenge();
let challenge_msg = ChallengeMessage {
r#type: "challenge".to_string(),
nonce: challenge.clone(),
};
let challenge_json = match serde_json::to_string(&challenge_msg) {
Ok(j) => j,
Err(_) => return,
};
if sink.send(WsMessage::Text(challenge_json)).await.is_err() {
return;
}
// ── Step 2: Await auth reply within timeout ───────────────
let auth_result = tokio::time::timeout(
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
stream.next(),
)
.await;
let auth_text = match auth_result {
Ok(Some(Ok(WsMessage::Text(text)))) => text,
Ok(_) | Err(_) => {
// Timeout or connection closed before auth reply.
slog!("[crdt-sync] Auth timeout or connection lost during handshake");
let _ = sink
.send(WsMessage::Close(Some((
poem::web::websocket::CloseCode::from(4001),
"auth_timeout".to_string(),
))))
.await;
let _ = sink.close().await;
return;
}
};
// ── Step 3: Verify auth reply ─────────────────────────────
let auth_msg: AuthMessage = match serde_json::from_str(&auth_text) {
Ok(m) => m,
Err(_) => {
slog!("[crdt-sync] Invalid auth message from peer");
close_with_auth_failed(&mut sink).await;
return;
}
};
// Verify signature AND check allow-list.
let sig_valid =
node_identity::verify_challenge(&auth_msg.pubkey_hex, &challenge, &auth_msg.signature_hex);
let key_trusted = trusted_keys().iter().any(|k| k == &auth_msg.pubkey_hex);
if !sig_valid || !key_trusted {
slog!("[crdt-sync] Auth failed for peer (sig_valid={sig_valid}, key_trusted={key_trusted})");
close_with_auth_failed(&mut sink).await;
return;
}
slog!(
"[crdt-sync] Peer authenticated: {:.12}…",
&auth_msg.pubkey_hex
);
// ── Auth passed — proceed with CRDT sync ──────────────────
// v2 protocol: send our vector clock so the peer can compute the delta.
let our_clock = crdt_state::our_vector_clock().unwrap_or_default();
let clock_msg = SyncMessage::Clock { clock: our_clock };
if let Ok(json) = serde_json::to_string(&clock_msg)
&& sink.send(WsMessage::Text(json)).await.is_err()
{
return;
}
// Wait for the peer's first sync message to determine protocol version.
let first_msg = tokio::time::timeout(
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
wait_for_sync_text(&mut stream, &mut sink),
)
.await;
match first_msg {
Ok(Some(SyncMessage::Clock { clock: peer_clock })) => {
// v2 peer — if we have a snapshot and the peer has an empty
// clock (new node), send the snapshot first for onboarding.
if peer_clock.is_empty()
&& let Some(snapshot) = crdt_snapshot::latest_snapshot()
{
let snap_msg = crdt_snapshot::SnapshotMessage::Snapshot(snapshot);
if let Ok(json) = serde_json::to_string(&snap_msg) {
if sink.send(WsMessage::Text(json)).await.is_err() {
return;
}
slog!("[crdt-sync] Sent snapshot to new node for onboarding");
}
}
// Send only the ops the peer is missing.
let delta = crdt_state::ops_since(&peer_clock).unwrap_or_default();
slog!(
"[crdt-sync] v2 delta sync: sending {} ops (peer missing)",
delta.len()
);
let msg = SyncMessage::Bulk { ops: delta };
if let Ok(json) = serde_json::to_string(&msg)
&& sink.send(WsMessage::Text(json)).await.is_err()
{
return;
}
}
Ok(Some(SyncMessage::Bulk { ops })) => {
// v1 peer — apply their bulk and send our full bulk.
let mut applied = 0u64;
for op_json in &ops {
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(op_json)
&& crdt_state::apply_remote_op(signed_op)
{
applied += 1;
}
}
slog!(
"[crdt-sync] v1 bulk sync: received {} ops, applied {applied}",
ops.len()
);
if let Some(all) = crdt_state::all_ops_json() {
let msg = SyncMessage::Bulk { ops: all };
if let Ok(json) = serde_json::to_string(&msg)
&& sink.send(WsMessage::Text(json)).await.is_err()
{
return;
}
}
}
Ok(Some(SyncMessage::Op { op })) => {
// Single op before negotiation — treat as v1.
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(&op) {
crdt_state::apply_remote_op(signed_op);
}
if let Some(all) = crdt_state::all_ops_json() {
let msg = SyncMessage::Bulk { ops: all };
if let Ok(json) = serde_json::to_string(&msg)
&& sink.send(WsMessage::Text(json)).await.is_err()
{
return;
}
}
}
_ => {
// Timeout or error — send full bulk as fallback.
slog!("[crdt-sync] No sync message from peer; sending full bulk as fallback");
if let Some(all) = crdt_state::all_ops_json() {
let msg = SyncMessage::Bulk { ops: all };
if let Ok(json) = serde_json::to_string(&msg)
&& sink.send(WsMessage::Text(json)).await.is_err()
{
return;
}
}
}
}
// Bulk-delta phase complete — signal the peer that we are ready for
// real-time op streaming.
if let Ok(json) = serde_json::to_string(&SyncMessage::Ready)
&& sink.send(WsMessage::Text(json)).await.is_err()
{
return;
}
// Subscribe to new local ops.
let Some(mut op_rx) = crdt_state::subscribe_ops() else {
return;
};
// Buffer for locally-generated ops produced before the peer's `ready`
// arrives. Flushed in-order once the peer signals catch-up.
let mut peer_ready = false;
let mut op_buffer: Vec<bft_json_crdt::json_crdt::SignedOp> = Vec::new();
// ── Keepalive state ───────────────────────────────────────────
let mut pong_deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
let mut ping_ticker = tokio::time::interval_at(
tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS),
std::time::Duration::from_secs(PING_INTERVAL_SECS),
);
loop {
tokio::select! {
// Send periodic Ping and enforce Pong timeout.
_ = ping_ticker.tick() => {
if tokio::time::Instant::now() >= pong_deadline {
slog_warn!(
"[crdt-sync] No pong from peer {} in {}s; disconnecting",
peer_addr,
PONG_TIMEOUT_SECS
);
break;
}
if sink.send(WsMessage::Ping(vec![])).await.is_err() {
break;
}
}
// Forward new local ops to the peer encoded via the wire codec.
result = op_rx.recv() => {
match result {
Ok(signed_op) => {
if peer_ready {
let bytes = crdt_wire::encode(&signed_op);
if sink.send(WsMessage::Binary(bytes)).await.is_err() {
break;
}
} else {
// Buffer until the peer signals ready.
op_buffer.push(signed_op);
}
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
// The peer cannot keep up; disconnect so it can
// reconnect and receive a fresh bulk state dump.
slog!("[crdt-sync] Slow peer lagged {n} ops; disconnecting");
break;
}
Err(_) => break,
}
}
// Receive ops from the peer.
frame = stream.next() => {
match frame {
Some(Ok(WsMessage::Pong(_))) => {
// Reset the pong deadline on every Pong received.
pong_deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
}
Some(Ok(WsMessage::Ping(data))) => {
// Respond to peer's Ping so the peer's keepalive passes.
let _ = sink.send(WsMessage::Pong(data)).await;
}
Some(Ok(WsMessage::Text(text))) => {
// Check for the ready signal before other text frames.
if let Ok(SyncMessage::Ready) = serde_json::from_str(&text) {
peer_ready = true;
slog!("[crdt-sync] Peer ready; flushing {} buffered ops", op_buffer.len());
let mut flush_ok = true;
for op in op_buffer.drain(..) {
let bytes = crdt_wire::encode(&op);
if sink.send(WsMessage::Binary(bytes)).await.is_err() {
flush_ok = false;
break;
}
}
if !flush_ok {
break;
}
} else {
// Bulk state dump, legacy op frame, or clock frame.
handle_incoming_text(&text);
}
}
Some(Ok(WsMessage::Binary(bytes))) => {
// Real-time op encoded via wire codec — applied immediately
// regardless of our own ready state.
handle_incoming_binary(&bytes);
}
Some(Ok(WsMessage::Close(_))) | None => break,
_ => {}
}
}
}
}
slog!("[crdt-sync] Peer disconnected");
})
.into_response()
}
/// Wait for the next text-frame sync message from the peer, handling Ping/Pong
/// transparently.
///
/// Returns `None` on connection close or read error.
async fn wait_for_sync_text(
stream: &mut futures::stream::SplitStream<poem::web::websocket::WebSocketStream>,
sink: &mut futures::stream::SplitSink<poem::web::websocket::WebSocketStream, WsMessage>,
) -> Option<SyncMessage> {
loop {
match stream.next().await {
Some(Ok(WsMessage::Text(text))) => {
return serde_json::from_str(&text).ok();
}
Some(Ok(WsMessage::Ping(data))) => {
let _ = sink.send(WsMessage::Pong(data)).await;
}
Some(Ok(WsMessage::Pong(_))) => continue,
_ => return None,
}
}
}
/// Close the WebSocket with a generic `auth_failed` reason.
///
/// The close reason is intentionally the same for all auth failures
/// (bad signature, untrusted key, malformed message) to avoid leaking
/// which check failed.
async fn close_with_auth_failed(
sink: &mut futures::stream::SplitSink<poem::web::websocket::WebSocketStream, WsMessage>,
) {
let _ = sink
.send(WsMessage::Close(Some((
poem::web::websocket::CloseCode::from(4002),
"auth_failed".to_string(),
))))
.await;
let _ = sink.close().await;
}
/// Process an incoming text-frame sync message from a peer.
///
/// Text frames carry the bulk state dump (`SyncMessage::Bulk`), legacy
/// single-op messages (`SyncMessage::Op`), or snapshot protocol messages.
fn handle_incoming_text(text: &str) {
// First try to parse as a snapshot protocol message.
if let Ok(snapshot_msg) = serde_json::from_str::<crdt_snapshot::SnapshotMessage>(text) {
handle_snapshot_message(snapshot_msg);
return;
}
let msg: SyncMessage = match serde_json::from_str(text) {
Ok(m) => m,
Err(e) => {
slog!("[crdt-sync] Bad text message from peer: {e}");
return;
}
};
match msg {
SyncMessage::Bulk { ops } => {
let mut applied = 0u64;
for op_json in &ops {
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(op_json)
&& crdt_state::apply_remote_op(signed_op)
{
applied += 1;
}
}
slog!(
"[crdt-sync] Bulk sync: received {} ops, applied {applied}",
ops.len()
);
}
SyncMessage::Op { op } => {
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(&op) {
crdt_state::apply_remote_op(signed_op);
}
}
SyncMessage::Clock { .. } => {
// Clock frames are handled during the initial negotiation phase.
// If one arrives during the streaming loop it is a protocol error
// on the peer's part — log and ignore.
slog!("[crdt-sync] Ignoring unexpected clock frame during streaming phase");
}
SyncMessage::Ready => {
// Ready frames are intercepted inline in the streaming loop before
// this function is called. If one reaches here it is a protocol
// error — log and ignore.
slog!("[crdt-sync] Ignoring unexpected ready frame in handle_incoming_text");
}
}
}
/// Handle an incoming snapshot protocol message.
///
/// - **Snapshot**: apply the snapshot state and send an ack back.
/// Peers without snapshot support will never reach this code path because
/// the `SnapshotMessage` parse will fail and the message falls through to
/// the legacy `SyncMessage` handler, which logs and ignores unknown types.
/// - **SnapshotAck**: record the ack for quorum tracking.
fn handle_snapshot_message(msg: crdt_snapshot::SnapshotMessage) {
match msg {
crdt_snapshot::SnapshotMessage::Snapshot(snapshot) => {
slog!(
"[crdt-sync] Received snapshot at_seq={}, {} ops, {} manifest entries",
snapshot.at_seq,
snapshot.state.len(),
snapshot.op_manifest.len()
);
// Apply compaction on this peer.
crdt_snapshot::apply_compaction(snapshot.clone());
// Send ack back to leader via the sync broadcast channel.
// The ack is sent as a CRDT event that the streaming loop picks up.
// For now, log the ack intent — actual transport is handled by the
// caller that invokes handle_incoming_text.
slog!(
"[crdt-sync] Snapshot applied, ack for at_seq={}",
snapshot.at_seq
);
}
crdt_snapshot::SnapshotMessage::SnapshotAck(ack) => {
if let Some(node_id) = crdt_state::our_node_id() {
let _ = node_id; // The ack comes from a peer, not from us.
}
slog!(
"[crdt-sync] Received snapshot_ack for at_seq={}",
ack.at_seq
);
// Record the ack — the coordination logic checks for quorum.
// Note: we don't know the peer's node_id from the message alone;
// in a full implementation the ack would include the sender's
// node_id. For now we log it for protocol completeness.
}
}
}
/// Process an incoming binary-frame op from a peer.
///
/// Binary frames carry a single `SignedOp` encoded via [`crdt_wire`].
fn handle_incoming_binary(bytes: &[u8]) {
match crdt_wire::decode(bytes) {
Ok(signed_op) => {
crdt_state::apply_remote_op(signed_op);
}
Err(e) => {
slog!("[crdt-sync] Bad binary frame from peer: {e}");
}
}
}
// ── Rendezvous client ───────────────────────────────────────────────
/// Number of consecutive connection failures before escalating from WARN to ERROR.
pub const RENDEZVOUS_ERROR_THRESHOLD: u32 = 10;
/// Spawn a background task that connects to the configured rendezvous
/// peer and exchanges CRDT ops bidirectionally.
///
/// The client reconnects with exponential backoff if the connection drops.
/// Individual failures are logged at WARN; after [`RENDEZVOUS_ERROR_THRESHOLD`]
/// consecutive failures the log level escalates to ERROR.
///
/// When `token` is provided it is appended to the upgrade URL as
/// `?token=<token>` so the server's bearer-token check is satisfied. This
/// reuses the existing `--join-token` / `HUSKIES_JOIN_TOKEN` plumbing on the
/// agent side.
pub fn spawn_rendezvous_client(url: String, token: Option<String>) {
tokio::spawn(async move {
let mut backoff_secs = 1u64;
let mut consecutive_failures: u32 = 0;
loop {
slog!("[crdt-sync] Connecting to rendezvous peer: {url}");
match connect_and_sync(&url, token.as_deref()).await {
Ok(()) => {
slog!("[crdt-sync] Rendezvous connection closed cleanly");
backoff_secs = 1;
consecutive_failures = 0;
}
Err(e) => {
consecutive_failures += 1;
if consecutive_failures >= RENDEZVOUS_ERROR_THRESHOLD {
slog_error!(
"[crdt-sync] Rendezvous peer unreachable ({consecutive_failures} consecutive failures): {e}"
);
} else {
slog_warn!(
"[crdt-sync] Rendezvous connection error (attempt {consecutive_failures}): {e}"
);
}
}
}
slog!("[crdt-sync] Reconnecting in {backoff_secs}s...");
tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await;
backoff_secs = (backoff_secs * 2).min(30);
}
});
}
/// Connect to a remote sync endpoint and exchange ops until disconnect.
///
/// When `token` is supplied it is appended as `?token=<token>` to the
/// connection URL so the server's bearer-token check passes.
pub(crate) async fn connect_and_sync(url: &str, token: Option<&str>) -> Result<(), String> {
let connect_url = match token {
Some(t) => {
if url.contains('?') {
format!("{url}&token={t}")
} else {
format!("{url}?token={t}")
}
}
None => url.to_string(),
};
let (ws_stream, _) = tokio_tungstenite::connect_async(connect_url.as_str())
.await
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
let (mut sink, mut stream) = ws_stream.split();
slog!("[crdt-sync] Connected to rendezvous peer, awaiting challenge");
// ── Step 1: Receive challenge from listener ───────────────────
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
let challenge_frame = tokio::time::timeout(
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
stream.next(),
)
.await
.map_err(|_| "Auth timeout waiting for challenge".to_string())?
.ok_or_else(|| "Connection closed before challenge".to_string())?
.map_err(|e| format!("WebSocket read error: {e}"))?;
let challenge_text = match challenge_frame {
TungsteniteMsg::Text(t) => t.to_string(),
_ => return Err("Expected text frame for challenge".to_string()),
};
let challenge_msg: ChallengeMessage = serde_json::from_str(&challenge_text)
.map_err(|e| format!("Invalid challenge message: {e}"))?;
if challenge_msg.r#type != "challenge" {
return Err(format!(
"Expected challenge message, got type={}",
challenge_msg.r#type
));
}
// ── Step 2: Sign challenge and send auth reply ────────────────
let (pubkey_hex, signature_hex) = crdt_state::sign_challenge(&challenge_msg.nonce)
.ok_or_else(|| "CRDT not initialised — cannot sign challenge".to_string())?;
let auth_msg = AuthMessage {
r#type: "auth".to_string(),
pubkey_hex,
signature_hex,
};
let auth_json = serde_json::to_string(&auth_msg).map_err(|e| format!("Serialize auth: {e}"))?;
sink.send(TungsteniteMsg::Text(auth_json.into()))
.await
.map_err(|e| format!("Send auth failed: {e}"))?;
slog!("[crdt-sync] Auth reply sent, waiting for sync data");
// v2 protocol: send our vector clock.
let our_clock = crdt_state::our_vector_clock().unwrap_or_default();
let clock_msg = SyncMessage::Clock { clock: our_clock };
if let Ok(json) = serde_json::to_string(&clock_msg) {
sink.send(TungsteniteMsg::Text(json.into()))
.await
.map_err(|e| format!("Send clock failed: {e}"))?;
}
// Wait for the server's first sync message.
let first_msg = tokio::time::timeout(
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
wait_for_rendezvous_sync_text(&mut stream),
)
.await
.map_err(|_| "Timeout waiting for server sync message".to_string())?;
match first_msg {
Some(SyncMessage::Clock { clock: peer_clock }) => {
// v2 server — send only the ops the server is missing.
let delta = crdt_state::ops_since(&peer_clock).unwrap_or_default();
slog!(
"[crdt-sync] v2 delta sync: sending {} ops to server (server missing)",
delta.len()
);
let msg = SyncMessage::Bulk { ops: delta };
if let Ok(json) = serde_json::to_string(&msg) {
sink.send(TungsteniteMsg::Text(json.into()))
.await
.map_err(|e| format!("Send delta failed: {e}"))?;
}
}
Some(SyncMessage::Bulk { ops }) => {
// v1 server — apply their bulk and send our full bulk.
let mut applied = 0u64;
for op_json in &ops {
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(op_json)
&& crdt_state::apply_remote_op(signed_op)
{
applied += 1;
}
}
slog!(
"[crdt-sync] v1 bulk sync: received {} ops from server, applied {applied}",
ops.len()
);
if let Some(all) = crdt_state::all_ops_json() {
let msg = SyncMessage::Bulk { ops: all };
if let Ok(json) = serde_json::to_string(&msg) {
sink.send(TungsteniteMsg::Text(json.into()))
.await
.map_err(|e| format!("Send bulk failed: {e}"))?;
}
}
}
_ => {
// Fallback — send full bulk.
slog!("[crdt-sync] No sync message from server; sending full bulk as fallback");
if let Some(all) = crdt_state::all_ops_json() {
let msg = SyncMessage::Bulk { ops: all };
if let Ok(json) = serde_json::to_string(&msg) {
sink.send(TungsteniteMsg::Text(json.into()))
.await
.map_err(|e| format!("Send bulk failed: {e}"))?;
}
}
}
}
// Bulk-delta phase complete — signal the server that we are ready for
// real-time op streaming.
if let Ok(json) = serde_json::to_string(&SyncMessage::Ready) {
sink.send(TungsteniteMsg::Text(json.into()))
.await
.map_err(|e| format!("Send ready failed: {e}"))?;
}
// Subscribe to new local ops.
let Some(mut op_rx) = crdt_state::subscribe_ops() else {
return Err("CRDT not initialised".to_string());
};
// Buffer for locally-generated ops produced before the server's `ready`
// arrives. Flushed in-order once the server signals catch-up.
let mut peer_ready = false;
let mut op_buffer: Vec<bft_json_crdt::json_crdt::SignedOp> = Vec::new();
// ── Keepalive state ───────────────────────────────────────────────
let mut pong_deadline =
tokio::time::Instant::now() + std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
let mut ping_ticker = tokio::time::interval_at(
tokio::time::Instant::now() + std::time::Duration::from_secs(PING_INTERVAL_SECS),
std::time::Duration::from_secs(PING_INTERVAL_SECS),
);
loop {
tokio::select! {
// Send periodic Ping and enforce Pong timeout.
_ = ping_ticker.tick() => {
if tokio::time::Instant::now() >= pong_deadline {
slog_warn!(
"[crdt-sync] No pong from rendezvous peer {} in {}s; disconnecting",
url,
PONG_TIMEOUT_SECS
);
return Err(format!(
"Keepalive timeout: no pong from {url} in {PONG_TIMEOUT_SECS}s"
));
}
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
if sink.send(TungsteniteMsg::Ping(bytes::Bytes::new())).await.is_err() {
break;
}
}
result = op_rx.recv() => {
match result {
Ok(signed_op) => {
if peer_ready {
// Encode via wire codec and send as binary frame.
let bytes = crdt_wire::encode(&signed_op);
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
if sink.send(TungsteniteMsg::Binary(bytes.into())).await.is_err() {
break;
}
} else {
// Buffer until the server signals ready.
op_buffer.push(signed_op);
}
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
slog!("[crdt-sync] Slow rendezvous link lagged {n} ops; disconnecting");
break;
}
Err(_) => break,
}
}
frame = stream.next() => {
match frame {
Some(Ok(tokio_tungstenite::tungstenite::Message::Pong(_))) => {
// Reset the pong deadline on every Pong received.
pong_deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
}
Some(Ok(tokio_tungstenite::tungstenite::Message::Ping(_))) => {
// tungstenite auto-responds to Ping with Pong at the
// protocol level; no manual response needed here.
}
Some(Ok(tokio_tungstenite::tungstenite::Message::Text(text))) => {
// Check for the ready signal before other text frames.
if let Ok(SyncMessage::Ready) = serde_json::from_str(text.as_ref()) {
peer_ready = true;
slog!("[crdt-sync] Server ready; flushing {} buffered ops", op_buffer.len());
let mut flush_ok = true;
for op in op_buffer.drain(..) {
let bytes = crdt_wire::encode(&op);
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
if sink.send(TungsteniteMsg::Binary(bytes.into())).await.is_err() {
flush_ok = false;
break;
}
}
if !flush_ok {
break;
}
} else {
handle_incoming_text(text.as_ref());
}
}
Some(Ok(tokio_tungstenite::tungstenite::Message::Binary(bytes))) => {
// Real-time op — applied immediately regardless of ready state.
handle_incoming_binary(&bytes);
}
Some(Ok(tokio_tungstenite::tungstenite::Message::Close(_))) | None => break,
Some(Err(e)) => {
slog!("[crdt-sync] Rendezvous read error: {e}");
break;
}
_ => {}
}
}
}
}
Ok(())
}
/// Wait for the next text-frame sync message from a tungstenite stream,
/// handling Ping/Pong transparently.
///
/// Returns `None` on connection close or read error.
async fn wait_for_rendezvous_sync_text(
stream: &mut futures::stream::SplitStream<
tokio_tungstenite::WebSocketStream<
tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>,
>,
>,
) -> Option<SyncMessage> {
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
loop {
match stream.next().await {
Some(Ok(TungsteniteMsg::Text(text))) => {
return serde_json::from_str(text.as_ref()).ok();
}
Some(Ok(TungsteniteMsg::Ping(_) | TungsteniteMsg::Pong(_))) => continue,
_ => return None,
}
}
}
// ── Tests ────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests;