refactor: split crdt_sync.rs into auth/wire/server/dispatch/client modules
The 3672-line crdt_sync.rs is split into a sub-module directory with co-located tests per Rust convention: - auth.rs: trusted-keys + bearer-token validation (230 lines) - wire.rs: ChallengeMessage / AuthMessage / SyncMessage types (141 lines) - server.rs: WebSocket server handler (1680 lines) - dispatch.rs: incoming-message dispatch + bulk/clock/op handling (1028 lines) - client.rs: rendezvous client + reconnect/backoff (464 lines) - mod.rs: doc, cross-cutting constants, re-exports (75 lines) No behaviour change. All 65 crdt_sync tests pass; full suite green (2635 tests with --test-threads=1).
This commit is contained in:
@@ -0,0 +1,464 @@
|
||||
//! Rendezvous client: connect to a remote peer, authenticate, and exchange CRDT ops.
|
||||
|
||||
use bft_json_crdt::json_crdt::SignedOp;
|
||||
use futures::{SinkExt, StreamExt};
|
||||
|
||||
use crate::crdt_state;
|
||||
use crate::crdt_wire;
|
||||
use crate::slog;
|
||||
use crate::slog_error;
|
||||
use crate::slog_warn;
|
||||
|
||||
use super::auth;
|
||||
use super::dispatch::{handle_incoming_binary, handle_incoming_text};
|
||||
use super::wire::{AuthMessage, ChallengeMessage, SyncMessage};
|
||||
use super::{AUTH_TIMEOUT_SECS, PING_INTERVAL_SECS, PONG_TIMEOUT_SECS};
|
||||
|
||||
#[allow(unused_imports)]
|
||||
use auth::{add_join_token, init_token_auth}; // needed by tests
|
||||
|
||||
// ── Rendezvous client ───────────────────────────────────────────────
|
||||
|
||||
/// Number of consecutive connection failures before escalating from WARN to ERROR.
|
||||
pub const RENDEZVOUS_ERROR_THRESHOLD: u32 = 10;
|
||||
|
||||
/// Spawn a background task that connects to the configured rendezvous
|
||||
/// peer and exchanges CRDT ops bidirectionally.
|
||||
///
|
||||
/// The client reconnects with exponential backoff if the connection drops.
|
||||
/// Individual failures are logged at WARN; after [`RENDEZVOUS_ERROR_THRESHOLD`]
|
||||
/// consecutive failures the log level escalates to ERROR.
|
||||
///
|
||||
/// When `token` is provided it is appended to the upgrade URL as
|
||||
/// `?token=<token>` so the server's bearer-token check is satisfied. This
|
||||
/// reuses the existing `--join-token` / `HUSKIES_JOIN_TOKEN` plumbing on the
|
||||
/// agent side.
|
||||
pub fn spawn_rendezvous_client(url: String, token: Option<String>) {
|
||||
tokio::spawn(async move {
|
||||
let mut backoff_secs = 1u64;
|
||||
let mut consecutive_failures: u32 = 0;
|
||||
loop {
|
||||
slog!("[crdt-sync] Connecting to rendezvous peer: {url}");
|
||||
match connect_and_sync(&url, token.as_deref()).await {
|
||||
Ok(()) => {
|
||||
slog!("[crdt-sync] Rendezvous connection closed cleanly");
|
||||
backoff_secs = 1;
|
||||
consecutive_failures = 0;
|
||||
}
|
||||
Err(e) => {
|
||||
consecutive_failures += 1;
|
||||
if consecutive_failures >= RENDEZVOUS_ERROR_THRESHOLD {
|
||||
slog_error!(
|
||||
"[crdt-sync] Rendezvous peer unreachable ({consecutive_failures} consecutive failures): {e}"
|
||||
);
|
||||
} else {
|
||||
slog_warn!(
|
||||
"[crdt-sync] Rendezvous connection error (attempt {consecutive_failures}): {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
slog!("[crdt-sync] Reconnecting in {backoff_secs}s...");
|
||||
tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await;
|
||||
backoff_secs = (backoff_secs * 2).min(30);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Connect to a remote sync endpoint and exchange ops until disconnect.
|
||||
///
|
||||
/// When `token` is supplied it is appended as `?token=<token>` to the
|
||||
/// connection URL so the server's bearer-token check passes.
|
||||
pub(crate) async fn connect_and_sync(url: &str, token: Option<&str>) -> Result<(), String> {
|
||||
let connect_url = match token {
|
||||
Some(t) => {
|
||||
if url.contains('?') {
|
||||
format!("{url}&token={t}")
|
||||
} else {
|
||||
format!("{url}?token={t}")
|
||||
}
|
||||
}
|
||||
None => url.to_string(),
|
||||
};
|
||||
let (ws_stream, _) = tokio_tungstenite::connect_async(connect_url.as_str())
|
||||
.await
|
||||
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
|
||||
|
||||
let (mut sink, mut stream) = ws_stream.split();
|
||||
|
||||
slog!("[crdt-sync] Connected to rendezvous peer, awaiting challenge");
|
||||
|
||||
// ── Step 1: Receive challenge from listener ───────────────────
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
|
||||
let challenge_frame = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
|
||||
stream.next(),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| "Auth timeout waiting for challenge".to_string())?
|
||||
.ok_or_else(|| "Connection closed before challenge".to_string())?
|
||||
.map_err(|e| format!("WebSocket read error: {e}"))?;
|
||||
|
||||
let challenge_text = match challenge_frame {
|
||||
TungsteniteMsg::Text(t) => t.to_string(),
|
||||
_ => return Err("Expected text frame for challenge".to_string()),
|
||||
};
|
||||
|
||||
let challenge_msg: ChallengeMessage = serde_json::from_str(&challenge_text)
|
||||
.map_err(|e| format!("Invalid challenge message: {e}"))?;
|
||||
|
||||
if challenge_msg.r#type != "challenge" {
|
||||
return Err(format!(
|
||||
"Expected challenge message, got type={}",
|
||||
challenge_msg.r#type
|
||||
));
|
||||
}
|
||||
|
||||
// ── Step 2: Sign challenge and send auth reply ────────────────
|
||||
let (pubkey_hex, signature_hex) = crdt_state::sign_challenge(&challenge_msg.nonce)
|
||||
.ok_or_else(|| "CRDT not initialised — cannot sign challenge".to_string())?;
|
||||
|
||||
let auth_msg = AuthMessage {
|
||||
r#type: "auth".to_string(),
|
||||
pubkey_hex,
|
||||
signature_hex,
|
||||
};
|
||||
let auth_json = serde_json::to_string(&auth_msg).map_err(|e| format!("Serialize auth: {e}"))?;
|
||||
sink.send(TungsteniteMsg::Text(auth_json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send auth failed: {e}"))?;
|
||||
|
||||
slog!("[crdt-sync] Auth reply sent, waiting for sync data");
|
||||
|
||||
// v2 protocol: send our vector clock.
|
||||
let our_clock = crdt_state::our_vector_clock().unwrap_or_default();
|
||||
let clock_msg = SyncMessage::Clock { clock: our_clock };
|
||||
if let Ok(json) = serde_json::to_string(&clock_msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send clock failed: {e}"))?;
|
||||
}
|
||||
|
||||
// Wait for the server's first sync message.
|
||||
let first_msg = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
|
||||
wait_for_rendezvous_sync_text(&mut stream),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| "Timeout waiting for server sync message".to_string())?;
|
||||
|
||||
match first_msg {
|
||||
Some(SyncMessage::Clock { clock: peer_clock }) => {
|
||||
// v2 server — send only the ops the server is missing.
|
||||
let delta = crdt_state::ops_since(&peer_clock).unwrap_or_default();
|
||||
slog!(
|
||||
"[crdt-sync] v2 delta sync: sending {} ops to server (server missing)",
|
||||
delta.len()
|
||||
);
|
||||
let msg = SyncMessage::Bulk { ops: delta };
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send delta failed: {e}"))?;
|
||||
}
|
||||
}
|
||||
Some(SyncMessage::Bulk { ops }) => {
|
||||
// v1 server — apply their bulk and send our full bulk.
|
||||
let mut applied = 0u64;
|
||||
for op_json in &ops {
|
||||
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(op_json)
|
||||
&& crdt_state::apply_remote_op(signed_op)
|
||||
{
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
slog!(
|
||||
"[crdt-sync] v1 bulk sync: received {} ops from server, applied {applied}",
|
||||
ops.len()
|
||||
);
|
||||
if let Some(all) = crdt_state::all_ops_json() {
|
||||
let msg = SyncMessage::Bulk { ops: all };
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send bulk failed: {e}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Fallback — send full bulk.
|
||||
slog!("[crdt-sync] No sync message from server; sending full bulk as fallback");
|
||||
if let Some(all) = crdt_state::all_ops_json() {
|
||||
let msg = SyncMessage::Bulk { ops: all };
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send bulk failed: {e}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bulk-delta phase complete — signal the server that we are ready for
|
||||
// real-time op streaming.
|
||||
if let Ok(json) = serde_json::to_string(&SyncMessage::Ready) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send ready failed: {e}"))?;
|
||||
}
|
||||
|
||||
// Subscribe to new local ops.
|
||||
let Some(mut op_rx) = crdt_state::subscribe_ops() else {
|
||||
return Err("CRDT not initialised".to_string());
|
||||
};
|
||||
|
||||
// Buffer for locally-generated ops produced before the server's `ready`
|
||||
// arrives. Flushed in-order once the server signals catch-up.
|
||||
let mut peer_ready = false;
|
||||
let mut op_buffer: Vec<bft_json_crdt::json_crdt::SignedOp> = Vec::new();
|
||||
|
||||
// ── Keepalive state ───────────────────────────────────────────────
|
||||
let mut pong_deadline =
|
||||
tokio::time::Instant::now() + std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
|
||||
let mut ping_ticker = tokio::time::interval_at(
|
||||
tokio::time::Instant::now() + std::time::Duration::from_secs(PING_INTERVAL_SECS),
|
||||
std::time::Duration::from_secs(PING_INTERVAL_SECS),
|
||||
);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
// Send periodic Ping and enforce Pong timeout.
|
||||
_ = ping_ticker.tick() => {
|
||||
if tokio::time::Instant::now() >= pong_deadline {
|
||||
slog_warn!(
|
||||
"[crdt-sync] No pong from rendezvous peer {} in {}s; disconnecting",
|
||||
url,
|
||||
PONG_TIMEOUT_SECS
|
||||
);
|
||||
return Err(format!(
|
||||
"Keepalive timeout: no pong from {url} in {PONG_TIMEOUT_SECS}s"
|
||||
));
|
||||
}
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
if sink.send(TungsteniteMsg::Ping(bytes::Bytes::new())).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
result = op_rx.recv() => {
|
||||
match result {
|
||||
Ok(signed_op) => {
|
||||
if peer_ready {
|
||||
// Encode via wire codec and send as binary frame.
|
||||
let bytes = crdt_wire::encode(&signed_op);
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
if sink.send(TungsteniteMsg::Binary(bytes.into())).await.is_err() {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Buffer until the server signals ready.
|
||||
op_buffer.push(signed_op);
|
||||
}
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
||||
slog!("[crdt-sync] Slow rendezvous link lagged {n} ops; disconnecting");
|
||||
break;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
frame = stream.next() => {
|
||||
match frame {
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Pong(_))) => {
|
||||
// Reset the pong deadline on every Pong received.
|
||||
pong_deadline = tokio::time::Instant::now()
|
||||
+ std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Ping(_))) => {
|
||||
// tungstenite auto-responds to Ping with Pong at the
|
||||
// protocol level; no manual response needed here.
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Text(text))) => {
|
||||
// Check for the ready signal before other text frames.
|
||||
if let Ok(SyncMessage::Ready) = serde_json::from_str(text.as_ref()) {
|
||||
peer_ready = true;
|
||||
slog!("[crdt-sync] Server ready; flushing {} buffered ops", op_buffer.len());
|
||||
let mut flush_ok = true;
|
||||
for op in op_buffer.drain(..) {
|
||||
let bytes = crdt_wire::encode(&op);
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
if sink.send(TungsteniteMsg::Binary(bytes.into())).await.is_err() {
|
||||
flush_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !flush_ok {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
handle_incoming_text(text.as_ref());
|
||||
}
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Binary(bytes))) => {
|
||||
// Real-time op — applied immediately regardless of ready state.
|
||||
handle_incoming_binary(&bytes);
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Close(_))) | None => break,
|
||||
Some(Err(e)) => {
|
||||
slog!("[crdt-sync] Rendezvous read error: {e}");
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Wait for the next text-frame sync message from a tungstenite stream,
|
||||
/// handling Ping/Pong transparently.
|
||||
///
|
||||
/// Returns `None` on connection close or read error.
|
||||
async fn wait_for_rendezvous_sync_text(
|
||||
stream: &mut futures::stream::SplitStream<
|
||||
tokio_tungstenite::WebSocketStream<
|
||||
tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>,
|
||||
>,
|
||||
>,
|
||||
) -> Option<SyncMessage> {
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
loop {
|
||||
match stream.next().await {
|
||||
Some(Ok(TungsteniteMsg::Text(text))) => {
|
||||
return serde_json::from_str(text.as_ref()).ok();
|
||||
}
|
||||
Some(Ok(TungsteniteMsg::Ping(_) | TungsteniteMsg::Pong(_))) => continue,
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Tests ────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn config_rendezvous_parsed_from_toml() {
|
||||
let toml_str = r#"
|
||||
rendezvous = "ws://remote:3001/crdt-sync"
|
||||
|
||||
[[agent]]
|
||||
name = "test"
|
||||
"#;
|
||||
let config: crate::config::ProjectConfig = toml::from_str(toml_str).unwrap();
|
||||
assert_eq!(
|
||||
config.rendezvous.as_deref(),
|
||||
Some("ws://remote:3001/crdt-sync")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_rendezvous_defaults_to_none() {
|
||||
let config = crate::config::ProjectConfig::default();
|
||||
assert!(config.rendezvous.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failure_counter_warn_below_threshold() {
|
||||
let threshold = RENDEZVOUS_ERROR_THRESHOLD;
|
||||
let mut consecutive_failures: u32 = 0;
|
||||
|
||||
// First threshold-1 failures are below the ERROR threshold.
|
||||
for _ in 0..(threshold - 1) {
|
||||
consecutive_failures += 1;
|
||||
assert!(
|
||||
consecutive_failures < threshold,
|
||||
"failure {consecutive_failures} must be below ERROR threshold {threshold}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failure_counter_error_at_threshold() {
|
||||
let threshold = RENDEZVOUS_ERROR_THRESHOLD;
|
||||
let consecutive_failures: u32 = threshold;
|
||||
assert!(
|
||||
consecutive_failures >= threshold,
|
||||
"failure {consecutive_failures} must reach or exceed ERROR threshold {threshold}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failure_counter_resets_on_success() {
|
||||
let threshold = RENDEZVOUS_ERROR_THRESHOLD;
|
||||
// Simulate sustained failure.
|
||||
let mut consecutive_failures: u32 = threshold + 5;
|
||||
assert!(consecutive_failures >= threshold);
|
||||
|
||||
// Simulate a clean reconnect.
|
||||
consecutive_failures = 0;
|
||||
assert_eq!(
|
||||
consecutive_failures, 0,
|
||||
"counter must reset to 0 on success"
|
||||
);
|
||||
|
||||
// Next error is attempt 1 — well below the ERROR threshold.
|
||||
consecutive_failures += 1;
|
||||
assert!(
|
||||
consecutive_failures < threshold,
|
||||
"first failure after reset must be below ERROR threshold"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_threshold_is_ten() {
|
||||
assert_eq!(
|
||||
RENDEZVOUS_ERROR_THRESHOLD,
|
||||
10,
|
||||
"ERROR escalation threshold must be 10 consecutive failures"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rendezvous_url_with_token_appended() {
|
||||
let base = "ws://host:3001/crdt-sync";
|
||||
let token = "my-secret-token";
|
||||
let url_with_token = if base.contains('?') {
|
||||
format!("{base}&token={token}")
|
||||
} else {
|
||||
format!("{base}?token={token}")
|
||||
};
|
||||
assert_eq!(
|
||||
url_with_token,
|
||||
"ws://host:3001/crdt-sync?token=my-secret-token"
|
||||
);
|
||||
|
||||
// With existing query params.
|
||||
let base_with_query = "ws://host:3001/crdt-sync?foo=bar";
|
||||
let url_appended = if base_with_query.contains('?') {
|
||||
format!("{base_with_query}&token={token}")
|
||||
} else {
|
||||
format!("{base_with_query}?token={token}")
|
||||
};
|
||||
assert_eq!(
|
||||
url_appended,
|
||||
"ws://host:3001/crdt-sync?foo=bar&token=my-secret-token"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rendezvous_url_without_token_unchanged() {
|
||||
let base = "ws://host:3001/crdt-sync";
|
||||
let token: Option<&str> = None;
|
||||
let connect_url = match token {
|
||||
Some(t) => format!("{base}?token={t}"),
|
||||
None => base.to_string(),
|
||||
};
|
||||
assert_eq!(connect_url, base);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user