refactor: split crdt_sync.rs into auth/wire/server/dispatch/client modules
The 3672-line crdt_sync.rs is split into a sub-module directory with co-located tests per Rust convention: - auth.rs: trusted-keys + bearer-token validation (230 lines) - wire.rs: ChallengeMessage / AuthMessage / SyncMessage types (141 lines) - server.rs: WebSocket server handler (1680 lines) - dispatch.rs: incoming-message dispatch + bulk/clock/op handling (1028 lines) - client.rs: rendezvous client + reconnect/backoff (464 lines) - mod.rs: doc, cross-cutting constants, re-exports (75 lines) No behaviour change. All 65 crdt_sync tests pass; full suite green (2635 tests with --test-threads=1).
This commit is contained in:
@@ -0,0 +1,230 @@
|
||||
//! Auth: trusted-key allow-list and bearer-token validation for `/crdt-sync` connections.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
/// Trusted public keys loaded once at startup.
|
||||
static TRUSTED_KEYS: OnceLock<Vec<String>> = OnceLock::new();
|
||||
|
||||
/// Initialise the trusted-key allow-list for connect-time mutual auth.
|
||||
///
|
||||
/// Must be called once at startup before any WebSocket connections are
|
||||
/// accepted. Subsequent calls are no-ops (OnceLock).
|
||||
pub fn init_trusted_keys(keys: Vec<String>) {
|
||||
let _ = TRUSTED_KEYS.set(keys);
|
||||
}
|
||||
|
||||
/// Return a reference to the trusted-key allow-list.
|
||||
pub(super) fn trusted_keys() -> &'static [String] {
|
||||
TRUSTED_KEYS.get().map(|v| v.as_slice()).unwrap_or(&[])
|
||||
}
|
||||
|
||||
// ── Bearer-token auth ───────────────────────────────────────────────
|
||||
|
||||
/// Time-to-live for CRDT bearer tokens in seconds (30 days).
|
||||
pub(super) const TOKEN_TTL_SECS: f64 = 30.0 * 24.0 * 3600.0;
|
||||
|
||||
/// Whether a bearer token is required for `/crdt-sync` connections.
|
||||
/// `None` (uninitialised) → open access (backward compatible).
|
||||
pub(super) static REQUIRE_TOKEN: OnceLock<bool> = OnceLock::new();
|
||||
|
||||
/// Valid bearer tokens — maps token string to its expiry unix timestamp.
|
||||
static CRDT_TOKENS: OnceLock<std::sync::RwLock<HashMap<String, f64>>> = OnceLock::new();
|
||||
|
||||
/// Initialise bearer-token auth for CRDT-sync connections.
|
||||
///
|
||||
/// Must be called once at startup before any WebSocket connections are accepted.
|
||||
/// When `require` is `true`, clients must supply a valid `?token=` query
|
||||
/// parameter on the upgrade request or receive HTTP 401. When `require` is
|
||||
/// `false` (default) a token is optional — connections without one are
|
||||
/// accepted, but a supplied token is still validated.
|
||||
pub fn init_token_auth(require: bool, tokens: Vec<String>) {
|
||||
let _ = REQUIRE_TOKEN.set(require);
|
||||
let store = CRDT_TOKENS.get_or_init(|| std::sync::RwLock::new(HashMap::new()));
|
||||
if let Ok(mut map) = store.write() {
|
||||
let now = chrono::Utc::now().timestamp() as f64;
|
||||
for token in tokens {
|
||||
map.insert(token, now + TOKEN_TTL_SECS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a bearer token to the CRDT-sync token store.
|
||||
///
|
||||
/// The token expires after [`TOKEN_TTL_SECS`] seconds. Returns the expiry
|
||||
/// unix timestamp so callers can surface it in admin tooling.
|
||||
pub fn add_join_token(token: String) -> f64 {
|
||||
let store = CRDT_TOKENS.get_or_init(|| std::sync::RwLock::new(HashMap::new()));
|
||||
let now = chrono::Utc::now().timestamp() as f64;
|
||||
let expires_at = now + TOKEN_TTL_SECS;
|
||||
if let Ok(mut map) = store.write() {
|
||||
map.insert(token, expires_at);
|
||||
}
|
||||
expires_at
|
||||
}
|
||||
|
||||
/// Validate a bearer token against the CRDT-sync token store.
|
||||
///
|
||||
/// Returns `true` if the token exists in the store and has not expired.
|
||||
pub(super) fn validate_join_token(token: &str) -> bool {
|
||||
let Some(store) = CRDT_TOKENS.get() else {
|
||||
return false;
|
||||
};
|
||||
let now = chrono::Utc::now().timestamp() as f64;
|
||||
store
|
||||
.read()
|
||||
.ok()
|
||||
.and_then(|map| map.get(token).copied())
|
||||
.is_some_and(|expires_at| expires_at > now)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn config_trusted_keys_parsed_from_toml() {
|
||||
let toml_str = r#"
|
||||
trusted_keys = [
|
||||
"aabbccdd00112233aabbccdd00112233aabbccdd00112233aabbccdd00112233",
|
||||
"11223344556677881122334455667788112233445566778811223344556677ab",
|
||||
]
|
||||
|
||||
[[agent]]
|
||||
name = "test"
|
||||
"#;
|
||||
let config: crate::config::ProjectConfig =
|
||||
crate::config::ProjectConfig::parse(toml_str).unwrap();
|
||||
assert_eq!(config.trusted_keys.len(), 2);
|
||||
assert_eq!(
|
||||
config.trusted_keys[0],
|
||||
"aabbccdd00112233aabbccdd00112233aabbccdd00112233aabbccdd00112233"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_trusted_keys_defaults_to_empty() {
|
||||
let config = crate::config::ProjectConfig::default();
|
||||
assert!(
|
||||
config.trusted_keys.is_empty(),
|
||||
"trusted_keys must default to empty (reject all)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_token_passes_validation() {
|
||||
let token = format!("test-valid-{}", uuid::Uuid::new_v4());
|
||||
super::add_join_token(token.clone());
|
||||
assert!(
|
||||
super::validate_join_token(&token),
|
||||
"A freshly added token must pass validation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bogus_token_fails_validation() {
|
||||
let bogus = "this-token-was-never-added-to-the-store";
|
||||
assert!(
|
||||
!super::validate_join_token(bogus),
|
||||
"An unknown token must fail validation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expired_token_fails_validation() {
|
||||
// Insert a token directly with an already-past expiry timestamp.
|
||||
let token = format!("test-expired-{}", uuid::Uuid::new_v4());
|
||||
let store = super::CRDT_TOKENS
|
||||
.get_or_init(|| std::sync::RwLock::new(std::collections::HashMap::new()));
|
||||
// expires_at = 1 (way in the past — 1970-01-01T00:00:01Z)
|
||||
store.write().unwrap().insert(token.clone(), 1.0_f64);
|
||||
assert!(
|
||||
!super::validate_join_token(&token),
|
||||
"An expired token must fail validation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_token_with_require_true_is_rejected() {
|
||||
// Simulate: require_token=true, token=None → reject.
|
||||
let require_token = true;
|
||||
let token: Option<&str> = None;
|
||||
let should_reject = match token {
|
||||
Some(t) => !super::validate_join_token(t),
|
||||
None if require_token => true,
|
||||
None => false,
|
||||
};
|
||||
assert!(
|
||||
should_reject,
|
||||
"Missing token must be rejected when token is required"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_token_with_require_false_is_accepted() {
|
||||
let require_token = false;
|
||||
let token: Option<&str> = None;
|
||||
let should_reject = match token {
|
||||
Some(t) => !super::validate_join_token(t),
|
||||
None if require_token => true,
|
||||
None => false,
|
||||
};
|
||||
assert!(
|
||||
!should_reject,
|
||||
"Missing token must be accepted in open mode"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_join_token_returns_future_expiry() {
|
||||
let token = format!("test-expiry-{}", uuid::Uuid::new_v4());
|
||||
let now = chrono::Utc::now().timestamp() as f64;
|
||||
let expires_at = super::add_join_token(token);
|
||||
assert!(
|
||||
expires_at > now,
|
||||
"Expiry timestamp must be in the future (got {expires_at}, now={now})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_ttl_is_thirty_days() {
|
||||
assert_eq!(
|
||||
super::TOKEN_TTL_SECS,
|
||||
30.0 * 24.0 * 3600.0,
|
||||
"TOKEN_TTL_SECS must be 30 days"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_crdt_require_token_defaults_to_false() {
|
||||
let config = crate::config::ProjectConfig::default();
|
||||
assert!(
|
||||
!config.crdt_require_token,
|
||||
"crdt_require_token must default to false (open access)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_crdt_tokens_defaults_to_empty() {
|
||||
let config = crate::config::ProjectConfig::default();
|
||||
assert!(
|
||||
config.crdt_tokens.is_empty(),
|
||||
"crdt_tokens must default to empty"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_crdt_token_fields_parsed_from_toml() {
|
||||
let toml_str = r#"
|
||||
crdt_require_token = true
|
||||
crdt_tokens = ["token-abc", "token-xyz"]
|
||||
|
||||
[[agent]]
|
||||
name = "test"
|
||||
"#;
|
||||
let config: crate::config::ProjectConfig = toml::from_str(toml_str).unwrap();
|
||||
assert!(config.crdt_require_token);
|
||||
assert_eq!(config.crdt_tokens, vec!["token-abc", "token-xyz"]);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,464 @@
|
||||
//! Rendezvous client: connect to a remote peer, authenticate, and exchange CRDT ops.
|
||||
|
||||
use bft_json_crdt::json_crdt::SignedOp;
|
||||
use futures::{SinkExt, StreamExt};
|
||||
|
||||
use crate::crdt_state;
|
||||
use crate::crdt_wire;
|
||||
use crate::slog;
|
||||
use crate::slog_error;
|
||||
use crate::slog_warn;
|
||||
|
||||
use super::auth;
|
||||
use super::dispatch::{handle_incoming_binary, handle_incoming_text};
|
||||
use super::wire::{AuthMessage, ChallengeMessage, SyncMessage};
|
||||
use super::{AUTH_TIMEOUT_SECS, PING_INTERVAL_SECS, PONG_TIMEOUT_SECS};
|
||||
|
||||
#[allow(unused_imports)]
|
||||
use auth::{add_join_token, init_token_auth}; // needed by tests
|
||||
|
||||
// ── Rendezvous client ───────────────────────────────────────────────
|
||||
|
||||
/// Number of consecutive connection failures before escalating from WARN to ERROR.
|
||||
pub const RENDEZVOUS_ERROR_THRESHOLD: u32 = 10;
|
||||
|
||||
/// Spawn a background task that connects to the configured rendezvous
|
||||
/// peer and exchanges CRDT ops bidirectionally.
|
||||
///
|
||||
/// The client reconnects with exponential backoff if the connection drops.
|
||||
/// Individual failures are logged at WARN; after [`RENDEZVOUS_ERROR_THRESHOLD`]
|
||||
/// consecutive failures the log level escalates to ERROR.
|
||||
///
|
||||
/// When `token` is provided it is appended to the upgrade URL as
|
||||
/// `?token=<token>` so the server's bearer-token check is satisfied. This
|
||||
/// reuses the existing `--join-token` / `HUSKIES_JOIN_TOKEN` plumbing on the
|
||||
/// agent side.
|
||||
pub fn spawn_rendezvous_client(url: String, token: Option<String>) {
|
||||
tokio::spawn(async move {
|
||||
let mut backoff_secs = 1u64;
|
||||
let mut consecutive_failures: u32 = 0;
|
||||
loop {
|
||||
slog!("[crdt-sync] Connecting to rendezvous peer: {url}");
|
||||
match connect_and_sync(&url, token.as_deref()).await {
|
||||
Ok(()) => {
|
||||
slog!("[crdt-sync] Rendezvous connection closed cleanly");
|
||||
backoff_secs = 1;
|
||||
consecutive_failures = 0;
|
||||
}
|
||||
Err(e) => {
|
||||
consecutive_failures += 1;
|
||||
if consecutive_failures >= RENDEZVOUS_ERROR_THRESHOLD {
|
||||
slog_error!(
|
||||
"[crdt-sync] Rendezvous peer unreachable ({consecutive_failures} consecutive failures): {e}"
|
||||
);
|
||||
} else {
|
||||
slog_warn!(
|
||||
"[crdt-sync] Rendezvous connection error (attempt {consecutive_failures}): {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
slog!("[crdt-sync] Reconnecting in {backoff_secs}s...");
|
||||
tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await;
|
||||
backoff_secs = (backoff_secs * 2).min(30);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Connect to a remote sync endpoint and exchange ops until disconnect.
|
||||
///
|
||||
/// When `token` is supplied it is appended as `?token=<token>` to the
|
||||
/// connection URL so the server's bearer-token check passes.
|
||||
pub(crate) async fn connect_and_sync(url: &str, token: Option<&str>) -> Result<(), String> {
|
||||
let connect_url = match token {
|
||||
Some(t) => {
|
||||
if url.contains('?') {
|
||||
format!("{url}&token={t}")
|
||||
} else {
|
||||
format!("{url}?token={t}")
|
||||
}
|
||||
}
|
||||
None => url.to_string(),
|
||||
};
|
||||
let (ws_stream, _) = tokio_tungstenite::connect_async(connect_url.as_str())
|
||||
.await
|
||||
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
|
||||
|
||||
let (mut sink, mut stream) = ws_stream.split();
|
||||
|
||||
slog!("[crdt-sync] Connected to rendezvous peer, awaiting challenge");
|
||||
|
||||
// ── Step 1: Receive challenge from listener ───────────────────
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
|
||||
let challenge_frame = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
|
||||
stream.next(),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| "Auth timeout waiting for challenge".to_string())?
|
||||
.ok_or_else(|| "Connection closed before challenge".to_string())?
|
||||
.map_err(|e| format!("WebSocket read error: {e}"))?;
|
||||
|
||||
let challenge_text = match challenge_frame {
|
||||
TungsteniteMsg::Text(t) => t.to_string(),
|
||||
_ => return Err("Expected text frame for challenge".to_string()),
|
||||
};
|
||||
|
||||
let challenge_msg: ChallengeMessage = serde_json::from_str(&challenge_text)
|
||||
.map_err(|e| format!("Invalid challenge message: {e}"))?;
|
||||
|
||||
if challenge_msg.r#type != "challenge" {
|
||||
return Err(format!(
|
||||
"Expected challenge message, got type={}",
|
||||
challenge_msg.r#type
|
||||
));
|
||||
}
|
||||
|
||||
// ── Step 2: Sign challenge and send auth reply ────────────────
|
||||
let (pubkey_hex, signature_hex) = crdt_state::sign_challenge(&challenge_msg.nonce)
|
||||
.ok_or_else(|| "CRDT not initialised — cannot sign challenge".to_string())?;
|
||||
|
||||
let auth_msg = AuthMessage {
|
||||
r#type: "auth".to_string(),
|
||||
pubkey_hex,
|
||||
signature_hex,
|
||||
};
|
||||
let auth_json = serde_json::to_string(&auth_msg).map_err(|e| format!("Serialize auth: {e}"))?;
|
||||
sink.send(TungsteniteMsg::Text(auth_json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send auth failed: {e}"))?;
|
||||
|
||||
slog!("[crdt-sync] Auth reply sent, waiting for sync data");
|
||||
|
||||
// v2 protocol: send our vector clock.
|
||||
let our_clock = crdt_state::our_vector_clock().unwrap_or_default();
|
||||
let clock_msg = SyncMessage::Clock { clock: our_clock };
|
||||
if let Ok(json) = serde_json::to_string(&clock_msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send clock failed: {e}"))?;
|
||||
}
|
||||
|
||||
// Wait for the server's first sync message.
|
||||
let first_msg = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(AUTH_TIMEOUT_SECS),
|
||||
wait_for_rendezvous_sync_text(&mut stream),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| "Timeout waiting for server sync message".to_string())?;
|
||||
|
||||
match first_msg {
|
||||
Some(SyncMessage::Clock { clock: peer_clock }) => {
|
||||
// v2 server — send only the ops the server is missing.
|
||||
let delta = crdt_state::ops_since(&peer_clock).unwrap_or_default();
|
||||
slog!(
|
||||
"[crdt-sync] v2 delta sync: sending {} ops to server (server missing)",
|
||||
delta.len()
|
||||
);
|
||||
let msg = SyncMessage::Bulk { ops: delta };
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send delta failed: {e}"))?;
|
||||
}
|
||||
}
|
||||
Some(SyncMessage::Bulk { ops }) => {
|
||||
// v1 server — apply their bulk and send our full bulk.
|
||||
let mut applied = 0u64;
|
||||
for op_json in &ops {
|
||||
if let Ok(signed_op) = serde_json::from_str::<SignedOp>(op_json)
|
||||
&& crdt_state::apply_remote_op(signed_op)
|
||||
{
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
slog!(
|
||||
"[crdt-sync] v1 bulk sync: received {} ops from server, applied {applied}",
|
||||
ops.len()
|
||||
);
|
||||
if let Some(all) = crdt_state::all_ops_json() {
|
||||
let msg = SyncMessage::Bulk { ops: all };
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send bulk failed: {e}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Fallback — send full bulk.
|
||||
slog!("[crdt-sync] No sync message from server; sending full bulk as fallback");
|
||||
if let Some(all) = crdt_state::all_ops_json() {
|
||||
let msg = SyncMessage::Bulk { ops: all };
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send bulk failed: {e}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bulk-delta phase complete — signal the server that we are ready for
|
||||
// real-time op streaming.
|
||||
if let Ok(json) = serde_json::to_string(&SyncMessage::Ready) {
|
||||
sink.send(TungsteniteMsg::Text(json.into()))
|
||||
.await
|
||||
.map_err(|e| format!("Send ready failed: {e}"))?;
|
||||
}
|
||||
|
||||
// Subscribe to new local ops.
|
||||
let Some(mut op_rx) = crdt_state::subscribe_ops() else {
|
||||
return Err("CRDT not initialised".to_string());
|
||||
};
|
||||
|
||||
// Buffer for locally-generated ops produced before the server's `ready`
|
||||
// arrives. Flushed in-order once the server signals catch-up.
|
||||
let mut peer_ready = false;
|
||||
let mut op_buffer: Vec<bft_json_crdt::json_crdt::SignedOp> = Vec::new();
|
||||
|
||||
// ── Keepalive state ───────────────────────────────────────────────
|
||||
let mut pong_deadline =
|
||||
tokio::time::Instant::now() + std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
|
||||
let mut ping_ticker = tokio::time::interval_at(
|
||||
tokio::time::Instant::now() + std::time::Duration::from_secs(PING_INTERVAL_SECS),
|
||||
std::time::Duration::from_secs(PING_INTERVAL_SECS),
|
||||
);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
// Send periodic Ping and enforce Pong timeout.
|
||||
_ = ping_ticker.tick() => {
|
||||
if tokio::time::Instant::now() >= pong_deadline {
|
||||
slog_warn!(
|
||||
"[crdt-sync] No pong from rendezvous peer {} in {}s; disconnecting",
|
||||
url,
|
||||
PONG_TIMEOUT_SECS
|
||||
);
|
||||
return Err(format!(
|
||||
"Keepalive timeout: no pong from {url} in {PONG_TIMEOUT_SECS}s"
|
||||
));
|
||||
}
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
if sink.send(TungsteniteMsg::Ping(bytes::Bytes::new())).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
result = op_rx.recv() => {
|
||||
match result {
|
||||
Ok(signed_op) => {
|
||||
if peer_ready {
|
||||
// Encode via wire codec and send as binary frame.
|
||||
let bytes = crdt_wire::encode(&signed_op);
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
if sink.send(TungsteniteMsg::Binary(bytes.into())).await.is_err() {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Buffer until the server signals ready.
|
||||
op_buffer.push(signed_op);
|
||||
}
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
||||
slog!("[crdt-sync] Slow rendezvous link lagged {n} ops; disconnecting");
|
||||
break;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
frame = stream.next() => {
|
||||
match frame {
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Pong(_))) => {
|
||||
// Reset the pong deadline on every Pong received.
|
||||
pong_deadline = tokio::time::Instant::now()
|
||||
+ std::time::Duration::from_secs(PONG_TIMEOUT_SECS);
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Ping(_))) => {
|
||||
// tungstenite auto-responds to Ping with Pong at the
|
||||
// protocol level; no manual response needed here.
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Text(text))) => {
|
||||
// Check for the ready signal before other text frames.
|
||||
if let Ok(SyncMessage::Ready) = serde_json::from_str(text.as_ref()) {
|
||||
peer_ready = true;
|
||||
slog!("[crdt-sync] Server ready; flushing {} buffered ops", op_buffer.len());
|
||||
let mut flush_ok = true;
|
||||
for op in op_buffer.drain(..) {
|
||||
let bytes = crdt_wire::encode(&op);
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
if sink.send(TungsteniteMsg::Binary(bytes.into())).await.is_err() {
|
||||
flush_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !flush_ok {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
handle_incoming_text(text.as_ref());
|
||||
}
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Binary(bytes))) => {
|
||||
// Real-time op — applied immediately regardless of ready state.
|
||||
handle_incoming_binary(&bytes);
|
||||
}
|
||||
Some(Ok(tokio_tungstenite::tungstenite::Message::Close(_))) | None => break,
|
||||
Some(Err(e)) => {
|
||||
slog!("[crdt-sync] Rendezvous read error: {e}");
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Wait for the next text-frame sync message from a tungstenite stream,
|
||||
/// handling Ping/Pong transparently.
|
||||
///
|
||||
/// Returns `None` on connection close or read error.
|
||||
async fn wait_for_rendezvous_sync_text(
|
||||
stream: &mut futures::stream::SplitStream<
|
||||
tokio_tungstenite::WebSocketStream<
|
||||
tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>,
|
||||
>,
|
||||
>,
|
||||
) -> Option<SyncMessage> {
|
||||
use tokio_tungstenite::tungstenite::Message as TungsteniteMsg;
|
||||
loop {
|
||||
match stream.next().await {
|
||||
Some(Ok(TungsteniteMsg::Text(text))) => {
|
||||
return serde_json::from_str(text.as_ref()).ok();
|
||||
}
|
||||
Some(Ok(TungsteniteMsg::Ping(_) | TungsteniteMsg::Pong(_))) => continue,
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Tests ────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn config_rendezvous_parsed_from_toml() {
|
||||
let toml_str = r#"
|
||||
rendezvous = "ws://remote:3001/crdt-sync"
|
||||
|
||||
[[agent]]
|
||||
name = "test"
|
||||
"#;
|
||||
let config: crate::config::ProjectConfig = toml::from_str(toml_str).unwrap();
|
||||
assert_eq!(
|
||||
config.rendezvous.as_deref(),
|
||||
Some("ws://remote:3001/crdt-sync")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_rendezvous_defaults_to_none() {
|
||||
let config = crate::config::ProjectConfig::default();
|
||||
assert!(config.rendezvous.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failure_counter_warn_below_threshold() {
|
||||
let threshold = RENDEZVOUS_ERROR_THRESHOLD;
|
||||
let mut consecutive_failures: u32 = 0;
|
||||
|
||||
// First threshold-1 failures are below the ERROR threshold.
|
||||
for _ in 0..(threshold - 1) {
|
||||
consecutive_failures += 1;
|
||||
assert!(
|
||||
consecutive_failures < threshold,
|
||||
"failure {consecutive_failures} must be below ERROR threshold {threshold}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failure_counter_error_at_threshold() {
|
||||
let threshold = RENDEZVOUS_ERROR_THRESHOLD;
|
||||
let consecutive_failures: u32 = threshold;
|
||||
assert!(
|
||||
consecutive_failures >= threshold,
|
||||
"failure {consecutive_failures} must reach or exceed ERROR threshold {threshold}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failure_counter_resets_on_success() {
|
||||
let threshold = RENDEZVOUS_ERROR_THRESHOLD;
|
||||
// Simulate sustained failure.
|
||||
let mut consecutive_failures: u32 = threshold + 5;
|
||||
assert!(consecutive_failures >= threshold);
|
||||
|
||||
// Simulate a clean reconnect.
|
||||
consecutive_failures = 0;
|
||||
assert_eq!(
|
||||
consecutive_failures, 0,
|
||||
"counter must reset to 0 on success"
|
||||
);
|
||||
|
||||
// Next error is attempt 1 — well below the ERROR threshold.
|
||||
consecutive_failures += 1;
|
||||
assert!(
|
||||
consecutive_failures < threshold,
|
||||
"first failure after reset must be below ERROR threshold"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_threshold_is_ten() {
|
||||
assert_eq!(
|
||||
RENDEZVOUS_ERROR_THRESHOLD,
|
||||
10,
|
||||
"ERROR escalation threshold must be 10 consecutive failures"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rendezvous_url_with_token_appended() {
|
||||
let base = "ws://host:3001/crdt-sync";
|
||||
let token = "my-secret-token";
|
||||
let url_with_token = if base.contains('?') {
|
||||
format!("{base}&token={token}")
|
||||
} else {
|
||||
format!("{base}?token={token}")
|
||||
};
|
||||
assert_eq!(
|
||||
url_with_token,
|
||||
"ws://host:3001/crdt-sync?token=my-secret-token"
|
||||
);
|
||||
|
||||
// With existing query params.
|
||||
let base_with_query = "ws://host:3001/crdt-sync?foo=bar";
|
||||
let url_appended = if base_with_query.contains('?') {
|
||||
format!("{base_with_query}&token={token}")
|
||||
} else {
|
||||
format!("{base_with_query}?token={token}")
|
||||
};
|
||||
assert_eq!(
|
||||
url_appended,
|
||||
"ws://host:3001/crdt-sync?foo=bar&token=my-secret-token"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rendezvous_url_without_token_unchanged() {
|
||||
let base = "ws://host:3001/crdt-sync";
|
||||
let token: Option<&str> = None;
|
||||
let connect_url = match token {
|
||||
Some(t) => format!("{base}?token={t}"),
|
||||
None => base.to_string(),
|
||||
};
|
||||
assert_eq!(connect_url, base);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,75 @@
|
||||
//! CRDT sync — WebSocket-based replication of pipeline state between huskies nodes.
|
||||
/// WebSocket-based CRDT sync layer for replicating pipeline state between
|
||||
/// huskies nodes.
|
||||
///
|
||||
/// # Protocol
|
||||
///
|
||||
/// ## Version negotiation
|
||||
///
|
||||
/// After the auth handshake, both sides send their first sync message:
|
||||
///
|
||||
/// - **v2 peers** send a `clock` frame: `{"type":"clock","clock":{ <node_id_hex>: <max_count>, ... }}`
|
||||
/// containing a vector clock that maps each author's hex Ed25519 pubkey to the
|
||||
/// count of ops received from that author. Upon receiving the peer's clock,
|
||||
/// each side computes the delta via [`crdt_state::ops_since`] and sends only
|
||||
/// the missing ops as a `bulk` frame.
|
||||
///
|
||||
/// - **v1 (legacy) peers** send a `bulk` frame directly (full op dump).
|
||||
/// A v2 peer receiving a `bulk` first (instead of a `clock`) falls back to
|
||||
/// the full-dump path: applies the incoming bulk and responds with its own
|
||||
/// full bulk. This preserves backward compatibility — no code change needed
|
||||
/// on the v1 side.
|
||||
///
|
||||
/// ## Text frames
|
||||
/// A JSON object with a `"type"` field:
|
||||
/// - `{"type":"clock","clock":{...}}` — Vector clock (v2 protocol).
|
||||
/// - `{"type":"bulk","ops":[...]}` — Ops dump (full or delta).
|
||||
/// - `{"type":"ready"}` — Signals that the bulk-delta phase is complete and the
|
||||
/// sender is ready for real-time op streaming. Locally-generated ops are
|
||||
/// buffered until the peer's `ready` is received, then flushed in order.
|
||||
///
|
||||
/// ## Binary frames (real-time op broadcast)
|
||||
/// Individual `SignedOp`s encoded via [`crate::crdt_wire`] (versioned JSON
|
||||
/// envelope: `{"v":1,"op":{...}}`). Each locally-applied op is immediately
|
||||
/// broadcast as a binary frame to all connected peers.
|
||||
///
|
||||
/// Both the server endpoint and the rendezvous client use the same protocol,
|
||||
/// making the connection fully symmetric.
|
||||
///
|
||||
/// ## Backpressure
|
||||
/// Each connected peer has its own [`tokio::sync::broadcast`] receiver. If a
|
||||
/// slow peer allows the channel to fill (indicated by a `Lagged` error), the
|
||||
/// connection is dropped with a warning log. The peer can reconnect and
|
||||
/// receive a fresh bulk state dump to catch up.
|
||||
|
||||
// ── Cross-cutting constants ─────────────────────────────────────────
|
||||
|
||||
// ── Auth configuration ──────────────────────────────────────────────
|
||||
|
||||
/// Default timeout for the auth handshake (seconds).
|
||||
pub(super) const AUTH_TIMEOUT_SECS: u64 = 10;
|
||||
|
||||
// ── Keepalive configuration ─────────────────────────────────────────
|
||||
|
||||
/// Interval (seconds) between WebSocket Ping frames sent by each side.
|
||||
pub const PING_INTERVAL_SECS: u64 = 30;
|
||||
|
||||
/// Seconds without a Pong response before the connection is dropped.
|
||||
pub const PONG_TIMEOUT_SECS: u64 = 60;
|
||||
|
||||
// ── Sub-modules ─────────────────────────────────────────────────────
|
||||
mod auth;
|
||||
mod client;
|
||||
mod dispatch;
|
||||
mod server;
|
||||
mod wire;
|
||||
|
||||
// ── Public API re-exports ───────────────────────────────────────────
|
||||
pub use auth::{add_join_token, init_token_auth, init_trusted_keys};
|
||||
pub(crate) use client::connect_and_sync;
|
||||
pub use client::{RENDEZVOUS_ERROR_THRESHOLD, spawn_rendezvous_client};
|
||||
pub use server::crdt_sync_handler;
|
||||
|
||||
// Test-only re-export used by `crdt_snapshot` tests.
|
||||
#[cfg(test)]
|
||||
pub(crate) use wire::SyncMessagePublic;
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,141 @@
|
||||
//! Wire-protocol types for the `/crdt-sync` WebSocket protocol.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// ── Wire protocol types ─────────────────────────────────────────────
|
||||
|
||||
/// Auth handshake: challenge sent by the listener to the connector.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub(super) struct ChallengeMessage {
|
||||
pub(super) r#type: String,
|
||||
pub(super) nonce: String,
|
||||
}
|
||||
|
||||
/// Auth handshake: auth reply sent by the connector to the listener.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub(super) struct AuthMessage {
|
||||
pub(super) r#type: String,
|
||||
pub(super) pubkey_hex: String,
|
||||
pub(super) signature_hex: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub(crate) enum SyncMessage {
|
||||
/// Bulk state dump sent on connect (v1) or delta ops after clock exchange (v2).
|
||||
Bulk { ops: Vec<String> },
|
||||
/// A single new op.
|
||||
Op { op: String },
|
||||
/// Vector clock exchanged on connect (v2 protocol).
|
||||
///
|
||||
/// Each entry maps a node's hex-encoded Ed25519 public key to the count of
|
||||
/// ops received from that node. The receiving side computes the delta via
|
||||
/// [`crdt_state::ops_since`] and sends only the missing ops.
|
||||
Clock {
|
||||
clock: std::collections::HashMap<String, u64>,
|
||||
},
|
||||
/// Signals that the bulk-delta phase is complete; the sender is ready for
|
||||
/// real-time op streaming. Locally-generated ops are buffered until the
|
||||
/// peer's `Ready` is received, then flushed in-order.
|
||||
Ready,
|
||||
}
|
||||
|
||||
/// Crate-visible re-export of `SyncMessage` for backwards-compatibility testing.
|
||||
///
|
||||
/// Used by `crdt_snapshot` tests to verify that snapshot messages are NOT
|
||||
/// parseable as legacy `SyncMessage` variants — confirming that old peers
|
||||
/// will gracefully reject them.
|
||||
#[cfg(test)]
|
||||
pub(crate) type SyncMessagePublic = SyncMessage;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn sync_message_bulk_serialization_roundtrip() {
|
||||
let msg = SyncMessage::Bulk {
|
||||
ops: vec!["op1".to_string(), "op2".to_string()],
|
||||
};
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
assert!(json.contains(r#""type":"bulk""#));
|
||||
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
||||
match deserialized {
|
||||
SyncMessage::Bulk { ops } => {
|
||||
assert_eq!(ops.len(), 2);
|
||||
assert_eq!(ops[0], "op1");
|
||||
assert_eq!(ops[1], "op2");
|
||||
}
|
||||
_ => panic!("Expected Bulk"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_message_op_serialization_roundtrip() {
|
||||
let msg = SyncMessage::Op {
|
||||
op: r#"{"inner":{}}"#.to_string(),
|
||||
};
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
assert!(json.contains(r#""type":"op""#));
|
||||
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
||||
match deserialized {
|
||||
SyncMessage::Op { op } => {
|
||||
assert_eq!(op, r#"{"inner":{}}"#);
|
||||
}
|
||||
_ => panic!("Expected Op"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_message_bulk_empty_ops() {
|
||||
let msg = SyncMessage::Bulk { ops: vec![] };
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
||||
match deserialized {
|
||||
SyncMessage::Bulk { ops } => assert!(ops.is_empty()),
|
||||
_ => panic!("Expected Bulk"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_message_clock_serialization_roundtrip() {
|
||||
let mut clock = std::collections::HashMap::new();
|
||||
clock.insert("aabbcc00".to_string(), 42u64);
|
||||
clock.insert("ddeeff11".to_string(), 7u64);
|
||||
|
||||
let msg = SyncMessage::Clock { clock };
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
assert!(json.contains(r#""type":"clock""#));
|
||||
|
||||
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
||||
match deserialized {
|
||||
SyncMessage::Clock { clock } => {
|
||||
assert_eq!(clock["aabbcc00"], 42);
|
||||
assert_eq!(clock["ddeeff11"], 7);
|
||||
}
|
||||
_ => panic!("Expected Clock"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_message_clock_empty() {
|
||||
let msg = SyncMessage::Clock {
|
||||
clock: std::collections::HashMap::new(),
|
||||
};
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
||||
match deserialized {
|
||||
SyncMessage::Clock { clock } => assert!(clock.is_empty()),
|
||||
_ => panic!("Expected Clock"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_message_ready_serialization_roundtrip() {
|
||||
let msg = SyncMessage::Ready;
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
assert_eq!(json, r#"{"type":"ready"}"#);
|
||||
let deserialized: SyncMessage = serde_json::from_str(&json).unwrap();
|
||||
assert!(matches!(deserialized, SyncMessage::Ready));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user