Adding a working bft-json-crdt implementation for the PoC

2024-05-30 13:51:32 +01:00
parent bcdd3f6a81
commit f8b932b561
27 changed files with 265989 additions and 3 deletions
--- a/crates/bft-json-crdt/tests/byzantine.rs
+++ b/crates/bft-json-crdt/tests/byzantine.rs
@@ -0,0 +1,134 @@
+use bft_json_crdt::{
+    json_crdt::{add_crdt_fields, BaseCrdt, CrdtNode, IntoCrdtNode, OpState},
+    keypair::make_keypair,
+    list_crdt::ListCrdt,
+    lww_crdt::LwwRegisterCrdt,
+    op::{Op, PathSegment, ROOT_ID},
+};
+use serde_json::json;
+
+// What is potentially Byzantine behaviour?
+// 1. send valid updates
+// 2. send a mix of valid and invalid updates
+//  a) messages with duplicate ID (attempt to overwrite old entries)
+//  b) send incorrect sequence number to multiple nodes (which could lead to divergent state) -- this is called equivocation
+//  c) ‘forge’ updates from another author (could happen when forwarding valid messages from peers)
+// 3. send malformed updates (e.g. missing fields)
+//      this we don't test as we assume transport layer only allows valid messages
+// 4. overwhelm message queue by sending many updates far into the future
+//      also untestested! currently we keep an unbounded message queue
+// 5. block actual messages from honest actors (eclipse attack)
+
+#[add_crdt_fields]
+#[derive(Clone, CrdtNode)]
+struct ListExample {
+    list: ListCrdt<char>,
+}
+
+// case 2a + 2b
+#[test]
+fn test_equivocation() {
+    let key = make_keypair();
+    let testkey = make_keypair();
+    let mut crdt = BaseCrdt::<ListExample>::new(&key);
+    let mut testcrdt = BaseCrdt::<ListExample>::new(&testkey);
+    let _a = crdt.doc.list.insert(ROOT_ID, 'a').sign(&key);
+    let _b = crdt.doc.list.insert(_a.id(), 'b').sign(&key);
+
+    // make a fake operation with same id as _b but different content
+    let mut fake_op = _b.clone();
+    fake_op.inner.content = Some('c'.into());
+
+    // also try modifying the sequence number
+    let mut fake_op_seq = _b.clone();
+    fake_op_seq.inner.seq = 99;
+    fake_op_seq.inner.is_deleted = true;
+
+    assert_eq!(crdt.apply(fake_op.clone()), OpState::ErrHashMismatch);
+    assert_eq!(crdt.apply(fake_op_seq.clone()), OpState::ErrHashMismatch);
+
+    assert_eq!(testcrdt.apply(fake_op_seq), OpState::ErrHashMismatch);
+    assert_eq!(testcrdt.apply(fake_op), OpState::ErrHashMismatch);
+    assert_eq!(testcrdt.apply(_a), OpState::Ok);
+    assert_eq!(testcrdt.apply(_b), OpState::Ok);
+
+    // make sure it doesnt accept either of the fake operations
+    assert_eq!(crdt.doc.list.view(), vec!['a', 'b']);
+    assert_eq!(crdt.doc.list.view(), testcrdt.doc.list.view());
+}
+
+// case 2c
+#[test]
+fn test_forge_update() {
+    let key = make_keypair();
+    let testkey = make_keypair();
+    let mut crdt = BaseCrdt::<ListExample>::new(&key);
+    let mut testcrdt = BaseCrdt::<ListExample>::new(&testkey);
+    let _a = crdt.doc.list.insert(ROOT_ID, 'a').sign(&key);
+
+    let fake_key = make_keypair(); // generate a new keypair as we dont have privkey of list.our_id
+    let mut op = Op {
+        origin: _a.inner.id,
+        author: crdt.doc.id, // pretend to be the owner of list
+        content: Some('b'),
+        path: vec![PathSegment::Field("list".to_string())],
+        seq: 1,
+        is_deleted: false,
+        id: ROOT_ID, // placeholder, to be generated
+    };
+
+    // this is a completely valid hash and digest, just signed by the wrong person
+    // as keypair.public != list.public
+    op.id = op.hash_to_id();
+    let signed = op.sign(&fake_key);
+
+    assert_eq!(crdt.apply(signed.clone()), OpState::ErrHashMismatch);
+    assert_eq!(testcrdt.apply(signed), OpState::ErrHashMismatch);
+    assert_eq!(testcrdt.apply(_a), OpState::Ok);
+
+    // make sure it doesnt accept fake operation
+    assert_eq!(crdt.doc.list.view(), vec!['a']);
+}
+
+#[add_crdt_fields]
+#[derive(Clone, CrdtNode)]
+struct Nested {
+    a: Nested2,
+}
+
+#[add_crdt_fields]
+#[derive(Clone, CrdtNode)]
+struct Nested2 {
+    b: LwwRegisterCrdt<bool>,
+}
+
+#[test]
+fn test_path_update() {
+    let key = make_keypair();
+    let testkey = make_keypair();
+    let mut crdt = BaseCrdt::<Nested>::new(&key);
+    let mut testcrdt = BaseCrdt::<Nested>::new(&testkey);
+    let mut _true = crdt.doc.a.b.set(true);
+    _true.path = vec![PathSegment::Field("x".to_string())];
+    let mut _false = crdt.doc.a.b.set(false);
+    _false.path = vec![
+        PathSegment::Field("a".to_string()),
+        PathSegment::Index(_false.id),
+    ];
+
+    let signedtrue = _true.sign(&key);
+    let signedfalse = _false.sign(&key);
+    let mut signedfalsefakepath = signedfalse.clone();
+    signedfalsefakepath.inner.path = vec![
+        PathSegment::Field("a".to_string()),
+        PathSegment::Field("b".to_string()),
+    ];
+
+    assert_eq!(testcrdt.apply(signedtrue), OpState::ErrPathMismatch);
+    assert_eq!(testcrdt.apply(signedfalse), OpState::ErrPathMismatch);
+    assert_eq!(testcrdt.apply(signedfalsefakepath), OpState::ErrDigestMismatch);
+
+    // make sure it doesnt accept fake operation
+    assert_eq!(crdt.doc.a.b.view(), json!(false).into());
+    assert_eq!(testcrdt.doc.a.b.view(), json!(null).into());
+}
--- a/crates/bft-json-crdt/tests/commutative.rs
+++ b/crates/bft-json-crdt/tests/commutative.rs
@@ -0,0 +1,91 @@
+use bft_json_crdt::{
+    keypair::make_author,
+    list_crdt::ListCrdt,
+    op::{Op, OpId, ROOT_ID}, json_crdt::{CrdtNode, Value},
+};
+use rand::{rngs::ThreadRng, seq::SliceRandom, Rng};
+
+fn random_op<T: CrdtNode>(arr: &Vec<Op<T>>, rng: &mut ThreadRng) -> OpId {
+    arr.choose(rng).map(|op| op.id).unwrap_or(ROOT_ID)
+}
+
+const TEST_N: usize = 100;
+
+#[test]
+fn test_list_fuzz_commutative() {
+    let mut rng = rand::thread_rng();
+    let mut op_log = Vec::<Op<Value>>::new();
+    let mut op_log1 = Vec::<Op<Value>>::new();
+    let mut op_log2 = Vec::<Op<Value>>::new();
+    let mut l1 = ListCrdt::<char>::new(make_author(1), vec![]);
+    let mut l2 = ListCrdt::<char>::new(make_author(2), vec![]);
+    let mut chk = ListCrdt::<char>::new(make_author(3), vec![]);
+    for _ in 0..TEST_N {
+        let letter1: char = rng.gen_range(b'a'..=b'z') as char;
+        let letter2: char = rng.gen_range(b'a'..=b'z') as char;
+        let op1 = if rng.gen_bool(4.0 / 5.0) {
+            l1.insert(random_op(&op_log1, &mut rng), letter1)
+        } else {
+            l1.delete(random_op(&op_log1, &mut rng))
+        };
+        let op2 = if rng.gen_bool(4.0 / 5.0) {
+            l2.insert(random_op(&op_log2, &mut rng), letter2)
+        } else {
+            l2.delete(random_op(&op_log2, &mut rng))
+        };
+        op_log1.push(op1.clone());
+        op_log2.push(op2.clone());
+        op_log.push(op1.clone());
+        op_log.push(op2.clone());
+    }
+
+    // shuffle ops
+    op_log1.shuffle(&mut rng);
+    op_log2.shuffle(&mut rng);
+
+    // apply to each other
+    for op in op_log1 {
+        l2.apply(op.clone());
+        chk.apply(op.into());
+    }
+    for op in op_log2 {
+        l1.apply(op.clone());
+        chk.apply(op);
+    }
+
+    // ensure all equal
+    let l1_doc = l1.view();
+    let l2_doc = l2.view();
+    let chk_doc = chk.view();
+    assert_eq!(l1_doc, l2_doc);
+    assert_eq!(l1_doc, chk_doc);
+    assert_eq!(l2_doc, chk_doc);
+
+    // now, allow cross mixing between both
+    let mut op_log1 = Vec::<Op<Value>>::new();
+    let mut op_log2 = Vec::<Op<Value>>::new();
+    for _ in 0..TEST_N {
+        let letter1: char = rng.gen_range(b'a'..=b'z') as char;
+        let letter2: char = rng.gen_range(b'a'..=b'z') as char;
+        let op1 = l1.insert(random_op(&op_log, &mut rng), letter1);
+        let op2 = l2.insert(random_op(&op_log, &mut rng), letter2);
+        op_log1.push(op1);
+        op_log2.push(op2);
+    }
+
+    for op in op_log1 {
+        l2.apply(op.clone());
+        chk.apply(op);
+    }
+    for op in op_log2 {
+        l1.apply(op.clone());
+        chk.apply(op);
+    }
+
+    let l1_doc = l1.view();
+    let l2_doc = l2.view();
+    let chk_doc = chk.view();
+    assert_eq!(l1_doc, l2_doc);
+    assert_eq!(l1_doc, chk_doc);
+    assert_eq!(l2_doc, chk_doc);
+}
--- a/crates/bft-json-crdt/tests/editing-trace.js
+++ b/crates/bft-json-crdt/tests/editing-trace.js
--- a/crates/bft-json-crdt/tests/edits.json
+++ b/crates/bft-json-crdt/tests/edits.json
--- a/crates/bft-json-crdt/tests/kleppmann_trace.rs
+++ b/crates/bft-json-crdt/tests/kleppmann_trace.rs
@@ -0,0 +1,76 @@
+use bft_json_crdt::keypair::make_author;
+use bft_json_crdt::list_crdt::ListCrdt;
+use bft_json_crdt::op::{OpId, ROOT_ID};
+use std::{fs::File, io::Read};
+use time::PreciseTime;
+
+use serde::Deserialize;
+
+#[derive(Debug, Deserialize, Clone)]
+#[serde(rename_all = "camelCase")]
+struct Edit {
+    pos: usize,
+    delete: bool,
+    #[serde(default)]
+    content: Option<char>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct Trace {
+    final_text: String,
+    edits: Vec<Edit>,
+}
+
+fn get_trace() -> Trace {
+    let fp = "./tests/edits.json";
+    match File::open(fp) {
+        Err(e) => panic!("Open edits.json failed: {:?}", e.kind()),
+        Ok(mut file) => {
+            let mut content: String = String::new();
+            file.read_to_string(&mut content)
+                .expect("Problem reading file");
+            serde_json::from_str(&content).expect("JSON was not well-formatted")
+        }
+    }
+}
+
+/// Really large test to run Martin Kleppmann's
+/// editing trace over his paper
+/// Data source: https://github.com/automerge/automerge-perf
+#[test]
+fn test_editing_trace() {
+    let t = get_trace();
+    let mut list = ListCrdt::<char>::new(make_author(1), vec![]);
+    let mut ops: Vec<OpId> = Vec::new();
+    ops.push(ROOT_ID);
+    let start = PreciseTime::now();
+    let edits = t.edits;
+    for (i, op) in edits.into_iter().enumerate() {
+        let origin = ops[op.pos];
+        if op.delete {
+            let delete_op = list.delete(origin);
+            ops.push(delete_op.id);
+        } else {
+            let new_op = list.insert(origin, op.content.unwrap());
+            ops.push(new_op.id);
+        }
+
+        match i {
+            10_000 | 100_000 => {
+                let end = PreciseTime::now();
+                let runtime_sec = start.to(end);
+                println!("took {runtime_sec:?} to run {i} ops");
+            }
+            _ => {}
+        };
+    }
+
+    let end = PreciseTime::now();
+    let runtime_sec = start.to(end);
+    println!("took {runtime_sec:?} to finish");
+    let result = list.iter().collect::<String>();
+    let expected = t.final_text;
+    assert_eq!(result.len(), expected.len());
+    assert_eq!(result, expected);
+}