Adding a working bft-json-crdt implementation for the PoC

This commit is contained in:
Dave Hrycyszyn
2024-05-30 13:51:32 +01:00
parent bcdd3f6a81
commit f8b932b561
27 changed files with 265989 additions and 3 deletions

42
Cargo.lock generated
View File

@@ -299,6 +299,7 @@ dependencies = [
"serde",
"serde_json",
"sha2 0.10.8",
"time 0.1.45",
]
[[package]]
@@ -1995,7 +1996,7 @@ dependencies = [
"serde",
"serde_json",
"serde_with_macros",
"time",
"time 0.3.36",
]
[[package]]
@@ -2266,6 +2267,17 @@ dependencies = [
"num_cpus",
]
[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "time"
version = "0.3.36"
@@ -2474,6 +2486,12 @@ version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
@@ -2554,6 +2572,28 @@ dependencies = [
"url",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.52.0"

2
crates/bft-json-crdt/.gitattributes vendored Normal file
View File

@@ -0,0 +1,2 @@
*.js linguist-generated
*.json linguist-generated

1
crates/bft-json-crdt/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
target

1924
crates/bft-json-crdt/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
[package]
name = "bft-json-crdt"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["lib"]
[features]
default = ["bft", "logging-list", "logging-json"]
logging-list = ["logging-base"]
logging-json = ["logging-base"]
logging-base = []
bft = []
[dependencies]
bft-crdt-derive = { path = "bft-crdt-derive" }
colored = "2.0.0"
fastcrypto = "0.1.8"
itertools = "0.10.5"
rand = "0.8.5"
random_color = "0.6.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.85"
sha2 = "0.10.6"
[dev-dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.85"
time = "0.1"

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Jacky Zhao
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,58 @@
# Byzantine Fault Tolerant CRDTs
This work is mainly inspired by implementing Martin Kleppmann's 2022 paper on *Making CRDTs Byzantine Fault Tolerant*[^2]
on top of a simplified [Automerge](https://automerge.org/) implementation.
The goal is to show a working prototype that demonstrated in simple code the ideas behind
1. An Automerge-like CRDT
2. How a primitive list CRDT can be composed to create complex CRDTs like JSON
2. How to add Byzantine Fault Tolerance to arbitrary CRDTs
Unlike most other CRDT implementations, I leave out many performance optimizations that would make the basic algorithm harder to understand.
Check out the [accompanying blog post for this project!](https://jzhao.xyz/posts/bft-json-crdt)
## Benchmarks
Altough this implementation does not optimize for performance, it still nonetheless performs quite well.
Benchmarking happened on a 2019 Macbook Pro with a 2.6GHz i7.
Numbers are compared to Automerge which report their performance benchmarks [here](https://github.com/automerge/automerge-perf)
| # Ops | Raw String (JS) | Ours (basic) | Ours (BFT) | Automerge (JS) | Automerge (Rust) |
|--|--|--|--|--|--|
|10k | n/a | 0.081s | 1.793s | 1.6s | 0.047s |
|100k | n/a | 9.321s | 38.842s | 43.0s | 0.597s |
|All (259k)| 0.61s | 88.610s | 334.960s | Out of Memory| 1.780s |
|Memory | 0.1MB | 27.6MB | 59.5MB | 880MB | 232.5MB |
## Flamegraph
To get some flamegraphs of the time graph on MacOS, run:
```bash
sudo cargo flamegraph --dev --root --bench speed
```
## Further Work
This is mostly a learning/instructional project but there are a few places where performance improvements are obvious:
1. This is backed by `std::Vec` which isn't great for random insert. Replace with a B-tree or something that provides better insert and find performance
1. [Diamond Types](https://github.com/josephg/diamond-types) and [Automerge (Rust)](https://github.com/automerge/automerge-rs) use a B-tree
2. Yjs is backed by a doubly linked-list and caches last ~5-10 accessed locations (assumes that most edits happen sequentially; seeks are rare)
3. (funnily enough, main peformance hit is dominated by find and not insert, see [this flamegraph](./flamegraphs/flamegraph_unoptimized.svg))
2. Avoid calling `find` so many times. A few Automerge optimizations that were not implemented
1. Use an index hint (especially for local inserts)
2. Skipping the second `find` operation in `integrate` if sequence number is already larger
3. Improve storage requirement. As of now, a single `Op` weighs in at *over* 168 bytes. This doesn't even fit in a single cache line!
4. Implement 'transactions' for a group of changes that should be considered atomic.
1. This would also speed up Ed25519 signature verification time by batching.
2. For example, a peer might create an atomic 'transaction' that contains a bunch of changes.
5. Currently, each character is a single op. Similar to Yjs, we can combine runs of characters into larger entities like what André, Luc, et al.[^1] suggest
6. Implement proper persistence using SQLLite or something similar
7. Compile the project to WASM and implement a transport layer so it can be used in browser. Something similar to [Yjs' WebRTC Connector](https://github.com/yjs/y-webrtc) could work.
[^1]: André, Luc, et al. "Supporting adaptable granularity of changes for massive-scale collaborative editing." 9th IEEE International Conference on Collaborative Computing: Networking, Applications and Worksharing. IEEE, 2013.
[^2]: Kleppmann, Martin. "Making CRDTs Byzantine Fault Tolerant." Proceedings of the 9th Workshop on Principles and Practice of Consistency for Distributed Data. 2022.
## Acknowledgements
Thank you to [Nalin Bhardwaj](https://nibnalin.me/) for helping me with my cryptography questions and [Martin Kleppmann](https://martin.kleppmann.com/)
for his teaching materials and lectures which taught me a significant portion of what I've learned about distributed systems and CRDTs.

View File

@@ -0,0 +1,57 @@
#![feature(test)]
extern crate test;
use bft_json_crdt::{keypair::make_author, list_crdt::ListCrdt, op::Op, op::ROOT_ID, json_crdt::Value};
use rand::seq::SliceRandom;
use test::Bencher;
#[bench]
fn bench_insert_1_000_root(b: &mut Bencher) {
b.iter(|| {
let mut list = ListCrdt::<i64>::new(make_author(1), vec![]);
for i in 0..1_000 {
list.insert(ROOT_ID, i);
}
})
}
#[bench]
fn bench_insert_1_000_linear(b: &mut Bencher) {
b.iter(|| {
let mut list = ListCrdt::<i64>::new(make_author(1), vec![]);
let mut prev = ROOT_ID;
for i in 0..1_000 {
let op = list.insert(prev, i);
prev = op.id;
}
})
}
#[bench]
fn bench_insert_many_agents_conflicts(b: &mut Bencher) {
b.iter(|| {
const N: u8 = 50;
let mut rng = rand::thread_rng();
let mut crdts: Vec<ListCrdt<i64>> = Vec::with_capacity(N as usize);
let mut logs: Vec<Op<Value>> = Vec::new();
for i in 0..N {
let list = ListCrdt::new(make_author(i), vec![]);
crdts.push(list);
for _ in 0..5 {
let op = crdts[i as usize].insert(ROOT_ID, i as i32);
logs.push(op);
}
}
logs.shuffle(&mut rng);
for op in logs {
for c in &mut crdts {
if op.author() != c.our_id {
c.apply(op.clone());
}
}
}
assert!(crdts.windows(2).all(|w| w[0].view() == w[1].view()));
})
}

View File

@@ -0,0 +1,100 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "bft-crdt-derive"
version = "0.1.0"
dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "once_cell"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
[[package]]
name = "proc-macro-crate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eda0fc3b0fb7c975631757e14d9049da17374063edb6ebbcbc54d880d4fe94e9"
dependencies = [
"once_cell",
"thiserror",
"toml",
]
[[package]]
name = "proc-macro2"
version = "1.0.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965"
[[package]]
name = "syn"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "toml"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7"
dependencies = [
"serde",
]
[[package]]
name = "unicode-ident"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"

View File

@@ -0,0 +1,14 @@
[package]
name = "bft-crdt-derive"
version = "0.1.0"
edition = "2021"
publish = false
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0.47"
proc-macro-crate = "1.2.1"
quote = "1.0.21"
syn = { version = "1.0.103", features = ["full"] }

View File

@@ -0,0 +1,189 @@
use proc_macro::TokenStream as OgTokenStream;
use proc_macro2::{Ident, Span, TokenStream};
use proc_macro_crate::{crate_name, FoundCrate};
use quote::{quote, quote_spanned, ToTokens};
use syn::{
parse::{self, Parser},
parse_macro_input,
spanned::Spanned,
Data, DeriveInput, Field, Fields, ItemStruct, LitStr, Type
};
/// Helper to get tokenstream representing the parent crate
fn get_crate_name() -> TokenStream {
let cr8 = crate_name("bft-json-crdt")
.unwrap_or(FoundCrate::Itself);
match cr8 {
FoundCrate::Itself => quote! { ::bft_json_crdt },
FoundCrate::Name(name) => {
let ident = Ident::new(&name, Span::call_site());
quote! { ::#ident }
}
}
}
/// Proc macro to insert a keypair and path field on a given struct
#[proc_macro_attribute]
pub fn add_crdt_fields(args: OgTokenStream, input: OgTokenStream) -> OgTokenStream {
let mut input = parse_macro_input!(input as ItemStruct);
let crate_name = get_crate_name();
let _ = parse_macro_input!(args as parse::Nothing);
if let syn::Fields::Named(ref mut fields) = input.fields {
fields.named.push(
Field::parse_named
.parse2(quote! { path: Vec<#crate_name::op::PathSegment> })
.unwrap(),
);
fields.named.push(
Field::parse_named
.parse2(quote! { id: #crate_name::keypair::AuthorId })
.unwrap(),
);
}
return quote! {
#input
}
.into();
}
/// Proc macro to automatically derive the CRDTNode trait
#[proc_macro_derive(CrdtNode)]
pub fn derive_json_crdt(input: OgTokenStream) -> OgTokenStream {
// parse the input tokens into a syntax tree
let input = parse_macro_input!(input as DeriveInput);
let crate_name = get_crate_name();
// used in the quasi-quotation below as `#name`
let ident = input.ident;
let ident_str = LitStr::new(&*ident.to_string(), ident.span());
let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
match input.data {
Data::Struct(data) => match &data.fields {
Fields::Named(fields) => {
let mut field_impls = vec![];
let mut ident_literals = vec![];
let mut ident_strings = vec![];
let mut tys = vec![];
// parse all named fields
for field in &fields.named {
let ident = field.ident.as_ref().expect("Failed to get struct field identifier");
if ident != "path" && ident != "id" {
let ty = match &field.ty {
Type::Path(t) => t.to_token_stream(),
_ => return quote_spanned! { field.span() => compile_error!("Field should be a primitive or struct which implements CRDTNode") }.into(),
};
let str_literal = LitStr::new(&*ident.to_string(), ident.span());
ident_strings.push(str_literal.clone());
ident_literals.push(ident.clone());
tys.push(ty.clone());
field_impls.push(quote! {
#ident: <#ty as CrdtNode>::new(
id,
#crate_name::op::join_path(path.clone(), #crate_name::op::PathSegment::Field(#str_literal.to_string()))
)
});
}
}
let expanded = quote! {
impl #impl_generics #crate_name::json_crdt::CrdtNodeFromValue for #ident #ty_generics #where_clause {
fn node_from(value: #crate_name::json_crdt::Value, id: #crate_name::keypair::AuthorId, path: Vec<#crate_name::op::PathSegment>) -> Result<Self, String> {
if let #crate_name::json_crdt::Value::Object(mut obj) = value {
Ok(#ident {
path: path.clone(),
id,
#(#ident_literals: obj.remove(#ident_strings)
.unwrap()
.into_node(
id,
#crate_name::op::join_path(path.clone(), #crate_name::op::PathSegment::Field(#ident_strings.to_string()))
)
.unwrap()
),*
})
} else {
Err(format!("failed to convert {:?} -> {}<T>", value, #ident_str.to_string()))
}
}
}
impl #impl_generics std::fmt::Debug for #ident #ty_generics #where_clause {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut fields = Vec::new();
#(fields.push(format!("{}", #ident_strings.to_string()));)*
write!(f, "{{ {:?} }}", fields.join(", "))
}
}
impl #impl_generics #crate_name::json_crdt::CrdtNode for #ident #ty_generics #where_clause {
fn apply(&mut self, op: #crate_name::op::Op<#crate_name::json_crdt::Value>) -> #crate_name::json_crdt::OpState {
let path = op.path.clone();
let author = op.id.clone();
if !#crate_name::op::ensure_subpath(&self.path, &op.path) {
#crate_name::debug::debug_path_mismatch(self.path.to_owned(), op.path);
return #crate_name::json_crdt::OpState::ErrPathMismatch;
}
if self.path.len() == op.path.len() {
return #crate_name::json_crdt::OpState::ErrApplyOnStruct;
} else {
let idx = self.path.len();
if let #crate_name::op::PathSegment::Field(path_seg) = &op.path[idx] {
match &path_seg[..] {
#(#ident_strings => {
return self.#ident_literals.apply(op.into());
}),*
_ => {},
};
};
return #crate_name::json_crdt::OpState::ErrPathMismatch
}
}
fn view(&self) -> #crate_name::json_crdt::Value {
let mut view_map = std::collections::HashMap::new();
#(view_map.insert(#ident_strings.to_string(), self.#ident_literals.view().into());)*
#crate_name::json_crdt::Value::Object(view_map)
}
fn new(id: #crate_name::keypair::AuthorId, path: Vec<#crate_name::op::PathSegment>) -> Self {
Self {
path: path.clone(),
id,
#(#field_impls),*
}
}
}
impl #crate_name::debug::DebugView for #ident {
#[cfg(feature = "logging-base")]
fn debug_view(&self, indent: usize) -> String {
let inner_spacing = " ".repeat(indent + 2);
let path_str = #crate_name::op::print_path(self.path.clone());
let mut inner = vec![];
#(inner.push(format!("{}\"{}\": {}", inner_spacing, #ident_strings, self.#ident_literals.debug_view(indent + 4)));)*
let inner_str = inner.join("\n");
format!("{} @ /{}\n{}", #ident_str, path_str, inner_str)
}
#[cfg(not(feature = "logging-base"))]
fn debug_view(&self, _indent: usize) -> String {
"".to_string()
}
}
};
// Hand the output tokens back to the compiler
expanded.into()
}
_ => {
return quote_spanned! { ident.span() => compile_error!("Cannot derive CRDT on tuple or unit structs"); }
.into()
}
},
_ => return quote_spanned! { ident.span() => compile_error!("Cannot derive CRDT on enums or unions"); }.into(),
}
}

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 502 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 300 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 80 KiB

View File

@@ -0,0 +1,317 @@
use crate::{
json_crdt::{BaseCrdt, CrdtNode, SignedOp},
keypair::SignedDigest,
list_crdt::ListCrdt,
op::{Op, OpId, PathSegment},
};
#[cfg(feature = "logging-base")]
use {
crate::{
keypair::{lsb_32, AuthorId},
op::{print_hex, print_path, ROOT_ID},
},
colored::Colorize,
random_color::{Luminosity, RandomColor},
};
#[cfg(feature = "logging-list")]
use std::collections::HashMap;
use std::fmt::Display;
#[cfg(feature = "logging-base")]
fn author_to_hex(author: AuthorId) -> String {
format!("{:#010x}", lsb_32(author))
}
#[cfg(feature = "logging-base")]
fn display_op_id<T: CrdtNode>(op: &Op<T>) -> String {
let [r, g, b] = RandomColor::new()
.luminosity(Luminosity::Light)
.seed(lsb_32(op.author))
.to_rgb_array();
format!(
"[{},{}]",
author_to_hex(op.author).bold().truecolor(r, g, b),
op.seq.to_string().yellow()
)
}
pub fn debug_type_mismatch(_msg: String) {
#[cfg(feature = "logging-base")]
{
println!(" {}\n {_msg}", "type mismatch! ignoring this node".red(),);
}
}
pub fn debug_path_mismatch(_our_path: Vec<PathSegment>, _op_path: Vec<PathSegment>) {
#[cfg(feature = "logging-base")]
{
println!(
" {}\n current path: {}\n op path: {}",
"path mismatch!".red(),
print_path(_our_path),
print_path(_op_path),
);
}
}
pub fn debug_op_on_primitive(_op_path: Vec<PathSegment>) {
#[cfg(feature = "logging-base")]
{
println!(
" {} this is an error, ignoring op.\n op path: {}",
"trying to apply() on a primitive!".red(),
print_path(_op_path),
);
}
}
#[cfg(feature = "logging-base")]
fn display_author(author: AuthorId) -> String {
let [r, g, b] = RandomColor::new()
.luminosity(Luminosity::Light)
.seed(lsb_32(author))
.to_rgb_array();
format!(" {} ", author_to_hex(author))
.black()
.on_truecolor(r, g, b)
.to_string()
}
pub trait DebugView {
fn debug_view(&self, indent: usize) -> String;
}
impl<T: CrdtNode + DebugView> BaseCrdt<T> {
pub fn debug_view(&self) {
#[cfg(feature = "logging-json")]
println!("document is now:\n{}", self.doc.debug_view(0));
}
pub fn log_try_apply(&self, _op: &SignedOp) {
#[cfg(feature = "logging-json")]
println!(
"{} trying to apply operation {} from {}",
display_author(self.id),
&print_hex(&_op.signed_digest)[..6],
display_author(_op.inner.author())
);
}
pub fn debug_digest_failure(&self, _op: SignedOp) {
#[cfg(feature = "logging-json")]
println!(
" {} cannot confirm signed_digest from {}",
"digest failure!".red(),
display_author(_op.author())
);
}
pub fn log_missing_causal_dep(&self, _missing: &SignedDigest) {
#[cfg(feature = "logging-json")]
println!(
" {} haven't received op with digest {}",
"missing causal dependency".red(),
print_hex(_missing)
);
}
pub fn log_actually_apply(&self, _op: &SignedOp) {
#[cfg(feature = "logging-json")]
{
println!(
" applying op to path: /{}",
print_path(_op.inner.path.clone())
);
println!("{}", _op.inner.debug_view(2));
}
}
}
impl<T> Op<T>
where
T: CrdtNode,
{
pub fn debug_hash_failure(&self) {
#[cfg(feature = "logging-base")]
{
println!(" {}", "hash failure!".red());
println!(" expected: {}", print_hex(&self.id));
println!(" computed: {}", print_hex(&self.hash_to_id()));
}
}
}
impl<T> DebugView for T
where
T: Display,
{
#[cfg(feature = "logging-base")]
fn debug_view(&self, _indent: usize) -> String {
self.to_string()
}
#[cfg(not(feature = "logging-base"))]
fn debug_view(&self, _indent: usize) -> String {
"".to_string()
}
}
impl<T> DebugView for Op<T>
where
T: DebugView + CrdtNode,
{
#[cfg(not(feature = "logging-base"))]
fn debug_view(&self, _indent: usize) -> String {
"".to_string()
}
#[cfg(feature = "logging-json")]
fn debug_view(&self, indent: usize) -> String {
let op_id = display_op_id(self);
let content = if self.id == ROOT_ID && self.content.is_none() {
"root".blue().bold().to_string()
} else {
self.content
.as_ref()
.map_or("[empty]".to_string(), |c| c.debug_view(indent + 2))
};
let content_str = if self.is_deleted && self.id != ROOT_ID {
content.red().strikethrough().to_string()
} else {
content
};
format!("{op_id} {content_str}")
}
}
impl<T> ListCrdt<T>
where
T: CrdtNode,
{
pub fn log_ops(&self, highlight: Option<OpId>) {
#[cfg(feature = "logging-list")]
{
let mut lines = Vec::<String>::new();
// do in-order traversal
let res: Vec<&Op<T>> = self.ops.iter().collect();
if res.is_empty() {
println!("[empty]");
}
// figure out parent-child hierarchies from origins
let mut parent_child_map: HashMap<OpId, Vec<OpId>> = HashMap::new();
for op in &res {
let children = parent_child_map.entry(op.origin).or_default();
children.push(op.id);
}
let is_last = |op: &Op<T>| -> bool {
if op.id == ROOT_ID {
return true;
}
if let Some(children) = parent_child_map.get(&op.origin) {
return *children.last().unwrap() == op.id;
}
false
};
// make stack of origins
let mut stack: Vec<(OpId, &str)> = Vec::new();
stack.push((ROOT_ID, ""));
let mut prev = None;
for op in &res {
let origin_idx = self.find_idx(op.origin).unwrap();
let origin = &res[origin_idx];
let origin_id = origin.id;
if let Some(prev) = prev {
if origin_id == prev {
// went down one layer, add to stack
let stack_prefix_char = if is_last(origin) { " " } else { "" };
stack.push((prev, stack_prefix_char));
}
}
// pop back up until we reach the right origin
while stack.last().unwrap().0 != origin_id {
stack.pop();
}
let cur_char = if is_last(op) { "╰─" } else { "├─" };
let prefixes = stack.iter().map(|s| s.1).collect::<Vec<_>>().join("");
let highlight_text = if highlight.is_some() && highlight.unwrap() == op.id {
if op.is_deleted {
"<- deleted".bold().red()
} else {
"<- inserted".bold().green()
}
.to_string()
} else {
"".to_string()
};
let content = if op.id == ROOT_ID {
"root".blue().bold().to_string()
} else {
op.content
.as_ref()
.map_or("[empty]".to_string(), |c| c.hash())
};
if op.is_deleted && op.id != ROOT_ID {
lines.push(format!(
"{}{}{} {} {}",
prefixes,
cur_char,
display_op_id(op),
content.strikethrough().red(),
highlight_text
));
} else {
lines.push(format!(
"{}{}{} {} {}",
prefixes,
cur_char,
display_op_id(op),
content,
highlight_text
));
}
prev = Some(op.id);
}
// full string
let flat = self.iter().map(|t| t.hash()).collect::<Vec<_>>().join("");
lines.push(format!("Flattened result: {}", flat));
println!("{}", lines.join("\n"));
}
}
pub fn log_apply(&self, op: &Op<T>) {
#[cfg(feature = "logging-list")]
{
if op.is_deleted {
println!(
"{} Performing a delete of {}@{}",
display_author(self.our_id),
display_op_id(op),
op.sequence_num(),
);
return;
}
if let Some(content) = op.content.as_ref() {
println!(
"{} Performing an insert of {}@{}: '{}' after {}",
display_author(self.our_id),
display_op_id(op),
op.sequence_num(),
content.hash(),
display_op_id(op)
);
}
}
}
}

View File

@@ -0,0 +1,868 @@
use std::{
collections::{HashMap, HashSet},
fmt::Display,
};
use crate::{
debug::{debug_op_on_primitive, DebugView},
keypair::{sha256, sign, AuthorId, SignedDigest},
list_crdt::ListCrdt,
lww_crdt::LwwRegisterCrdt,
op::{print_hex, print_path, Hashable, Op, OpId, PathSegment},
};
pub use bft_crdt_derive::*;
use fastcrypto::traits::VerifyingKey;
use fastcrypto::{
ed25519::{Ed25519KeyPair, Ed25519PublicKey, Ed25519Signature},
traits::{KeyPair, ToFromBytes},
// Verifier,
};
/// Anything that can be nested in a JSON CRDT
pub trait CrdtNode: CrdtNodeFromValue + Hashable + Clone {
/// Create a new CRDT of this type
fn new(id: AuthorId, path: Vec<PathSegment>) -> Self;
/// Apply an operation to this CRDT, forwarding if necessary
fn apply(&mut self, op: Op<Value>) -> OpState;
/// Get a JSON representation of the value in this node
fn view(&self) -> Value;
}
/// Enum representing possible outcomes of applying an operation to a CRDT
#[derive(Debug, PartialEq)]
pub enum OpState {
/// Operation applied successfully
Ok,
/// Tried to apply an operation to a non-CRDT primative (i.e. f64, bool, etc.)
/// If you would like a mutable primitive, wrap it in a [`LWWRegisterCRDT`]
ErrApplyOnPrimitive,
/// Tried to apply an operation to a static struct CRDT
/// If you would like a mutable object, use a [`Value`]
ErrApplyOnStruct,
/// Tried to apply an operation that contains content of the wrong type.
/// In other words, the content cannot be coerced to the CRDT at the path specified.
ErrMismatchedType,
/// The signed digest of the message did not match the claimed author of the message.
/// This can happen if the message was tampered with during delivery
ErrDigestMismatch,
/// The hash of the message did not match the contents of the mesage.
/// This can happen if the author tried to perform an equivocation attack by creating an
/// operation and modifying it has already been created
ErrHashMismatch,
/// Tried to apply an operation to a non-existent path. The author may have forgotten to attach
/// a causal dependency
ErrPathMismatch,
/// Trying to modify/delete the sentinel (zero-th) node element that is used for book-keeping
ErrListApplyToEmpty,
/// We have not received all of the causal dependencies of this operation. It has been queued
/// up and will be executed when its causal dependencies have been delivered
MissingCausalDependencies,
}
/// The following types can be used as a 'terminal' type in CRDTs
pub trait MarkPrimitive: Into<Value> + Default {}
impl MarkPrimitive for bool {}
impl MarkPrimitive for i32 {}
impl MarkPrimitive for i64 {}
impl MarkPrimitive for f64 {}
impl MarkPrimitive for char {}
impl MarkPrimitive for String {}
impl MarkPrimitive for Value {}
/// Implement CrdtNode for non-CRDTs
/// This is a stub implementation so most functions don't do anything/log an error
impl<T> CrdtNode for T
where
T: CrdtNodeFromValue + MarkPrimitive + Hashable + Clone,
{
fn apply(&mut self, _op: Op<Value>) -> OpState {
OpState::ErrApplyOnPrimitive
}
fn view(&self) -> Value {
self.to_owned().into()
}
fn new(_id: AuthorId, _path: Vec<PathSegment>) -> Self {
debug_op_on_primitive(_path);
Default::default()
}
}
/// The base struct for a JSON CRDT. Allows for declaring causal
/// dependencies across fields. It only accepts messages of [`SignedOp`] for BFT.
pub struct BaseCrdt<T: CrdtNode> {
/// Public key of this CRDT
pub id: AuthorId,
/// Internal base CRDT
pub doc: T,
/// In a real world scenario, this would be a proper hashgraph that allows for
/// efficient reconciliation of missing dependencies. We naively keep a hashset
/// of messages we've seen (represented by their [`SignedDigest`]).
received: HashSet<SignedDigest>,
message_q: HashMap<SignedDigest, Vec<SignedOp>>,
}
/// An [`Op<Value>`] with a few bits of extra metadata
#[derive(Clone)]
pub struct SignedOp {
// Note that this can be different from the author of the inner op as the inner op could have been created
// by a different person
author: AuthorId,
/// Signed hash using priv key of author. Effectively [`OpID`] Use this as the ID to figure out what has been delivered already
pub signed_digest: SignedDigest,
pub inner: Op<Value>,
/// List of causal dependencies
pub depends_on: Vec<SignedDigest>,
}
impl SignedOp {
pub fn id(&self) -> OpId {
self.inner.id
}
pub fn author(&self) -> AuthorId {
self.author
}
/// Creates a digest of the following fields. Any changes in the fields will change the signed digest
/// - id (hash of the following)
/// - origin
/// - author
/// - seq
/// - is_deleted
/// - path
/// - dependencies
fn digest(&self) -> [u8; 32] {
let path_string = print_path(self.inner.path.clone());
let dependency_string = self
.depends_on
.iter()
.map(print_hex)
.collect::<Vec<_>>()
.join("");
let fmt_str = format!("{:?},{path_string},{dependency_string}", self.id());
sha256(fmt_str)
}
/// Sign this digest with the given keypair. Shouldn't need to be called manually,
/// just use [`SignedOp::from_op`] instead
fn sign_digest(&mut self, keypair: &Ed25519KeyPair) {
self.signed_digest = sign(keypair, &self.digest()).sig.to_bytes()
}
/// Ensure digest was actually signed by the author it claims to be signed by
pub fn is_valid_digest(&self) -> bool {
let digest = Ed25519Signature::from_bytes(&self.signed_digest);
let pubkey = Ed25519PublicKey::from_bytes(&self.author());
match (digest, pubkey) {
(Ok(digest), Ok(pubkey)) => pubkey.verify(&self.digest(), &digest).is_ok(),
(_, _) => false,
}
}
/// Sign a normal op and add all the needed metadata
pub fn from_op<T: CrdtNode>(
value: Op<T>,
keypair: &Ed25519KeyPair,
depends_on: Vec<SignedDigest>,
) -> Self {
let author = keypair.public().0.to_bytes();
let mut new = Self {
inner: Op {
content: value.content.map(|c| c.view()),
origin: value.origin,
author: value.author,
seq: value.seq,
path: value.path,
is_deleted: value.is_deleted,
id: value.id,
},
author,
signed_digest: [0u8; 64],
depends_on,
};
new.sign_digest(keypair);
new
}
}
impl<T: CrdtNode + DebugView> BaseCrdt<T> {
/// Crease a new BaseCRDT of the given type. Multiple BaseCRDTs
/// can be created from a single keypair but you are responsible for
/// routing messages to the right BaseCRDT. Usually you should just make a single
/// struct that contains all the state you need
pub fn new(keypair: &Ed25519KeyPair) -> Self {
let id = keypair.public().0.to_bytes();
Self {
id,
doc: T::new(id, vec![]),
received: HashSet::new(),
message_q: HashMap::new(),
}
}
/// Apply a signed operation to this BaseCRDT, verifying integrity and routing to the right
/// nested CRDT
pub fn apply(&mut self, op: SignedOp) -> OpState {
self.log_try_apply(&op);
#[cfg(feature = "bft")]
if !op.is_valid_digest() {
self.debug_digest_failure(op);
return OpState::ErrDigestMismatch;
}
let op_id = op.signed_digest;
if !op.depends_on.is_empty() {
for origin in &op.depends_on {
if !self.received.contains(origin) {
self.log_missing_causal_dep(origin);
self.message_q.entry(*origin).or_default().push(op);
return OpState::MissingCausalDependencies;
}
}
}
// apply
self.log_actually_apply(&op);
let status = self.doc.apply(op.inner);
self.debug_view();
self.received.insert(op_id);
// apply all of its causal dependents if there are any
let dependent_queue = self.message_q.remove(&op_id);
if let Some(mut q) = dependent_queue {
for dependent in q.drain(..) {
self.apply(dependent);
}
}
status
}
}
/// An enum representing a JSON value
#[derive(Clone, Debug, PartialEq)]
pub enum Value {
Null,
Bool(bool),
Number(f64),
String(String),
Array(Vec<Value>),
Object(HashMap<String, Value>),
}
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
Value::Null => "null".to_string(),
Value::Bool(b) => b.to_string(),
Value::Number(n) => n.to_string(),
Value::String(s) => format!("\"{s}\""),
Value::Array(arr) => {
if arr.len() > 1 {
format!(
"[\n{}\n]",
arr.iter()
.map(|x| format!(" {x}"))
.collect::<Vec<_>>()
.join(",\n")
)
} else {
format!(
"[ {} ]",
arr.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", ")
)
}
}
Value::Object(obj) => format!(
"{{ {} }}",
obj.iter()
.map(|(k, v)| format!(" \"{k}\": {v}"))
.collect::<Vec<_>>()
.join(",\n")
),
}
)
}
}
impl Default for Value {
fn default() -> Self {
Self::Null
}
}
/// Allow easy conversion to and from serde's JSON format. This allows us to use the [`json!`]
/// macro
impl From<Value> for serde_json::Value {
fn from(value: Value) -> Self {
match value {
Value::Null => serde_json::Value::Null,
Value::Bool(x) => serde_json::Value::Bool(x),
Value::Number(x) => serde_json::Value::Number(serde_json::Number::from_f64(x).unwrap()),
Value::String(x) => serde_json::Value::String(x),
Value::Array(x) => {
serde_json::Value::Array(x.iter().map(|a| a.clone().into()).collect())
}
Value::Object(x) => serde_json::Value::Object(
x.iter()
.map(|(k, v)| (k.clone(), v.clone().into()))
.collect(),
),
}
}
}
impl From<serde_json::Value> for Value {
fn from(value: serde_json::Value) -> Self {
match value {
serde_json::Value::Null => Value::Null,
serde_json::Value::Bool(x) => Value::Bool(x),
serde_json::Value::Number(x) => Value::Number(x.as_f64().unwrap()),
serde_json::Value::String(x) => Value::String(x),
serde_json::Value::Array(x) => {
Value::Array(x.iter().map(|a| a.clone().into()).collect())
}
serde_json::Value::Object(x) => Value::Object(
x.iter()
.map(|(k, v)| (k.clone(), v.clone().into()))
.collect(),
),
}
}
}
impl Value {
pub fn into_json(self) -> serde_json::Value {
self.into()
}
}
/// Conversions from primitive types to [`Value`]
impl From<bool> for Value {
fn from(val: bool) -> Self {
Value::Bool(val)
}
}
impl From<i64> for Value {
fn from(val: i64) -> Self {
Value::Number(val as f64)
}
}
impl From<i32> for Value {
fn from(val: i32) -> Self {
Value::Number(val as f64)
}
}
impl From<f64> for Value {
fn from(val: f64) -> Self {
Value::Number(val)
}
}
impl From<String> for Value {
fn from(val: String) -> Self {
Value::String(val)
}
}
impl From<char> for Value {
fn from(val: char) -> Self {
Value::String(val.into())
}
}
impl<T> From<Option<T>> for Value
where
T: CrdtNode,
{
fn from(val: Option<T>) -> Self {
match val {
Some(x) => x.view(),
None => Value::Null,
}
}
}
impl<T> From<Vec<T>> for Value
where
T: CrdtNode,
{
fn from(value: Vec<T>) -> Self {
Value::Array(value.iter().map(|x| x.view()).collect())
}
}
/// Fallibly create a CRDT Node from a JSON Value
pub trait CrdtNodeFromValue: Sized {
fn node_from(value: Value, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String>;
}
/// Fallibly cast a JSON Value into a CRDT Node
pub trait IntoCrdtNode<T>: Sized {
fn into_node(self, id: AuthorId, path: Vec<PathSegment>) -> Result<T, String>;
}
/// [`CrdtNodeFromValue`] implies [`IntoCRDTNode<T>`]
impl<T> IntoCrdtNode<T> for Value
where
T: CrdtNodeFromValue,
{
fn into_node(self, id: AuthorId, path: Vec<PathSegment>) -> Result<T, String> {
T::node_from(self, id, path)
}
}
/// Trivial conversion from Value to Value as CrdtNodeFromValue
impl CrdtNodeFromValue for Value {
fn node_from(value: Value, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
Ok(value)
}
}
/// Conversions from primitives to CRDTs
impl CrdtNodeFromValue for bool {
fn node_from(value: Value, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
if let Value::Bool(x) = value {
Ok(x)
} else {
Err(format!("failed to convert {value:?} -> bool"))
}
}
}
impl CrdtNodeFromValue for f64 {
fn node_from(value: Value, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
if let Value::Number(x) = value {
Ok(x)
} else {
Err(format!("failed to convert {value:?} -> f64"))
}
}
}
impl CrdtNodeFromValue for i64 {
fn node_from(value: Value, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
if let Value::Number(x) = value {
Ok(x as i64)
} else {
Err(format!("failed to convert {value:?} -> f64"))
}
}
}
impl CrdtNodeFromValue for String {
fn node_from(value: Value, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
if let Value::String(x) = value {
Ok(x)
} else {
Err(format!("failed to convert {value:?} -> String"))
}
}
}
impl CrdtNodeFromValue for char {
fn node_from(value: Value, _id: AuthorId, _path: Vec<PathSegment>) -> Result<Self, String> {
if let Value::String(x) = value.clone() {
x.chars().next().ok_or(format!(
"failed to convert {value:?} -> char: found a zero-length string"
))
} else {
Err(format!("failed to convert {value:?} -> char"))
}
}
}
impl<T> CrdtNodeFromValue for LwwRegisterCrdt<T>
where
T: CrdtNode,
{
fn node_from(value: Value, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String> {
let mut crdt = LwwRegisterCrdt::new(id, path);
crdt.set(value);
Ok(crdt)
}
}
impl<T> CrdtNodeFromValue for ListCrdt<T>
where
T: CrdtNode,
{
fn node_from(value: Value, id: AuthorId, path: Vec<PathSegment>) -> Result<Self, String> {
if let Value::Array(arr) = value {
let mut crdt = ListCrdt::new(id, path);
let result: Result<(), String> =
arr.into_iter().enumerate().try_for_each(|(i, val)| {
crdt.insert_idx(i, val);
Ok(())
});
result?;
Ok(crdt)
} else {
Err(format!("failed to convert {value:?} -> ListCRDT<T>"))
}
}
}
#[cfg(test)]
mod test {
use serde_json::json;
use crate::{
json_crdt::{add_crdt_fields, BaseCrdt, CrdtNode, IntoCrdtNode, OpState, Value},
keypair::make_keypair,
list_crdt::ListCrdt,
lww_crdt::LwwRegisterCrdt,
op::{print_path, ROOT_ID},
};
#[test]
fn test_derive_basic() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Player {
x: LwwRegisterCrdt<f64>,
y: LwwRegisterCrdt<f64>,
}
let keypair = make_keypair();
let crdt = BaseCrdt::<Player>::new(&keypair);
assert_eq!(print_path(crdt.doc.x.path), "x");
assert_eq!(print_path(crdt.doc.y.path), "y");
}
#[test]
fn test_derive_nested() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Position {
x: LwwRegisterCrdt<f64>,
y: LwwRegisterCrdt<f64>,
}
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Player {
pos: Position,
balance: LwwRegisterCrdt<f64>,
messages: ListCrdt<String>,
}
let keypair = make_keypair();
let crdt = BaseCrdt::<Player>::new(&keypair);
assert_eq!(print_path(crdt.doc.pos.x.path), "pos.x");
assert_eq!(print_path(crdt.doc.pos.y.path), "pos.y");
assert_eq!(print_path(crdt.doc.balance.path), "balance");
assert_eq!(print_path(crdt.doc.messages.path), "messages");
}
#[test]
fn test_lww_ops() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Test {
a: LwwRegisterCrdt<f64>,
b: LwwRegisterCrdt<bool>,
c: LwwRegisterCrdt<String>,
}
let kp1 = make_keypair();
let kp2 = make_keypair();
let mut base1 = BaseCrdt::<Test>::new(&kp1);
let mut base2 = BaseCrdt::<Test>::new(&kp2);
let _1_a_1 = base1.doc.a.set(3.0).sign(&kp1);
let _1_b_1 = base1.doc.b.set(true).sign(&kp1);
let _2_a_1 = base2.doc.a.set(1.5).sign(&kp2);
let _2_a_2 = base2.doc.a.set(2.13).sign(&kp2);
let _2_c_1 = base2.doc.c.set("abc".to_string()).sign(&kp2);
assert_eq!(base1.doc.a.view(), json!(3.0).into());
assert_eq!(base2.doc.a.view(), json!(2.13).into());
assert_eq!(base1.doc.b.view(), json!(true).into());
assert_eq!(base2.doc.c.view(), json!("abc").into());
assert_eq!(
base1.doc.view().into_json(),
json!({
"a": 3.0,
"b": true,
"c": null,
})
);
assert_eq!(
base2.doc.view().into_json(),
json!({
"a": 2.13,
"b": null,
"c": "abc",
})
);
assert_eq!(base2.apply(_1_a_1), OpState::Ok);
assert_eq!(base2.apply(_1_b_1), OpState::Ok);
assert_eq!(base1.apply(_2_a_1), OpState::Ok);
assert_eq!(base1.apply(_2_a_2), OpState::Ok);
assert_eq!(base1.apply(_2_c_1), OpState::Ok);
assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
assert_eq!(
base1.doc.view().into_json(),
json!({
"a": 2.13,
"b": true,
"c": "abc"
})
)
}
#[test]
fn test_vec_and_map_ops() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Test {
a: ListCrdt<String>,
}
let kp1 = make_keypair();
let kp2 = make_keypair();
let mut base1 = BaseCrdt::<Test>::new(&kp1);
let mut base2 = BaseCrdt::<Test>::new(&kp2);
let _1a = base1.doc.a.insert(ROOT_ID, "a".to_string()).sign(&kp1);
let _1b = base1.doc.a.insert(_1a.id(), "b".to_string()).sign(&kp1);
let _2c = base2.doc.a.insert(ROOT_ID, "c".to_string()).sign(&kp2);
let _2d = base2.doc.a.insert(_1b.id(), "d".to_string()).sign(&kp2);
assert_eq!(
base1.doc.view().into_json(),
json!({
"a": ["a", "b"],
})
);
// as _1b hasn't been delivered to base2 yet
assert_eq!(
base2.doc.view().into_json(),
json!({
"a": ["c"],
})
);
assert_eq!(base2.apply(_1b), OpState::MissingCausalDependencies);
assert_eq!(base2.apply(_1a), OpState::Ok);
assert_eq!(base1.apply(_2d), OpState::Ok);
assert_eq!(base1.apply(_2c), OpState::Ok);
assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
}
#[test]
fn test_causal_field_dependency() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Item {
name: LwwRegisterCrdt<String>,
soulbound: LwwRegisterCrdt<bool>,
}
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Player {
inventory: ListCrdt<Item>,
balance: LwwRegisterCrdt<f64>,
}
let kp1 = make_keypair();
let kp2 = make_keypair();
let mut base1 = BaseCrdt::<Player>::new(&kp1);
let mut base2 = BaseCrdt::<Player>::new(&kp2);
// require balance update to happen before inventory update
let _add_money = base1.doc.balance.set(5000.0).sign(&kp1);
let _spend_money = base1
.doc
.balance
.set(3000.0)
.sign_with_dependencies(&kp1, vec![&_add_money]);
let sword: Value = json!({
"name": "Sword",
"soulbound": true,
})
.into();
let _new_inventory_item = base1
.doc
.inventory
.insert_idx(0, sword)
.sign_with_dependencies(&kp1, vec![&_spend_money]);
assert_eq!(
base1.doc.view().into_json(),
json!({
"balance": 3000.0,
"inventory": [
{
"name": "Sword",
"soulbound": true
}
]
})
);
// do it completely out of order
assert_eq!(
base2.apply(_new_inventory_item),
OpState::MissingCausalDependencies
);
assert_eq!(
base2.apply(_spend_money),
OpState::MissingCausalDependencies
);
assert_eq!(base2.apply(_add_money), OpState::Ok);
assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
}
#[test]
fn test_2d_grid() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Game {
grid: ListCrdt<ListCrdt<LwwRegisterCrdt<bool>>>,
}
let kp1 = make_keypair();
let kp2 = make_keypair();
let mut base1 = BaseCrdt::<Game>::new(&kp1);
let mut base2 = BaseCrdt::<Game>::new(&kp2);
// init a 2d grid
let row0: Value = json!([true, false]).into();
let row1: Value = json!([false, true]).into();
let construct1 = base1.doc.grid.insert_idx(0, row0).sign(&kp1);
let construct2 = base1.doc.grid.insert_idx(1, row1).sign(&kp1);
assert_eq!(base2.apply(construct1), OpState::Ok);
assert_eq!(base2.apply(construct2.clone()), OpState::Ok);
assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
assert_eq!(
base1.doc.view().into_json(),
json!({
"grid": [[true, false], [false, true]]
})
);
let set1 = base1.doc.grid[0][0].set(false).sign(&kp1);
let set2 = base2.doc.grid[1][1].set(false).sign(&kp2);
assert_eq!(base1.apply(set2), OpState::Ok);
assert_eq!(base2.apply(set1), OpState::Ok);
assert_eq!(base1.doc.view().into_json(), base2.doc.view().into_json());
assert_eq!(
base1.doc.view().into_json(),
json!({
"grid": [[false, false], [false, false]]
})
);
let topright = base1.doc.grid[0].id_at(1).unwrap();
base1.doc.grid[0].delete(topright);
assert_eq!(
base1.doc.view().into_json(),
json!({
"grid": [[false], [false, false]]
})
);
base1.doc.grid.delete(construct2.id());
assert_eq!(
base1.doc.view().into_json(),
json!({
"grid": [[false]]
})
);
}
#[test]
fn test_arb_json() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Test {
reg: LwwRegisterCrdt<Value>,
}
let kp1 = make_keypair();
let mut base1 = BaseCrdt::<Test>::new(&kp1);
let base_val: Value = json!({
"a": true,
"b": "asdf",
"c": {
"d": [],
"e": [ false ]
}
})
.into();
base1.doc.reg.set(base_val).sign(&kp1);
assert_eq!(
base1.doc.view().into_json(),
json!({
"reg": {
"a": true,
"b": "asdf",
"c": {
"d": [],
"e": [ false ]
}
}
})
);
}
#[test]
fn test_wrong_json_types() {
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Nested {
list: ListCrdt<f64>,
}
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Test {
reg: LwwRegisterCrdt<bool>,
strct: ListCrdt<Nested>,
}
let key = make_keypair();
let mut crdt = BaseCrdt::<Test>::new(&key);
// wrong type should not go through
crdt.doc.reg.set(32);
assert_eq!(crdt.doc.reg.view(), json!(null).into());
crdt.doc.reg.set(true);
assert_eq!(crdt.doc.reg.view(), json!(true).into());
// set nested
let mut list_view: Value = crdt.doc.strct.view().into();
assert_eq!(list_view, json!([]).into());
// only keeps actual numbers
let list: Value = json!({"list": [0, 123, -0.45, "char", []]}).into();
crdt.doc.strct.insert_idx(0, list);
list_view = crdt.doc.strct.view().into();
assert_eq!(list_view, json!([{ "list": [0, 123, -0.45]}]).into());
}
}

View File

@@ -0,0 +1,57 @@
use fastcrypto::traits::VerifyingKey;
pub use fastcrypto::{
ed25519::{
Ed25519KeyPair, Ed25519PublicKey, Ed25519Signature, ED25519_PUBLIC_KEY_LENGTH,
ED25519_SIGNATURE_LENGTH,
},
traits::{KeyPair, Signer},
// Verifier,
};
use sha2::{Digest, Sha256};
/// Represents the ID of a unique node. An Ed25519 public key
pub type AuthorId = [u8; ED25519_PUBLIC_KEY_LENGTH];
/// A signed message
pub type SignedDigest = [u8; ED25519_SIGNATURE_LENGTH];
/// Create a fake public key from a u8
pub fn make_author(n: u8) -> AuthorId {
let mut id = [0u8; ED25519_PUBLIC_KEY_LENGTH];
id[0] = n;
id
}
/// Get the least significant 32 bits of a public key
pub fn lsb_32(pubkey: AuthorId) -> u32 {
((pubkey[0] as u32) << 24)
+ ((pubkey[1] as u32) << 16)
+ ((pubkey[2] as u32) << 8)
+ (pubkey[3] as u32)
}
/// SHA256 hash of a string
pub fn sha256(input: String) -> [u8; 32] {
let mut hasher = Sha256::new();
hasher.update(input.as_bytes());
let result = hasher.finalize();
let mut bytes = [0u8; 32];
bytes.copy_from_slice(&result[..]);
bytes
}
/// Generate a random Ed25519 keypair from OS rng
pub fn make_keypair() -> Ed25519KeyPair {
let mut csprng = rand::thread_rng();
Ed25519KeyPair::generate(&mut csprng)
}
/// Sign a byte array
pub fn sign(keypair: &Ed25519KeyPair, message: &[u8]) -> Ed25519Signature {
keypair.sign(message)
}
/// Verify a byte array was signed by the given pubkey
pub fn verify(pubkey: Ed25519PublicKey, message: &[u8], signature: Ed25519Signature) -> bool {
pubkey.verify(message, &signature).is_ok()
}

View File

@@ -0,0 +1,8 @@
pub mod debug;
pub mod json_crdt;
pub mod keypair;
pub mod list_crdt;
pub mod lww_crdt;
pub mod op;
extern crate self as bft_json_crdt;

View File

@@ -0,0 +1,441 @@
use crate::{
debug::debug_path_mismatch,
json_crdt::{CrdtNode, OpState, Value},
keypair::AuthorId,
op::*,
};
use serde::{Deserialize, Serialize};
use std::{
cmp::{max, Ordering},
collections::HashMap,
fmt::Debug,
ops::{Index, IndexMut},
};
/// An RGA-like list CRDT that can store a CRDT-like datatype
#[derive(Clone, Serialize, Deserialize)]
pub struct ListCrdt<T>
where
T: CrdtNode,
{
/// Public key for this node
pub our_id: AuthorId,
/// Path to this CRDT
pub path: Vec<PathSegment>,
/// List of all the operations we know of
pub ops: Vec<Op<T>>,
/// Queue of messages where K is the ID of the message yet to arrive
/// and V is the list of operations depending on it
message_q: HashMap<OpId, Vec<Op<T>>>,
/// The sequence number of this node
our_seq: SequenceNumber,
}
impl<T> ListCrdt<T>
where
T: CrdtNode,
{
/// Create a new List CRDT with the given [`AuthorID`] (it should be unique)
pub fn new(id: AuthorId, path: Vec<PathSegment>) -> ListCrdt<T> {
let ops = vec![Op::make_root()];
ListCrdt {
our_id: id,
path,
ops,
message_q: HashMap::new(),
our_seq: 0,
}
}
/// Locally insert some content causally after the given operation
pub fn insert<U: Into<Value>>(&mut self, after: OpId, content: U) -> Op<Value> {
let mut op = Op::new(
after,
self.our_id,
self.our_seq + 1,
false,
Some(content.into()),
self.path.to_owned(),
);
// we need to know the op ID before setting the path as [`PathSegment::Index`] requires an
// [`OpID`]
let new_path = join_path(self.path.to_owned(), PathSegment::Index(op.id));
op.path = new_path;
self.apply(op.clone());
op
}
/// Shorthand function to insert at index locally. Indexing ignores deleted items
pub fn insert_idx<U: Into<Value> + Clone>(&mut self, idx: usize, content: U) -> Op<Value> {
let mut i = 0;
for op in &self.ops {
if !op.is_deleted {
if idx == i {
return self.insert(op.id, content);
}
i += 1;
}
}
panic!("index {idx} out of range (length of {i})")
}
/// Shorthand to figure out the OpID of something with a given index.
/// Useful for declaring a causal dependency if you didn't create the original
pub fn id_at(&self, idx: usize) -> Option<OpId> {
let mut i = 0;
for op in &self.ops {
if !op.is_deleted {
if idx == i {
return Some(op.id);
}
i += 1;
}
}
None
}
/// Mark a node as deleted. If the node doesn't exist, it will be stuck
/// waiting for that node to be created.
pub fn delete(&mut self, id: OpId) -> Op<Value> {
let op = Op::new(
id,
self.our_id,
self.our_seq + 1,
true,
None,
join_path(self.path.to_owned(), PathSegment::Index(id)),
);
self.apply(op.clone());
op
}
/// Find the idx of an operation with the given [`OpID`]
pub fn find_idx(&self, id: OpId) -> Option<usize> {
self.ops.iter().position(|op| op.id == id)
}
/// Apply an operation (both local and remote) to this local list CRDT.
/// Forwards it to a nested CRDT if necessary.
pub fn apply(&mut self, op: Op<Value>) -> OpState {
if !op.is_valid_hash() {
return OpState::ErrHashMismatch;
}
if !ensure_subpath(&self.path, &op.path) {
return OpState::ErrPathMismatch;
}
// haven't reached end yet, navigate to inner CRDT
if op.path.len() - 1 > self.path.len() {
if let Some(PathSegment::Index(op_id)) = op.path.get(self.path.len()) {
let op_id = op_id.to_owned();
if let Some(idx) = self.find_idx(op_id) {
if self.ops[idx].content.is_none() {
return OpState::ErrListApplyToEmpty;
} else {
return self.ops[idx].content.as_mut().unwrap().apply(op);
}
} else {
debug_path_mismatch(
join_path(self.path.to_owned(), PathSegment::Index(op_id)),
op.path,
);
return OpState::ErrPathMismatch;
};
} else {
debug_path_mismatch(self.path.to_owned(), op.path);
return OpState::ErrPathMismatch;
}
}
// otherwise, this is just a direct replacement
self.integrate(op.into())
}
/// Main CRDT logic of integrating an op properly into our local log
/// without causing conflicts. This is basically a really fancy
/// insertion sort.
///
/// Effectively, we
/// 1) find the parent item
/// 2) find the right spot to insert before the next node
fn integrate(&mut self, new_op: Op<T>) -> OpState {
let op_id = new_op.id;
let seq = new_op.sequence_num();
let origin_id = self.find_idx(new_op.origin);
if origin_id.is_none() {
self.message_q
.entry(new_op.origin)
.or_default()
.push(new_op);
return OpState::MissingCausalDependencies;
}
let new_op_parent_idx = origin_id.unwrap();
// if its a delete operation, we don't need to do much
self.log_apply(&new_op);
if new_op.is_deleted {
let op = &mut self.ops[new_op_parent_idx];
op.is_deleted = true;
return OpState::Ok;
}
// otherwise, we are in an insert case
// start looking from right after parent
// stop when we reach end of document
let mut i = new_op_parent_idx + 1;
while i < self.ops.len() {
let op = &self.ops[i];
let op_parent_idx = self.find_idx(op.origin).unwrap();
// idempotency
if op.id == new_op.id {
return OpState::Ok;
}
// first, lets compare causal origins
match new_op_parent_idx.cmp(&op_parent_idx) {
Ordering::Greater => break,
Ordering::Equal => {
// our parents our equal, we are siblings
// siblings are sorted first by sequence number then by author id
match new_op.sequence_num().cmp(&op.sequence_num()) {
Ordering::Greater => break,
Ordering::Equal => {
// conflict, resolve arbitrarily but deterministically
// tie-break on author id as that is unique
if new_op.author() > op.author() {
break;
}
}
Ordering::Less => (),
}
}
Ordering::Less => (),
}
i += 1;
}
// insert at i
self.ops.insert(i, new_op);
self.our_seq = max(self.our_seq, seq);
self.log_ops(Some(op_id));
// apply all of its causal dependents if there are any
let dependent_queue = self.message_q.remove(&op_id);
if let Some(mut q) = dependent_queue {
for dependent in q.drain(..) {
self.integrate(dependent);
}
}
OpState::Ok
}
/// Make an iterator out of list CRDT contents, ignoring deleted items and empty content
pub fn iter(&self) -> impl Iterator<Item = &T> {
self.ops
.iter()
.filter(|op| !op.is_deleted && op.content.is_some())
.map(|op| op.content.as_ref().unwrap())
}
/// Convenience function to get a vector of visible list elements
pub fn view(&self) -> Vec<T> {
self.iter().map(|i| i.to_owned()).collect()
}
}
impl<T> Debug for ListCrdt<T>
where
T: CrdtNode,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[{}]",
self.ops
.iter()
.map(|op| format!("{:?}", op.id))
.collect::<Vec<_>>()
.join(", ")
)
}
}
/// Allows us to index into a List CRDT like we would with an array
impl<T> Index<usize> for ListCrdt<T>
where
T: CrdtNode,
{
type Output = T;
fn index(&self, idx: usize) -> &Self::Output {
let mut i = 0;
for op in &self.ops {
if !op.is_deleted && op.content.is_some() {
if idx == i {
return op.content.as_ref().unwrap();
}
i += 1;
}
}
panic!("index {idx} out of range (length of {i})")
}
}
/// Allows us to mutably index into a List CRDT like we would with an array
impl<T> IndexMut<usize> for ListCrdt<T>
where
T: CrdtNode,
{
fn index_mut(&mut self, idx: usize) -> &mut Self::Output {
let mut i = 0;
for op in &mut self.ops {
if !op.is_deleted && op.content.is_some() {
if idx == i {
return op.content.as_mut().unwrap();
}
i += 1;
}
}
panic!("index {idx} out of range (length of {i})")
}
}
impl<T> CrdtNode for ListCrdt<T>
where
T: CrdtNode,
{
fn apply(&mut self, op: Op<Value>) -> OpState {
self.apply(op.into())
}
fn view(&self) -> Value {
self.view().into()
}
fn new(id: AuthorId, path: Vec<PathSegment>) -> Self {
Self::new(id, path)
}
}
#[cfg(feature = "logging-base")]
use crate::debug::DebugView;
#[cfg(feature = "logging-base")]
impl<T> DebugView for ListCrdt<T>
where
T: CrdtNode + DebugView,
{
fn debug_view(&self, indent: usize) -> String {
let spacing = " ".repeat(indent);
let path_str = print_path(self.path.clone());
let inner = self
.ops
.iter()
.map(|op| {
format!(
"{spacing}{}: {}",
&print_hex(&op.id)[..6],
op.debug_view(indent)
)
})
.collect::<Vec<_>>()
.join("\n");
format!("List CRDT @ /{path_str}\n{inner}")
}
}
#[cfg(test)]
mod test {
use crate::{json_crdt::OpState, keypair::make_author, list_crdt::ListCrdt, op::ROOT_ID};
#[test]
fn test_list_simple() {
let mut list = ListCrdt::<i64>::new(make_author(1), vec![]);
let _one = list.insert(ROOT_ID, 1);
let _two = list.insert(_one.id, 2);
let _three = list.insert(_two.id, 3);
let _four = list.insert(_one.id, 4);
assert_eq!(list.view(), vec![1, 4, 2, 3]);
}
#[test]
fn test_list_idempotence() {
let mut list = ListCrdt::<i64>::new(make_author(1), vec![]);
let op = list.insert(ROOT_ID, 1);
for _ in 1..10 {
assert_eq!(list.apply(op.clone()), OpState::Ok);
}
assert_eq!(list.view(), vec![1]);
}
#[test]
fn test_list_delete() {
let mut list = ListCrdt::<char>::new(make_author(1), vec![]);
let _one = list.insert(ROOT_ID, 'a');
let _two = list.insert(_one.id, 'b');
let _three = list.insert(ROOT_ID, 'c');
list.delete(_one.id);
list.delete(_two.id);
assert_eq!(list.view(), vec!['c']);
}
#[test]
fn test_list_interweave_chars() {
let mut list = ListCrdt::<char>::new(make_author(1), vec![]);
let _one = list.insert(ROOT_ID, 'a');
let _two = list.insert(_one.id, 'b');
let _three = list.insert(ROOT_ID, 'c');
assert_eq!(list.view(), vec!['c', 'a', 'b']);
}
#[test]
fn test_list_conflicting_agents() {
let mut list1 = ListCrdt::<char>::new(make_author(1), vec![]);
let mut list2 = ListCrdt::new(make_author(2), vec![]);
let _1_a = list1.insert(ROOT_ID, 'a');
assert_eq!(list2.apply(_1_a.clone()), OpState::Ok);
let _2_b = list2.insert(_1_a.id, 'b');
assert_eq!(list1.apply(_2_b.clone()), OpState::Ok);
let _2_d = list2.insert(ROOT_ID, 'd');
let _2_y = list2.insert(_2_b.id, 'y');
let _1_x = list1.insert(_2_b.id, 'x');
// create artificial delay, then apply out of order
assert_eq!(list2.apply(_1_x), OpState::Ok);
assert_eq!(list1.apply(_2_y), OpState::Ok);
assert_eq!(list1.apply(_2_d), OpState::Ok);
assert_eq!(list1.view(), vec!['d', 'a', 'b', 'y', 'x']);
assert_eq!(list1.view(), list2.view());
}
#[test]
fn test_list_delete_multiple_agent() {
let mut list1 = ListCrdt::<char>::new(make_author(1), vec![]);
let mut list2 = ListCrdt::new(make_author(2), vec![]);
let _1_a = list1.insert(ROOT_ID, 'a');
assert_eq!(list2.apply(_1_a.clone()), OpState::Ok);
let _2_b = list2.insert(_1_a.id, 'b');
let del_1_a = list1.delete(_1_a.id);
assert_eq!(list1.apply(_2_b), OpState::Ok);
assert_eq!(list2.apply(del_1_a), OpState::Ok);
assert_eq!(list1.view(), vec!['b']);
assert_eq!(list1.view(), list2.view());
}
#[test]
fn test_list_nested() {
let mut list1 = ListCrdt::<char>::new(make_author(1), vec![]);
let _c = list1.insert(ROOT_ID, 'c');
let _a = list1.insert(ROOT_ID, 'a');
let _d = list1.insert(_c.id, 'd');
let _b = list1.insert(_a.id, 'b');
assert_eq!(list1.view(), vec!['a', 'b', 'c', 'd']);
}
}

View File

@@ -0,0 +1,192 @@
use crate::debug::DebugView;
use crate::json_crdt::{CrdtNode, OpState, Value};
use crate::op::{join_path, print_path, Op, PathSegment, SequenceNumber};
use std::cmp::{max, Ordering};
use std::fmt::Debug;
use crate::keypair::AuthorId;
/// A simple delete-wins, last-writer-wins (LWW) register CRDT.
/// Basically only for adding support for primitives within a more complex CRDT
#[derive(Clone)]
pub struct LwwRegisterCrdt<T>
where
T: CrdtNode,
{
/// Public key for this node
pub our_id: AuthorId,
/// Path to this CRDT
pub path: Vec<PathSegment>,
/// Internal value of this CRDT. We wrap it in an Op to retain the author/sequence metadata
value: Op<T>,
/// The sequence number of this node
our_seq: SequenceNumber,
}
impl<T> LwwRegisterCrdt<T>
where
T: CrdtNode,
{
/// Create a new register CRDT with the given [`AuthorID`] (it should be unique)
pub fn new(id: AuthorId, path: Vec<PathSegment>) -> LwwRegisterCrdt<T> {
LwwRegisterCrdt {
our_id: id,
path,
value: Op::make_root(),
our_seq: 0,
}
}
/// Sets the current value of the register
pub fn set<U: Into<Value>>(&mut self, content: U) -> Op<Value> {
let mut op = Op::new(
self.value.id,
self.our_id,
self.our_seq + 1,
false,
Some(content.into()),
self.path.to_owned(),
);
// we need to know the op ID before setting the path as [`PathSegment::Index`] requires an
// [`OpID`]
let new_path = join_path(self.path.to_owned(), PathSegment::Index(op.id));
op.path = new_path;
self.apply(op.clone());
op
}
/// Apply an operation (both local and remote) to this local register CRDT.
pub fn apply(&mut self, op: Op<Value>) -> OpState {
if !op.is_valid_hash() {
return OpState::ErrHashMismatch;
}
let op: Op<T> = op.into();
let seq = op.sequence_num();
// take most recent update by sequence number
match seq.cmp(&self.our_seq) {
Ordering::Greater => {
self.value = Op {
id: self.value.id,
..op
};
}
Ordering::Equal => {
// if we are equal, tie break on author
if op.author() < self.value.author() {
// we want to keep id constant so replace everything but id
self.value = Op {
id: self.value.id,
..op
};
}
}
Ordering::Less => {} // LWW, ignore if its outdate
};
// update bookkeeping
self.our_seq = max(self.our_seq, seq);
OpState::Ok
}
fn view(&self) -> Option<T> {
self.value.content.to_owned()
}
}
impl<T> CrdtNode for LwwRegisterCrdt<T>
where
T: CrdtNode,
{
fn apply(&mut self, op: Op<Value>) -> OpState {
self.apply(op.into())
}
fn view(&self) -> Value {
self.view().into()
}
fn new(id: AuthorId, path: Vec<PathSegment>) -> Self {
Self::new(id, path)
}
}
impl<T> DebugView for LwwRegisterCrdt<T>
where
T: CrdtNode + DebugView,
{
fn debug_view(&self, indent: usize) -> String {
let spacing = " ".repeat(indent);
let path_str = print_path(self.path.clone());
let inner = self.value.debug_view(indent + 2);
format!("LWW Register CRDT @ /{path_str}\n{spacing}{inner}")
}
}
impl<T> Debug for LwwRegisterCrdt<T>
where
T: CrdtNode,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.value.id)
}
}
#[cfg(test)]
mod test {
use super::LwwRegisterCrdt;
use crate::{json_crdt::OpState, keypair::make_author};
#[test]
fn test_lww_simple() {
let mut register = LwwRegisterCrdt::new(make_author(1), vec![]);
assert_eq!(register.view(), None);
register.set(1);
assert_eq!(register.view(), Some(1));
register.set(99);
assert_eq!(register.view(), Some(99));
}
#[test]
fn test_lww_multiple_writer() {
let mut register1 = LwwRegisterCrdt::new(make_author(1), vec![]);
let mut register2 = LwwRegisterCrdt::new(make_author(2), vec![]);
let _a = register1.set('a');
let _b = register1.set('b');
let _c = register2.set('c');
assert_eq!(register2.view(), Some('c'));
assert_eq!(register1.apply(_c), OpState::Ok);
assert_eq!(register2.apply(_b), OpState::Ok);
assert_eq!(register2.apply(_a), OpState::Ok);
assert_eq!(register1.view(), Some('b'));
assert_eq!(register2.view(), Some('b'));
}
#[test]
fn test_lww_idempotence() {
let mut register = LwwRegisterCrdt::new(make_author(1), vec![]);
let op = register.set(1);
for _ in 1..10 {
assert_eq!(register.apply(op.clone()), OpState::Ok);
}
assert_eq!(register.view(), Some(1));
}
#[test]
fn test_lww_consistent_tiebreak() {
let mut register1 = LwwRegisterCrdt::new(make_author(1), vec![]);
let mut register2 = LwwRegisterCrdt::new(make_author(2), vec![]);
let _a = register1.set('a');
let _b = register2.set('b');
assert_eq!(register1.apply(_b), OpState::Ok);
assert_eq!(register2.apply(_a), OpState::Ok);
let _c = register1.set('c');
let _d = register2.set('d');
assert_eq!(register2.apply(_c), OpState::Ok);
assert_eq!(register1.apply(_d), OpState::Ok);
assert_eq!(register1.view(), register2.view());
assert_eq!(register1.view(), Some('c'));
}
}

View File

@@ -0,0 +1,237 @@
use crate::debug::{debug_path_mismatch, debug_type_mismatch};
use crate::json_crdt::{CrdtNode, CrdtNodeFromValue, IntoCrdtNode, SignedOp, Value};
use crate::keypair::{sha256, AuthorId};
use fastcrypto::ed25519::Ed25519KeyPair;
use serde::{Deserialize, Serialize};
use std::fmt::Debug;
/// A lamport clock timestamp. Used to track document versions
pub type SequenceNumber = u64;
/// A unique ID for a single [`Op<T>`]
pub type OpId = [u8; 32];
/// The root/sentinel op
pub const ROOT_ID: OpId = [0u8; 32];
/// Part of a path to get to a specific CRDT in a nested CRDT
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum PathSegment {
Field(String),
Index(OpId),
}
/// Format a byte array as a hex string
pub fn print_hex<const N: usize>(bytes: &[u8; N]) -> String {
bytes
.iter()
.map(|byte| format!("{byte:02x}"))
.collect::<Vec<_>>()
.join("")
}
/// Pretty print a path
pub fn print_path(path: Vec<PathSegment>) -> String {
path.iter()
.map(|p| match p {
PathSegment::Field(s) => s.to_string(),
PathSegment::Index(i) => print_hex(i)[..6].to_string(),
})
.collect::<Vec<_>>()
.join(".")
}
/// Ensure our_path is a subpath of op_path. Note that two identical paths are considered subpaths
/// of each other.
pub fn ensure_subpath(our_path: &Vec<PathSegment>, op_path: &Vec<PathSegment>) -> bool {
// if our_path is longer, it cannot be a subpath
if our_path.len() > op_path.len() {
debug_path_mismatch(our_path.to_owned(), op_path.to_owned());
return false;
}
// iterate to end of our_path, ensuring each element is the same
for i in 0..our_path.len() {
let ours = our_path.get(i);
let theirs = op_path.get(i);
if ours != theirs {
debug_path_mismatch(our_path.to_owned(), op_path.to_owned());
return false;
}
}
true
}
/// Helper to easily append a [`PathSegment`] to a path
pub fn join_path(path: Vec<PathSegment>, segment: PathSegment) -> Vec<PathSegment> {
let mut p = path;
p.push(segment);
p
}
/// Parse out the field from a [`PathSegment`]
pub fn parse_field(path: Vec<PathSegment>) -> Option<String> {
path.last().and_then(|segment| {
if let PathSegment::Field(key) = segment {
Some(key.to_string())
} else {
None
}
})
}
/// Represents a single node in a CRDT
#[derive(Clone, Serialize, Deserialize)]
pub struct Op<T>
where
T: CrdtNode,
{
pub origin: OpId,
pub author: AuthorId, // pub key of author
pub seq: SequenceNumber,
pub content: Option<T>,
pub path: Vec<PathSegment>, // path to get to target CRDT
pub is_deleted: bool,
pub id: OpId, // hash of the operation
}
/// Something can be turned into a string. This allows us to use [`content`] as in
/// input into the SHA256 hash
pub trait Hashable {
fn hash(&self) -> String;
}
/// Anything that implements Debug is trivially hashable
impl<T> Hashable for T
where
T: Debug,
{
fn hash(&self) -> String {
format!("{self:?}")
}
}
/// Conversion from Op<Value> -> Op<T> given that T is a CRDT that can be created from a JSON value
impl Op<Value> {
pub fn into<T: CrdtNodeFromValue + CrdtNode>(self) -> Op<T> {
let content = if let Some(inner_content) = self.content {
match inner_content.into_node(self.id, self.path.clone()) {
Ok(node) => Some(node),
Err(msg) => {
debug_type_mismatch(msg);
None
}
}
} else {
None
};
Op {
content,
origin: self.origin,
author: self.author,
seq: self.seq,
path: self.path,
is_deleted: self.is_deleted,
id: self.id,
}
}
}
impl<T> Op<T>
where
T: CrdtNode,
{
pub fn sign(self, keypair: &Ed25519KeyPair) -> SignedOp {
SignedOp::from_op(self, keypair, vec![])
}
pub fn sign_with_dependencies(
self,
keypair: &Ed25519KeyPair,
dependencies: Vec<&SignedOp>,
) -> SignedOp {
SignedOp::from_op(
self,
keypair,
dependencies
.iter()
.map(|dep| dep.signed_digest)
.collect::<Vec<_>>(),
)
}
pub fn author(&self) -> AuthorId {
self.author
}
pub fn sequence_num(&self) -> SequenceNumber {
self.seq
}
pub fn new(
origin: OpId,
author: AuthorId,
seq: SequenceNumber,
is_deleted: bool,
content: Option<T>,
path: Vec<PathSegment>,
) -> Op<T> {
let mut op = Self {
origin,
id: ROOT_ID,
author,
seq,
is_deleted,
content,
path,
};
op.id = op.hash_to_id();
op
}
/// Generate OpID by hashing our contents. Hash includes
/// - content
/// - origin
/// - author
/// - seq
/// - is_deleted
pub fn hash_to_id(&self) -> OpId {
let content_str = match self.content.as_ref() {
Some(content) => content.hash(),
None => "".to_string(),
};
let fmt_str = format!(
"{:?},{:?},{:?},{:?},{content_str}",
self.origin, self.author, self.seq, self.is_deleted,
);
sha256(fmt_str)
}
/// Rehashes the contents to make sure it matches the ID
pub fn is_valid_hash(&self) -> bool {
// make sure content is only none for deletion events
if self.content.is_none() && !self.is_deleted {
return false;
}
// try to avoid expensive sig check if early fail
let res = self.hash_to_id() == self.id;
if !res {
self.debug_hash_failure();
}
res
}
/// Special constructor for defining the sentinel root node
pub fn make_root() -> Op<T> {
Self {
origin: ROOT_ID,
id: ROOT_ID,
author: [0u8; 32],
seq: 0,
is_deleted: false,
content: None,
path: vec![],
}
}
}

View File

@@ -0,0 +1,134 @@
use bft_json_crdt::{
json_crdt::{add_crdt_fields, BaseCrdt, CrdtNode, IntoCrdtNode, OpState},
keypair::make_keypair,
list_crdt::ListCrdt,
lww_crdt::LwwRegisterCrdt,
op::{Op, PathSegment, ROOT_ID},
};
use serde_json::json;
// What is potentially Byzantine behaviour?
// 1. send valid updates
// 2. send a mix of valid and invalid updates
// a) messages with duplicate ID (attempt to overwrite old entries)
// b) send incorrect sequence number to multiple nodes (which could lead to divergent state) -- this is called equivocation
// c) forge updates from another author (could happen when forwarding valid messages from peers)
// 3. send malformed updates (e.g. missing fields)
// this we don't test as we assume transport layer only allows valid messages
// 4. overwhelm message queue by sending many updates far into the future
// also untestested! currently we keep an unbounded message queue
// 5. block actual messages from honest actors (eclipse attack)
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct ListExample {
list: ListCrdt<char>,
}
// case 2a + 2b
#[test]
fn test_equivocation() {
let key = make_keypair();
let testkey = make_keypair();
let mut crdt = BaseCrdt::<ListExample>::new(&key);
let mut testcrdt = BaseCrdt::<ListExample>::new(&testkey);
let _a = crdt.doc.list.insert(ROOT_ID, 'a').sign(&key);
let _b = crdt.doc.list.insert(_a.id(), 'b').sign(&key);
// make a fake operation with same id as _b but different content
let mut fake_op = _b.clone();
fake_op.inner.content = Some('c'.into());
// also try modifying the sequence number
let mut fake_op_seq = _b.clone();
fake_op_seq.inner.seq = 99;
fake_op_seq.inner.is_deleted = true;
assert_eq!(crdt.apply(fake_op.clone()), OpState::ErrHashMismatch);
assert_eq!(crdt.apply(fake_op_seq.clone()), OpState::ErrHashMismatch);
assert_eq!(testcrdt.apply(fake_op_seq), OpState::ErrHashMismatch);
assert_eq!(testcrdt.apply(fake_op), OpState::ErrHashMismatch);
assert_eq!(testcrdt.apply(_a), OpState::Ok);
assert_eq!(testcrdt.apply(_b), OpState::Ok);
// make sure it doesnt accept either of the fake operations
assert_eq!(crdt.doc.list.view(), vec!['a', 'b']);
assert_eq!(crdt.doc.list.view(), testcrdt.doc.list.view());
}
// case 2c
#[test]
fn test_forge_update() {
let key = make_keypair();
let testkey = make_keypair();
let mut crdt = BaseCrdt::<ListExample>::new(&key);
let mut testcrdt = BaseCrdt::<ListExample>::new(&testkey);
let _a = crdt.doc.list.insert(ROOT_ID, 'a').sign(&key);
let fake_key = make_keypair(); // generate a new keypair as we dont have privkey of list.our_id
let mut op = Op {
origin: _a.inner.id,
author: crdt.doc.id, // pretend to be the owner of list
content: Some('b'),
path: vec![PathSegment::Field("list".to_string())],
seq: 1,
is_deleted: false,
id: ROOT_ID, // placeholder, to be generated
};
// this is a completely valid hash and digest, just signed by the wrong person
// as keypair.public != list.public
op.id = op.hash_to_id();
let signed = op.sign(&fake_key);
assert_eq!(crdt.apply(signed.clone()), OpState::ErrHashMismatch);
assert_eq!(testcrdt.apply(signed), OpState::ErrHashMismatch);
assert_eq!(testcrdt.apply(_a), OpState::Ok);
// make sure it doesnt accept fake operation
assert_eq!(crdt.doc.list.view(), vec!['a']);
}
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Nested {
a: Nested2,
}
#[add_crdt_fields]
#[derive(Clone, CrdtNode)]
struct Nested2 {
b: LwwRegisterCrdt<bool>,
}
#[test]
fn test_path_update() {
let key = make_keypair();
let testkey = make_keypair();
let mut crdt = BaseCrdt::<Nested>::new(&key);
let mut testcrdt = BaseCrdt::<Nested>::new(&testkey);
let mut _true = crdt.doc.a.b.set(true);
_true.path = vec![PathSegment::Field("x".to_string())];
let mut _false = crdt.doc.a.b.set(false);
_false.path = vec![
PathSegment::Field("a".to_string()),
PathSegment::Index(_false.id),
];
let signedtrue = _true.sign(&key);
let signedfalse = _false.sign(&key);
let mut signedfalsefakepath = signedfalse.clone();
signedfalsefakepath.inner.path = vec![
PathSegment::Field("a".to_string()),
PathSegment::Field("b".to_string()),
];
assert_eq!(testcrdt.apply(signedtrue), OpState::ErrPathMismatch);
assert_eq!(testcrdt.apply(signedfalse), OpState::ErrPathMismatch);
assert_eq!(testcrdt.apply(signedfalsefakepath), OpState::ErrDigestMismatch);
// make sure it doesnt accept fake operation
assert_eq!(crdt.doc.a.b.view(), json!(false).into());
assert_eq!(testcrdt.doc.a.b.view(), json!(null).into());
}

View File

@@ -0,0 +1,91 @@
use bft_json_crdt::{
keypair::make_author,
list_crdt::ListCrdt,
op::{Op, OpId, ROOT_ID}, json_crdt::{CrdtNode, Value},
};
use rand::{rngs::ThreadRng, seq::SliceRandom, Rng};
fn random_op<T: CrdtNode>(arr: &Vec<Op<T>>, rng: &mut ThreadRng) -> OpId {
arr.choose(rng).map(|op| op.id).unwrap_or(ROOT_ID)
}
const TEST_N: usize = 100;
#[test]
fn test_list_fuzz_commutative() {
let mut rng = rand::thread_rng();
let mut op_log = Vec::<Op<Value>>::new();
let mut op_log1 = Vec::<Op<Value>>::new();
let mut op_log2 = Vec::<Op<Value>>::new();
let mut l1 = ListCrdt::<char>::new(make_author(1), vec![]);
let mut l2 = ListCrdt::<char>::new(make_author(2), vec![]);
let mut chk = ListCrdt::<char>::new(make_author(3), vec![]);
for _ in 0..TEST_N {
let letter1: char = rng.gen_range(b'a'..=b'z') as char;
let letter2: char = rng.gen_range(b'a'..=b'z') as char;
let op1 = if rng.gen_bool(4.0 / 5.0) {
l1.insert(random_op(&op_log1, &mut rng), letter1)
} else {
l1.delete(random_op(&op_log1, &mut rng))
};
let op2 = if rng.gen_bool(4.0 / 5.0) {
l2.insert(random_op(&op_log2, &mut rng), letter2)
} else {
l2.delete(random_op(&op_log2, &mut rng))
};
op_log1.push(op1.clone());
op_log2.push(op2.clone());
op_log.push(op1.clone());
op_log.push(op2.clone());
}
// shuffle ops
op_log1.shuffle(&mut rng);
op_log2.shuffle(&mut rng);
// apply to each other
for op in op_log1 {
l2.apply(op.clone());
chk.apply(op.into());
}
for op in op_log2 {
l1.apply(op.clone());
chk.apply(op);
}
// ensure all equal
let l1_doc = l1.view();
let l2_doc = l2.view();
let chk_doc = chk.view();
assert_eq!(l1_doc, l2_doc);
assert_eq!(l1_doc, chk_doc);
assert_eq!(l2_doc, chk_doc);
// now, allow cross mixing between both
let mut op_log1 = Vec::<Op<Value>>::new();
let mut op_log2 = Vec::<Op<Value>>::new();
for _ in 0..TEST_N {
let letter1: char = rng.gen_range(b'a'..=b'z') as char;
let letter2: char = rng.gen_range(b'a'..=b'z') as char;
let op1 = l1.insert(random_op(&op_log, &mut rng), letter1);
let op2 = l2.insert(random_op(&op_log, &mut rng), letter2);
op_log1.push(op1);
op_log2.push(op2);
}
for op in op_log1 {
l2.apply(op.clone());
chk.apply(op);
}
for op in op_log2 {
l1.apply(op.clone());
chk.apply(op);
}
let l1_doc = l1.view();
let l2_doc = l2.view();
let chk_doc = chk.view();
assert_eq!(l1_doc, l2_doc);
assert_eq!(l1_doc, chk_doc);
assert_eq!(l2_doc, chk_doc);
}

259883
crates/bft-json-crdt/tests/editing-trace.js generated Normal file

File diff suppressed because one or more lines are too long

1
crates/bft-json-crdt/tests/edits.json generated Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,76 @@
use bft_json_crdt::keypair::make_author;
use bft_json_crdt::list_crdt::ListCrdt;
use bft_json_crdt::op::{OpId, ROOT_ID};
use std::{fs::File, io::Read};
use time::PreciseTime;
use serde::Deserialize;
#[derive(Debug, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
struct Edit {
pos: usize,
delete: bool,
#[serde(default)]
content: Option<char>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Trace {
final_text: String,
edits: Vec<Edit>,
}
fn get_trace() -> Trace {
let fp = "./tests/edits.json";
match File::open(fp) {
Err(e) => panic!("Open edits.json failed: {:?}", e.kind()),
Ok(mut file) => {
let mut content: String = String::new();
file.read_to_string(&mut content)
.expect("Problem reading file");
serde_json::from_str(&content).expect("JSON was not well-formatted")
}
}
}
/// Really large test to run Martin Kleppmann's
/// editing trace over his paper
/// Data source: https://github.com/automerge/automerge-perf
#[test]
fn test_editing_trace() {
let t = get_trace();
let mut list = ListCrdt::<char>::new(make_author(1), vec![]);
let mut ops: Vec<OpId> = Vec::new();
ops.push(ROOT_ID);
let start = PreciseTime::now();
let edits = t.edits;
for (i, op) in edits.into_iter().enumerate() {
let origin = ops[op.pos];
if op.delete {
let delete_op = list.delete(origin);
ops.push(delete_op.id);
} else {
let new_op = list.insert(origin, op.content.unwrap());
ops.push(new_op.id);
}
match i {
10_000 | 100_000 => {
let end = PreciseTime::now();
let runtime_sec = start.to(end);
println!("took {runtime_sec:?} to run {i} ops");
}
_ => {}
};
}
let end = PreciseTime::now();
let runtime_sec = start.to(end);
println!("took {runtime_sec:?} to finish");
let result = list.iter().collect::<String>();
let expected = t.final_text;
assert_eq!(result.len(), expected.len());
assert_eq!(result, expected);
}

View File

@@ -9,8 +9,8 @@ edition = "2021"
clap = { version = "4.5.4", features = ["derive"] }
tokio = { version = "1.37.0", features = ["time"] }
websockets = "0.3.0"
bft-json-crdt = { path = "../../bft-json-crdt" }
bft-crdt-derive = { path = "../../bft-json-crdt/bft-crdt-derive" }
bft-json-crdt = { path = "../crates/bft-json-crdt" }
bft-crdt-derive = { path = "../crates/bft-json-crdt/bft-crdt-derive" }
# serde_cbor = "0.11.2" # move to this once we need to pack things in CBOR
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.117"