diff --git a/src/authenticated_tree_ops.rs b/src/authenticated_tree_ops.rs index 692868f..549086c 100644 --- a/src/authenticated_tree_ops.rs +++ b/src/authenticated_tree_ops.rs @@ -288,7 +288,10 @@ pub trait AuthenticatedTreeOps { (r_node.clone(), false, false, true, Some(r.value)) } Some(v) => { // update value - assert!(self.tree().value_length.filter(|len|v.len() != *len).is_none()); + ensure!( + self.tree().value_length.filter(|len| v.len() != *len).is_none(), + "AVL operation value length does not match the tree's fixed value length" + ); let old_value = Some(r.value); let r_new = LeafNode::update(r_node, &r.hdr.key.unwrap(), &v, &r.next_node_key); self.on_node_visit(r_node, operation, false); @@ -311,7 +314,10 @@ pub trait AuthenticatedTreeOps { (r_node.clone(), false, false, false, None) } Some(v) => { // insert new value - assert!(self.tree().value_length.filter(|len|v.len() != *len).is_none()); + ensure!( + self.tree().value_length.filter(|len| v.len() != *len).is_none(), + "AVL operation value length does not match the tree's fixed value length" + ); self.on_node_visit(r_node, operation, false); (self.add_node(r_node, &key, &v), true, true, false, None) } diff --git a/src/batch_avl_prover.rs b/src/batch_avl_prover.rs index 7dbf169..128a65a 100644 --- a/src/batch_avl_prover.rs +++ b/src/batch_avl_prover.rs @@ -470,7 +470,7 @@ mod tests { fn empty_digest_hash_is_correct() { let prover = BatchAVLProver::new( AVLTree::new( - |digest| Node::LabelOnly(NodeHeader::new(Some(*digest), None)), + |digest: &Digest32| Node::LabelOnly(NodeHeader::new(Some(*digest), None)), 32, None, ), diff --git a/src/batch_avl_verifier.rs b/src/batch_avl_verifier.rs index 956bfaa..a666b83 100644 --- a/src/batch_avl_verifier.rs +++ b/src/batch_avl_verifier.rs @@ -33,6 +33,24 @@ pub struct BatchAVLVerifier { replay_index: usize, } +/// Read `n` bytes from `proof` starting at `*i`, advancing `*i` past them. +/// +/// Returns an error (rather than panicking with a slice-bounds or overflow +/// panic) when the proof is too short or the requested length overflows. A +/// malformed, truncated, or empty proof must fail gracefully: the reference +/// `BatchAVLVerifier` reconstructs the tree inside a `Try`, so any failure +/// there simply yields a verifier with no tree rather than crashing. +fn read_proof_slice<'a>(proof: &'a [u8], i: &mut usize, n: usize) -> Result<&'a [u8]> { + let end = i + .checked_add(n) + .ok_or_else(|| anyhow!("Malformed AVL proof: length overflow"))?; + let slice = proof + .get(*i..end) + .ok_or_else(|| anyhow!("Malformed AVL proof: unexpected end of input"))?; + *i = end; + Ok(slice) +} + impl BatchAVLVerifier { pub fn new( starting_digest: &ADDigest, @@ -92,7 +110,12 @@ impl BatchAVLVerifier { let mut previous_leaf: Option = None; let mut stack: Vec = Vec::new(); let key_length = self.base.tree.key_length; - while self.proof[i] != END_OF_TREE_IN_PACKAGED_PROOF { + while *self + .proof + .get(i) + .ok_or_else(|| anyhow!("Malformed AVL proof: unexpected end of input"))? + != END_OF_TREE_IN_PACKAGED_PROOF + { let n = self.proof[i]; i += 1; num_nodes += 1; @@ -100,8 +123,7 @@ impl BatchAVLVerifier { match n { LABEL_IN_PACKAGED_PROOF => { let mut label: Digest32 = Default::default(); - label.copy_from_slice(&self.proof[i..i + DIGEST_LENGTH]); - i += DIGEST_LENGTH; + label.copy_from_slice(read_proof_slice(&self.proof, &mut i, DIGEST_LENGTH)?); stack.push(Node::new_label(&label)); previous_leaf = None; } @@ -109,26 +131,32 @@ impl BatchAVLVerifier { let key = if let Some(prev) = previous_leaf { Bytes::copy_from_slice(&self.base.tree.next_node_key(&prev)) } else { - let start = i; - i += self.base.tree.key_length; - Bytes::copy_from_slice(&self.proof[start..i]) + Bytes::copy_from_slice(read_proof_slice(&self.proof, &mut i, key_length)?) + }; + let next_leaf_key = + Bytes::copy_from_slice(read_proof_slice(&self.proof, &mut i, key_length)?); + let value_length = match self.base.tree.value_length { + Some(vl) => vl, + None => { + BigEndian::read_u32(read_proof_slice(&self.proof, &mut i, 4)?) as usize + } }; - let next_leaf_key = Bytes::copy_from_slice(&self.proof[i..i + key_length]); - i += key_length; - let value_length = self.base.tree.value_length.unwrap_or_else(|| { - let vl = BigEndian::read_u32(&self.proof[i..i + 4]) as usize; - i += 4; - vl - }); - let value = Bytes::copy_from_slice(&self.proof[i..i + value_length]); - i += value_length; + let value = Bytes::copy_from_slice(read_proof_slice( + &self.proof, + &mut i, + value_length, + )?); let leaf = LeafNode::new(&key, &value, &next_leaf_key); stack.push(leaf.clone()); previous_leaf = Some(leaf); } _ => { - let right = stack.pop().unwrap(); - let left = stack.pop().unwrap(); + let right = stack + .pop() + .ok_or_else(|| anyhow!("Malformed AVL proof: stack underflow"))?; + let left = stack + .pop() + .ok_or_else(|| anyhow!("Malformed AVL proof: stack underflow"))?; stack.push(InternalNode::new(None, &left, &right, n as Balance)); } } diff --git a/src/batch_node.rs b/src/batch_node.rs index ba36222..6db12e7 100644 --- a/src/batch_node.rs +++ b/src/batch_node.rs @@ -18,7 +18,7 @@ pub(crate) const END_OF_TREE_IN_PACKAGED_PROOF: u8 = 4; pub type Balance = i8; pub type SerializedAdProof = Bytes; pub type NodeId = Rc>; -pub type Resolver = fn(&Digest32) -> Node; +pub type Resolver = alloc::sync::Arc Node + Send + Sync>; pub type Blake2b256 = Blake2b; #[derive(Debug, Clone)] @@ -284,11 +284,18 @@ pub struct AVLTree { } impl AVLTree { - pub fn new(resolver: Resolver, key_length: usize, value_length: Option) -> AVLTree { + /// Accepts any closure (wrapped into the `Arc` internally), so callers written + /// against the old `Resolver = fn(&Digest32) -> Node` signature compile unchanged. + /// To reuse a prebuilt [`Resolver`], construct the struct literally (`resolver` is pub). + pub fn new( + resolver: impl Fn(&Digest32) -> Node + Send + Sync + 'static, + key_length: usize, + value_length: Option, + ) -> AVLTree { AVLTree { key_length, value_length, - resolver, + resolver: alloc::sync::Arc::new(resolver), height: 0, root: None, } @@ -426,30 +433,49 @@ impl AVLTree { key_found: bool, ) -> bool { if &self.label(node) == label { - true - } else { - if let Node::Internal(r) = &mut *node.borrow_mut() { - if key_found { - self.contains_recursive(&self.resolve(&mut r.left), key, label, true) - } else { - match (*key).cmp(r.hdr.key.as_ref().unwrap()) { - Ordering::Equal => - // found in the tree -- go one step right, then left to the leaf - { - self.contains_recursive(&self.resolve(&mut r.right), key, label, true) - } - Ordering::Less => - // going left, not yet found - { - self.contains_recursive(&self.resolve(&mut r.left), key, label, false) - } - Ordering::Greater => { - self.contains_recursive(&self.resolve(&mut r.right), key, label, false) + return true; + } + + // Discriminate the node kind in a scoped immutable borrow so we can + // re-borrow mutably below for the Internal walk without a conflict. + enum Kind { Internal, Leaf, LabelOnly } + let kind = { + let n = node.borrow(); + match &*n { + Node::Internal(_) => Kind::Internal, + Node::Leaf(_) => Kind::Leaf, + Node::LabelOnly(_) => Kind::LabelOnly, + } + }; + + match kind { + Kind::Leaf => false, + // Reached a LabelOnly that the resolver could not materialize + // (digest not in storage). We cannot conclude whether the target + // label is present in the unresolved subtree, so fail safe by + // returning true. Used by `removed_nodes()` to decide what to + // delete from storage; treating an unresolvable subtree as + // "definitely absent" silently deletes nodes that may still be + // referenced from this subtree, producing dangling parent->child + // references on disk and later walks bailing with + // "should never reach this point". + Kind::LabelOnly => true, + Kind::Internal => { + if let Node::Internal(r) = &mut *node.borrow_mut() { + if key_found { + self.contains_recursive(&self.resolve(&mut r.left), key, label, true) + } else { + match (*key).cmp(r.hdr.key.as_ref().unwrap()) { + // found in the tree -- go one step right, then left to the leaf + Ordering::Equal => self.contains_recursive(&self.resolve(&mut r.right), key, label, true), + // going left, not yet found + Ordering::Less => self.contains_recursive(&self.resolve(&mut r.left), key, label, false), + Ordering::Greater => self.contains_recursive(&self.resolve(&mut r.right), key, label, false), } } + } else { + unreachable!("kind already verified Internal") } - } else { - false } } } diff --git a/src/versioned_avl_storage.rs b/src/versioned_avl_storage.rs index b820dbc..fcbf20f 100644 --- a/src/versioned_avl_storage.rs +++ b/src/versioned_avl_storage.rs @@ -53,4 +53,17 @@ pub trait VersionedAVLStorage { /// @return versions store keeps /// fn rollback_versions<'a>(&'a self) -> Box + 'a>; + + //// + /// Force a durable commit of outstanding writes. Implementations that + /// use deferred/non-durable writes (e.g. `Durability::None` in redb) + /// should fsync the backing store here. Default is a no-op for + /// in-memory or always-durable storages. + /// + /// Callers should invoke this periodically during long-running write + /// sequences and on graceful shutdown to bound crash data loss. + /// + fn flush(&self) -> Result<()> { + Ok(()) + } } diff --git a/tests/malformed_proof.rs b/tests/malformed_proof.rs new file mode 100644 index 0000000..b2fefec --- /dev/null +++ b/tests/malformed_proof.rs @@ -0,0 +1,124 @@ +//! Regression tests: malformed, truncated, or empty proofs — and out-of-range +//! tree parameters — must fail gracefully (returning an `Err`) rather than +//! panicking with a slice-bounds, stack-underflow, or arithmetic-overflow +//! panic. This mirrors the reference (scorex) `BatchAVLVerifier`, whose tree +//! reconstruction is wrapped in a `Try`: any failure simply yields a verifier +//! with no reconstructed tree, and subsequent operations then fail cleanly. + +use bytes::Bytes; +use ergo_avltree_rust::authenticated_tree_ops::AuthenticatedTreeOps; +use ergo_avltree_rust::batch_avl_verifier::BatchAVLVerifier; +use ergo_avltree_rust::operation::*; + +mod common; +use common::*; + +// Proof structure opcodes (see `batch_node`): a leaf node, a label-only node, +// and the end-of-tree marker, respectively. Referenced by value here because +// they are crate-private. +const LEAF: u8 = 2; +const LABEL: u8 = 3; + +/// A syntactically well-formed 33-byte digest: 32-byte root hash + height byte. +fn dummy_digest() -> Bytes { + Bytes::from(vec![7u8; 33]) +} + +#[test] +fn empty_proof_is_err_not_panic() { + // Used to panic indexing `self.proof[0]` on an empty proof. + let v = BatchAVLVerifier::new( + &dummy_digest(), + &Bytes::new(), + generate_tree(KEY_LENGTH, None), + None, + None, + ); + assert!(v.is_err()); +} + +#[test] +fn internal_node_opcode_with_empty_stack_is_err_not_panic() { + // A single non-leaf/non-label opcode reaches the internal-node arm and used + // to panic on `stack.pop().unwrap()` with nothing on the stack. + let v = BatchAVLVerifier::new( + &dummy_digest(), + &Bytes::from(vec![0u8]), + generate_tree(KEY_LENGTH, None), + None, + None, + ); + assert!(v.is_err()); +} + +#[test] +fn truncated_label_proof_is_err_not_panic() { + // A label opcode promises 32 following bytes; with none present the digest + // slice used to panic out of bounds. + let v = BatchAVLVerifier::new( + &dummy_digest(), + &Bytes::from(vec![LABEL]), + generate_tree(KEY_LENGTH, None), + None, + None, + ); + assert!(v.is_err()); +} + +#[test] +fn truncated_real_proof_is_err_not_panic() { + // A valid proof cut in half: a genuine reconstruction that runs out of bytes + // partway through, rather than a hand-crafted shape. + let (mut prover, _) = generate_and_populate_prover(16); + let proof = prover.generate_proof(); + let digest = prover.digest().unwrap(); + let half = Bytes::copy_from_slice(&proof[..proof.len() / 2]); + let v = BatchAVLVerifier::new(&digest, &half, generate_tree(KEY_LENGTH, None), None, None); + assert!(v.is_err()); +} + +#[test] +fn oversized_key_length_is_err_not_panic() { + // A wrapped/out-of-range key length used to overflow `i + key_length` or + // slice the proof out of bounds while reading the first leaf key. + let v = BatchAVLVerifier::new( + &dummy_digest(), + &Bytes::from(vec![LEAF]), + generate_tree(usize::MAX, None), + None, + None, + ); + assert!(v.is_err()); +} + +#[test] +fn wrong_value_length_operation_is_err_not_panic() { + // A fixed-value-length tree: performing an operation whose value length does + // not match used to fire `assert!(value_length matches)` inside the op. + let mut prover = generate_prover(KEY_LENGTH, Some(VALUE_LENGTH)); + let initial_digest = prover.digest().unwrap(); + let key = Bytes::from(vec![1u8; KEY_LENGTH]); + prover + .perform_one_operation(&Operation::Insert(KeyValue { + key: key.clone(), + value: Bytes::from(vec![0u8; VALUE_LENGTH]), + })) + .unwrap(); + let proof = prover.generate_proof(); + + let mut verifier = BatchAVLVerifier::new( + &initial_digest, + &proof, + generate_tree(KEY_LENGTH, Some(VALUE_LENGTH)), + None, + None, + ) + .unwrap(); + // Replay the same insert, but with a value whose length differs from the + // tree's fixed value length. + let res = verifier.perform_one_operation(&Operation::Insert(KeyValue { + key, + value: Bytes::from(vec![0u8; VALUE_LENGTH + 1]), + })); + assert!(res.is_err()); +}