From 28862a1eef688071e0ec80b0fbbaea0c82d920fb Mon Sep 17 00:00:00 2001 From: Muad'Dib Date: Sat, 4 Apr 2026 16:35:47 +0200 Subject: [PATCH 1/3] Change Resolver type from fn pointer to Arc for persistence support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Resolver type was defined as a bare function pointer (fn(&Digest32) -> Node), which cannot capture state. This makes it impossible to implement VersionedAVLStorage with a real storage backend — the resolver needs to load nodes from a database, but a function pointer cannot hold a database reference. Changed to Arc Node + Send + Sync> which allows closures that capture storage handles. Arc (not Box) because AVLTree derives Clone. Send + Sync for thread safety with concurrent readers. All 22 existing tests pass unchanged (modulo wrapping bare functions in Arc::new). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/batch_avl_prover.rs | 2 +- src/batch_node.rs | 2 +- tests/common/mod.rs | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/batch_avl_prover.rs b/src/batch_avl_prover.rs index 7dbf169..520d603 100644 --- a/src/batch_avl_prover.rs +++ b/src/batch_avl_prover.rs @@ -470,7 +470,7 @@ mod tests { fn empty_digest_hash_is_correct() { let prover = BatchAVLProver::new( AVLTree::new( - |digest| Node::LabelOnly(NodeHeader::new(Some(*digest), None)), + alloc::sync::Arc::new(|digest: &Digest32| Node::LabelOnly(NodeHeader::new(Some(*digest), None))), 32, None, ), diff --git a/src/batch_node.rs b/src/batch_node.rs index ba36222..eba2692 100644 --- a/src/batch_node.rs +++ b/src/batch_node.rs @@ -18,7 +18,7 @@ pub(crate) const END_OF_TREE_IN_PACKAGED_PROOF: u8 = 4; pub type Balance = i8; pub type SerializedAdProof = Bytes; pub type NodeId = Rc>; -pub type Resolver = fn(&Digest32) -> Node; +pub type Resolver = alloc::sync::Arc Node + Send + Sync>; pub type Blake2b256 = Blake2b; #[derive(Debug, Clone)] diff --git a/tests/common/mod.rs b/tests/common/mod.rs index ec2ba17..94a0040 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -10,6 +10,7 @@ use ergo_avltree_rust::versioned_avl_storage::*; use rand::prelude::*; use sha2::{Digest, Sha256}; use std::collections::HashMap; +use std::sync::Arc; pub const INITIAL_TREE_SIZE: usize = 1000; pub const KEY_LENGTH: usize = 32; @@ -97,7 +98,7 @@ pub fn generate_verifier( } pub fn generate_tree(key_length: usize, value_length: Option) -> AVLTree { - AVLTree::new(dummy_resolver, key_length, value_length) + AVLTree::new(Arc::new(dummy_resolver), key_length, value_length) } pub fn generate_prover(key_length: usize, value_length: Option) -> BatchAVLProver { From de1a79a4acc25592662701245f2d3b78a2cc5d01 Mon Sep 17 00:00:00 2001 From: Muad'Dib Date: Tue, 14 Apr 2026 04:14:15 +0200 Subject: [PATCH 2/3] feat: add VersionedAVLStorage::flush() for durable commits on demand Adds a default-no-op method on the storage trait so callers can force a durable commit (fsync) of outstanding writes. Implementations using deferred writes (e.g. redb Durability::None) should override this to fsync the backing store. In-memory or always-durable storages can keep the default no-op. Motivation: PersistentBatchAVLProver::generate_proof_and_update_storage uses non-durable writes for throughput. Without a caller-triggered flush, graceful SIGTERM leaves uncommitted state pending; on reopen, redb sees no valid commit and treats the storage as empty. Periodic flush calls bound crash data loss. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/versioned_avl_storage.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/versioned_avl_storage.rs b/src/versioned_avl_storage.rs index b820dbc..fcbf20f 100644 --- a/src/versioned_avl_storage.rs +++ b/src/versioned_avl_storage.rs @@ -53,4 +53,17 @@ pub trait VersionedAVLStorage { /// @return versions store keeps /// fn rollback_versions<'a>(&'a self) -> Box + 'a>; + + //// + /// Force a durable commit of outstanding writes. Implementations that + /// use deferred/non-durable writes (e.g. `Durability::None` in redb) + /// should fsync the backing store here. Default is a no-op for + /// in-memory or always-durable storages. + /// + /// Callers should invoke this periodically during long-running write + /// sequences and on graceful shutdown to bound crash data loss. + /// + fn flush(&self) -> Result<()> { + Ok(()) + } } From 879545c8bde6dfd7853b66f799f4b29b1e241571 Mon Sep 17 00:00:00 2001 From: Muad'Dib Date: Sat, 2 May 2026 14:04:55 +0200 Subject: [PATCH 3/3] fix(contains): treat unresolvable LabelOnly as 'maybe present', not absent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `contains_recursive` previously returned false at the catch-all `else` arm whenever it reached a non-Internal node. That branch fires for both Leaf nodes (terminal — return false is correct) and LabelOnly nodes that the resolver could not materialise (return false is *unsafe*). Used by `removed_nodes()` to decide which digests to delete from persistent storage: for cn in &self.base.changed_nodes_buffer_to_check { if !self.contains(cn) { self.base.changed_nodes_buffer.push(cn.clone()) } } If `contains()` walks into an unresolvable subtree, returning false says "definitely not in tree → delete it". The caller deletes the node from storage. But the node may still be referenced from the very subtree we couldn't resolve. The next walk into that subtree hits a LabelOnly with the deleted digest and bails: ERROR ergo_sync::state: apply_state failed error=UTXO state operation failed: operation N failed: Should never reach this point. If in prover, this is a bug. If in verifier, this proof is wrong. Observed in production on a Rust full-node implementation of the Ergo mainnet at v0.4.x. After ~250 blocks of steady-state validation with the typical "most subtrees are LabelOnly, only walked paths are materialised" prover state, an on-disk scan revealed: Reachable nodes: 6,353,404 Missing references: 135 (parent in storage, child digest not in NODES_TABLE) Orphan nodes: 378,274 (in storage but unreachable from root) Fix: distinguish Leaf from LabelOnly at the catch-all. Leaf with non-matching label remains terminal → false. LabelOnly that the resolver couldn't materialise → true (fail safe). At worst this leaks orphan nodes; never silently corrupts. Refactored to use a `Kind` enum so we can scope the immutable borrow that does the discrimination separately from the mutable borrow that drives the Internal-node walk. All 22 existing tests pass unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/batch_node.rs | 61 +++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/src/batch_node.rs b/src/batch_node.rs index eba2692..386e1fe 100644 --- a/src/batch_node.rs +++ b/src/batch_node.rs @@ -426,30 +426,49 @@ impl AVLTree { key_found: bool, ) -> bool { if &self.label(node) == label { - true - } else { - if let Node::Internal(r) = &mut *node.borrow_mut() { - if key_found { - self.contains_recursive(&self.resolve(&mut r.left), key, label, true) - } else { - match (*key).cmp(r.hdr.key.as_ref().unwrap()) { - Ordering::Equal => - // found in the tree -- go one step right, then left to the leaf - { - self.contains_recursive(&self.resolve(&mut r.right), key, label, true) - } - Ordering::Less => - // going left, not yet found - { - self.contains_recursive(&self.resolve(&mut r.left), key, label, false) - } - Ordering::Greater => { - self.contains_recursive(&self.resolve(&mut r.right), key, label, false) + return true; + } + + // Discriminate the node kind in a scoped immutable borrow so we can + // re-borrow mutably below for the Internal walk without a conflict. + enum Kind { Internal, Leaf, LabelOnly } + let kind = { + let n = node.borrow(); + match &*n { + Node::Internal(_) => Kind::Internal, + Node::Leaf(_) => Kind::Leaf, + Node::LabelOnly(_) => Kind::LabelOnly, + } + }; + + match kind { + Kind::Leaf => false, + // Reached a LabelOnly that the resolver could not materialize + // (digest not in storage). We cannot conclude whether the target + // label is present in the unresolved subtree, so fail safe by + // returning true. Used by `removed_nodes()` to decide what to + // delete from storage; treating an unresolvable subtree as + // "definitely absent" silently deletes nodes that may still be + // referenced from this subtree, producing dangling parent->child + // references on disk and later walks bailing with + // "should never reach this point". + Kind::LabelOnly => true, + Kind::Internal => { + if let Node::Internal(r) = &mut *node.borrow_mut() { + if key_found { + self.contains_recursive(&self.resolve(&mut r.left), key, label, true) + } else { + match (*key).cmp(r.hdr.key.as_ref().unwrap()) { + // found in the tree -- go one step right, then left to the leaf + Ordering::Equal => self.contains_recursive(&self.resolve(&mut r.right), key, label, true), + // going left, not yet found + Ordering::Less => self.contains_recursive(&self.resolve(&mut r.left), key, label, false), + Ordering::Greater => self.contains_recursive(&self.resolve(&mut r.right), key, label, false), } } + } else { + unreachable!("kind already verified Internal") } - } else { - false } } }