-
Notifications
You must be signed in to change notification settings - Fork 130
test(core-consensus): add minimal deterministic harness + in-memory WAL capture/replay #1470
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a6e2364
adcf72b
b6c4ada
925bd63
b0801a5
b66babb
529239e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| #![allow(clippy::needless_update)] | ||
|
|
||
| use informalsystems_malachitebft_core_consensus::{ | ||
| process, Effect, Error, Input, Params, Resumable, Resume, State, WalEntry, | ||
| }; | ||
| use malachitebft_core_types::{Round, ThresholdParams, ValuePayload}; | ||
| use malachitebft_metrics::Metrics; | ||
| use malachitebft_test::utils::validators::make_validators; | ||
| use malachitebft_test::{Height, TestContext, ValidatorSet}; | ||
|
|
||
| use super::utils::{propose_timeout, wal_entry_to_input}; | ||
|
|
||
| /// Minimal harness for driving core-consensus deterministically. | ||
| /// | ||
| /// This is intentionally small and does **not** attempt to simulate full networking or timing. | ||
| /// It records WAL entries emitted via `Effect::WalAppend` and can replay them after a simulated crash. | ||
| struct Harness { | ||
| state: State<TestContext>, | ||
| // In-memory WAL: (height, entry) | ||
| wal: Vec<(Height, WalEntry<TestContext>)>, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should be able to set the WAL, emulating the recovery after a crash. |
||
| } | ||
|
|
||
| impl Harness { | ||
| fn new(height: Height, vs: ValidatorSet) -> Self { | ||
| let ctx = TestContext::new(); | ||
| let params = Params { | ||
| address: vs | ||
| .get_by_index(0) | ||
| .expect("validator set must be non-empty") | ||
| .address, | ||
| threshold_params: ThresholdParams::default(), | ||
| value_payload: ValuePayload::ProposalOnly, | ||
| enabled: true, | ||
| }; | ||
|
|
||
| Self { | ||
| state: State::new(ctx, height, vs, params, 128), | ||
| wal: Vec::new(), | ||
| } | ||
| } | ||
|
|
||
| fn run(&mut self, input: Input<TestContext>) { | ||
| let metrics = Metrics::new(); | ||
|
|
||
| // Split borrows so the effect handler doesn't need to borrow `self` while `state` is mutably borrowed. | ||
| let state = &mut self.state; | ||
| let wal = &mut self.wal; | ||
|
|
||
| // Metrics expects step_start/step_end pairing; initialize it to the current driver step. | ||
| metrics.step_start(state.driver.step()); | ||
|
|
||
| let _res: Result<(), informalsystems_malachitebft_core_consensus::Error<TestContext>> = process!( | ||
| input: input, | ||
| state: state, | ||
| metrics: &metrics, | ||
| with: effect => { | ||
| let res: Result<Resume<TestContext>, Error<TestContext>> = match effect { | ||
| Effect::WalAppend(height, entry, r) => { | ||
| wal.push((height, entry)); | ||
| Ok(r.resume_with(())) | ||
| } | ||
| // For this PR we keep the effect handler conservative: always continue. | ||
| // Follow-up PRs will add specific effect simulation (signing, publishing, etc.). | ||
| other => { | ||
| let _ = other; | ||
| Ok(Resume::Continue) | ||
| } | ||
| }; | ||
| res | ||
| } | ||
| ); | ||
|
|
||
| let _ = _res; | ||
| } | ||
|
|
||
| fn drain_wal_entries(&self, height: Height) -> Vec<WalEntry<TestContext>> { | ||
| self.wal | ||
| .iter() | ||
| .filter_map(|(h, e)| (*h == height).then(|| e.clone())) | ||
| .collect() | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn wal_entries_can_be_captured_and_replayed_in_memory() { | ||
| let [(v1, _sk1), (v2, _sk2), (v3, _sk3), (v4, _sk4)] = make_validators([1, 1, 1, 1]); | ||
| let vs = ValidatorSet::new(vec![v1, v2, v3, v4]); | ||
|
|
||
| // First run: start height and trigger a persisted timeout. | ||
| let mut h1 = Harness::new(Height::new(1), vs.clone()); | ||
|
|
||
| h1.run(Input::StartHeight(Height::new(1), vs.clone(), false, None)); | ||
|
|
||
| h1.run(Input::TimeoutElapsed(propose_timeout(0))); | ||
|
|
||
| let wal_entries = h1.drain_wal_entries(Height::new(1)); | ||
| assert!( | ||
| wal_entries | ||
| .iter() | ||
| .any(|e| matches!(e, WalEntry::Timeout(t) if t.round == Round::new(0))), | ||
| "expected a timeout WAL entry for round 0" | ||
| ); | ||
|
|
||
| // Simulated crash/restart: new harness, replay WAL entries as inputs. | ||
| let vs2 = vs; | ||
| let mut h2 = Harness::new(Height::new(1), vs2.clone()); | ||
| h2.run(Input::StartHeight(Height::new(1), vs2, true, None)); | ||
|
|
||
| for entry in wal_entries { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is something supposed to occur as part of the "engine" implementation. Namely, once you setup a new instance and attached the WAL, the content of the WAL should be replayed.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your response @cason. I have gone through all your comment and I'll work on it. Updated revision coming shortly. |
||
| h2.run(wal_entry_to_input(entry)); | ||
| } | ||
|
|
||
| // After replay, we should have persisted the same timeout again deterministically. | ||
| let wal_entries_2 = h2.drain_wal_entries(Height::new(1)); | ||
| assert!( | ||
| wal_entries_2 | ||
| .iter() | ||
| .any(|e| matches!(e, WalEntry::Timeout(t) if t.round == Round::new(0))), | ||
| "expected timeout WAL entry after replay" | ||
| ); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| pub mod basic; | ||
|
|
||
| mod utils; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| use informalsystems_malachitebft_core_consensus::{Input, WalEntry}; | ||
| use malachitebft_core_types::{Context, Timeout}; | ||
|
|
||
| /// Convert a WAL entry back into a core-consensus input for deterministic replay. | ||
| pub fn wal_entry_to_input<Ctx: Context>(entry: WalEntry<Ctx>) -> Input<Ctx> { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not something we would need, in my opinion, as we should be able to write |
||
| match entry { | ||
| WalEntry::ConsensusMsg(msg) => match msg { | ||
| informalsystems_malachitebft_core_consensus::SignedConsensusMsg::Vote(v) => { | ||
| Input::Vote(v) | ||
| } | ||
| informalsystems_malachitebft_core_consensus::SignedConsensusMsg::Proposal(p) => { | ||
| Input::Proposal(p) | ||
| } | ||
| }, | ||
| WalEntry::Timeout(timeout) => Input::TimeoutElapsed(timeout), | ||
| WalEntry::ProposedValue(v) => { | ||
| // For now, treat replay as if the value arrived from consensus gossip. | ||
| // (Scenarios that require ValueOrigin::Sync will be added in follow-up PRs.) | ||
| Input::ProposedValue(v, malachitebft_core_types::ValueOrigin::Consensus) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| pub fn propose_timeout(round: u32) -> Timeout { | ||
| Timeout::propose(malachitebft_core_types::Round::new(round)) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, not sure how this is going to evolve, but at the moment this is a
StateWithWAL?