From cfbe895384ad8ce8042ee415bcdc48c2742c1ba8 Mon Sep 17 00:00:00 2001 From: Muad'Dib Date: Mon, 1 Jun 2026 16:37:34 +0200 Subject: [PATCH 1/2] fix: substConstants no-ops out-of-range positions (match sigma-state) `SubstConstants` eval full-parsed the script bytes (`ErgoTree:: sigma_parse_bytes`) and errored when a requested position was >= the tree's constant count. sigma-state's `ErgoTreeSerializer. substituteConstants` is byte-surgery: it parses only the header and constants segment, keeps the tree body verbatim (never deserializing it), substitutes constants at in-range positions, and silently no-ops out-of-range ones. sigma-rust therefore rejected scripts the JVM accepts -- an out-of-range position, or an unparseable tree body -- a latent consensus fork (not seen on mainnet). Add `ErgoTree::substitute_constants` mirroring the Scala serializer (header + constants parsed, body bytes kept raw, OOB positions dropped, first position per index wins) and call it from `SubstConstants::eval`. Verified byte-exact against SANTA's JVM-blessed v5 vectors (`substConstants_equivalence.json`): the five out-of-range entries (incl. `[0,0,8,-45]`, whose body the full parser rejects with InvalidTypeCode) return the input unchanged; in-range substitution stays covered by the existing proptests. New `substitute_constants_oob_is_noop` regression test. Co-Authored-By: Claude Opus 4.8 (1M context) --- ergotree-interpreter/src/eval/subst_const.rs | 27 ++--- ergotree-ir/src/ergo_tree.rs | 120 +++++++++++++++++++ 2 files changed, 129 insertions(+), 18 deletions(-) diff --git a/ergotree-interpreter/src/eval/subst_const.rs b/ergotree-interpreter/src/eval/subst_const.rs index f035a48ad..57b647006 100644 --- a/ergotree-interpreter/src/eval/subst_const.rs +++ b/ergotree-interpreter/src/eval/subst_const.rs @@ -11,7 +11,6 @@ use ergotree_ir::mir::subst_const::SubstConstants; use ergotree_ir::mir::value::CollKind; use ergotree_ir::mir::value::NativeColl; use ergotree_ir::mir::value::Value; -use ergotree_ir::serialization::SigmaSerializable; use sigma_util::AsVecI8; use sigma_util::AsVecU8; @@ -55,24 +54,16 @@ impl Evaluable for SubstConstants { } if let Value::Coll(CollKind::NativeColl(NativeColl::CollByte(b))) = script_bytes_v { - // Substitue constants with repeated calls to `ErgoTree::with_constant`. - let mut ergo_tree = ErgoTree::sigma_parse_bytes(&b.as_vec_u8())?; - let num_constants = ergo_tree.constants_len().map_err(to_misc_err)?; - for (ix, i) in positions.iter().enumerate() { - if *i < num_constants { - ergo_tree = ergo_tree - .with_constant(*i, new_constants[ix].clone()) - .map_err(to_misc_err)?; - } else { - return Err(EvalError::Misc(format!( - "SubstConstants: positions[{}] == {} is an out of bound index with \ - respect to the serialized ErgoTree's constant list", - ix, *i - ))); - } - } + // Byte-level substitution mirroring sigma-state's + // `ErgoTreeSerializer.substituteConstants`: the tree body is never + // parsed and out-of-range positions are a no-op, so a malformed + // body or an OOB position returns the original bytes (JVM parity) + // instead of erroring. + let (new_bytes, _num_constants) = + ErgoTree::substitute_constants(b.as_vec_u8(), &positions, &new_constants) + .map_err(to_misc_err)?; Ok(Value::Coll(CollKind::NativeColl(NativeColl::CollByte( - ergo_tree.sigma_serialize_bytes()?.as_vec_i8().into(), + new_bytes.as_vec_i8().into(), )))) } else { Err(EvalError::Misc(format!( diff --git a/ergotree-ir/src/ergo_tree.rs b/ergotree-ir/src/ergo_tree.rs index a1d52708d..ab3b582f9 100644 --- a/ergotree-ir/src/ergo_tree.rs +++ b/ergotree-ir/src/ergo_tree.rs @@ -308,6 +308,96 @@ impl ErgoTree { pub fn template_bytes(&self) -> Result, ErgoTreeError> { self.clone().parsed_tree()?.template_bytes() } + + /// Replaces constants at the given `positions` with `new_values` in a + /// serialized ErgoTree, mirroring sigma-state's + /// `ErgoTreeSerializer.substituteConstants`. Only the header and the + /// constants segment are parsed; the body bytes are kept verbatim and + /// never deserialized, so an unparseable body is tolerated. Positions + /// outside the tree's constants list are silently ignored (no-op), and + /// the first position referencing a given constant index wins. Returns + /// the resulting bytes and the number of constants in the tree; + /// `positions.len()` must equal `new_values.len()`. + pub fn substitute_constants( + script_bytes: Vec, + positions: &[usize], + new_values: &[Constant], + ) -> Result<(Vec, usize), ErgoTreeError> { + use core2::io::Write; + use sigma_ser::vlq_encode::ReadSigmaVlqExt; + // Parse only the header + constants segment; keep the body raw. + let (header, mut constants, body_start) = { + let mut r = SigmaByteReader::new( + Cursor::new(script_bytes.as_slice()), + ConstantStore::empty(), + ); + let header = ErgoTreeHeader::sigma_parse(&mut r)?; + let (constants, body_start) = r.with_tree_version( + header.version(), + |r| -> Result<(Vec, usize), SigmaParsingError> { + if header.has_size() { + let _ = r.get_u32()?; + } + let constants = if header.is_constant_segregation() { + ErgoTree::sigma_parse_constants(r)? + } else { + Vec::new() + }; + let body_start = r.position()? as usize; + Ok((constants, body_start)) + }, + )?; + (header, constants, body_start) + }; + let num_constants = constants.len(); + let tree_bytes = script_bytes.get(body_start..).unwrap_or_default().to_vec(); + + // First position referencing a given index wins (matches Scala's + // `getPositionsBackref`); out-of-range positions are dropped. + let mut already_set = vec![false; num_constants]; + for (i_pos, &pos) in positions.iter().enumerate() { + if pos < num_constants && !already_set[pos] { + let new_c = &new_values[i_pos]; + if new_c.tpe != constants[pos].tpe { + return Err(ErgoTreeConstantError::SetConstantError( + SetConstantError::TypeMismatch(format!( + "substitute_constants: position {} expected type {:?}, got {:?}", + pos, constants[pos].tpe, new_c.tpe + )), + ) + .into()); + } + constants[pos] = new_c.clone(); + already_set[pos] = true; + } + } + + // Re-emit header + [size] + [count + constants (if segregated)] + + // verbatim body, mirroring ``. + let body_section = { + let mut data = Vec::new(); + let mut inner_w = SigmaByteWriter::new(&mut data, None); + inner_w.with_tree_version(header.version(), |inner_w| -> SigmaSerializeResult { + if header.is_constant_segregation() { + inner_w.put_usize_as_u32_unwrapped(constants.len())?; + constants + .iter() + .try_for_each(|c| c.sigma_serialize(inner_w))?; + } + inner_w.write_all(&tree_bytes)?; + Ok(()) + })?; + data + }; + let mut out = Vec::new(); + let mut w = SigmaByteWriter::new(&mut out, None); + header.sigma_serialize(&mut w)?; + if header.has_size() { + w.put_usize_as_u32_unwrapped(body_section.len())?; + } + w.write_all(&body_section)?; + Ok((out, num_constants)) + } } /// Constants related errors @@ -653,6 +743,36 @@ mod tests { assert_eq!(ergo_tree.get_constant(0).unwrap().unwrap(), false.into()); } + // JVM parity (jvm:sigma-state-6.0.3 LanguageSpecificationV5 substConstants): + // a position outside the tree's constant list is a no-op that returns the + // original bytes, not an error. substitute_constants never parses the body, + // so even #1 (`[0,0,8,-45]`), whose body sigma-rust's full parser rejects + // with InvalidTypeCode, no-ops cleanly. (`-45` == `0xd3`.) + #[test] + fn substitute_constants_oob_is_noop() { + let dummy: Constant = 0i32.into(); + let run = |bytes: Vec, pos: usize| -> (Vec, usize) { + ErgoTree::substitute_constants(bytes, &[pos], core::slice::from_ref(&dummy)).unwrap() + }; + // #0: non-segregated header, 0 constants + assert_eq!(run(vec![0x00, 0x08, 0xd3], 0), (vec![0x00, 0x08, 0xd3], 0)); + // #1: non-segregated, body unparseable by the full deserializer + assert_eq!( + run(vec![0x00, 0x00, 0x08, 0xd3], 0), + (vec![0x00, 0x00, 0x08, 0xd3], 0) + ); + // #2/#3: segregated header, 0 constants + assert_eq!( + run(vec![0x10, 0x00, 0x08, 0xd3], 0), + (vec![0x10, 0x00, 0x08, 0xd3], 0) + ); + // #6: segregated, 1 constant, position 1 is out of range + assert_eq!( + run(vec![0x10, 0x01, 0x08, 0xd3, 0x73, 0x00], 1), + (vec![0x10, 0x01, 0x08, 0xd3, 0x73, 0x00], 1) + ); + } + #[test] fn test_set_constant() { let expr = Expr::Const(Constant { From 7ed08da155aa87c49bdcfca6ce19e59e18fc227a Mon Sep 17 00:00:00 2001 From: Muad'Dib Date: Mon, 1 Jun 2026 21:25:49 +0200 Subject: [PATCH 2/2] fix: version-gate substConstants tree-size slot (match sigma-state) ErgoTree::substitute_constants re-emitted the tree-size slot whenever the template header has_size bit was set. sigma-state's ErgoTreeSerializer.substituteConstants re-emits it only when the evaluation's ErgoTree version is >= V3 (the V6 soft-fork isV3OrLaterErgoTreeVersion gate); for v<=2 the slot is dropped even though the bit stays set. Thread the evaluation's tree_version (ctx.tree_version()) into the fn and gate the size emit on it. Completes the substConstants serializer-level parity (body-verbatim and first-wins were already handled here). The v<=2 hasSize path is not mainnet-reachable, so it is covered by a unit test (v2 drops / v3 keeps the slot) certified against the Scala source rather than a blessed vector. Co-Authored-By: Claude Opus 4.8 (1M context) --- ergotree-interpreter/src/eval/subst_const.rs | 10 +++- ergotree-ir/src/ergo_tree.rs | 62 ++++++++++++++++++-- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/ergotree-interpreter/src/eval/subst_const.rs b/ergotree-interpreter/src/eval/subst_const.rs index 57b647006..5ae94c549 100644 --- a/ergotree-interpreter/src/eval/subst_const.rs +++ b/ergotree-interpreter/src/eval/subst_const.rs @@ -59,9 +59,13 @@ impl Evaluable for SubstConstants { // parsed and out-of-range positions are a no-op, so a malformed // body or an OOB position returns the original bytes (JVM parity) // instead of erroring. - let (new_bytes, _num_constants) = - ErgoTree::substitute_constants(b.as_vec_u8(), &positions, &new_constants) - .map_err(to_misc_err)?; + let (new_bytes, _num_constants) = ErgoTree::substitute_constants( + b.as_vec_u8(), + &positions, + &new_constants, + ctx.tree_version(), + ) + .map_err(to_misc_err)?; Ok(Value::Coll(CollKind::NativeColl(NativeColl::CollByte( new_bytes.as_vec_i8().into(), )))) diff --git a/ergotree-ir/src/ergo_tree.rs b/ergotree-ir/src/ergo_tree.rs index ab3b582f9..08568f422 100644 --- a/ergotree-ir/src/ergo_tree.rs +++ b/ergotree-ir/src/ergo_tree.rs @@ -318,19 +318,25 @@ impl ErgoTree { /// the first position referencing a given constant index wins. Returns /// the resulting bytes and the number of constants in the tree; /// `positions.len()` must equal `new_values.len()`. + /// + /// `tree_version` is the *evaluation's* ErgoTree version (not the + /// template header's). The tree-size slot is re-emitted only when it is + /// `>= V3` — the V6 soft-fork `isV3OrLaterErgoTreeVersion` gate in + /// `ErgoTreeSerializer.scala`; for `<= V2` the slot is dropped even + /// though the header's `has_size` bit stays set, a JVM quirk we mirror + /// byte-for-byte. pub fn substitute_constants( script_bytes: Vec, positions: &[usize], new_values: &[Constant], + tree_version: ErgoTreeVersion, ) -> Result<(Vec, usize), ErgoTreeError> { use core2::io::Write; use sigma_ser::vlq_encode::ReadSigmaVlqExt; // Parse only the header + constants segment; keep the body raw. let (header, mut constants, body_start) = { - let mut r = SigmaByteReader::new( - Cursor::new(script_bytes.as_slice()), - ConstantStore::empty(), - ); + let mut r = + SigmaByteReader::new(Cursor::new(script_bytes.as_slice()), ConstantStore::empty()); let header = ErgoTreeHeader::sigma_parse(&mut r)?; let (constants, body_start) = r.with_tree_version( header.version(), @@ -392,7 +398,10 @@ impl ErgoTree { let mut out = Vec::new(); let mut w = SigmaByteWriter::new(&mut out, None); header.sigma_serialize(&mut w)?; - if header.has_size() { + // V6 soft-fork: re-emit the size slot only when the evaluation's tree + // version is >= V3 (`isV3OrLaterErgoTreeVersion`); for <= V2 it is + // dropped even with the has_size bit set (JVM parity). + if tree_version >= ErgoTreeVersion::V3 && header.has_size() { w.put_usize_as_u32_unwrapped(body_section.len())?; } w.write_all(&body_section)?; @@ -752,7 +761,13 @@ mod tests { fn substitute_constants_oob_is_noop() { let dummy: Constant = 0i32.into(); let run = |bytes: Vec, pos: usize| -> (Vec, usize) { - ErgoTree::substitute_constants(bytes, &[pos], core::slice::from_ref(&dummy)).unwrap() + ErgoTree::substitute_constants( + bytes, + &[pos], + core::slice::from_ref(&dummy), + ErgoTreeVersion::V3, + ) + .unwrap() }; // #0: non-segregated header, 0 constants assert_eq!(run(vec![0x00, 0x08, 0xd3], 0), (vec![0x00, 0x08, 0xd3], 0)); @@ -773,6 +788,41 @@ mod tests { ); } + // JVM parity (jvm:sigma-state-6.0.3 substituteConstants): the tree-size + // slot is re-emitted only when the evaluation's ErgoTree version is >= V3 + // (the V6 soft-fork `isV3OrLaterErgoTreeVersion` gate, + // ErgoTreeSerializer.scala:369). For v<=2 the slot is dropped even though + // the header's has_size bit stays set. No SANTA substConstants vector is a + // has_size template, so this path is certified against the Scala source. + #[test] + fn substitute_constants_v3_gates_size_slot() { + // A v1 (has_size) segregated template with a single constant. + let expr = Expr::Const(Constant { + tpe: SType::SBoolean, + v: Literal::Boolean(false), + }); + let bytes = ErgoTree::new(ErgoTreeHeader::v1(true), &expr) + .unwrap() + .sigma_serialize_bytes() + .unwrap(); + assert!(ErgoTreeHeader::new(bytes[0]).unwrap().has_size()); + // Tiny tree => single-byte size VLQ, so it can be stripped positionally. + assert!(bytes[1] < 0x80, "test assumes a single-byte size VLQ"); + + // No substitution: the only inter-version difference is the size slot. + let (out_v3, _) = + ErgoTree::substitute_constants(bytes.clone(), &[], &[], ErgoTreeVersion::V3).unwrap(); + let (out_v2, _) = + ErgoTree::substitute_constants(bytes.clone(), &[], &[], ErgoTreeVersion::V2).unwrap(); + + // v>=3: size slot kept => byte-identical round-trip. + assert_eq!(out_v3, bytes, "v3 must re-emit the size slot"); + // v<=2: size slot dropped => header byte then the bytes after the slot. + let mut expected_v2 = vec![bytes[0]]; + expected_v2.extend_from_slice(&bytes[2..]); + assert_eq!(out_v2, expected_v2, "v<=2 must drop the size slot"); + } + #[test] fn test_set_constant() { let expr = Expr::Const(Constant {