From e0771d219d7133302bdb0b25a336cec0fcdb8db0 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 18 May 2026 09:26:07 +0200 Subject: [PATCH] Remove the `pop_except_from!` macro This the first step toward a more generic SIMD optimized path for scanning ahead and also for a potentially easier way of bailing out of this loop using the `?` Rust operator. Signed-off-by: Martin Robinson --- html5ever/src/tokenizer/mod.rs | 78 +++++++++++++++++++++++++++------- xml5ever/src/tokenizer/mod.rs | 39 ++++++++++++----- 2 files changed, 90 insertions(+), 27 deletions(-) diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 08abc17b..ba9a095c 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -707,10 +707,6 @@ macro_rules! peek ( ($me:expr, $input:expr) => ( unwrap_or_return!($me.peek($input), ProcessResult::Suspend) )); -macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => ( - unwrap_or_return!($me.pop_except_from($input, $set), ProcessResult::Suspend) -)); - macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => ( unwrap_or_return!($me.eat($input, $pat, u8::eq_ignore_ascii_case), ProcessResult::Suspend) )); @@ -798,7 +794,13 @@ impl Tokenizer { //§ rcdata-state states::RawData(Rcdata) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) { + let Some(set_result) = + self.pop_except_from(input, small_char_set!('\r' '\0' '&' '<' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\0') => { self.bad_char_error(); self.emit_char('\u{fffd}'); @@ -812,7 +814,13 @@ impl Tokenizer { //§ rawtext-state states::RawData(Rawtext) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) { + let Some(set_result) = + self.pop_except_from(input, small_char_set!('\r' '\0' '<' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\0') => { self.bad_char_error(); self.emit_char('\u{fffd}'); @@ -825,7 +833,13 @@ impl Tokenizer { //§ script-data-state states::RawData(ScriptData) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) { + let Some(set_result) = + self.pop_except_from(input, small_char_set!('\r' '\0' '<' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\0') => { self.bad_char_error(); self.emit_char('\u{fffd}'); @@ -838,7 +852,13 @@ impl Tokenizer { //§ script-data-escaped-state states::RawData(ScriptDataEscaped(Escaped)) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) { + let Some(set_result) = + self.pop_except_from(input, small_char_set!('\r' '\0' '-' '<' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\0') => { self.bad_char_error(); self.emit_char('\u{fffd}'); @@ -857,7 +877,13 @@ impl Tokenizer { //§ script-data-double-escaped-state states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) { + let Some(set_result) = + self.pop_except_from(input, small_char_set!('\r' '\0' '-' '<' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\0') => { self.bad_char_error(); self.emit_char('\u{fffd}'); @@ -877,7 +903,12 @@ impl Tokenizer { //§ plaintext-state states::Plaintext => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) { + let Some(set_result) = self.pop_except_from(input, small_char_set!('\r' '\0' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\0') => { self.bad_char_error(); self.emit_char('\u{fffd}'); @@ -1239,7 +1270,13 @@ impl Tokenizer { //§ attribute-value-(double-quoted)-state states::AttributeValue(DoubleQuoted) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) { + let Some(set_result) = + self.pop_except_from(input, small_char_set!('\r' '"' '&' '\0' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('"') => go!(self: to State::AfterAttributeValueQuoted), FromSet('&') => go!(self: consume_char_ref), FromSet('\0') => { @@ -1253,7 +1290,13 @@ impl Tokenizer { //§ attribute-value-(single-quoted)-state states::AttributeValue(SingleQuoted) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) { + let Some(set_result) = + self.pop_except_from(input, small_char_set!('\r' '\'' '&' '\0' '\n')) + else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\'') => go!(self: to State::AfterAttributeValueQuoted), FromSet('&') => go!(self: consume_char_ref), FromSet('\0') => { @@ -1267,11 +1310,14 @@ impl Tokenizer { //§ attribute-value-(unquoted)-state states::AttributeValue(Unquoted) => loop { - match pop_except_from!( - self, + let Some(set_result) = self.pop_except_from( input, - small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0') - ) { + small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0'), + ) else { + return ProcessResult::Suspend; + }; + + match set_result { FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => { go!(self: to State::BeforeAttributeName) }, diff --git a/xml5ever/src/tokenizer/mod.rs b/xml5ever/src/tokenizer/mod.rs index 7b0c0577..0d87e637 100644 --- a/xml5ever/src/tokenizer/mod.rs +++ b/xml5ever/src/tokenizer/mod.rs @@ -627,13 +627,6 @@ macro_rules! get_char ( ($me:expr, $input:expr) => {{ character }}); -macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => {{ - let Some(popped_element) = $me.pop_except_from($input, $set) else { - return ProcessResult::Done; - }; - popped_element -}}); - macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => {{ let Some(value) = $me.eat($input, $pat) else { return ProcessResult::Done; @@ -664,7 +657,13 @@ impl XmlTokenizer { match self.state.get() { //§ data-state XmlState::Data => loop { - match pop_except_from!(self, input, small_char_set!('\r' '&' '<')) { + let Some(popped_element) = + self.pop_except_from(input, small_char_set!('\r' '&' '<')) + else { + return ProcessResult::Done; + }; + + match popped_element { FromSet('&') => go!(self: consume_char_ref), FromSet('<') => go!(self: to XmlState::TagState), FromSet(c) => go!(self: emit c), @@ -925,7 +924,13 @@ impl XmlTokenizer { }, //§ tag-attribute-value-double-quoted-state XmlState::TagAttrValue(DoubleQuoted) => loop { - match pop_except_from!(self, input, small_char_set!('\n' '"' '&')) { + let Some(popped_element) = + self.pop_except_from(input, small_char_set!('\n' '"' '&')) + else { + return ProcessResult::Done; + }; + + match popped_element { FromSet('"') => go!(self: to XmlState::TagAttrNameBefore), FromSet('&') => go!(self: consume_char_ref '"' ), FromSet(c) => go!(self: push_value c), @@ -934,7 +939,13 @@ impl XmlTokenizer { }, //§ tag-attribute-value-single-quoted-state XmlState::TagAttrValue(SingleQuoted) => loop { - match pop_except_from!(self, input, small_char_set!('\n' '\'' '&')) { + let Some(popped_element) = + self.pop_except_from(input, small_char_set!('\n' '\'' '&')) + else { + return ProcessResult::Done; + }; + + match popped_element { FromSet('\'') => go!(self: to XmlState::TagAttrNameBefore), FromSet('&') => go!(self: consume_char_ref '\''), FromSet(c) => go!(self: push_value c), @@ -943,7 +954,13 @@ impl XmlTokenizer { }, //§ tag-attribute-value-double-quoted-state XmlState::TagAttrValue(Unquoted) => loop { - match pop_except_from!(self, input, small_char_set!('\n' '\t' ' ' '&' '>')) { + let Some(popped_element) = + self.pop_except_from(input, small_char_set!('\n' '\t' ' ' '&' '>')) + else { + return ProcessResult::Done; + }; + + match popped_element { FromSet('\t') | FromSet('\n') | FromSet(' ') => { go!(self: to XmlState::TagAttrNameBefore) },