1616//!
1717//! Determinism validation: rejects `NOW()`, `RANDOM()`, `NEXTVAL()`, `UUID()`.
1818
19+ mod tokenizer;
20+
1921use super :: expr:: { BinaryOp , SqlExpr } ;
2022use nodedb_types:: Value ;
23+ use tokenizer:: { Token , TokenKind , tokenize} ;
2124
2225/// Parse a SQL expression string into an SqlExpr AST.
2326///
@@ -46,160 +49,11 @@ pub fn parse_generated_expr(text: &str) -> Result<(SqlExpr, Vec<String>), String
4649 Ok ( ( expr, deps) )
4750}
4851
49- // ── Tokenizer ─────────────────────────────────────────────────────────
50-
51- #[ derive( Debug , Clone ) ]
52- struct Token {
53- text : String ,
54- kind : TokenKind ,
55- }
56-
57- #[ derive( Debug , Clone , Copy , PartialEq ) ]
58- enum TokenKind {
59- Ident ,
60- Number ,
61- StringLit ,
62- LParen ,
63- RParen ,
64- Comma ,
65- Op ,
66- }
67-
68- fn tokenize ( input : & str ) -> Result < Vec < Token > , String > {
69- let bytes = input. as_bytes ( ) ;
70- let mut tokens = Vec :: new ( ) ;
71- let mut i = 0 ;
72-
73- while i < bytes. len ( ) {
74- let b = bytes[ i] ;
75-
76- // Skip whitespace.
77- if b. is_ascii_whitespace ( ) {
78- i += 1 ;
79- continue ;
80- }
81-
82- // Single-char tokens.
83- if b == b'(' {
84- tokens. push ( Token {
85- text : "(" . into ( ) ,
86- kind : TokenKind :: LParen ,
87- } ) ;
88- i += 1 ;
89- continue ;
90- }
91- if b == b')' {
92- tokens. push ( Token {
93- text : ")" . into ( ) ,
94- kind : TokenKind :: RParen ,
95- } ) ;
96- i += 1 ;
97- continue ;
98- }
99- if b == b',' {
100- tokens. push ( Token {
101- text : "," . into ( ) ,
102- kind : TokenKind :: Comma ,
103- } ) ;
104- i += 1 ;
105- continue ;
106- }
107-
108- // Two-char operators.
109- if i + 1 < bytes. len ( ) {
110- let two = & input[ i..i + 2 ] ;
111- if matches ! ( two, "<=" | ">=" | "!=" | "<>" ) {
112- tokens. push ( Token {
113- text : two. into ( ) ,
114- kind : TokenKind :: Op ,
115- } ) ;
116- i += 2 ;
117- continue ;
118- }
119- if two == "||" {
120- tokens. push ( Token {
121- text : "||" . into ( ) ,
122- kind : TokenKind :: Op ,
123- } ) ;
124- i += 2 ;
125- continue ;
126- }
127- }
128-
129- // Single-char operators.
130- if matches ! ( b, b'+' | b'-' | b'*' | b'/' | b'%' | b'=' | b'<' | b'>' ) {
131- tokens. push ( Token {
132- text : ( b as char ) . to_string ( ) ,
133- kind : TokenKind :: Op ,
134- } ) ;
135- i += 1 ;
136- continue ;
137- }
138-
139- // String literal.
140- if b == b'\'' {
141- let mut s = String :: new ( ) ;
142- i += 1 ;
143- while i < bytes. len ( ) {
144- if bytes[ i] == b'\'' {
145- if i + 1 < bytes. len ( ) && bytes[ i + 1 ] == b'\'' {
146- s. push ( '\'' ) ;
147- i += 2 ;
148- continue ;
149- }
150- i += 1 ;
151- break ;
152- }
153- s. push ( bytes[ i] as char ) ;
154- i += 1 ;
155- }
156- tokens. push ( Token {
157- text : s,
158- kind : TokenKind :: StringLit ,
159- } ) ;
160- continue ;
161- }
162-
163- // Number.
164- if b. is_ascii_digit ( ) || ( b == b'.' && i + 1 < bytes. len ( ) && bytes[ i + 1 ] . is_ascii_digit ( ) )
165- {
166- let start = i;
167- while i < bytes. len ( ) && ( bytes[ i] . is_ascii_digit ( ) || bytes[ i] == b'.' ) {
168- i += 1 ;
169- }
170- tokens. push ( Token {
171- text : input[ start..i] . to_string ( ) ,
172- kind : TokenKind :: Number ,
173- } ) ;
174- continue ;
175- }
176-
177- // Identifier or keyword.
178- if b. is_ascii_alphabetic ( ) || b == b'_' {
179- let start = i;
180- while i < bytes. len ( ) && ( bytes[ i] . is_ascii_alphanumeric ( ) || bytes[ i] == b'_' ) {
181- i += 1 ;
182- }
183- tokens. push ( Token {
184- text : input[ start..i] . to_string ( ) ,
185- kind : TokenKind :: Ident ,
186- } ) ;
187- continue ;
188- }
189-
190- return Err ( format ! ( "unexpected character: '{}'" , b as char ) ) ;
191- }
192-
193- Ok ( tokens)
194- }
195-
19652// ── Recursive descent parser ──────────────────────────────────────────
19753
19854/// Maximum recursion depth for nested parentheses / sub-expressions.
199- /// Exceeding this limit returns `Err` instead of overflowing the stack.
20055const MAX_EXPR_DEPTH : usize = 128 ;
20156
202- /// Parse an expression (lowest precedence: OR).
20357fn parse_expr ( tokens : & [ Token ] , pos : & mut usize , depth : & mut usize ) -> Result < SqlExpr , String > {
20458 parse_or ( tokens, pos, depth)
20559}
@@ -304,13 +158,11 @@ fn parse_multiplicative(
304158}
305159
306160fn parse_unary ( tokens : & [ Token ] , pos : & mut usize , depth : & mut usize ) -> Result < SqlExpr , String > {
307- // Unary minus.
308161 if * pos < tokens. len ( ) && tokens[ * pos] . kind == TokenKind :: Op && tokens[ * pos] . text == "-" {
309162 * pos += 1 ;
310163 let expr = parse_primary ( tokens, pos, depth) ?;
311164 return Ok ( SqlExpr :: Negate ( Box :: new ( expr) ) ) ;
312165 }
313- // NOT
314166 if peek_keyword ( tokens, * pos, "NOT" ) {
315167 * pos += 1 ;
316168 let expr = parse_primary ( tokens, pos, depth) ?;
@@ -327,7 +179,6 @@ fn parse_primary(tokens: &[Token], pos: &mut usize, depth: &mut usize) -> Result
327179 let token = & tokens[ * pos] ;
328180
329181 match token. kind {
330- // Parenthesized expression.
331182 TokenKind :: LParen => {
332183 * depth += 1 ;
333184 if * depth > MAX_EXPR_DEPTH {
@@ -342,7 +193,6 @@ fn parse_primary(tokens: &[Token], pos: &mut usize, depth: &mut usize) -> Result
342193 Ok ( expr)
343194 }
344195
345- // Number literal.
346196 TokenKind :: Number => {
347197 * pos += 1 ;
348198 if let Ok ( i) = token. text . parse :: < i64 > ( ) {
@@ -354,13 +204,11 @@ fn parse_primary(tokens: &[Token], pos: &mut usize, depth: &mut usize) -> Result
354204 }
355205 }
356206
357- // String literal.
358207 TokenKind :: StringLit => {
359208 * pos += 1 ;
360209 Ok ( SqlExpr :: Literal ( Value :: String ( token. text . clone ( ) ) ) )
361210 }
362211
363- // Identifier: column ref, function call, keyword (NULL, TRUE, FALSE, CASE, COALESCE).
364212 TokenKind :: Ident => {
365213 let name = token. text . clone ( ) ;
366214 let upper = name. to_uppercase ( ) ;
@@ -376,15 +224,13 @@ fn parse_primary(tokens: &[Token], pos: &mut usize, depth: &mut usize) -> Result
376224 Ok ( SqlExpr :: Coalesce ( args) )
377225 }
378226 _ => {
379- // Function call: IDENT(args).
380227 if * pos < tokens. len ( ) && tokens[ * pos] . kind == TokenKind :: LParen {
381228 let args = parse_arg_list ( tokens, pos, depth) ?;
382229 Ok ( SqlExpr :: Function {
383230 name : name. to_lowercase ( ) ,
384231 args,
385232 } )
386233 } else {
387- // Column reference.
388234 Ok ( SqlExpr :: Column ( name. to_lowercase ( ) ) )
389235 }
390236 }
@@ -395,7 +241,6 @@ fn parse_primary(tokens: &[Token], pos: &mut usize, depth: &mut usize) -> Result
395241 }
396242}
397243
398- /// Parse `CASE WHEN cond THEN result [WHEN ... THEN ...] [ELSE result] END`.
399244fn parse_case ( tokens : & [ Token ] , pos : & mut usize , depth : & mut usize ) -> Result < SqlExpr , String > {
400245 let mut when_thens = Vec :: new ( ) ;
401246 let mut else_expr = None ;
@@ -429,7 +274,6 @@ fn parse_case(tokens: &[Token], pos: &mut usize, depth: &mut usize) -> Result<Sq
429274 } )
430275}
431276
432- /// Parse a parenthesized, comma-separated argument list: `(expr, expr, ...)`.
433277fn parse_arg_list (
434278 tokens : & [ Token ] ,
435279 pos : & mut usize ,
@@ -488,7 +332,6 @@ fn expect_token(
488332
489333// ── Validation ────────────────────────────────────────────────────────
490334
491- /// Non-deterministic functions that are rejected in GENERATED ALWAYS AS.
492335const NON_DETERMINISTIC : & [ & str ] = & [
493336 "now" ,
494337 "current_timestamp" ,
@@ -613,7 +456,6 @@ mod tests {
613456 assert_eq ! ( deps, vec![ "price" , "tax_rate" ] ) ;
614457 let doc = Value :: from ( serde_json:: json!( { "price" : 100.0 , "tax_rate" : 0.08 } ) ) ;
615458 let result = expr. eval ( & doc) ;
616- // eval returns integer when result is whole number.
617459 assert_eq ! ( result. as_f64( ) , Some ( 108.0 ) ) ;
618460 }
619461
@@ -693,15 +535,27 @@ mod tests {
693535
694536 #[ test]
695537 fn deeply_nested_parentheses_return_error_not_stack_overflow ( ) {
696- // Spec: the parser must enforce a recursion depth limit so that
697- // pathologically deep nesting returns Err rather than overflowing the
698- // call stack and causing a process crash.
699538 let depth = 10_000 ;
700- let input = format ! ( "{}x{}" , "(" . repeat( depth) , ")" . repeat( depth) , ) ;
539+ let input = format ! ( "{}x{}" , "(" . repeat( depth) , ")" . repeat( depth) ) ;
701540 let result = parse_generated_expr ( & input) ;
702541 assert ! (
703542 result. is_err( ) ,
704- "parse_generated_expr must return Err for {depth}-deep nesting, not stack overflow "
543+ "parse_generated_expr must return Err for {depth}-deep nesting"
705544 ) ;
706545 }
546+
547+ #[ test]
548+ fn cjk_string_in_concat ( ) {
549+ let ( expr, _) = parse_ok ( "CONCAT('你好', name)" ) ;
550+ let doc = Value :: from ( serde_json:: json!( { "name" : "world" } ) ) ;
551+ assert_eq ! ( expr. eval( & doc) , Value :: String ( "你好world" . into( ) ) ) ;
552+ }
553+
554+ #[ test]
555+ fn comparison_with_utf8_literal ( ) {
556+ let ( expr, deps) = parse_ok ( "name != '禁止'" ) ;
557+ assert_eq ! ( deps, vec![ "name" ] ) ;
558+ let doc = Value :: from ( serde_json:: json!( { "name" : "allowed" } ) ) ;
559+ assert_eq ! ( expr. eval( & doc) , Value :: Bool ( true ) ) ;
560+ }
707561}
0 commit comments