Skip to content

Commit d870fc6

Browse files
genezhangclaude
andcommitted
fix(tck): address review comments on array subscript validation
Four improvements following code review: 1. Remove Literal::String from infer_non_integer_kind — string-keyed bracket subscripts are valid Cypher map/property access (e.g. map['key']). String-on-array errors are caught at SQL execution time by ClickHouse. 2. Expand find_invalid_array_subscript to cover all expression variants that walk_expression handles: AggregateFnCall, Operator (legacy), Case, ReduceExpr, MapLiteral, ArraySlicing. Previously nested subscripts inside CASE branches, reduce expressions, etc. were silently skipped. 3. Extend plan coverage beyond Projection: also validate subscript indices in Filter predicates and WithClause items / where_clause / order_by expressions. Add check_items() helper. 4. Drop toInt64() cast for non-literal array indices — emit (expr)+1 instead, letting ClickHouse's own type checker catch bad types (floats, strings) rather than silently coercing them. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6a33244 commit d870fc6

2 files changed

Lines changed: 79 additions & 19 deletions

File tree

src/clickhouse_query_generator/to_sql_query.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5133,14 +5133,16 @@ impl RenderExpr {
51335133
// Array subscript in ClickHouse: array[index]
51345134
// Cypher uses 0-based indexing; ClickHouse uses 1-based.
51355135
// For integer literals we add +1 at compile time.
5136-
// For variable / expression indices we wrap with toInt64(...)+1.
5137-
// Exception: string-literal indices are ClickHouse map-key accesses
5136+
// For expression indices we emit (expr)+1 without an explicit cast
5137+
// so ClickHouse's own type checker catches bad types (e.g. floats,
5138+
// strings) rather than silently coercing them.
5139+
// Exception: string-literal indices are map-key accesses
51385140
// (e.g. top['score']) and must NOT be offset.
51395141
let array_sql = array.to_sql();
51405142
let index_sql = match index.as_ref() {
51415143
RenderExpr::Literal(Literal::Integer(n)) => format!("{}", n + 1),
51425144
RenderExpr::Literal(Literal::String(_)) => index.to_sql(),
5143-
_ => format!("toInt64({})+1", index.to_sql()),
5145+
_ => format!("({})+1", index.to_sql()),
51445146
};
51455147
format!("{}[{}]", array_sql, index_sql)
51465148
}

src/query_planner/analyzer/query_validation.rs

Lines changed: 74 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,16 @@ impl fmt::Display for NonIntegerKind {
4343
}
4444

4545
/// Returns `Some(kind)` when `expr` is a clearly non-integer literal; `None` otherwise.
46+
///
47+
/// Note: `Literal::String` is intentionally excluded — bracket subscripts with string keys
48+
/// (`map['key']`, `node['prop']`) are valid Cypher map/property access and must not be
49+
/// rejected. String-keyed array subscripts (invalid) are caught at SQL execution time.
4650
fn infer_non_integer_kind(expr: &LogicalExpr) -> Option<NonIntegerKind> {
4751
match expr {
4852
LogicalExpr::Literal(lit) => match lit {
4953
Literal::Boolean(_) => Some(NonIntegerKind::Boolean),
5054
Literal::Float(_) => Some(NonIntegerKind::Float),
51-
Literal::String(_) => Some(NonIntegerKind::Str),
52-
Literal::Integer(_) | Literal::Null => None,
55+
Literal::Integer(_) | Literal::String(_) | Literal::Null => None,
5356
},
5457
LogicalExpr::List(_) => Some(NonIntegerKind::List),
5558
LogicalExpr::MapLiteral(_) => Some(NonIntegerKind::Map),
@@ -74,10 +77,19 @@ fn build_non_integer_alias_map(items: &[ProjectionItem]) -> HashMap<String, NonI
7477
/// Walk `expr` recursively looking for an `ArraySubscript` whose index is a
7578
/// known non-integer type (either a direct literal or an alias bound to one).
7679
/// Returns `Some(error_message)` on the first violation found.
80+
///
81+
/// Covers all expression variants that `walk_expression` traverses so that
82+
/// subscripts nested inside CASE branches, aggregate calls, reduce expressions,
83+
/// map literals, array slicings, etc. are also checked.
7784
fn find_invalid_array_subscript(
7885
expr: &LogicalExpr,
7986
alias_types: &HashMap<String, NonIntegerKind>,
8087
) -> Option<String> {
88+
macro_rules! recurse {
89+
($e:expr) => {
90+
find_invalid_array_subscript($e, alias_types)
91+
};
92+
}
8193
match expr {
8294
LogicalExpr::ArraySubscript { array, index } => {
8395
// Direct non-integer literal index.
@@ -101,25 +113,45 @@ fn find_invalid_array_subscript(
101113
));
102114
}
103115
}
104-
// Recurse into sub-expressions.
105-
find_invalid_array_subscript(array, alias_types)
106-
.or_else(|| find_invalid_array_subscript(index, alias_types))
116+
recurse!(array).or_else(|| recurse!(index))
107117
}
108-
LogicalExpr::OperatorApplicationExp(op) => op
109-
.operands
110-
.iter()
111-
.find_map(|o| find_invalid_array_subscript(o, alias_types)),
112-
LogicalExpr::ScalarFnCall(f) => f
113-
.args
114-
.iter()
115-
.find_map(|a| find_invalid_array_subscript(a, alias_types)),
116-
LogicalExpr::List(items) => items
117-
.iter()
118-
.find_map(|i| find_invalid_array_subscript(i, alias_types)),
118+
LogicalExpr::OperatorApplicationExp(op) | LogicalExpr::Operator(op) => {
119+
op.operands.iter().find_map(|o| recurse!(o))
120+
}
121+
LogicalExpr::ScalarFnCall(f) => f.args.iter().find_map(|a| recurse!(a)),
122+
LogicalExpr::AggregateFnCall(f) => f.args.iter().find_map(|a| recurse!(a)),
123+
LogicalExpr::List(items) => items.iter().find_map(|i| recurse!(i)),
124+
LogicalExpr::MapLiteral(entries) => entries.iter().find_map(|(_, v)| recurse!(v)),
125+
LogicalExpr::Case(c) => c
126+
.expr
127+
.as_ref()
128+
.and_then(|e| recurse!(e))
129+
.or_else(|| {
130+
c.when_then
131+
.iter()
132+
.find_map(|(w, t)| recurse!(w).or_else(|| recurse!(t)))
133+
})
134+
.or_else(|| c.else_expr.as_ref().and_then(|e| recurse!(e))),
135+
LogicalExpr::ReduceExpr(r) => recurse!(&r.initial_value)
136+
.or_else(|| recurse!(&r.list))
137+
.or_else(|| recurse!(&r.expression)),
138+
LogicalExpr::ArraySlicing { array, from, to } => recurse!(array)
139+
.or_else(|| from.as_ref().and_then(|f| recurse!(f)))
140+
.or_else(|| to.as_ref().and_then(|t| recurse!(t))),
119141
_ => None,
120142
}
121143
}
122144

145+
/// Check all expressions in a slice of projection items.
146+
fn check_items(
147+
items: &[ProjectionItem],
148+
alias_types: &HashMap<String, NonIntegerKind>,
149+
) -> Option<String> {
150+
items
151+
.iter()
152+
.find_map(|item| find_invalid_array_subscript(&item.expression, alias_types))
153+
}
154+
123155
pub struct QueryValidation;
124156

125157
impl AnalyzerPass for QueryValidation {
@@ -355,6 +387,11 @@ impl AnalyzerPass for QueryValidation {
355387
graph_joins.rebuild_or_clone(child_tf, logical_plan.clone())
356388
}
357389
LogicalPlan::Filter(filter) => {
390+
// Validate subscript indices in WHERE predicates.
391+
let empty_map = HashMap::new();
392+
if let Some(err) = find_invalid_array_subscript(&filter.predicate, &empty_map) {
393+
return Err(AnalyzerError::InvalidPlan(err));
394+
}
358395
let child_tf =
359396
self.analyze_with_graph_schema(filter.input.clone(), plan_ctx, graph_schema)?;
360397
filter.rebuild_or_clone(child_tf, logical_plan.clone())
@@ -434,6 +471,27 @@ impl AnalyzerPass for QueryValidation {
434471
}
435472
}
436473
LogicalPlan::WithClause(with_clause) => {
474+
// Validate subscript indices inside WITH projection expressions.
475+
let empty_map = HashMap::new();
476+
if let Some(err) = check_items(&with_clause.items, &empty_map) {
477+
return Err(AnalyzerError::InvalidPlan(err));
478+
}
479+
// Build alias→type map so WHERE/ORDER BY can resolve alias references.
480+
let alias_types = build_non_integer_alias_map(&with_clause.items);
481+
if let Some(ref wc) = with_clause.where_clause {
482+
if let Some(err) = find_invalid_array_subscript(wc, &alias_types) {
483+
return Err(AnalyzerError::InvalidPlan(err));
484+
}
485+
}
486+
if let Some(ref order_by) = with_clause.order_by {
487+
for item in order_by {
488+
if let Some(err) =
489+
find_invalid_array_subscript(&item.expression, &alias_types)
490+
{
491+
return Err(AnalyzerError::InvalidPlan(err));
492+
}
493+
}
494+
}
437495
let child_tf = self.analyze_with_graph_schema(
438496
with_clause.input.clone(),
439497
plan_ctx,

0 commit comments

Comments
 (0)