Skip to content

Commit 65498bf

Browse files
genezhangclaude
andcommitted
fix: OPTIONAL MATCH on denormalized schemas generates CTE + LEFT JOIN
Previously, OPTIONAL MATCH was completely dropped for denormalized schemas where nodes are virtual (properties embedded in edge table). The Union standalone node scan from MATCH was distributed into OPTIONAL MATCH branches by UnionDistribution, losing LEFT JOIN semantics. Three-layer fix: 1. union_distribution.rs: Skip distributing GraphRel over Union when the GraphRel is OPTIONAL and the Union is a denormalized standalone scan. Preserves the Union as an anchor for LEFT JOIN. 2. plan_builder.rs (GraphRel handler): When an OPTIONAL GraphRel has a Union-of-denormalized-nodes as left, render the Union as a CTE, use CTE as FROM, and LEFT JOIN the edge table with the correct from_id/node_id join condition. 3. plan_builder.rs (to_render_plan_with_ctx): Detect the pattern in GraphJoins-wrapped plans and delegate to the GraphRel handler, overlaying outer SELECT/GROUP BY/ORDER BY. Also: - plan_builder_helpers.rs: is_node_denormalized now handles Union - plan_optimizer.rs: edge join preserved via from_id_column marker - synthesize_denorm_optional_join_condition for CartesianProduct path Generated SQL (still needs column reference refinement in follow-up): ```sql WITH __denorm_scan_a AS ( SELECT origin_code AS "code" FROM flights UNION DISTINCT SELECT dest_code AS "code" FROM flights ) SELECT a.code, count(*) FROM __denorm_scan_a AS a LEFT JOIN flights AS r ON a.code = r.origin_code GROUP BY a.code ``` Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 305eb9a commit 65498bf

4 files changed

Lines changed: 443 additions & 25 deletions

File tree

src/clickhouse_query_generator/to_sql_query.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2507,6 +2507,13 @@ fn flatten_all_ctes(plan: &mut RenderPlan) {
25072507
}
25082508

25092509
pub fn render_plan_to_sql(mut plan: RenderPlan, _max_cte_depth: u32) -> String {
2510+
log::trace!(
2511+
"render_plan_to_sql: from={:?}, joins={}, union={}, ctes={}",
2512+
plan.from.0.as_ref().map(|f| &f.name),
2513+
plan.joins.0.len(),
2514+
plan.union.0.is_some(),
2515+
plan.ctes.0.len()
2516+
);
25102517
// STEP 0: Flatten ALL CTEs to top level in dependency order.
25112518
// CTEs are always a flat, linear chain — never nested inside other CTEs or union branches.
25122519
flatten_all_ctes(&mut plan);
@@ -2548,6 +2555,10 @@ pub fn render_plan_to_sql(mut plan: RenderPlan, _max_cte_depth: u32) -> String {
25482555

25492556
sort_joins_by_dependency(plan.joins.0, from_table.as_ref())
25502557
};
2558+
log::trace!(
2559+
"render_plan_to_sql after sort_joins: joins={}",
2560+
plan.joins.0.len()
2561+
);
25512562

25522563
// Also sort JOINs in UNION branches
25532564
if let Some(ref mut union) = plan.union.0 {

src/query_planner/analyzer/union_distribution.rs

Lines changed: 75 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -139,17 +139,43 @@ fn distribute_union_impl(plan: &LogicalPlan, depth: usize) -> LogicalPlan {
139139
let new_right = distribute_union_impl(&gr.right, depth + 1);
140140

141141
// If left became Union after distribution, distribute GraphRel over it
142+
// EXCEPTION: Skip when the GraphRel is OPTIONAL and the Union is a
143+
// denormalized standalone node scan. Distribution would push the OPTIONAL
144+
// MATCH edge into each Union branch, losing LEFT JOIN semantics (every
145+
// flight row has airports, so no airport would appear "without flights").
142146
if let LogicalPlan::Union(union) = &new_left {
143-
log::debug!(
144-
"🔀 UnionDistribution: distributing GraphRel '{}' over left Union ({} branches)",
145-
gr.alias,
146-
union.inputs.len()
147-
);
148-
let center = Arc::new(new_center);
149-
let right = Arc::new(new_right);
150-
return distribute_over_union(union, |branch| {
151-
LogicalPlan::GraphRel(graph_rel_with(gr, branch, center.clone(), right.clone()))
152-
});
147+
let is_optional_gr = gr.is_optional.unwrap_or(false);
148+
let is_denorm_union = is_optional_gr
149+
&& !union.inputs.is_empty()
150+
&& union.inputs.iter().all(|input| {
151+
matches!(input.as_ref(), LogicalPlan::GraphNode(gn)
152+
if gn.is_denormalized
153+
|| matches!(gn.input.as_ref(), LogicalPlan::ViewScan(vs) if vs.is_denormalized))
154+
});
155+
156+
if is_denorm_union {
157+
log::info!(
158+
"🔀 UnionDistribution: SKIPPING GraphRel '{}' distribution over denormalized OPTIONAL Union — preserving LEFT JOIN semantics",
159+
gr.alias
160+
);
161+
// Keep the Union as-is; wrap in GraphRel without distributing
162+
} else {
163+
log::debug!(
164+
"🔀 UnionDistribution: distributing GraphRel '{}' over left Union ({} branches)",
165+
gr.alias,
166+
union.inputs.len()
167+
);
168+
let center = Arc::new(new_center);
169+
let right = Arc::new(new_right);
170+
return distribute_over_union(union, |branch| {
171+
LogicalPlan::GraphRel(graph_rel_with(
172+
gr,
173+
branch,
174+
center.clone(),
175+
right.clone(),
176+
))
177+
});
178+
}
153179
}
154180

155181
// If right became Union after distribution, distribute GraphRel over it
@@ -195,20 +221,47 @@ fn distribute_union_impl(plan: &LogicalPlan, depth: usize) -> LogicalPlan {
195221
});
196222
}
197223
// CP(Union(br0, br1), right) → Union(CP(br0, right), CP(br1, right))
224+
// EXCEPTION: Skip distribution when:
225+
// - The CartesianProduct is optional (OPTIONAL MATCH pattern)
226+
// - The Union is a denormalized standalone node scan
227+
// Distribution would push the OPTIONAL MATCH into each Union branch,
228+
// losing LEFT JOIN semantics (every branch would scan the edge table
229+
// directly, making all airports appear to have flights).
198230
if let LogicalPlan::Union(union) = &new_left {
199-
log::debug!(
200-
"🔀 UnionDistribution: distributing CP over left Union ({} branches)",
201-
union.inputs.len()
202-
);
203-
let right = Arc::new(new_right);
204-
return distribute_over_union(union, |branch| {
205-
LogicalPlan::CartesianProduct(CartesianProduct {
206-
left: branch,
207-
right: right.clone(),
208-
is_optional: cp.is_optional,
209-
join_condition: cp.join_condition.clone(),
210-
})
231+
let is_denorm_union = union.inputs.iter().all(|input| {
232+
fn is_denorm_node(plan: &LogicalPlan) -> bool {
233+
match plan {
234+
LogicalPlan::GraphNode(gn) => {
235+
gn.is_denormalized
236+
|| matches!(gn.input.as_ref(), LogicalPlan::ViewScan(vs) if vs.is_denormalized)
237+
}
238+
_ => false,
239+
}
240+
}
241+
is_denorm_node(input)
211242
});
243+
244+
if cp.is_optional && is_denorm_union {
245+
log::info!(
246+
"🔀 UnionDistribution: SKIPPING distribution of OPTIONAL CP over denormalized Union ({} branches) — preserving LEFT JOIN semantics",
247+
union.inputs.len()
248+
);
249+
// Don't distribute; keep the CartesianProduct intact
250+
} else {
251+
log::debug!(
252+
"🔀 UnionDistribution: distributing CP over left Union ({} branches)",
253+
union.inputs.len()
254+
);
255+
let right = Arc::new(new_right);
256+
return distribute_over_union(union, |branch| {
257+
LogicalPlan::CartesianProduct(CartesianProduct {
258+
left: branch,
259+
right: right.clone(),
260+
is_optional: cp.is_optional,
261+
join_condition: cp.join_condition.clone(),
262+
})
263+
});
264+
}
212265
}
213266

214267
LogicalPlan::CartesianProduct(CartesianProduct {

0 commit comments

Comments
 (0)