diff --git a/src/clickhouse_query_generator/to_sql_query.rs b/src/clickhouse_query_generator/to_sql_query.rs index e9ebc51a..1056fa7d 100644 --- a/src/clickhouse_query_generator/to_sql_query.rs +++ b/src/clickhouse_query_generator/to_sql_query.rs @@ -1153,16 +1153,38 @@ pub(crate) fn rewrite_expr_for_vlp( }) } - RenderExpr::AggregateFnCall(agg) => RenderExpr::AggregateFnCall(AggregateFnCall { - name: agg.name.clone(), - args: agg - .args - .iter() - .map(|a| { - rewrite_expr_for_vlp(a, start_alias, end_alias, path_variable, skip_start_alias) - }) - .collect(), - }), + RenderExpr::AggregateFnCall(agg) => { + // COUNT(path_variable) → COUNT(*) since each row represents a path + if let Some(path_var) = path_variable { + if agg.args.len() == 1 && agg.name.to_lowercase() == "count" { + if let RenderExpr::TableAlias(alias) = &agg.args[0] { + if &alias.0 == path_var { + log::info!("🔧 VLP path aggregate: count({}) → count(*)", path_var); + return RenderExpr::AggregateFnCall(AggregateFnCall { + name: agg.name.clone(), + args: vec![RenderExpr::Star], + }); + } + } + } + } + RenderExpr::AggregateFnCall(AggregateFnCall { + name: agg.name.clone(), + args: agg + .args + .iter() + .map(|a| { + rewrite_expr_for_vlp( + a, + start_alias, + end_alias, + path_variable, + skip_start_alias, + ) + }) + .collect(), + }) + } RenderExpr::ColumnAlias(ColumnAlias(alias_str)) if path_variable.as_ref() == Some(alias_str) => @@ -1563,14 +1585,31 @@ fn rewrite_expr_for_fixed_path( }) } - RenderExpr::AggregateFnCall(agg) => RenderExpr::AggregateFnCall(AggregateFnCall { - name: agg.name.clone(), - args: agg - .args - .iter() - .map(|a| rewrite_expr_for_fixed_path(a, path_variable, hop_count)) - .collect(), - }), + RenderExpr::AggregateFnCall(agg) => { + // COUNT(path_variable) → COUNT(*) since each row represents a path + if agg.args.len() == 1 { + if let RenderExpr::TableAlias(alias) = &agg.args[0] { + if alias.0 == *path_variable && agg.name.to_lowercase() == "count" { + log::info!( + "🔧 Fixed path aggregate: count({}) → count(*)", + path_variable + ); + return RenderExpr::AggregateFnCall(AggregateFnCall { + name: agg.name.clone(), + args: vec![RenderExpr::Star], + }); + } + } + } + RenderExpr::AggregateFnCall(AggregateFnCall { + name: agg.name.clone(), + args: agg + .args + .iter() + .map(|a| rewrite_expr_for_fixed_path(a, path_variable, hop_count)) + .collect(), + }) + } // Leave other expressions unchanged other => other.clone(), diff --git a/src/open_cypher_parser/use_clause.rs b/src/open_cypher_parser/use_clause.rs index 975103c7..f3febf49 100644 --- a/src/open_cypher_parser/use_clause.rs +++ b/src/open_cypher_parser/use_clause.rs @@ -1,6 +1,8 @@ use nom::{ - bytes::complete::{tag_no_case, take_while1}, + branch::alt, + bytes::complete::{tag, tag_no_case, take_while1}, error::context, + sequence::delimited, IResult, Parser, }; @@ -10,7 +12,7 @@ use super::{ast::UseClause, common::ws, errors::OpenCypherParsingError}; /// Examples: /// USE social_network /// USE ecommerce -/// USE mydb +/// USE `my-database` pub fn parse_use_clause<'a>( input: &'a str, ) -> IResult<&'a str, UseClause<'a>, OpenCypherParsingError<'a>> { @@ -18,9 +20,12 @@ pub fn parse_use_clause<'a>( let (input, database_name) = context( "Error parsing database name in USE clause", - ws(take_while1(|c: char| { - c.is_alphanumeric() || c == '_' || c == '.' - })), + ws(alt(( + // Backtick-quoted identifier: USE `my-database` + delimited(tag("`"), take_while1(|c: char| c != '`'), tag("`")), + // Unquoted identifier + take_while1(|c: char| c.is_alphanumeric() || c == '_' || c == '.'), + ))), ) .parse(input)?; @@ -92,6 +97,32 @@ mod tests { assert!(res.is_err(), "Should fail when database name is missing"); } + #[test] + fn test_parse_use_clause_backtick_quoted() { + let input = "USE `my-database` MATCH (n) RETURN n"; + let res = parse_use_clause(input); + match res { + Ok((remaining, use_clause)) => { + assert_eq!(use_clause.database_name, "my-database"); + assert_eq!(remaining, "MATCH (n) RETURN n"); + } + Err(e) => panic!("Failed to parse backtick-quoted USE clause: {:?}", e), + } + } + + #[test] + fn test_parse_use_clause_backtick_with_special_chars() { + let input = "USE `zeek_logs` MATCH (n) RETURN n"; + let res = parse_use_clause(input); + match res { + Ok((remaining, use_clause)) => { + assert_eq!(use_clause.database_name, "zeek_logs"); + assert_eq!(remaining, "MATCH (n) RETURN n"); + } + Err(e) => panic!("Failed to parse backtick-quoted USE clause: {:?}", e), + } + } + #[test] fn test_parse_use_clause_numeric_start() { let input = "USE 123db"; diff --git a/src/query_planner/logical_plan/match_clause/helpers.rs b/src/query_planner/logical_plan/match_clause/helpers.rs index db2cf686..88a9483c 100644 --- a/src/query_planner/logical_plan/match_clause/helpers.rs +++ b/src/query_planner/logical_plan/match_clause/helpers.rs @@ -422,16 +422,15 @@ pub fn register_path_variable( ); // Then register TableCtx for backward compatibility with code that uses alias_table_ctx_map - plan_ctx.insert_table_ctx( + let mut table_ctx = TableCtx::build( path_var.to_string(), - TableCtx::build( - path_var.to_string(), - None, // Path variables don't have labels - vec![], // Path variables don't have properties - false, // Not a relationship - true, // Explicitly named by user - ), + None, // Path variables don't have labels + vec![], // Path variables don't have properties + false, // Not a relationship + true, // Explicitly named by user ); + table_ctx.set_is_path(true); + plan_ctx.insert_table_ctx(path_var.to_string(), table_ctx); log::info!( "📍 Registered path variable '{}' with TypedVariable::Path (start={}, end={}, bounds={:?})", diff --git a/src/query_planner/optimizer/collect_unwind_elimination.rs b/src/query_planner/optimizer/collect_unwind_elimination.rs index 107b727c..3440b159 100644 --- a/src/query_planner/optimizer/collect_unwind_elimination.rs +++ b/src/query_planner/optimizer/collect_unwind_elimination.rs @@ -518,12 +518,24 @@ impl CollectUnwindElimination { // Add the source variable as a passthrough item so it's exported // (the UNWIND alias will be rewritten to reference this) - new_items.push(ProjectionItem { - expression: LogicalExpr::TableAlias(TableAlias( - source.clone(), - )), - col_alias: Some(ColumnAlias(source.clone())), - }); + // Only add if source isn't already in the remaining items + let source_already_present = + new_items.iter().any(|item| { + match &item.expression { + LogicalExpr::TableAlias(ta) => { + ta.0 == source + } + _ => false, + } + }); + if !source_already_present { + new_items.push(ProjectionItem { + expression: LogicalExpr::TableAlias( + TableAlias(source.clone()), + ), + col_alias: Some(ColumnAlias(source.clone())), + }); + } // Update exported aliases: remove collection, add source let mut new_exported_aliases: Vec = with diff --git a/src/query_planner/plan_ctx/table_ctx.rs b/src/query_planner/plan_ctx/table_ctx.rs index d1c5bba3..41c017da 100644 --- a/src/query_planner/plan_ctx/table_ctx.rs +++ b/src/query_planner/plan_ctx/table_ctx.rs @@ -37,6 +37,8 @@ pub struct TableCtx { /// For relationships: the label of the connected to_node (target) /// Used to resolve polymorphic relationships to_node_label: Option, + /// Whether this alias represents a path variable (from `MATCH p = ...`) + is_path: bool, } impl TableCtx { @@ -63,6 +65,7 @@ impl TableCtx { cte_reference: None, from_node_label: None, to_node_label: None, + is_path: false, } } @@ -108,6 +111,7 @@ impl TableCtx { cte_reference: Some(cte_name), from_node_label: None, to_node_label: None, + is_path: false, } } @@ -135,6 +139,9 @@ impl TableCtx { /// Path variables have no label (None) and are not relationships. /// Anonymous nodes also have None labels, so we exclude aliases matching "t\d+" pattern. pub fn is_path_variable(&self) -> bool { + if self.is_path { + return true; + } if self.is_rel { return false; } @@ -143,9 +150,6 @@ impl TableCtx { } // Computed/scalar aliases from WITH (e.g., `WITH size(...) AS cnt`) // have explicit_alias=true but no labels — they are NOT path variables. - // Note: Real path variables (e.g., `p` in `MATCH p = ...`) are also - // explicit_alias=true, but they are registered via define_path() directly - // in match_clause/traversal.rs, not through this heuristic. if self.explicit_alias { return false; } @@ -160,6 +164,11 @@ impl TableCtx { true // No labels, not a relationship, not an anonymous node → path variable } + /// Mark this TableCtx as a path variable. + pub fn set_is_path(&mut self, val: bool) { + self.is_path = val; + } + // ======================================================================== // Label/Type Access // ======================================================================== diff --git a/src/render_plan/cte_generation.rs b/src/render_plan/cte_generation.rs index c0b9036e..f3ea9b8f 100644 --- a/src/render_plan/cte_generation.rs +++ b/src/render_plan/cte_generation.rs @@ -9,6 +9,7 @@ use std::collections::HashMap; use crate::clickhouse_query_generator::variable_length_cte::WeightCteConfig; use crate::clickhouse_query_generator::NodeProperty; use crate::graph_catalog::config::Identifier; +use crate::graph_catalog::expression_parser::PropertyValue; use crate::graph_catalog::graph_schema::GraphSchema; use crate::query_planner::logical_expr::LogicalExpr; use crate::query_planner::logical_plan::LogicalPlan; @@ -783,6 +784,67 @@ pub fn map_property_to_column_with_relationship_context( Ok(column.raw().to_string()) } +/// Like `map_property_to_column_with_relationship_context` but preserves +/// the `PropertyValue` variant (Column vs Expression). Use this when the +/// caller needs to generate correct SQL for expression-based mappings. +pub fn map_property_to_property_value( + property: &str, + node_label: &str, +) -> Result { + let current_schema = crate::server::query_context::get_current_schema_with_fallback(); + let schema = current_schema.as_ref().ok_or_else(|| { + format!( + "No schema available for property '{}' on node '{}'", + property, node_label + ) + })?; + + let node_schema = schema + .all_node_schemas() + .get(node_label) + .ok_or_else(|| format!("Node label '{}' not found in schema", node_label))?; + + node_schema + .property_mappings + .get(property) + .cloned() + .ok_or_else(|| { + format!( + "Property '{}' not found for node label '{}'", + property, node_label + ) + }) +} + +/// Like `map_property_to_property_value` but for relationship properties. +pub fn map_rel_property_to_property_value( + property: &str, + relationship_type: &str, +) -> Result { + let current_schema = crate::server::query_context::get_current_schema_with_fallback(); + let schema = current_schema.as_ref().ok_or_else(|| { + format!( + "No schema available for property '{}' on rel '{}'", + property, relationship_type + ) + })?; + + let rel_schema = schema + .get_rel_schema(relationship_type) + .map_err(|e| format!("Relationship type '{}' not found: {}", relationship_type, e))?; + + rel_schema + .property_mappings + .get(property) + .cloned() + .ok_or_else(|| { + format!( + "Property '{}' not found for relationship type '{}'", + property, relationship_type + ) + }) +} + /// Map a relationship property to its corresponding column name in the schema. /// This is the relationship equivalent of map_property_to_column_with_schema. /// diff --git a/src/render_plan/plan_builder.rs b/src/render_plan/plan_builder.rs index e80abf34..48782d10 100644 --- a/src/render_plan/plan_builder.rs +++ b/src/render_plan/plan_builder.rs @@ -1777,6 +1777,12 @@ impl RenderPlanBuilder for LogicalPlan { .iter() .any(|item| matches!(item.expression, LogicalExpr::AggregateFnCall(_))); + // Extract VLP path variable from input (for path function rewriting in WITH items) + let vlp_path_variable = { + use crate::render_plan::cte_extraction::get_path_variable; + get_path_variable(&with.input) + }; + let cte_content = if is_denormalized_input || is_bidirectional_union { // Union path: use to_render_plan() which correctly renders both // UNION branches with per-branch property resolution and FROM/JOINs @@ -1791,6 +1797,29 @@ impl RenderPlanBuilder for LogicalPlan { "Failed to convert where clause".to_string(), ) })?; + + // 🔧 FIX: Rewrite renamed aliases in WHERE clause back to source aliases. + // Same rewrite as the standard path — needed for Union/bidirectional inputs too. + let render_where = { + let mut reverse_alias_map = HashMap::new(); + for item in &with.items { + if let (LogicalExpr::TableAlias(ta), Some(col_alias)) = + (&item.expression, &item.col_alias) + { + reverse_alias_map.insert(col_alias.0.clone(), ta.0.clone()); + } + } + if reverse_alias_map.is_empty() { + render_where + } else { + log::info!( + "🔧 Rewriting WITH WHERE aliases (Union path): {:?}", + reverse_alias_map + ); + rewrite_render_expr_aliases(&render_where, &reverse_alias_map) + } + }; + if has_aggregation { input_plan.having_clause = Some(render_where); } else { @@ -1838,6 +1867,30 @@ impl RenderPlanBuilder for LogicalPlan { "Failed to convert where clause".to_string(), ) })?; + + // 🔧 FIX: Rewrite renamed aliases in WHERE clause back to source aliases. + // For `WITH u AS person WHERE person.user_id = 1`, the WHERE uses "person" + // but inside the CTE body only "u" exists. Reverse the alias mapping. + let render_where = { + let mut reverse_alias_map = HashMap::new(); + for item in &with.items { + if let (LogicalExpr::TableAlias(ta), Some(col_alias)) = + (&item.expression, &item.col_alias) + { + reverse_alias_map.insert(col_alias.0.clone(), ta.0.clone()); + } + } + if reverse_alias_map.is_empty() { + render_where + } else { + log::info!( + "🔧 Rewriting WITH WHERE aliases: {:?}", + reverse_alias_map + ); + rewrite_render_expr_aliases(&render_where, &reverse_alias_map) + } + }; + if has_aggregation { if cte_having.is_some() { return Err(RenderBuildError::InvalidRenderPlan( @@ -1911,6 +1964,56 @@ impl RenderPlanBuilder for LogicalPlan { }); } + // 🔧 FIX: Rewrite VLP path function expressions in WITH items + // When WITH contains `length(path)`, `nodes(path)`, `relationships(path)`, + // these must be rewritten to reference VLP CTE columns (t.hop_count, etc.) + if let Some(ref path_var) = vlp_path_variable { + // Find start/end aliases from the VLP GraphRel + fn find_vlp_endpoints(plan: &LogicalPlan) -> Option<(String, String)> { + match plan { + LogicalPlan::GraphRel(gr) if gr.variable_length.is_some() => { + Some((gr.left_connection.clone(), gr.right_connection.clone())) + } + LogicalPlan::GraphRel(gr) => find_vlp_endpoints(&gr.left) + .or_else(|| find_vlp_endpoints(&gr.right)), + LogicalPlan::GraphNode(gn) => find_vlp_endpoints(&gn.input), + LogicalPlan::Filter(f) => find_vlp_endpoints(&f.input), + LogicalPlan::Projection(p) => find_vlp_endpoints(&p.input), + LogicalPlan::GraphJoins(gj) => find_vlp_endpoints(&gj.input), + LogicalPlan::GroupBy(gb) => find_vlp_endpoints(&gb.input), + LogicalPlan::Union(u) => { + u.inputs.iter().find_map(|i| find_vlp_endpoints(i)) + } + _ => None, + } + } + + let endpoints = find_vlp_endpoints(&with.input); + let (start_alias, end_alias) = match endpoints { + Some((s, e)) => (Some(s), Some(e)), + None => (None, None), + }; + + log::info!( + "🔧 VLP path rewriting in WITH: path_var={}, start={:?}, end={:?}", + path_var, + start_alias, + end_alias + ); + + use crate::clickhouse_query_generator::to_sql_query::rewrite_expr_for_vlp; + let path_variable_opt = Some(path_var.clone()); + for item in cte_select_items.iter_mut() { + item.expression = rewrite_expr_for_vlp( + &item.expression, + &start_alias, + &end_alias, + &path_variable_opt, + false, + ); + } + } + // ✅ FIX (Phase 6): Remap column aliases to match exported aliases // When we have `WITH u AS person`, the select items will have aliases like `u.name` // but they need to be remapped to `person.name` for the CTE output @@ -1980,6 +2083,7 @@ impl RenderPlanBuilder for LogicalPlan { }); let mut cte = Cte::new(cte_name.clone(), cte_content, false); cte.with_exported_aliases = with.exported_aliases.clone(); + cte.vlp_path_variable = vlp_path_variable.clone(); let ctes = CteItems(vec![cte]); let from = FromTableItem(Some(ViewTableRef { @@ -3993,6 +4097,63 @@ fn build_with_alias_mapping( mapping } +/// Rewrite table aliases in a RenderExpr using an alias mapping. +/// Used to rewrite post-WITH WHERE clauses: `person.user_id` → `u.user_id` +/// when the WHERE uses a renamed alias but the CTE body uses the original. +fn rewrite_render_expr_aliases( + expr: &RenderExpr, + alias_map: &HashMap, +) -> RenderExpr { + match expr { + RenderExpr::PropertyAccessExp(pa) => { + if let Some(source_alias) = alias_map.get(&pa.table_alias.0) { + RenderExpr::PropertyAccessExp(PropertyAccess { + table_alias: TableAlias(source_alias.clone()), + column: pa.column.clone(), + }) + } else { + expr.clone() + } + } + RenderExpr::TableAlias(ta) => { + if let Some(source_alias) = alias_map.get(&ta.0) { + RenderExpr::TableAlias(TableAlias(source_alias.clone())) + } else { + expr.clone() + } + } + RenderExpr::OperatorApplicationExp(op) => { + RenderExpr::OperatorApplicationExp(OperatorApplication { + operator: op.operator.clone(), + operands: op + .operands + .iter() + .map(|o| rewrite_render_expr_aliases(o, alias_map)) + .collect(), + }) + } + RenderExpr::ScalarFnCall(func) => { + RenderExpr::ScalarFnCall(super::render_expr::ScalarFnCall { + name: func.name.clone(), + args: func + .args + .iter() + .map(|a| rewrite_render_expr_aliases(a, alias_map)) + .collect(), + }) + } + RenderExpr::AggregateFnCall(func) => RenderExpr::AggregateFnCall(AggregateFnCall { + name: func.name.clone(), + args: func + .args + .iter() + .map(|a| rewrite_render_expr_aliases(a, alias_map)) + .collect(), + }), + _ => expr.clone(), + } +} + /// Remap select item aliases to match WITH clause exported aliases /// Changes column aliases like "u_name" to "person_name" when WITH u AS person fn remap_select_item_aliases( diff --git a/src/render_plan/plan_builder_helpers.rs b/src/render_plan/plan_builder_helpers.rs index 0fe89d91..539aad34 100644 --- a/src/render_plan/plan_builder_helpers.rs +++ b/src/render_plan/plan_builder_helpers.rs @@ -2271,7 +2271,11 @@ pub(super) fn plan_type_name(plan: &LogicalPlan) -> &'static str { /// and looks up the property in its property_mapping. /// /// Returns the mapped column name if found, None otherwise. -fn get_property_from_viewscan(alias: &str, property: &str, plan: &LogicalPlan) -> Option { +fn get_property_from_viewscan( + alias: &str, + property: &str, + plan: &LogicalPlan, +) -> Option { match plan { LogicalPlan::GraphNode(node) if node.alias == alias => { // Found the matching GraphNode - check its input for ViewScan @@ -2279,17 +2283,17 @@ fn get_property_from_viewscan(alias: &str, property: &str, plan: &LogicalPlan) - LogicalPlan::ViewScan(scan) => { // Look up property in the ViewScan's property_mapping if let Some(prop_value) = scan.property_mapping.get(property) { - return Some(prop_value.raw().to_string()); + return Some(prop_value.clone()); } // Also check from_node_properties and to_node_properties if let Some(from_props) = &scan.from_node_properties { if let Some(prop_value) = from_props.get(property) { - return Some(prop_value.raw().to_string()); + return Some(prop_value.clone()); } } if let Some(to_props) = &scan.to_node_properties { if let Some(prop_value) = to_props.get(property) { - return Some(prop_value.raw().to_string()); + return Some(prop_value.clone()); } } None @@ -2367,16 +2371,16 @@ pub(super) fn apply_property_mapping_to_expr(expr: &mut RenderExpr, plan: &Logic // For denormalized virtual nodes, try to get property mapping from ViewScan first // This is needed because denormalized nodes have position-specific mappings // (from_node_properties vs to_node_properties) - if let Some(column) = + if let Some(mapped_pv) = get_property_from_viewscan(&prop.table_alias.0, prop.column.raw(), plan) { log::debug!( "🔍 PROPERTY MAPPING (ViewScan): '{}.{}' -> '{}'", prop.table_alias.0, prop.column.raw(), - column + mapped_pv.raw() ); - prop.column = PropertyValue::Column(column); + prop.column = mapped_pv; } else if let Some(node_label) = get_node_label_for_alias(&prop.table_alias.0, plan) { log::debug!( "🔍 PROPERTY MAPPING: Alias '{}' -> Label '{}', Property '{}' (before mapping)", @@ -2385,22 +2389,34 @@ pub(super) fn apply_property_mapping_to_expr(expr: &mut RenderExpr, plan: &Logic prop.column.raw() ); - // Map the property to the correct column - let mapped_column = crate::render_plan::cte_generation::map_property_to_column_with_relationship_context( - prop.column.raw(), - &node_label, - None, // relationship_type - None, // node_role - None, // schema_name will be resolved from task-local - ).unwrap_or_else(|_| prop.column.raw().to_string()); - - log::debug!( - "🔍 PROPERTY MAPPING: '{}' -> '{}'", - prop.column.raw(), - mapped_column - ); + // Map the property to the correct column, preserving Expression variant + if let Ok(mapped_pv) = + crate::render_plan::cte_generation::map_property_to_property_value( + prop.column.raw(), + &node_label, + ) + { + log::debug!( + "🔍 PROPERTY MAPPING: '{}' -> '{}'", + prop.column.raw(), + mapped_pv.raw() + ); + prop.column = mapped_pv; + } else { + // Fallback to string-based mapping for denormalized/complex cases + let mapped_column = crate::render_plan::cte_generation::map_property_to_column_with_relationship_context( + prop.column.raw(), + &node_label, + None, None, None, + ).unwrap_or_else(|_| prop.column.raw().to_string()); - prop.column = PropertyValue::Column(mapped_column); + log::debug!( + "🔍 PROPERTY MAPPING (fallback): '{}' -> '{}'", + prop.column.raw(), + mapped_column + ); + prop.column = PropertyValue::Column(mapped_column); + } } else if let Some(rel_type) = get_relationship_type_for_alias(&prop.table_alias.0, plan) { @@ -2412,22 +2428,36 @@ pub(super) fn apply_property_mapping_to_expr(expr: &mut RenderExpr, plan: &Logic prop.column.raw() ); - // Map the relationship property to the correct column - let mapped_column = - crate::render_plan::cte_generation::map_relationship_property_to_column( + // Map the relationship property, preserving Expression variant + if let Ok(mapped_pv) = + crate::render_plan::cte_generation::map_rel_property_to_property_value( prop.column.raw(), &rel_type, - None, // Use task-local schema context ) - .unwrap_or_else(|_| prop.column.raw().to_string()); - - log::debug!( - "🔍 RELATIONSHIP PROPERTY MAPPING: '{}' -> '{}'", - prop.column.raw(), - mapped_column - ); + { + log::debug!( + "🔍 RELATIONSHIP PROPERTY MAPPING: '{}' -> '{}'", + prop.column.raw(), + mapped_pv.raw() + ); + prop.column = mapped_pv; + } else { + // Fallback to string-based mapping + let mapped_column = + crate::render_plan::cte_generation::map_relationship_property_to_column( + prop.column.raw(), + &rel_type, + None, + ) + .unwrap_or_else(|_| prop.column.raw().to_string()); - prop.column = PropertyValue::Column(mapped_column); + log::debug!( + "🔍 RELATIONSHIP PROPERTY MAPPING (fallback): '{}' -> '{}'", + prop.column.raw(), + mapped_column + ); + prop.column = PropertyValue::Column(mapped_column); + } } // For denormalized nodes, remap the table alias to the edge alias diff --git a/src/render_plan/plan_builder_utils.rs b/src/render_plan/plan_builder_utils.rs index 357e8782..864c476c 100644 --- a/src/render_plan/plan_builder_utils.rs +++ b/src/render_plan/plan_builder_utils.rs @@ -8721,6 +8721,28 @@ pub(crate) fn build_chained_with_match_cte_plan( "🔧 build_chained_with_match_cte_plan: Applying WHERE clause from WITH" ); + // 🔧 FIX: Rewrite renamed aliases back to source aliases in WHERE clause. + // For `WITH u AS person WHERE person.user_id = 1`, "person" must become "u" + // so that property mapping resolution can find the correct schema mappings. + let where_predicate = if let Some(ref items) = with_items { + let mut reverse_map = std::collections::HashMap::new(); + for item in items { + if let (LogicalExpr::TableAlias(ta), Some(col_alias)) = + (&item.expression, &item.col_alias) + { + reverse_map.insert(col_alias.0.clone(), ta.0.clone()); + } + } + if reverse_map.is_empty() { + where_predicate + } else { + log::info!("🔧 Rewriting WITH WHERE aliases: {:?}", reverse_map); + rewrite_logical_expr_aliases(&where_predicate, &reverse_map) + } + } else { + where_predicate + }; + // Rewrite through scope to map Cypher properties to CTE columns let where_rewritten = rewrite_expression_with_property_mapping( &where_predicate, @@ -17226,3 +17248,66 @@ fn replace_arraycount_placeholders_in_expr( _ => {} } } + +/// Rewrite table aliases in a LogicalExpr using an alias mapping. +/// Used for post-WITH WHERE clauses: `person.user_id` → `u.user_id` +/// when the WHERE uses a renamed alias but the input uses the original. +fn rewrite_logical_expr_aliases( + expr: &LogicalExpr, + alias_map: &std::collections::HashMap, +) -> LogicalExpr { + match expr { + LogicalExpr::PropertyAccessExp(pa) => { + if let Some(source_alias) = alias_map.get(&pa.table_alias.0) { + LogicalExpr::PropertyAccessExp(crate::query_planner::logical_expr::PropertyAccess { + table_alias: crate::query_planner::logical_expr::TableAlias( + source_alias.clone(), + ), + column: pa.column.clone(), + }) + } else { + expr.clone() + } + } + LogicalExpr::TableAlias(ta) => { + if let Some(source_alias) = alias_map.get(&ta.0) { + LogicalExpr::TableAlias(crate::query_planner::logical_expr::TableAlias( + source_alias.clone(), + )) + } else { + expr.clone() + } + } + LogicalExpr::OperatorApplicationExp(op) => LogicalExpr::OperatorApplicationExp( + crate::query_planner::logical_expr::OperatorApplication { + operator: op.operator.clone(), + operands: op + .operands + .iter() + .map(|o| rewrite_logical_expr_aliases(o, alias_map)) + .collect(), + }, + ), + LogicalExpr::ScalarFnCall(func) => { + LogicalExpr::ScalarFnCall(crate::query_planner::logical_expr::ScalarFnCall { + name: func.name.clone(), + args: func + .args + .iter() + .map(|a| rewrite_logical_expr_aliases(a, alias_map)) + .collect(), + }) + } + LogicalExpr::AggregateFnCall(func) => { + LogicalExpr::AggregateFnCall(crate::query_planner::logical_expr::AggregateFnCall { + name: func.name.clone(), + args: func + .args + .iter() + .map(|a| rewrite_logical_expr_aliases(a, alias_map)) + .collect(), + }) + } + _ => expr.clone(), + } +} diff --git a/src/render_plan/render_expr.rs b/src/render_plan/render_expr.rs index ddc3d32d..40eebf3e 100644 --- a/src/render_plan/render_expr.rs +++ b/src/render_plan/render_expr.rs @@ -937,6 +937,22 @@ pub struct AggregateFnCall { pub args: Vec, } +/// Check if a RenderExpr is a literal or effectively a literal (e.g., negated numeric literal). +/// Used to decide whether list elements need toString() wrapping. +fn is_literal_like(expr: &RenderExpr) -> bool { + match expr { + RenderExpr::Literal(_) | RenderExpr::Parameter(_) => true, + // Unary minus: parser generates `0 - x` for `-x` + RenderExpr::OperatorApplicationExp(op) + if op.operator == Operator::Subtraction && op.operands.len() == 2 => + { + matches!(&op.operands[0], RenderExpr::Literal(Literal::Integer(0))) + && matches!(&op.operands[1], RenderExpr::Literal(_)) + } + _ => false, + } +} + impl TryFrom for RenderExpr { type Error = RenderBuildError; @@ -985,21 +1001,20 @@ impl TryFrom for RenderExpr { // // Pure literal lists (e.g., [1, 2, 3] for IN clauses, UNWIND) are left as-is // so they preserve their literal element types. - let has_non_literal = items - .iter() - .any(|e| !matches!(e, RenderExpr::Literal(_) | RenderExpr::Parameter(_))); + let has_non_literal = items.iter().any(|e| !is_literal_like(e)); // Skip toString() wrapping when all non-literal elements are bare aliases // (TableAlias, ColumnAlias, or wildcard PropertyAccessExp). These represent // node variables that will be resolved to ID columns by rewrite_bare_variables. - let all_non_literal_are_aliases = items - .iter() - .filter(|e| !matches!(e, RenderExpr::Literal(_) | RenderExpr::Parameter(_))) - .all(|e| match e { - RenderExpr::TableAlias(_) | RenderExpr::ColumnAlias(_) => true, - RenderExpr::PropertyAccessExp(pa) if pa.column.raw() == "*" => true, - _ => false, - }); + let all_non_literal_are_aliases = + items + .iter() + .filter(|e| !is_literal_like(e)) + .all(|e| match e { + RenderExpr::TableAlias(_) | RenderExpr::ColumnAlias(_) => true, + RenderExpr::PropertyAccessExp(pa) if pa.column.raw() == "*" => true, + _ => false, + }); if has_non_literal && !all_non_literal_are_aliases { RenderExpr::List( diff --git a/tests/integration/test_with_cte_node_expansion.py b/tests/integration/test_with_cte_node_expansion.py index f40704de..4d41cf28 100644 --- a/tests/integration/test_with_cte_node_expansion.py +++ b/tests/integration/test_with_cte_node_expansion.py @@ -17,7 +17,7 @@ Each test verifies: - WITH-exported variables expand to multiple columns -- Column names follow pattern: . +- Column names follow pattern: . or _ - Correct number of properties returned - No regressions in base table expansion """ @@ -41,18 +41,22 @@ def get_columns_from_response(response): return [] +def get_var_columns(columns, var): + """Get columns for a variable, accepting both dot and underscore notation.""" + return [col for col in columns if col.startswith(f"{var}.") or col.startswith(f"{var}_")] + + class TestWithBasicNodeExpansion: """Test 1: Basic WITH node export.""" - - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") + def test_with_single_node_export(self): """ Test basic WITH node export. - + MATCH (a:User) WITH a RETURN a - + Expected: a expands to a.user_id, a.name, a.email, etc. """ response = execute_cypher( @@ -64,16 +68,16 @@ def test_with_single_node_export(self): """, schema_name="social_integration" ) - + assert_query_success(response) assert_row_count(response, 1) - + columns = get_columns_from_response(response) - - # Should have multiple columns like "a_user_id", "a_name", etc. - a_columns = [col for col in columns if col.startswith("a_")] - assert len(a_columns) >= 2, f"Expected multiple a_* columns, got: {columns}" - + + # Should have multiple columns like "a.user_id", "a.name", etc. + a_columns = get_var_columns(columns, "a") + assert len(a_columns) >= 2, f"Expected multiple a.* columns, got: {columns}" + # Verify common properties are present assert any("user_id" in col for col in a_columns), \ f"user_id not found in columns: {columns}" @@ -83,16 +87,15 @@ def test_with_single_node_export(self): class TestWithMultipleVariableExport: """Test 2: Multi-variable WITH export.""" - - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") + def test_with_two_node_export(self): """ Test WITH exporting two related nodes. - + MATCH (a:User)-[r:FOLLOWS]->(b:User) WITH a, b RETURN a, b - + Expected: Both a and b expand to properties """ response = execute_cypher( @@ -104,29 +107,28 @@ def test_with_two_node_export(self): """, schema_name="social_integration" ) - + assert_query_success(response) assert_row_count(response, 1) - + columns = get_columns_from_response(response) - + # Check a.* columns - a_columns = [col for col in columns if col.startswith("a_")] + a_columns = get_var_columns(columns, "a") assert len(a_columns) >= 2, \ f"Expected multiple a.* columns, got: {columns}" - + # Check b.* columns - b_columns = [col for col in columns if col.startswith("b_")] + b_columns = get_var_columns(columns, "b") assert len(b_columns) >= 2, \ f"Expected multiple b.* columns, got: {columns}" - + # Verify both have properties assert any("user_id" in col for col in a_columns), \ f"a.user_id not found in columns: {columns}" assert any("user_id" in col for col in b_columns), \ f"b.user_id not found in columns: {columns}" - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") def test_with_three_node_export(self): """Test WITH exporting three nodes from multi-hop pattern.""" response = execute_cypher( @@ -138,33 +140,32 @@ def test_with_three_node_export(self): """, schema_name="social_integration" ) - + assert_query_success(response) assert_row_count(response, 1) - + columns = get_columns_from_response(response) - + # Check all three variables expanded for var in ["a", "b", "c"]: - var_columns = [col for col in columns if col.startswith(f"{var}_")] + var_columns = get_var_columns(columns, var) assert len(var_columns) >= 2, \ f"Expected multiple {var}.* columns, got: {columns}" class TestWithChaining: """Test 3: WITH chaining (nested CTEs).""" - - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") + def test_with_chaining_two_levels(self): """ Test WITH chaining - nested WITH clauses. - + MATCH (a:User) WITH a MATCH (b:User) WITH a, b RETURN a, b - + Expected: Both a and b expand (a comes from first CTE, b from base table) """ response = execute_cypher( @@ -178,23 +179,22 @@ def test_with_chaining_two_levels(self): """, schema_name="social_integration" ) - + assert_query_success(response) assert_row_count(response, 1) - + columns = get_columns_from_response(response) - + # Check a.* columns (from first CTE) - a_columns = [col for col in columns if col.startswith("a_")] + a_columns = get_var_columns(columns, "a") assert len(a_columns) >= 2, \ f"Expected multiple a.* columns from first CTE, got: {columns}" - + # Check b.* columns (from base table) - b_columns = [col for col in columns if col.startswith("b_")] + b_columns = get_var_columns(columns, "b") assert len(b_columns) >= 2, \ f"Expected multiple b.* columns from base table, got: {columns}" - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") def test_with_chaining_three_levels(self): """Test WITH chaining with three levels.""" response = execute_cypher( @@ -210,28 +210,28 @@ def test_with_chaining_three_levels(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) - + # All three should have properties for var in ["a", "b", "c"]: - var_columns = [col for col in columns if col.startswith(f"{var}_")] + var_columns = get_var_columns(columns, var) assert len(var_columns) >= 2, \ f"Expected {var}.* properties at level 3, got: {columns}" class TestWithScalarExport: """Test 4: WITH scalar export (aggregates).""" - + def test_with_scalar_count(self): """ Test WITH scalar - aggregation should NOT expand. - + MATCH (a:User) WITH COUNT(a) AS count RETURN count - + Expected: count is single column (scalar), not expanded """ response = execute_cypher( @@ -242,25 +242,24 @@ def test_with_scalar_count(self): """, schema_name="social_integration" ) - + assert_query_success(response) assert_row_count(response, 1) - + columns = get_columns_from_response(response) - + # Should have exactly one column for the count assert "user_count" in columns, \ f"Expected 'user_count' column, got: {columns}" - + # Count should NOT expand (no .* columns) assert not any("user_count." in col for col in columns), \ f"Scalar 'user_count' should not expand, got: {columns}" - + # Verify value is numeric value = get_single_value(response, "user_count", convert_to_int=True) assert value > 0, f"Count should be > 0, got: {value}" - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") def test_with_scalar_and_node(self): """Test WITH mixing scalar aggregation and node export.""" response = execute_cypher( @@ -272,16 +271,16 @@ def test_with_scalar_and_node(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) - + # Node a should expand - a_columns = [col for col in columns if col.startswith("a_")] + a_columns = get_var_columns(columns, "a") assert len(a_columns) >= 2, \ f"Expected a.* columns, got: {columns}" - + # Scalar should NOT expand assert "follower_count" in columns, \ f"Expected follower_count column, got: {columns}" @@ -291,15 +290,14 @@ def test_with_scalar_and_node(self): class TestWithPropertyRename: """Test 5: WITH property rename.""" - - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") + def test_with_node_rename(self): """ Test WITH node aliased with AS clause. - + MATCH (a:User) WITH a AS person RETURN person - + Expected: person expands to person.user_id, person.name, etc. """ response = execute_cypher( @@ -311,22 +309,21 @@ def test_with_node_rename(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) - + # person should expand (not as 'a') - person_columns = [col for col in columns if col.startswith("person_")] + person_columns = get_var_columns(columns, "person") assert len(person_columns) >= 2, \ f"Expected person.* columns, got: {columns}" - + # Should NOT have 'a.' columns - a_columns = [col for col in columns if col.startswith("a_")] + a_columns = get_var_columns(columns, "a") assert len(a_columns) == 0, \ f"Should not have 'a.' columns (renamed to person), got: {columns}" - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") def test_with_multi_rename(self): """Test WITH multiple nodes with renames.""" response = execute_cypher( @@ -338,34 +335,33 @@ def test_with_multi_rename(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) - + # Both renamed variables should expand - follower_columns = [col for col in columns if col.startswith("follower_")] + follower_columns = get_var_columns(columns, "follower") assert len(follower_columns) >= 2, \ f"Expected follower.* columns, got: {columns}" - - followed_columns = [col for col in columns if col.startswith("followed_")] + + followed_columns = get_var_columns(columns, "followed") assert len(followed_columns) >= 2, \ f"Expected followed.* columns, got: {columns}" class TestWithCrossTable: """Test 6: Cross-table WITH patterns.""" - - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") + def test_with_cross_table_multi_hop(self): """ Test complex WITH with multiple hops and different node types. - + MATCH (a:User)-[:FOLLOWS]->(b:User) WITH a, b MATCH (c:Post) WHERE c.user_id = a.user_id RETURN a, b, c - + Expected: a and b from WITH expand, c from base table expands """ response = execute_cypher( @@ -378,18 +374,17 @@ def test_with_cross_table_multi_hop(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) - + # Check all three expand for var in ["a", "b", "c"]: - var_columns = [col for col in columns if col.startswith(f"{var}_")] + var_columns = get_var_columns(columns, var) assert len(var_columns) >= 1, \ f"Expected {var}.* columns, got: {columns}" - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") def test_with_where_filter(self): """Test WITH followed by WHERE filter.""" response = execute_cypher( @@ -401,28 +396,27 @@ def test_with_where_filter(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) - a_columns = [col for col in columns if col.startswith("a_")] + a_columns = get_var_columns(columns, "a") assert len(a_columns) >= 2, \ f"Expected a.* columns after WHERE, got: {columns}" class TestWithOptionalMatch: """Test 7: Optional match with WITH.""" - - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") + def test_optional_match_with_export(self): """ Test OPTIONAL MATCH followed by WITH. - + MATCH (a:User) OPTIONAL MATCH (a)-[:FOLLOWS]->(b:User) WITH a, b RETURN a, b - + Expected: a expands, b may be NULL (OPTIONAL) """ response = execute_cypher( @@ -435,18 +429,18 @@ def test_optional_match_with_export(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) - + # a should expand - a_columns = [col for col in columns if col.startswith("a_")] + a_columns = get_var_columns(columns, "a") assert len(a_columns) >= 2, \ f"Expected a.* columns, got: {columns}" - + # b should also expand (or be NULL for OPTIONAL matches) - b_columns = [col for col in columns if col.startswith("b_")] + b_columns = get_var_columns(columns, "b") # b might not have values but should have columns assert len(b_columns) >= 1, \ f"Expected b.* columns (optional), got: {columns}" @@ -454,12 +448,11 @@ def test_optional_match_with_export(self): class TestWithPolymorphicLabels: """Test 8: Polymorphic node labels (edge case).""" - - @pytest.mark.xfail(reason="Code bug: WITH CTE node expansion not properly implemented") + def test_with_multi_label_node(self): """ Test WITH when node might have multiple labels. - + In social_integration, User and Post are distinct. Test WITH on either type. """ @@ -473,12 +466,12 @@ def test_with_multi_label_node(self): """, schema_name="social_integration" ) - + assert_query_success(response_user) user_columns = get_columns_from_response(response_user) - user_a_columns = [col for col in user_columns if col.startswith("a_")] + user_a_columns = get_var_columns(user_columns, "a") assert len(user_a_columns) >= 2 - + # Test with Post response_post = execute_cypher( """ @@ -489,20 +482,20 @@ def test_with_multi_label_node(self): """, schema_name="social_integration" ) - + assert_query_success(response_post) post_columns = get_columns_from_response(response_post) - post_p_columns = [col for col in post_columns if col.startswith("p_")] + post_p_columns = get_var_columns(post_columns, "p") assert len(post_p_columns) >= 1 class TestWithRegressionCases: """Regression tests - ensure existing behavior not broken.""" - + def test_base_table_expansion_unchanged(self): """ Verify base table expansion still works (not changed by fix). - + MATCH (a:User) RETURN a """ @@ -514,19 +507,19 @@ def test_base_table_expansion_unchanged(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) # Base table expansion can use either format (a_name or a.name) - a_columns = [col for col in columns if col.startswith("a_") or col.startswith("a.")] + a_columns = get_var_columns(columns, "a") assert len(a_columns) >= 2, \ f"Base table expansion regression - got: {columns}" def test_property_access_unchanged(self): """ Verify explicit property access still works. - + MATCH (a:User) RETURN a.user_id, a.name """ @@ -538,9 +531,9 @@ def test_property_access_unchanged(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) # Should have exactly these columns assert "a.user_id" in columns or "user_id" in columns, \ @@ -549,7 +542,7 @@ def test_property_access_unchanged(self): def test_aggregation_without_with(self): """ Verify aggregation still works without WITH. - + MATCH (a:User) RETURN COUNT(a) """ @@ -560,16 +553,16 @@ def test_aggregation_without_with(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + value = get_single_value(response, "total", convert_to_int=True) assert value > 0, f"Aggregation without WITH returned: {value}" def test_multi_hop_without_with(self): """ Verify multi-hop still works without WITH. - + MATCH (a)-[:FOLLOWS]->(b)-[:FOLLOWS]->(c) RETURN a, b, c """ @@ -581,12 +574,11 @@ def test_multi_hop_without_with(self): """, schema_name="social_integration" ) - + assert_query_success(response) - + columns = get_columns_from_response(response) for var in ["a", "b", "c"]: - # Base table expansion can use either format (a_name or a.name) - var_columns = [col for col in columns if col.startswith(f"{var}_") or col.startswith(f"{var}.")] + var_columns = get_var_columns(columns, var) assert len(var_columns) >= 1, \ f"Expected {var}.* or {var}_* columns, got: {columns}" diff --git a/tests/rust/integration/complex_feature_tests.rs b/tests/rust/integration/complex_feature_tests.rs index c5cf32ed..64a32add 100644 --- a/tests/rust/integration/complex_feature_tests.rs +++ b/tests/rust/integration/complex_feature_tests.rs @@ -1204,3 +1204,275 @@ async fn test_reversed_anchor_optional_match_with_where_predicate() { sql ); } + +/// Test VLP path function `length(path)` inside WITH clause +#[tokio::test] +async fn test_vlp_length_path_in_with_clause() { + let schema = create_test_schema(); + + let cypher = r#" + MATCH path = (a:User)-[:FOLLOWS*1..3]->(b:User) + WITH a, b, length(path) as hops + WHERE hops = 2 + RETURN a.name, b.name, hops + "#; + + let ast = parse_query(cypher).expect("Failed to parse VLP length(path) in WITH"); + + let result = evaluate_read_query(ast, &schema, None, None); + assert!(result.is_ok(), "Failed to build plan: {:?}", result.err()); + + let (logical_plan, _plan_ctx) = result.unwrap(); + let render_result = logical_plan_to_render_plan(logical_plan, &schema); + assert!( + render_result.is_ok(), + "Failed to render SQL: {:?}", + render_result.err() + ); + + let render_plan = render_result.unwrap(); + let sql = render_plan.to_sql(); + println!("Generated SQL:\n{}", sql); + + let sql_lower = sql.to_lowercase(); + + // VLP should generate recursive CTE + assert!( + sql_lower.contains("recursive"), + "Should contain RECURSIVE CTE for VLP.\nSQL:\n{}", + sql + ); + + // length(path) should be rewritten to t.hop_count, not literal "length(path)" + assert!( + !sql_lower.contains("length(path)"), + "length(path) should be rewritten to hop_count, not left as literal.\nSQL:\n{}", + sql + ); + + // Should contain hop_count reference (from VLP CTE) + assert!( + sql_lower.contains("hop_count"), + "Should contain hop_count from VLP CTE rewriting.\nSQL:\n{}", + sql + ); +} + +/// Test VLP path functions `nodes(path)` and `relationships(path)` inside WITH clause +#[tokio::test] +async fn test_vlp_nodes_relationships_path_in_with_clause() { + let schema = create_test_schema(); + + let cypher = r#" + MATCH path = (a:User)-[:FOLLOWS*1..2]->(b:User) + WITH a, b, nodes(path) as path_nodes, relationships(path) as path_rels + RETURN a.name, size(path_nodes) as node_count, size(path_rels) as rel_count + "#; + + let ast = parse_query(cypher).expect("Failed to parse VLP nodes/relationships in WITH"); + + let result = evaluate_read_query(ast, &schema, None, None); + assert!(result.is_ok(), "Failed to build plan: {:?}", result.err()); + + let (logical_plan, _plan_ctx) = result.unwrap(); + let render_result = logical_plan_to_render_plan(logical_plan, &schema); + assert!( + render_result.is_ok(), + "Failed to render SQL: {:?}", + render_result.err() + ); + + let render_plan = render_result.unwrap(); + let sql = render_plan.to_sql(); + println!("Generated SQL:\n{}", sql); + + let sql_lower = sql.to_lowercase(); + + // nodes(path) should be rewritten to path_nodes CTE column + assert!( + !sql_lower.contains("nodes(path)"), + "nodes(path) should be rewritten, not left as literal.\nSQL:\n{}", + sql + ); + + // relationships(path) should be rewritten to path_relationships CTE column + assert!( + !sql_lower.contains("relationships(path)"), + "relationships(path) should be rewritten, not left as literal.\nSQL:\n{}", + sql + ); + + // Should contain path_nodes and path_relationships references + assert!( + sql_lower.contains("path_nodes"), + "Should contain path_nodes from VLP CTE rewriting.\nSQL:\n{}", + sql + ); + assert!( + sql_lower.contains("path_relationships"), + "Should contain path_relationships from VLP CTE rewriting.\nSQL:\n{}", + sql + ); +} + +/// Test WITH alias rename + WHERE uses the renamed alias +#[tokio::test] +async fn test_with_alias_rename_where_filter() { + use clickgraph::server::query_context::{set_current_schema, with_query_context, QueryContext}; + use std::sync::Arc; + + let schema = create_test_schema(); + + let cypher = r#" + MATCH (u:User) + WITH u AS person + WHERE person.name = 'Alice' + RETURN person.name + "#; + + let schema_clone = schema.clone(); + let ctx = QueryContext::new(Some("default".to_string())); + let sql = with_query_context(ctx, async { + set_current_schema(Arc::new(schema_clone)); + + let ast = parse_query(cypher).expect("Failed to parse WITH alias rename + WHERE"); + + let result = evaluate_read_query(ast, &schema, None, None); + assert!(result.is_ok(), "Failed to build plan: {:?}", result.err()); + + let (logical_plan, _plan_ctx) = result.unwrap(); + let render_result = logical_plan_to_render_plan(logical_plan, &schema); + assert!( + render_result.is_ok(), + "Failed to render SQL: {:?}", + render_result.err() + ); + + let render_plan = render_result.unwrap(); + render_plan.to_sql() + }) + .await; + + println!("Generated SQL:\n{}", sql); + + let sql_lower = sql.to_lowercase(); + + // WHERE clause should NOT reference "person" inside the CTE body + // It should be rewritten to use the original alias "u" + assert!( + !sql_lower.contains("where person."), + "WHERE should not use renamed alias 'person' inside CTE.\nSQL:\n{}", + sql + ); + + // Should contain a WHERE filter referencing the table column + assert!( + sql_lower.contains("where") && sql_lower.contains("full_name"), + "WHERE should filter on the mapped column.\nSQL:\n{}", + sql + ); +} + +/// Test: collect+unwind CTE does not produce duplicate columns after elimination +#[tokio::test] +async fn test_collect_unwind_no_duplicate_cte_columns() { + use clickgraph::server::query_context::{set_current_schema, with_query_context, QueryContext}; + use std::sync::Arc; + + let config = clickgraph::graph_catalog::config::GraphSchemaConfig::from_yaml_file( + "schemas/test/social_integration.yaml", + ) + .expect("Failed to load social_integration schema"); + let schema = config.to_graph_schema().expect("Failed to convert schema"); + + let cypher = r#" + MATCH (u:User) + WITH u, collect(u) as users + UNWIND users as user + RETURN user.name, user.email, user.city + LIMIT 3 + "#; + + let schema_clone = schema.clone(); + let ctx = QueryContext::new(Some("default".to_string())); + let sql = with_query_context(ctx, async { + set_current_schema(Arc::new(schema_clone)); + + let (_remaining, statement) = + clickgraph::open_cypher_parser::parse_cypher_statement(cypher) + .unwrap_or_else(|e| panic!("Failed to parse: {:?}", e)); + + let (logical_plan, _plan_ctx) = clickgraph::query_planner::evaluate_read_statement( + statement, &schema, None, None, None, + ) + .unwrap_or_else(|e| panic!("Failed to plan: {:?}", e)); + + let render_plan = + clickgraph::render_plan::logical_plan_to_render_plan(logical_plan, &schema) + .unwrap_or_else(|e| panic!("Failed to render: {:?}", e)); + render_plan.to_sql() + }) + .await; + + let sql_lower = sql.to_lowercase(); + assert!( + sql_lower.contains("select"), + "Should have SELECT.\nSQL:\n{}", + sql + ); + + // Verify no duplicate columns in CTE body (case-insensitive split on FROM) + let sql_upper = sql.to_uppercase(); + let cte_body = sql_upper.split("FROM").next().unwrap_or(""); + let name_count = cte_body.matches("P1_U_NAME").count(); + assert_eq!( + name_count, 1, + "P1_U_NAME should appear once in CTE body, found {}.\nSQL:\n{}", + name_count, sql + ); +} + +/// Test: COUNT(p) on a fixed-hop path variable resolves to COUNT(*) +#[tokio::test] +async fn test_count_path_variable_fixed_hop() { + use clickgraph::server::query_context::{set_current_schema, with_query_context, QueryContext}; + use std::sync::Arc; + + let schema = create_test_schema(); + + let cypher = r#" + MATCH p = (a:User)-[:FOLLOWS]->(b:User) + RETURN COUNT(p) AS path_count + "#; + + let schema_clone = schema.clone(); + let ctx = QueryContext::new(Some("default".to_string())); + let sql = with_query_context(ctx, async { + set_current_schema(Arc::new(schema_clone)); + + let ast = parse_query(cypher).expect("Failed to parse COUNT(p) query"); + let result = evaluate_read_query(ast, &schema, None, None); + assert!(result.is_ok(), "Failed to plan: {:?}", result.err()); + + let (logical_plan, _plan_ctx) = result.unwrap(); + let render_result = logical_plan_to_render_plan(logical_plan, &schema); + assert!( + render_result.is_ok(), + "Failed to render: {:?}", + render_result.err() + ); + + let render_plan = render_result.unwrap(); + render_plan.to_sql() + }) + .await; + + println!("Generated SQL for COUNT(p):\n{}", sql); + + let sql_lower = sql.to_lowercase(); + assert!( + sql_lower.contains("count(*)") || sql_lower.contains("count(1)"), + "COUNT(p) should be rewritten to COUNT(*) for fixed-hop path.\nSQL:\n{}", + sql + ); +}