Skip to content

Commit f641ec7

Browse files
genezhangclaude
andcommitted
fix: denormalized VLP cycle prevention skips table lookup for virtual nodes
Fixed-length VLP (*2, *3) on denormalized schemas failed with "Missing table information for start node in cycle prevention" because extract_table_name returns None for virtual nodes that have no separate table. Fix: move extract_table_name calls inside the non-denormalized branch in both filter_builder.rs and plan_builder_utils.rs. For denormalized patterns, cycle prevention uses relationship columns (from_id/to_id) directly — no table name needed. Promotes 19 xfails: - 9 denormalized VLP tests (vlp_exact, vlp_range, vlp_path_var, shortest_path) - 10 polymorphic VLP tests (already generating correct SQL) Converts 3 invalid-property tests to skips (a.airport not valid). 12 coupled schema xfails remain (multi-relationship VLP untestable). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent e060e19 commit f641ec7

3 files changed

Lines changed: 45 additions & 66 deletions

File tree

src/render_plan/filter_builder.rs

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -267,32 +267,30 @@ impl FilterBuilder for LogicalPlan {
267267
let is_denormalized = is_node_denormalized(&graph_rel.left)
268268
&& is_node_denormalized(&graph_rel.right);
269269

270-
// Extract table/column info for cycle prevention
271-
// Use extract_table_name directly to avoid wrong fallbacks
272-
let start_table =
273-
extract_table_name(&graph_rel.left).ok_or_else(|| {
274-
RenderBuildError::MissingTableInfo(
275-
"start node in cycle prevention".to_string(),
276-
)
277-
})?;
278-
let end_table =
279-
extract_table_name(&graph_rel.right).ok_or_else(|| {
280-
RenderBuildError::MissingTableInfo(
281-
"end node in cycle prevention".to_string(),
282-
)
283-
})?;
284-
285270
let rel_cols = extract_relationship_columns(&graph_rel.center)
286271
.unwrap_or(RelationshipColumns {
287272
from_id: Identifier::Single("from_node_id".to_string()),
288273
to_id: Identifier::Single("to_node_id".to_string()),
289274
});
290275

291-
// For denormalized, use relationship columns directly
292-
// For normal, use node ID columns
276+
// For denormalized, use relationship columns directly (nodes
277+
// have no separate table — extract_table_name would fail).
278+
// For normal schemas, use node ID columns from node tables.
293279
let (start_id_col, end_id_col) = if is_denormalized {
294280
(rel_cols.from_id.to_string(), rel_cols.to_id.to_string())
295281
} else {
282+
let start_table =
283+
extract_table_name(&graph_rel.left).ok_or_else(|| {
284+
RenderBuildError::MissingTableInfo(
285+
"start node in cycle prevention".to_string(),
286+
)
287+
})?;
288+
let end_table =
289+
extract_table_name(&graph_rel.right).ok_or_else(|| {
290+
RenderBuildError::MissingTableInfo(
291+
"end node in cycle prevention".to_string(),
292+
)
293+
})?;
296294
let start = extract_id_column(&graph_rel.left)
297295
.unwrap_or_else(|| table_to_id_column(&start_table));
298296
let end = extract_id_column(&graph_rel.right)

src/render_plan/plan_builder_utils.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,31 +1725,31 @@ pub fn extract_filters(plan: &LogicalPlan) -> RenderPlanBuilderResult<Option<Ren
17251725
let is_denormalized = is_node_denormalized(&graph_rel.left)
17261726
&& is_node_denormalized(&graph_rel.right);
17271727

1728-
// Extract table/column info for cycle prevention
1729-
// Use extract_table_name directly to avoid wrong fallbacks
1730-
let start_table = extract_table_name(&graph_rel.left).ok_or_else(|| {
1731-
RenderBuildError::MissingTableInfo(
1732-
"start node in cycle prevention".to_string(),
1733-
)
1734-
})?;
1735-
let end_table = extract_table_name(&graph_rel.right).ok_or_else(|| {
1736-
RenderBuildError::MissingTableInfo(
1737-
"end node in cycle prevention".to_string(),
1738-
)
1739-
})?;
1740-
17411728
let rel_cols = extract_relationship_columns(&graph_rel.center).unwrap_or(
17421729
RelationshipColumns {
17431730
from_id: Identifier::Single("from_node_id".to_string()),
17441731
to_id: Identifier::Single("to_node_id".to_string()),
17451732
},
17461733
);
17471734

1748-
// For denormalized, use relationship columns directly
1749-
// For normal, use node ID columns
1735+
// For denormalized, use relationship columns directly (nodes
1736+
// have no separate table — extract_table_name would fail).
1737+
// For normal schemas, use node ID columns from node tables.
17501738
let (start_id_col, end_id_col) = if is_denormalized {
17511739
(rel_cols.from_id.to_string(), rel_cols.to_id.to_string())
17521740
} else {
1741+
let start_table =
1742+
extract_table_name(&graph_rel.left).ok_or_else(|| {
1743+
RenderBuildError::MissingTableInfo(
1744+
"start node in cycle prevention".to_string(),
1745+
)
1746+
})?;
1747+
let end_table =
1748+
extract_table_name(&graph_rel.right).ok_or_else(|| {
1749+
RenderBuildError::MissingTableInfo(
1750+
"end node in cycle prevention".to_string(),
1751+
)
1752+
})?;
17531753
// Use extract_end_node_id_column for nested GraphRel patterns
17541754
// (e.g., (a)-[:R]->(b)-[:VLP*]->(c) where left is a GraphRel).
17551755
// extract_id_column follows rel.center (relationship table) returning FKs

tests/integration/query_patterns/test_pattern_schema_matrix.py

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,6 @@ def test_multi_hop_2(self):
685685
result = execute_query(query, "ontime_flights")
686686
assert "error" not in result, f"Query failed: {result}"
687687

688-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
689688
def test_vlp_exact_0(self):
690689
"""
691690
Variable-length path with exact hops
@@ -695,7 +694,7 @@ def test_vlp_exact_0(self):
695694
result = execute_query(query, "ontime_flights")
696695
assert "error" not in result, f"Query failed: {result}"
697696

698-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
697+
@pytest.mark.skip(reason="Invalid test: Airport has no 'airport' property (valid: code, city, state)")
699698
def test_vlp_exact_1(self):
700699
"""
701700
Variable-length path with exact hops
@@ -705,7 +704,6 @@ def test_vlp_exact_1(self):
705704
result = execute_query(query, "ontime_flights")
706705
assert "error" not in result, f"Query failed: {result}"
707706

708-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
709707
def test_vlp_exact_2(self):
710708
"""
711709
Variable-length path with exact hops
@@ -715,7 +713,6 @@ def test_vlp_exact_2(self):
715713
result = execute_query(query, "ontime_flights")
716714
assert "error" not in result, f"Query failed: {result}"
717715

718-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
719716
def test_vlp_range_0(self):
720717
"""
721718
Variable-length path with range
@@ -725,7 +722,6 @@ def test_vlp_range_0(self):
725722
result = execute_query(query, "ontime_flights")
726723
assert "error" not in result, f"Query failed: {result}"
727724

728-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
729725
def test_vlp_range_1(self):
730726
"""
731727
Variable-length path with range
@@ -735,7 +731,6 @@ def test_vlp_range_1(self):
735731
result = execute_query(query, "ontime_flights")
736732
assert "error" not in result, f"Query failed: {result}"
737733

738-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
739734
def test_vlp_range_2(self):
740735
"""
741736
Variable-length path with range
@@ -745,7 +740,6 @@ def test_vlp_range_2(self):
745740
result = execute_query(query, "ontime_flights")
746741
assert "error" not in result, f"Query failed: {result}"
747742

748-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
749743
def test_vlp_path_var_0(self):
750744
"""
751745
Path variable with functions
@@ -755,7 +749,6 @@ def test_vlp_path_var_0(self):
755749
result = execute_query(query, "ontime_flights")
756750
assert "error" not in result, f"Query failed: {result}"
757751

758-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
759752
def test_vlp_path_var_1(self):
760753
"""
761754
Path variable with functions
@@ -765,7 +758,6 @@ def test_vlp_path_var_1(self):
765758
result = execute_query(query, "ontime_flights")
766759
assert "error" not in result, f"Query failed: {result}"
767760

768-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
769761
def test_vlp_path_var_2(self):
770762
"""
771763
Path variable with functions
@@ -966,7 +958,7 @@ def test_order_limit_2(self):
966958
result = execute_query(query, "ontime_flights")
967959
assert "error" not in result, f"Query failed: {result}"
968960

969-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
961+
@pytest.mark.skip(reason="Invalid test: Airport has no 'airport' property (valid: code, city, state)")
970962
def test_shortest_path_0(self):
971963
"""
972964
Shortest path query
@@ -976,7 +968,7 @@ def test_shortest_path_0(self):
976968
result = execute_query(query, "ontime_flights")
977969
assert "error" not in result, f"Query failed: {result}"
978970

979-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
971+
@pytest.mark.skip(reason="Invalid test: Airport has no 'airport' property (valid: code, city, state)")
980972
def test_shortest_path_1(self):
981973
"""
982974
Shortest path query
@@ -986,7 +978,6 @@ def test_shortest_path_1(self):
986978
result = execute_query(query, "ontime_flights")
987979
assert "error" not in result, f"Query failed: {result}"
988980

989-
@pytest.mark.xfail(reason="Denormalized schema: node property resolution or VLP CTE gap")
990981
def test_shortest_path_2(self):
991982
"""
992983
Shortest path query
@@ -1182,7 +1173,6 @@ def test_vlp_exact_1(self):
11821173
result = execute_query(query, "social_polymorphic")
11831174
assert "error" not in result, f"Query failed: {result}"
11841175

1185-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
11861176
def test_vlp_exact_2(self):
11871177
"""
11881178
Variable-length path with exact hops
@@ -1192,7 +1182,6 @@ def test_vlp_exact_2(self):
11921182
result = execute_query(query, "social_polymorphic")
11931183
assert "error" not in result, f"Query failed: {result}"
11941184

1195-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
11961185
def test_vlp_range_0(self):
11971186
"""
11981187
Variable-length path with range
@@ -1202,7 +1191,6 @@ def test_vlp_range_0(self):
12021191
result = execute_query(query, "social_polymorphic")
12031192
assert "error" not in result, f"Query failed: {result}"
12041193

1205-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
12061194
def test_vlp_range_1(self):
12071195
"""
12081196
Variable-length path with range
@@ -1212,7 +1200,6 @@ def test_vlp_range_1(self):
12121200
result = execute_query(query, "social_polymorphic")
12131201
assert "error" not in result, f"Query failed: {result}"
12141202

1215-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
12161203
def test_vlp_range_2(self):
12171204
"""
12181205
Variable-length path with range
@@ -1222,7 +1209,6 @@ def test_vlp_range_2(self):
12221209
result = execute_query(query, "social_polymorphic")
12231210
assert "error" not in result, f"Query failed: {result}"
12241211

1225-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
12261212
def test_vlp_path_var_0(self):
12271213
"""
12281214
Path variable with functions
@@ -1232,7 +1218,6 @@ def test_vlp_path_var_0(self):
12321218
result = execute_query(query, "social_polymorphic")
12331219
assert "error" not in result, f"Query failed: {result}"
12341220

1235-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
12361221
def test_vlp_path_var_1(self):
12371222
"""
12381223
Path variable with functions
@@ -1242,7 +1227,6 @@ def test_vlp_path_var_1(self):
12421227
result = execute_query(query, "social_polymorphic")
12431228
assert "error" not in result, f"Query failed: {result}"
12441229

1245-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
12461230
def test_vlp_path_var_2(self):
12471231
"""
12481232
Path variable with functions
@@ -1441,7 +1425,6 @@ def test_order_limit_2(self):
14411425
result = execute_query(query, "social_polymorphic")
14421426
assert "error" not in result, f"Query failed: {result}"
14431427

1444-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
14451428
def test_shortest_path_0(self):
14461429
"""
14471430
Shortest path query
@@ -1451,7 +1434,6 @@ def test_shortest_path_0(self):
14511434
result = execute_query(query, "social_polymorphic")
14521435
assert "error" not in result, f"Query failed: {result}"
14531436

1454-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
14551437
def test_shortest_path_1(self):
14561438
"""
14571439
Shortest path query
@@ -1461,7 +1443,6 @@ def test_shortest_path_1(self):
14611443
result = execute_query(query, "social_polymorphic")
14621444
assert "error" not in result, f"Query failed: {result}"
14631445

1464-
@pytest.mark.xfail(reason="Polymorphic schema: VLP or shortest path gap")
14651446
def test_shortest_path_2(self):
14661447
"""
14671448
Shortest path query
@@ -1642,7 +1623,7 @@ def test_multi_hop_2(self):
16421623
result = execute_query(query, "zeek_dns")
16431624
assert "error" not in result, f"Query failed: {result}"
16441625

1645-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1626+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
16461627
def test_vlp_exact_0(self):
16471628
"""
16481629
Variable-length path with exact hops
@@ -1652,7 +1633,7 @@ def test_vlp_exact_0(self):
16521633
result = execute_query(query, "zeek_dns")
16531634
assert "error" not in result, f"Query failed: {result}"
16541635

1655-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1636+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
16561637
def test_vlp_exact_1(self):
16571638
"""
16581639
Variable-length path with exact hops
@@ -1662,7 +1643,7 @@ def test_vlp_exact_1(self):
16621643
result = execute_query(query, "zeek_dns")
16631644
assert "error" not in result, f"Query failed: {result}"
16641645

1665-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1646+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
16661647
def test_vlp_exact_2(self):
16671648
"""
16681649
Variable-length path with exact hops
@@ -1672,7 +1653,7 @@ def test_vlp_exact_2(self):
16721653
result = execute_query(query, "zeek_dns")
16731654
assert "error" not in result, f"Query failed: {result}"
16741655

1675-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1656+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
16761657
def test_vlp_range_0(self):
16771658
"""
16781659
Variable-length path with range
@@ -1682,7 +1663,7 @@ def test_vlp_range_0(self):
16821663
result = execute_query(query, "zeek_dns")
16831664
assert "error" not in result, f"Query failed: {result}"
16841665

1685-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1666+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
16861667
def test_vlp_range_1(self):
16871668
"""
16881669
Variable-length path with range
@@ -1692,7 +1673,7 @@ def test_vlp_range_1(self):
16921673
result = execute_query(query, "zeek_dns")
16931674
assert "error" not in result, f"Query failed: {result}"
16941675

1695-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1676+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
16961677
def test_vlp_range_2(self):
16971678
"""
16981679
Variable-length path with range
@@ -1702,7 +1683,7 @@ def test_vlp_range_2(self):
17021683
result = execute_query(query, "zeek_dns")
17031684
assert "error" not in result, f"Query failed: {result}"
17041685

1705-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1686+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
17061687
def test_vlp_path_var_0(self):
17071688
"""
17081689
Path variable with functions
@@ -1712,7 +1693,7 @@ def test_vlp_path_var_0(self):
17121693
result = execute_query(query, "zeek_dns")
17131694
assert "error" not in result, f"Query failed: {result}"
17141695

1715-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1696+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
17161697
def test_vlp_path_var_1(self):
17171698
"""
17181699
Path variable with functions
@@ -1722,7 +1703,7 @@ def test_vlp_path_var_1(self):
17221703
result = execute_query(query, "zeek_dns")
17231704
assert "error" not in result, f"Query failed: {result}"
17241705

1725-
@pytest.mark.xfail(reason="Coupled schema: VLP not supported on denormalized edge tables")
1706+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
17261707
def test_vlp_path_var_2(self):
17271708
"""
17281709
Path variable with functions
@@ -1924,7 +1905,7 @@ def test_order_limit_2(self):
19241905
result = execute_query(query, "zeek_dns")
19251906
assert "error" not in result, f"Query failed: {result}"
19261907

1927-
@pytest.mark.xfail(reason="Coupled schema: VLP/shortestPath not supported on denormalized edge tables")
1908+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
19281909
def test_shortest_path_0(self):
19291910
"""
19301911
Shortest path query
@@ -1934,7 +1915,7 @@ def test_shortest_path_0(self):
19341915
result = execute_query(query, "zeek_dns")
19351916
assert "error" not in result, f"Query failed: {result}"
19361917

1937-
@pytest.mark.xfail(reason="Coupled schema: VLP/shortestPath not supported on denormalized edge tables")
1918+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
19381919
def test_shortest_path_1(self):
19391920
"""
19401921
Shortest path query
@@ -1944,7 +1925,7 @@ def test_shortest_path_1(self):
19441925
result = execute_query(query, "zeek_dns")
19451926
assert "error" not in result, f"Query failed: {result}"
19461927

1947-
@pytest.mark.xfail(reason="Coupled schema: VLP/shortestPath not supported on denormalized edge tables")
1928+
@pytest.mark.xfail(reason="Coupled schema: VLP on multi-relationship denormalized tables")
19481929
def test_shortest_path_2(self):
19491930
"""
19501931
Shortest path query

0 commit comments

Comments
 (0)