Skip to content

Commit 209857a

Browse files
committed
chore: Release v0.5.3
Features: - label() function for scalar label return - EXISTS subquery support - WITH + MATCH chaining support - Regex match (=~) operator - collect() function mapping Bug Fixes: - Column alias for type(), id(), labels() functions - Parser rejects invalid syntax with unparsed input Documentation: - Anonymous nodes limitation documented - Quick Start guide fixes
1 parent ee7c457 commit 209857a

4 files changed

Lines changed: 120 additions & 45 deletions

File tree

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,26 @@
1+
## [0.5.3] - 2025-12-02
2+
3+
### 🚀 Features
4+
5+
- Add `label()` function for scalar label return
6+
- Add EXISTS subquery support
7+
- Add WITH + MATCH chaining support
8+
- Add regex match (`=~`) operator → `match()` function
9+
- Add `collect()` function → `groupArray()` mapping
10+
11+
### 🐛 Bug Fixes
12+
13+
- Fix column alias for `type()`, `id()`, `labels()` graph introspection functions
14+
- Parser now rejects invalid syntax with unparsed input (catches `WHERE AND`, etc.)
15+
16+
### 📚 Documentation
17+
18+
- Add anonymous nodes limitation to KNOWN_ISSUES.md
19+
- Fix Quick Start guide to include required `GRAPH_CONFIG_PATH`
20+
- Clean up resolved issues from KNOWN_ISSUES.md
21+
22+
---
23+
124
## [0.5.2] - 2025-11-30
225

326
### 🚀 Features

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ members = [
55

66
[package]
77
name = "clickgraph"
8-
version = "0.5.2"
8+
version = "0.5.3"
99
edition = "2021"
1010
rust-version = "1.85"
1111

KNOWN_ISSUES.md

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,25 @@
88

99
## Active Issues
1010

11-
### 1. Undirected Patterns - ClickHouse OR-in-JOIN Limitation
11+
### 1. ~~Undirected Patterns - Direction Logic Bug in UNION ALL~~ ✅ FIXED
1212

13-
**Status**: 🔧 Needs UNION ALL implementation
14-
**Severity**: HIGH
15-
**Identified**: November 29, 2025
16-
17-
**Problem**: Undirected patterns `(a)-[r]-(b)` generate OR conditions in JOINs, which ClickHouse handles incorrectly (missing rows).
18-
19-
**Current SQL** (problematic):
20-
```sql
21-
INNER JOIN follows AS r ON (r.follower_id = a.user_id OR r.followed_id = a.user_id)
22-
```
13+
**Status**: ✅ FIXED (December 2, 2025)
14+
**Fix**: Ensured each UNION branch has independent `joined_entities` state and correctly swaps `from_id`/`to_id` columns based on direction.
2315

24-
**Solution**: Generate UNION ALL of two directed queries instead:
16+
**Correct SQL now generated**:
2517
```sql
26-
-- Direction 1
27-
SELECT ... FROM users AS a JOIN follows AS r ON r.follower_id = a.user_id ...
18+
SELECT ... FROM users AS u1
19+
JOIN follows AS r ON r.follower_id = u1.user_id -- Branch 1: outgoing
20+
JOIN users AS u2 ON u2.user_id = r.followed_id
2821
UNION ALL
29-
-- Direction 2
30-
SELECT ... FROM users AS a JOIN follows AS r ON r.followed_id = a.user_id ...
22+
SELECT ... FROM users AS u1
23+
JOIN follows AS r ON r.followed_id = u1.user_id -- Branch 2: incoming (swapped!)
24+
JOIN users AS u2 ON u2.user_id = r.follower_id
3125
```
3226

33-
**Affected Tests**: `test_relationship_degree`, `test_undirected_relationship`
34-
35-
**Design Doc**: `notes/bidirectional-union-approach.md`
36-
3727
---
3828

39-
### 2. Undirected Patterns - Relationship Uniqueness
29+
### ~~2.~~ 1. Undirected Patterns - Relationship Uniqueness
4030

4131
**Status**: 🔧 Requires relationship IDs in schema
4232
**Severity**: HIGH
@@ -60,7 +50,31 @@ relationships:
6050

6151
---
6252

63-
### 3. Disconnected Patterns Generate Invalid SQL
53+
### 3. Anonymous Nodes Without Labels Not Supported
54+
55+
**Status**: 📋 Limitation
56+
**Severity**: LOW
57+
**Identified**: December 2, 2025
58+
59+
**Problem**: Anonymous nodes without labels cannot be resolved to tables:
60+
```cypher
61+
MATCH ()-[r:FOLLOWS]->() RETURN r LIMIT 5 -- ❌ Broken SQL
62+
MATCH ()-[r]->() RETURN r LIMIT 5 -- ❌ Also broken
63+
```
64+
65+
**Root Cause**: Without a label, the query planner cannot determine which node table to use. The anonymous node gets a generated alias (e.g., `aeba9f1d7f`) but no `table_name`, causing invalid SQL with dangling references.
66+
67+
**Workaround**: Always specify node labels:
68+
```cypher
69+
MATCH (:User)-[r:FOLLOWS]->(:User) RETURN r LIMIT 5 -- ✅ Works
70+
MATCH (a:User)-[r:FOLLOWS]->(b:User) RETURN r LIMIT 5 -- ✅ Works
71+
```
72+
73+
**Future Enhancement**: For schemas with a single relationship type or polymorphic edge table, the system could infer node types from the relationship's `from_node_label`/`to_node_label` configuration. Deferred for now.
74+
75+
---
76+
77+
### 4. Disconnected Patterns Generate Invalid SQL
6478

6579
**Status**: 🐛 Bug
6680
**Severity**: MEDIUM

src/query_planner/analyzer/graph_join_inference.rs

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ impl AnalyzerPass for GraphJoinInference {
241241
// Empty joins vector = fully denormalized pattern (no JOINs needed)
242242
// Without this wrapper, RenderPlan will try to generate JOINs from raw GraphRel
243243
let optional_aliases = plan_ctx.get_optional_aliases().clone();
244-
Self::build_graph_joins(logical_plan, &mut collected_graph_joins, optional_aliases, plan_ctx)
244+
Self::build_graph_joins(logical_plan, &mut collected_graph_joins, optional_aliases, plan_ctx, graph_schema)
245245
}
246246
}
247247

@@ -775,19 +775,40 @@ impl GraphJoinInference {
775775
collected_graph_joins: &mut Vec<Join>,
776776
optional_aliases: std::collections::HashSet<String>,
777777
plan_ctx: &PlanCtx,
778+
graph_schema: &GraphSchema,
778779
) -> AnalyzerResult<Transformed<Arc<LogicalPlan>>> {
779780
let transformed_plan = match logical_plan.as_ref() {
780-
// If input is a Union, push GraphJoins into each branch
781+
// If input is a Union, process each branch INDEPENDENTLY
782+
// Each branch needs its own collect_graph_joins + build_graph_joins pass
781783
LogicalPlan::Union(union) => {
782-
log::info!("🔄 Union detected in build_graph_joins, processing {} branches", union.inputs.len());
784+
log::info!("🔄 Union detected in build_graph_joins, processing {} branches independently", union.inputs.len());
783785
let mut any_transformed = false;
786+
let graph_join_inference = GraphJoinInference::new();
787+
784788
let transformed_branches: Result<Vec<Arc<LogicalPlan>>, _> = union.inputs.iter().map(|branch| {
785-
let mut branch_joins = collected_graph_joins.clone();
789+
// CRITICAL: Each branch needs fresh state - collect and build separately
790+
let mut branch_joins: Vec<Join> = vec![];
791+
let mut branch_joined_entities: HashSet<String> = HashSet::new();
792+
793+
// Collect joins for this specific branch only
794+
graph_join_inference.collect_graph_joins(
795+
branch.clone(),
796+
branch.clone(),
797+
&mut plan_ctx.clone(), // Clone PlanCtx for each branch
798+
graph_schema,
799+
&mut branch_joins,
800+
&mut branch_joined_entities,
801+
)?;
802+
803+
eprintln!("🔹 Union branch collected {} joins", branch_joins.len());
804+
805+
// Build GraphJoins for this branch with its own collected joins
786806
let result = Self::build_graph_joins(
787807
branch.clone(),
788808
&mut branch_joins,
789809
optional_aliases.clone(),
790810
plan_ctx,
811+
graph_schema,
791812
)?;
792813
if matches!(result, Transformed::Yes(_)) {
793814
any_transformed = true;
@@ -834,6 +855,7 @@ impl GraphJoinInference {
834855
collected_graph_joins,
835856
optional_aliases.clone(),
836857
plan_ctx,
858+
graph_schema,
837859
)?;
838860

839861
// is_denormalized flag is set by view_optimizer pass - just rebuild
@@ -845,18 +867,21 @@ impl GraphJoinInference {
845867
collected_graph_joins,
846868
optional_aliases.clone(),
847869
plan_ctx,
870+
graph_schema,
848871
)?;
849872
let center_tf = Self::build_graph_joins(
850873
graph_rel.center.clone(),
851874
collected_graph_joins,
852875
optional_aliases.clone(),
853876
plan_ctx,
877+
graph_schema,
854878
)?;
855879
let right_tf = Self::build_graph_joins(
856880
graph_rel.right.clone(),
857881
collected_graph_joins,
858882
optional_aliases.clone(),
859883
plan_ctx,
884+
graph_schema,
860885
)?;
861886

862887
graph_rel.rebuild_or_clone(left_tf, center_tf, right_tf, logical_plan.clone())
@@ -867,6 +892,7 @@ impl GraphJoinInference {
867892
collected_graph_joins,
868893
optional_aliases,
869894
plan_ctx,
895+
graph_schema,
870896
)?;
871897
cte.rebuild_or_clone(child_tf, logical_plan.clone())
872898
}
@@ -878,6 +904,7 @@ impl GraphJoinInference {
878904
collected_graph_joins,
879905
optional_aliases,
880906
plan_ctx,
907+
graph_schema,
881908
)?;
882909
graph_joins.rebuild_or_clone(child_tf, logical_plan.clone())
883910
}
@@ -887,6 +914,7 @@ impl GraphJoinInference {
887914
collected_graph_joins,
888915
optional_aliases,
889916
plan_ctx,
917+
graph_schema,
890918
)?;
891919
filter.rebuild_or_clone(child_tf, logical_plan.clone())
892920
}
@@ -896,6 +924,7 @@ impl GraphJoinInference {
896924
collected_graph_joins,
897925
optional_aliases,
898926
plan_ctx,
927+
graph_schema,
899928
)?;
900929
group_by.rebuild_or_clone(child_tf, logical_plan.clone())
901930
}
@@ -905,6 +934,7 @@ impl GraphJoinInference {
905934
collected_graph_joins,
906935
optional_aliases,
907936
plan_ctx,
937+
graph_schema,
908938
)?;
909939
order_by.rebuild_or_clone(child_tf, logical_plan.clone())
910940
}
@@ -914,6 +944,7 @@ impl GraphJoinInference {
914944
collected_graph_joins,
915945
optional_aliases,
916946
plan_ctx,
947+
graph_schema,
917948
)?;
918949
skip.rebuild_or_clone(child_tf, logical_plan.clone())
919950
}
@@ -923,6 +954,7 @@ impl GraphJoinInference {
923954
collected_graph_joins,
924955
optional_aliases,
925956
plan_ctx,
957+
graph_schema,
926958
)?;
927959
limit.rebuild_or_clone(child_tf, logical_plan.clone())
928960
}
@@ -934,6 +966,7 @@ impl GraphJoinInference {
934966
collected_graph_joins,
935967
optional_aliases.clone(),
936968
plan_ctx,
969+
graph_schema,
937970
)?;
938971
inputs_tf.push(child_tf);
939972
}
@@ -947,6 +980,7 @@ impl GraphJoinInference {
947980
collected_graph_joins,
948981
optional_aliases,
949982
plan_ctx,
983+
graph_schema,
950984
)?;
951985
match child_tf {
952986
Transformed::Yes(new_input) => Transformed::Yes(Arc::new(LogicalPlan::Unwind(crate::query_planner::logical_plan::Unwind {
@@ -1153,17 +1187,11 @@ impl GraphJoinInference {
11531187
)
11541188
}
11551189
LogicalPlan::Union(union) => {
1156-
eprintln!("� ? Union, recursing into {} inputs", union.inputs.len());
1157-
for input_plan in union.inputs.iter() {
1158-
self.collect_graph_joins(
1159-
input_plan.clone(),
1160-
root_plan.clone(),
1161-
plan_ctx,
1162-
graph_schema,
1163-
collected_graph_joins,
1164-
joined_entities,
1165-
)?;
1166-
}
1190+
// CRITICAL: Don't recurse into UNION branches here!
1191+
// Each branch will be processed independently by build_graph_joins,
1192+
// which properly clones the state for each branch.
1193+
// If we recurse here with shared state, branches pollute each other.
1194+
eprintln!("🔀 Union detected in collect_graph_joins - skipping recursion (handled by build_graph_joins)");
11671195
Ok(())
11681196
}
11691197
LogicalPlan::PageRank(_) => {
@@ -1529,12 +1557,18 @@ impl GraphJoinInference {
15291557
to_id: "to_node_id".to_string(),
15301558
},
15311559
);
1532-
let rel_from_col = rel_cols.from_id;
1533-
let rel_to_col = rel_cols.to_id;
1560+
1561+
// For Direction::Incoming (from BidirectionalUnion), swap the columns
1562+
// so that the "from" side of the relationship connects to the "to" node
1563+
let (rel_from_col, rel_to_col) = if graph_rel.direction == Direction::Incoming {
1564+
(rel_cols.to_id, rel_cols.from_id) // Swapped for incoming direction
1565+
} else {
1566+
(rel_cols.from_id, rel_cols.to_id) // Normal for outgoing/either
1567+
};
15341568

15351569
eprintln!(
1536-
" � ?? DEBUG REL COLUMNS: rel_from_col = '{}', rel_to_col = '{}'",
1537-
rel_from_col, rel_to_col
1570+
" 🔹 DEBUG REL COLUMNS: direction={:?}, rel_from_col = '{}', rel_to_col = '{}'",
1571+
graph_rel.direction, rel_from_col, rel_to_col
15381572
);
15391573

15401574
// If both nodes are of the same type then check the direction to determine where are the left and right nodes present in the edgelist.
@@ -1923,6 +1957,7 @@ impl GraphJoinInference {
19231957
// DON'T mark as joined - denormalized nodes are virtual, not physical tables
19241958
} else {
19251959
// Traditional: Join LEFT node first
1960+
eprintln!(" 🔹 CREATING LEFT JOIN: u1 ON r.{}", rel_from_col);
19261961
let left_graph_join = Join {
19271962
table_name: left_cte_name.clone(),
19281963
table_alias: left_alias.to_string(),
@@ -1944,7 +1979,7 @@ impl GraphJoinInference {
19441979
};
19451980
collected_graph_joins.push(left_graph_join);
19461981
joined_entities.insert(left_alias.to_string());
1947-
eprintln!(" � ? LEFT node '{}' joined first", left_alias);
1982+
eprintln!(" LEFT node '{}' joined first", left_alias);
19481983
}
19491984
}
19501985

@@ -3623,6 +3658,9 @@ mod tests {
36233658
assert_eq!(rel_join_condition.operands.len(), 2);
36243659

36253660
// For incoming direction, the relationship connects differently
3661+
// Pattern (p2)<-[f1]-(p1) means p1 FOLLOWS p2, so:
3662+
// - f1.from_id = p1.id (source)
3663+
// - f1.to_id = p2.id (target) ← p2 is the anchor, connects via to_id
36263664
match (
36273665
&rel_join_condition.operands[0],
36283666
&rel_join_condition.operands[1],
@@ -3632,7 +3670,7 @@ mod tests {
36323670
LogicalExpr::PropertyAccessExp(right_prop),
36333671
) => {
36343672
assert_eq!(rel_prop.table_alias.0, "f1");
3635-
assert_eq!(rel_prop.column.raw(), "from_id");
3673+
assert_eq!(rel_prop.column.raw(), "to_id"); // p2 is target, connects via to_id
36363674
assert_eq!(right_prop.table_alias.0, "p2");
36373675
assert_eq!(right_prop.column.raw(), "id");
36383676
}
@@ -3659,7 +3697,7 @@ mod tests {
36593697
assert_eq!(p1_prop.table_alias.0, "p1");
36603698
assert_eq!(p1_prop.column.raw(), "id");
36613699
assert_eq!(rel_prop.table_alias.0, "f1");
3662-
assert_eq!(rel_prop.column.raw(), "to_id");
3700+
assert_eq!(rel_prop.column.raw(), "from_id"); // p1 is source, connects via from_id
36633701
}
36643702
_ => panic!("Expected PropertyAccessExp operands for p1 join"),
36653703
}

0 commit comments

Comments
 (0)