Skip to content

Commit a6f3e6a

Browse files
ser-vasilichsingaraiona
authored andcommitted
patch
1 parent f16f29e commit a6f3e6a

File tree

4 files changed

+149
-81
lines changed

4 files changed

+149
-81
lines changed

core/index.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3105,7 +3105,7 @@ static obj_p __join_probe_chunk(i64_t len, i64_t offset, void *raw_ctx) {
31053105
for (i = offset; i < end; i++) {
31063106
if (i + 8 < end) {
31073107
u64_t ph = __index_list_hash_get(i + 8, &ctx->list_ctx) & jht->mask;
3108-
__builtin_prefetch(&jht->heads[ph], 0, 0);
3108+
__builtin_prefetch(&AS_I64(jht->heads)[ph], 0, 0);
31093109
}
31103110
match_rids[i] = __jht_probe(jht, i, &ctx->list_ctx);
31113111
}

core/join.c

Lines changed: 41 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -36,116 +36,79 @@
3636
#include "query.h"
3737

3838
obj_p select_column(obj_p left_col, obj_p right_col, i64_t ids[], i64_t len) {
39-
i64_t i, idx;
39+
i64_t i;
4040
i8_t type;
4141
obj_p res;
42+
b8_t has_left = !is_null(left_col);
4243

43-
// there is no such column in the right table
4444
if (is_null(right_col))
4545
return clone_obj(left_col);
4646

47-
// Column exists only in right table — all ids are valid, use at_ids fast path
48-
if (is_null(left_col))
49-
return at_ids(right_col, ids, len);
50-
51-
type = left_col->type;
47+
type = has_left ? left_col->type : right_col->type;
5248

53-
if (right_col->type != type)
49+
if (has_left && right_col->type != type)
5450
return err_type(type, right_col->type, 0, 0);
5551

5652
res = vector(type, len);
5753

54+
#define MERGE_LOOP(T, AS, NULL_VAL) { \
55+
T *ro = AS(right_col), *out = AS(res); \
56+
if (has_left) { \
57+
T *lo = AS(left_col); \
58+
for (i = 0; i < len; i++) out[i] = (ids[i] != NULL_I64) ? ro[ids[i]] : lo[i]; \
59+
} else { \
60+
for (i = 0; i < len; i++) out[i] = (ids[i] != NULL_I64) ? ro[ids[i]] : NULL_VAL; \
61+
} \
62+
break; \
63+
}
64+
5865
switch (type) {
5966
case TYPE_B8:
6067
case TYPE_U8:
61-
case TYPE_C8: {
62-
u8_t *lo = AS_U8(left_col);
63-
u8_t *ro = AS_U8(right_col);
64-
u8_t *out = AS_U8(res);
65-
for (i = 0; i < len; i++) {
66-
idx = ids[i];
67-
out[i] = (idx != NULL_I64) ? ro[idx] : lo[i];
68-
}
69-
break;
70-
}
71-
case TYPE_I16: {
72-
i16_t *lo = AS_I16(left_col);
73-
i16_t *ro = AS_I16(right_col);
74-
i16_t *out = AS_I16(res);
75-
for (i = 0; i < len; i++) {
76-
idx = ids[i];
77-
out[i] = (idx != NULL_I64) ? ro[idx] : lo[i];
78-
}
79-
break;
80-
}
68+
case TYPE_C8: MERGE_LOOP(u8_t, AS_U8, 0)
69+
case TYPE_I16: MERGE_LOOP(i16_t, AS_I16, NULL_I16)
8170
case TYPE_I32:
8271
case TYPE_DATE:
83-
case TYPE_TIME: {
84-
i32_t *lo = AS_I32(left_col);
85-
i32_t *ro = AS_I32(right_col);
86-
i32_t *out = AS_I32(res);
87-
for (i = 0; i < len; i++) {
88-
idx = ids[i];
89-
out[i] = (idx != NULL_I64) ? ro[idx] : lo[i];
90-
}
91-
break;
92-
}
72+
case TYPE_TIME: MERGE_LOOP(i32_t, AS_I32, NULL_I32)
9373
case TYPE_I64:
9474
case TYPE_SYMBOL:
95-
case TYPE_TIMESTAMP: {
96-
i64_t *lo = AS_I64(left_col);
97-
i64_t *ro = AS_I64(right_col);
98-
i64_t *out = AS_I64(res);
99-
for (i = 0; i < len; i++) {
100-
idx = ids[i];
101-
out[i] = (idx != NULL_I64) ? ro[idx] : lo[i];
102-
}
103-
break;
104-
}
105-
case TYPE_F64: {
106-
f64_t *lo = AS_F64(left_col);
107-
f64_t *ro = AS_F64(right_col);
108-
f64_t *out = AS_F64(res);
109-
for (i = 0; i < len; i++) {
110-
idx = ids[i];
111-
out[i] = (idx != NULL_I64) ? ro[idx] : lo[i];
112-
}
113-
break;
114-
}
75+
case TYPE_TIMESTAMP: MERGE_LOOP(i64_t, AS_I64, NULL_I64)
76+
case TYPE_F64: MERGE_LOOP(f64_t, AS_F64, NULL_F64)
11577
case TYPE_GUID: {
116-
guid_t *lo = AS_GUID(left_col);
117-
guid_t *ro = AS_GUID(right_col);
118-
guid_t *out = AS_GUID(res);
119-
for (i = 0; i < len; i++) {
120-
idx = ids[i];
121-
memcpy(out[i], (idx != NULL_I64) ? ro[idx] : lo[i], sizeof(guid_t));
78+
guid_t *ro = AS_GUID(right_col), *out = AS_GUID(res);
79+
guid_t null_guid = {0};
80+
if (has_left) {
81+
guid_t *lo = AS_GUID(left_col);
82+
for (i = 0; i < len; i++) memcpy(out[i], (ids[i] != NULL_I64) ? ro[ids[i]] : lo[i], sizeof(guid_t));
83+
} else {
84+
for (i = 0; i < len; i++) memcpy(out[i], (ids[i] != NULL_I64) ? ro[ids[i]] : null_guid, sizeof(guid_t));
12285
}
12386
break;
12487
}
12588
case TYPE_LIST: {
126-
obj_p *lo = AS_LIST(left_col);
127-
obj_p *ro = AS_LIST(right_col);
128-
obj_p *out = AS_LIST(res);
129-
for (i = 0; i < len; i++) {
130-
idx = ids[i];
131-
out[i] = clone_obj((idx != NULL_I64) ? ro[idx] : lo[i]);
89+
obj_p *ro = AS_LIST(right_col), *out = AS_LIST(res);
90+
if (has_left) {
91+
obj_p *lo = AS_LIST(left_col);
92+
for (i = 0; i < len; i++) out[i] = clone_obj((ids[i] != NULL_I64) ? ro[ids[i]] : lo[i]);
93+
} else {
94+
for (i = 0; i < len; i++) out[i] = (ids[i] != NULL_I64) ? clone_obj(ro[ids[i]]) : NULL_OBJ;
13295
}
13396
break;
13497
}
13598
default: {
136-
obj_p v;
137-
for (i = 0; i < len; i++) {
138-
idx = ids[i];
139-
if (idx != NULL_I64)
140-
v = at_idx(right_col, idx);
141-
else
142-
v = at_idx(left_col, i);
143-
ins_obj(&res, i, v);
99+
if (has_left) {
100+
for (i = 0; i < len; i++)
101+
ins_obj(&res, i, (ids[i] != NULL_I64) ? at_idx(right_col, ids[i]) : at_idx(left_col, i));
102+
} else {
103+
for (i = 0; i < len; i++)
104+
ins_obj(&res, i, (ids[i] != NULL_I64) ? at_idx(right_col, ids[i]) : NULL_OBJ);
144105
}
145106
break;
146107
}
147108
}
148109

110+
#undef MERGE_LOOP
111+
149112
return res;
150113
}
151114

tests/join_tests.c

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,76 @@ test_result_t test_join_left_basic() {
101101
PASS();
102102
}
103103

104+
// ==================== LEFT JOIN SHARED COLUMN TYPES ====================
105+
test_result_t test_join_left_shared_column_types() {
106+
// Shared I64 column, partial match — exercises select_column I64 branch
107+
TEST_ASSERT_EQ(
108+
"(set t1 (table [id shared] (list [1 2 3] [10 20 30])))"
109+
"(set t2 (table [id shared] (list [1 3] [100 300])))"
110+
"(at (left-join [id] t1 t2) 'shared)",
111+
"[100 20 300]");
112+
113+
// Shared F64 column, partial match
114+
TEST_ASSERT_EQ(
115+
"(set t1 (table [id shared] (list [1 2 3] [1.1 2.2 3.3])))"
116+
"(set t2 (table [id shared] (list [1 3] [10.1 30.3])))"
117+
"(at (left-join [id] t1 t2) 'shared)",
118+
"[10.1 2.2 30.3]");
119+
120+
// Shared Symbol column, partial match
121+
TEST_ASSERT_EQ(
122+
"(set t1 (table [id shared] (list [1 2 3] [aaa bbb ccc])))"
123+
"(set t2 (table [id shared] (list [1 3] [xxx zzz])))"
124+
"(at (left-join [id] t1 t2) 'shared)",
125+
"[xxx bbb zzz]");
126+
127+
// Shared Date column, partial match
128+
TEST_ASSERT_EQ(
129+
"(set t1 (table [id shared] (list [1 2 3] [2024.01.01 2024.01.02 2024.01.03])))"
130+
"(set t2 (table [id shared] (list [1 3] [2025.06.01 2025.06.03])))"
131+
"(at (left-join [id] t1 t2) 'shared)",
132+
"[2025.06.01 2024.01.02 2025.06.03]");
133+
134+
// Shared Time column, partial match
135+
TEST_ASSERT_EQ(
136+
"(set t1 (table [id shared] (list [1 2 3] [10:00:00.000 10:00:01.000 10:00:02.000])))"
137+
"(set t2 (table [id shared] (list [1 3] [20:00:00.000 20:00:02.000])))"
138+
"(at (left-join [id] t1 t2) 'shared)",
139+
"[20:00:00.000 10:00:01.000 20:00:02.000]");
140+
141+
// Shared Timestamp column, partial match
142+
TEST_ASSERT_EQ(
143+
"(set t1 (table [id shared] (list [1 2 3] "
144+
"[2024.01.01D10:00:00.000000000 2024.01.01D10:00:01.000000000 2024.01.01D10:00:02.000000000])))"
145+
"(set t2 (table [id shared] (list [1 3] "
146+
"[2025.06.01D20:00:00.000000000 2025.06.01D20:00:02.000000000])))"
147+
"(at (left-join [id] t1 t2) 'shared)",
148+
"[2025.06.01D20:00:00.000000000 2024.01.01D10:00:01.000000000 2025.06.01D20:00:02.000000000]");
149+
150+
// Shared B8 column, partial match
151+
TEST_ASSERT_EQ(
152+
"(set t1 (table [id shared] (list [1 2 3] [true false true])))"
153+
"(set t2 (table [id shared] (list [1 3] [false true])))"
154+
"(at (left-join [id] t1 t2) 'shared)",
155+
"[false false true]");
156+
157+
// Shared GUID column, partial match
158+
TEST_ASSERT_EQ(
159+
"(set t1 (table [id shared] (list [1 2 3] (guid 3))))"
160+
"(set t2 (table [id shared] (list [1 3] (guid 2))))"
161+
"(count (left-join [id] t1 t2))",
162+
"3");
163+
164+
// Shared I16 column (cast via as), partial match
165+
TEST_ASSERT_EQ(
166+
"(set t1 (table [id shared] (list [1 2 3] (as 'I16 [10 20 30]))))"
167+
"(set t2 (table [id shared] (list [1 3] (as 'I16 [100 300]))))"
168+
"(at (left-join [id] t1 t2) 'shared)",
169+
"[100 20 300]");
170+
171+
PASS();
172+
}
173+
104174
// ==================== JOIN ON SINGLE KEY TYPES ====================
105175
test_result_t test_join_single_key_types() {
106176
// Join on I32 key
@@ -574,8 +644,7 @@ test_result_t test_join_parallel() {
574644
"(sum (at (left-join [id] t1 t2) 'val1))",
575645
"199990000");
576646

577-
// Verify left join — right values correct for matched rows
578-
// val2 = 2*id for ids 0..9999, should be filled from right; rest from left (val1)
647+
// Left join — right-only column sum
579648
TEST_ASSERT_EQ(
580649
"(set t1 (table [id val1] (list (til 20000) (til 20000))))"
581650
"(set t2 (table [id val2] (list (til 10000) (* 2 (til 10000)))))"
@@ -599,5 +668,40 @@ test_result_t test_join_parallel() {
599668
"(sum (at (left-join [id k] t1 t2) 'val1))",
600669
"199990000");
601670

671+
// Left join, right-only column with partial match (multi-key)
672+
// t1 has no val2 column. t2 has val2. Rows (3,a) and (4,a) are unmatched.
673+
// val2 for unmatched rows must be null-filled, not crash.
674+
TEST_ASSERT_EQ(
675+
"(set t1 (table [k1 k2] (list [1 2 3 4] [a a a a])))"
676+
"(set t2 (table [k1 k2 val2] (list [1 2] [a a] [100 200])))"
677+
"(count (left-join [k1 k2] t1 t2))",
678+
"4");
679+
680+
TEST_ASSERT_EQ(
681+
"(set t1 (table [k1 k2] (list [1 2 3 4] [a a a a])))"
682+
"(set t2 (table [k1 k2 val2] (list [1 2] [a a] [100 200])))"
683+
"(at (left-join [k1 k2] t1 t2) 'val2)",
684+
"[100 200 0Nl 0Nl]");
685+
686+
// Left join, right-only column, >16K rows, half unmatched
687+
TEST_ASSERT_EQ(
688+
"(set t1 (table [id k] (list (til 20000) (* 10 (til 20000)))))"
689+
"(set t2 (table [id k val2] (list (til 10000) (* 10 (til 10000)) (* 2 (til 10000)))))"
690+
"(count (left-join [id k] t1 t2))",
691+
"20000");
692+
693+
// Multi-key join with duplicate composite keys in right table
694+
TEST_ASSERT_EQ(
695+
"(set t1 (table [k1 k2 val1] (list [1 1 2 2] [a b a b] [10 20 30 40])))"
696+
"(set t2 (table [k1 k2 val2] (list [1 1 2] [a a b] [100 101 200])))"
697+
"(count (inner-join [k1 k2] t1 t2))",
698+
"2");
699+
700+
TEST_ASSERT_EQ(
701+
"(set t1 (table [k1 k2 val1] (list [1 1 2 2] [a b a b] [10 20 30 40])))"
702+
"(set t2 (table [k1 k2 val2] (list [1 1 2] [a a b] [100 101 200])))"
703+
"(count (left-join [k1 k2] t1 t2))",
704+
"4");
705+
602706
PASS();
603707
}

tests/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ test_entry_t tests[] = {
564564
// Join tests
565565
{"test_join_inner_basic", test_join_inner_basic},
566566
{"test_join_left_basic", test_join_left_basic},
567+
{"test_join_left_shared_column_types", test_join_left_shared_column_types},
567568
{"test_join_single_key_types", test_join_single_key_types},
568569
{"test_join_multi_key", test_join_multi_key},
569570
{"test_join_empty_tables", test_join_empty_tables},

0 commit comments

Comments
 (0)