From 005fc9ac210ae5344546be426761d35fe02822ae Mon Sep 17 00:00:00 2001
From: ykiko <ykikoykikoykiko@gmail.com>
Date: Sat, 11 Apr 2026 14:04:42 +0800
Subject: [PATCH 1/2] feat(index): optimize merge with ContextBitmap, hash
 caching, and selective merge

Introduce ContextBitmap (tagged-pointer inline bitmap for <63 ids) to
replace roaring::Roaring in MergedIndex hot paths, eliminating heap
allocations in the common case. Cache FileIndex SHA256 hashes to avoid
recomputation. Add selective ProjectIndex::merge that skips symbols not
referencing newly-merged files. MergedIndex::merge now returns bool to
indicate cache hits.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CMakeLists.txt                              |   8 +
 benchmarks/index_benchmark.cpp              | 662 ++++++++++++++++++++
 src/index/merged_index.cpp                  |  79 +--
 src/index/merged_index.h                    |   6 +-
 src/index/project_index.cpp                 |  36 ++
 src/index/project_index.h                   |   6 +
 src/index/tu_index.cpp                      |  29 +-
 src/index/tu_index.h                        |   6 +
 src/support/bitmap.h                        | 162 +++++
 tests/unit/index/merged_index_tests.cpp     |  39 ++
 tests/unit/index/project_index_tests.cpp    |  52 ++
 tests/unit/index/tu_index_tests.cpp         |  12 +
 tests/unit/support/context_bitmap_tests.cpp | 254 ++++++++
 13 files changed, 1306 insertions(+), 45 deletions(-)
 create mode 100644 benchmarks/index_benchmark.cpp
 create mode 100644 tests/unit/support/context_bitmap_tests.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d5c45d14b..64a788258 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -200,6 +200,14 @@ if(CLICE_ENABLE_BENCHMARK)
         "${PROJECT_SOURCE_DIR}/src"
     )
     target_link_libraries(scan_benchmark PRIVATE clice::core kota::deco)
+
+    add_executable(index_benchmark
+        "${PROJECT_SOURCE_DIR}/benchmarks/index_benchmark.cpp"
+    )
+    target_include_directories(index_benchmark PRIVATE
+        "${PROJECT_SOURCE_DIR}/src"
+    )
+    target_link_libraries(index_benchmark PRIVATE clice::core kota::deco)
 endif()
 
 if(CLICE_RELEASE)
diff --git a/benchmarks/index_benchmark.cpp b/benchmarks/index_benchmark.cpp
new file mode 100644
index 000000000..9c792c5c8
--- /dev/null
+++ b/benchmarks/index_benchmark.cpp
@@ -0,0 +1,662 @@
+/// Benchmark for TUIndex build, MergedIndex merge, and query performance.
+///
+/// Usage:
+///   index_benchmark [OPTIONS] <compile_commands.json>
+///
+/// Example:
+///   ./build/RelWithDebInfo/bin/index_benchmark \
+///       /home/ykiko/C++/clice/.llvm/build-debug/compile_commands.json
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cstdlib>
+#include <mutex>
+#include <numeric>
+#include <print>
+#include <sstream>
+#include <thread>
+#include <vector>
+
+#include "command/command.h"
+#include "compile/compilation.h"
+#include "index/merged_index.h"
+#include "index/project_index.h"
+#include "index/tu_index.h"
+#include "support/logging.h"
+
+#include "kota/deco/deco.h"
+#include "kota/ipc/lsp/position.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clice;
+
+struct Options {
+    DecoKV(names = {"--log-level"}; help = "Log level"; required = false;)
+    <std::string> log_level = "off";
+
+    DecoKV(names = {"--limit"}; help = "Max files to index (0 = all)"; required = false;)
+    <int> limit = 100;
+
+    DecoKV(names = {"--jobs", "-j"}; help = "Parallel compile jobs (0 = nproc)"; required = false;)
+    <int> jobs = 0;
+
+    DecoFlag(names = {"-h", "--help"}; help = "Show help"; required = false;)
+    help;
+
+    DecoInput(meta_var = "CDB"; help = "Path to compile_commands.json"; required = false;)
+    <std::string> cdb_path;
+};
+
+struct Timer {
+    std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
+
+    double ms() const {
+        auto end = std::chrono::steady_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+    void reset() {
+        start = std::chrono::steady_clock::now();
+    }
+};
+
+struct IndexStats {
+    std::string file;
+    double compile_ms = 0;
+    double build_index_ms = 0;
+    double serialize_ms = 0;
+    std::size_t symbols = 0;
+    std::size_t occurrences = 0;
+    std::size_t relations = 0;
+    std::size_t serialized_bytes = 0;
+    std::size_t file_count = 0;
+};
+
+struct IndexResult {
+    IndexStats stats;
+    index::TUIndex tu_index;
+};
+
+struct MergeStats {
+    std::size_t total_files = 0;
+    std::size_t total_occurrences = 0;
+    std::size_t total_relations = 0;
+    double merge_ms = 0;
+    double serialize_ms = 0;
+    std::size_t total_serialized_bytes = 0;
+    std::size_t shard_count = 0;
+};
+
+void print_size(std::size_t bytes) {
+    if(bytes < 1024)
+        std::print("{}B", bytes);
+    else if(bytes < 1024 * 1024)
+        std::print("{:.1f}KB", bytes / 1024.0);
+    else
+        std::print("{:.1f}MB", bytes / (1024.0 * 1024.0));
+}
+
+/// Per-task: compile one file, build TUIndex, serialize, collect stats.
+/// Everything here is thread-local, no shared state.
+static std::optional<IndexResult> index_one_file(CompilationDatabase& cdb,
+                                                 const CompilationEntry& entry) {
+    auto file = cdb.resolve_path(entry.file);
+    auto cmds = cdb.lookup(file);
+    if(cmds.empty())
+        return std::nullopt;
+
+    auto& cmd = cmds[0];
+    auto argv_vec = cmd.to_argv();
+
+    CompilationParams cp;
+    cp.kind = CompilationKind::Indexing;
+    cp.directory = cmd.resolved.directory;
+    for(auto* arg: argv_vec)
+        cp.arguments.push_back(arg);
+
+    IndexResult result;
+    result.stats.file = std::string(file);
+
+    Timer t;
+    auto unit = compile(cp);
+    result.stats.compile_ms = t.ms();
+
+    if(!unit.completed())
+        return std::nullopt;
+
+    t.reset();
+    result.tu_index = index::TUIndex::build(unit);
+    result.stats.build_index_ms = t.ms();
+
+    result.stats.symbols = result.tu_index.symbols.size();
+    result.stats.file_count = result.tu_index.file_indices.size() + 1;
+    for(auto& [fid, fi]: result.tu_index.file_indices) {
+        result.stats.occurrences += fi.occurrences.size();
+        for(auto& [_, rels]: fi.relations)
+            result.stats.relations += rels.size();
+    }
+    result.stats.occurrences += result.tu_index.main_file_index.occurrences.size();
+    for(auto& [_, rels]: result.tu_index.main_file_index.relations)
+        result.stats.relations += rels.size();
+
+    t.reset();
+    std::string serialized;
+    llvm::raw_string_ostream os(serialized);
+    result.tu_index.serialize(os);
+    result.stats.serialize_ms = t.ms();
+    result.stats.serialized_bytes = serialized.size();
+
+    return result;
+}
+
+int main(int argc, const char** argv) {
+    auto args = deco::util::argvify(argc, argv);
+    auto parsed = deco::cli::parse<Options>(args);
+    if(!parsed.has_value()) {
+        std::println(stderr, "Error: {}", parsed.error().message);
+        return 1;
+    }
+
+    auto& opts = parsed->options;
+    if(opts.help.value_or(false) || !opts.cdb_path.has_value()) {
+        std::ostringstream oss;
+        deco::cli::write_usage_for<Options>(oss, "index_benchmark [OPTIONS] <cdb>");
+        std::print("{}", oss.str());
+        return 0;
+    }
+
+    auto level = spdlog::level::from_str(*opts.log_level);
+    clice::logging::options.level = level;
+    clice::logging::stderr_logger("index_benchmark", clice::logging::options);
+
+    auto num_jobs = static_cast<unsigned>(*opts.jobs);
+    if(num_jobs == 0)
+        num_jobs = std::thread::hardware_concurrency();
+    if(num_jobs == 0)
+        num_jobs = 4;
+
+    // Load CDB.
+    CompilationDatabase cdb;
+    auto count = cdb.load(*opts.cdb_path);
+    std::println("CDB: {} entries from {}", count, *opts.cdb_path);
+
+    auto entries = cdb.get_entries();
+    auto limit = static_cast<std::size_t>(*opts.limit);
+    if(limit > 0 && limit < entries.size())
+        entries = entries.take_front(limit);
+    std::println("Indexing {} files with {} threads...\n", entries.size(), num_jobs);
+
+    // Phase 1: Parallel compile & build TUIndex.
+    // Each thread picks tasks from a shared atomic counter.
+    // Results are written to a pre-sized vector (one slot per entry).
+    std::vector<std::optional<IndexResult>> results(entries.size());
+    std::atomic<std::size_t> next_task{0};
+    std::atomic<std::size_t> completed{0};
+
+    Timer phase1_wall;
+
+    {
+        std::vector<std::jthread> workers;
+        workers.reserve(num_jobs);
+        for(unsigned i = 0; i < num_jobs; i++) {
+            workers.emplace_back([&] {
+                while(true) {
+                    auto idx = next_task.fetch_add(1);
+                    if(idx >= entries.size())
+                        break;
+                    results[idx] = index_one_file(cdb, entries[idx]);
+                    auto done = completed.fetch_add(1) + 1;
+                    if(done % 100 == 0)
+                        std::println("  ... {}/{} done", done, entries.size());
+                }
+            });
+        }
+    }
+
+    double phase1_wall_ms = phase1_wall.ms();
+
+    // Collect results on main thread.
+    std::vector<IndexStats> all_stats;
+    std::vector<std::pair<std::string, index::TUIndex>> tu_indices;
+    all_stats.reserve(entries.size());
+    tu_indices.reserve(entries.size());
+
+    double total_compile_ms = 0;
+    double total_build_ms = 0;
+    double total_serialize_ms = 0;
+    std::size_t total_serialized = 0;
+
+    for(auto& r: results) {
+        if(!r)
+            continue;
+        total_compile_ms += r->stats.compile_ms;
+        total_build_ms += r->stats.build_index_ms;
+        total_serialize_ms += r->stats.serialize_ms;
+        total_serialized += r->stats.serialized_bytes;
+        tu_indices.emplace_back(std::move(r->stats.file), std::move(r->tu_index));
+        all_stats.push_back(std::move(r->stats));
+    }
+
+    std::println("=== Phase 1: Compile & Build TUIndex ({} threads) ===", num_jobs);
+    std::println("  Files indexed:    {}", all_stats.size());
+    std::println("  Wall time:        {:.0f}ms", phase1_wall_ms);
+    std::println("  Total compile:    {:.0f}ms (sum of per-thread)", total_compile_ms);
+    std::println("  Total build idx:  {:.0f}ms (sum of per-thread)", total_build_ms);
+    std::println("  Total serialize:  {:.0f}ms (sum of per-thread)", total_serialize_ms);
+    std::print("  Total TUIndex size: ");
+    print_size(total_serialized);
+    std::println("");
+
+    // Size distribution.
+    if(!all_stats.empty()) {
+        std::vector<std::size_t> sizes;
+        std::vector<double> compile_times;
+        std::vector<double> build_times;
+        for(auto& s: all_stats) {
+            sizes.push_back(s.serialized_bytes);
+            compile_times.push_back(s.compile_ms);
+            build_times.push_back(s.build_index_ms);
+        }
+        std::ranges::sort(sizes);
+        std::ranges::sort(compile_times);
+        std::ranges::sort(build_times);
+        auto p = [](auto& v, double pct) {
+            return v[static_cast<std::size_t>(v.size() * pct)];
+        };
+
+        std::println("\n  TUIndex size distribution:");
+        std::print("    p50=");
+        print_size(p(sizes, 0.5));
+        std::print("  p90=");
+        print_size(p(sizes, 0.9));
+        std::print("  p99=");
+        print_size(p(sizes, 0.99));
+        std::print("  max=");
+        print_size(sizes.back());
+        std::println("");
+
+        std::println("  Compile time distribution:");
+        std::println("    p50={:.0f}ms  p90={:.0f}ms  p99={:.0f}ms  max={:.0f}ms",
+                     p(compile_times, 0.5),
+                     p(compile_times, 0.9),
+                     p(compile_times, 0.99),
+                     compile_times.back());
+
+        std::println("  Build index time distribution:");
+        std::println("    p50={:.1f}ms  p90={:.1f}ms  p99={:.1f}ms  max={:.1f}ms",
+                     p(build_times, 0.5),
+                     p(build_times, 0.9),
+                     p(build_times, 0.99),
+                     build_times.back());
+    }
+
+    // Phase 2: Merge into MergedIndex shards (main thread).
+    std::println("\n=== Phase 2: Merge into MergedIndex ===");
+
+    index::ProjectIndex project_index;
+    llvm::DenseMap<std::uint32_t, index::MergedIndex> merged;
+    MergeStats merge_stats;
+    Timer merge_timer;
+
+    llvm::DenseMap<std::uint32_t, std::string> file_contents;
+
+    auto read_content = [&](std::uint32_t path_id, llvm::StringRef path) -> llvm::StringRef {
+        auto it = file_contents.find(path_id);
+        if(it != file_contents.end())
+            return it->second;
+        auto buf = llvm::MemoryBuffer::getFile(path);
+        if(!buf)
+            return {};
+        auto [inserted, _] = file_contents.try_emplace(path_id, (*buf)->getBuffer().str());
+        return inserted->second;
+    };
+
+    // Pre-compute SHA256 hashes (simulates offloading to worker threads).
+    // With cached hash, this warms the cache so MergedIndex::merge won't recompute.
+    Timer hash_timer;
+    std::size_t hash_count = 0;
+    for(auto& [file, tu_index]: tu_indices) {
+        for(auto& [fid, fi]: tu_index.file_indices) {
+            fi.hash();
+            hash_count++;
+        }
+        tu_index.main_file_index.hash();
+        hash_count++;
+    }
+    double precompute_hash_ms = hash_timer.ms();
+
+    // Merge with both optimizations:
+    // 1. SHA256 already cached from above (MergedIndex::merge uses cached values)
+    // 2. Selective ProjectIndex update (skip symbols only in cache-hit files)
+    double project_merge_full_ms = 0;
+    double project_merge_selective_ms = 0;
+    double merged_index_merge_ms = 0;
+    std::size_t cache_hits = 0;
+    std::size_t cache_misses = 0;
+
+    for(auto& [file, tu_index]: tu_indices) {
+        auto& graph = tu_index.graph;
+        auto main_tu_path_id = static_cast<std::uint32_t>(graph.paths.size() - 1);
+
+        // First: MergedIndex merges to find out which files are new.
+        Bitmap new_file_ids;
+        Timer mt;
+        for(auto& [fid, fi]: tu_index.file_indices) {
+            auto tu_path_id = graph.path_id(fid);
+            auto global_path_id = project_index.path_pool.path_id(graph.paths[tu_path_id]);
+            auto content = read_content(global_path_id, graph.paths[tu_path_id]);
+            auto include_id = graph.include_location_id(fid);
+            bool hit = merged[global_path_id].merge(global_path_id, include_id, fi, content);
+            if(hit) {
+                cache_hits++;
+            } else {
+                cache_misses++;
+                new_file_ids.add(tu_path_id);
+            }
+            merge_stats.total_files++;
+        }
+
+        // Main file is always new.
+        new_file_ids.add(main_tu_path_id);
+        auto global_main_id = project_index.path_pool.path_id(graph.paths[main_tu_path_id]);
+        auto content = read_content(global_main_id, graph.paths[main_tu_path_id]);
+        auto now = std::chrono::duration_cast<std::chrono::milliseconds>(
+            std::chrono::system_clock::now().time_since_epoch());
+
+        std::vector<index::IncludeLocation> remapped_locs;
+        for(auto& loc: graph.locations) {
+            auto remapped = loc;
+            remapped.path_id = project_index.path_pool.path_id(graph.paths[loc.path_id]);
+            remapped_locs.push_back(remapped);
+        }
+        merged[global_main_id].merge(global_main_id,
+                                     now,
+                                     std::move(remapped_locs),
+                                     tu_index.main_file_index,
+                                     content);
+        cache_misses++;
+        merged_index_merge_ms += mt.ms();
+
+        // Then: selective ProjectIndex merge (only symbols in new files).
+        Timer pt;
+        project_index.merge(tu_index, new_file_ids);
+        project_merge_selective_ms += pt.ms();
+    }
+
+    merge_stats.merge_ms = merge_timer.ms();
+    merge_stats.shard_count = merged.size();
+
+    // Serialize all shards to measure size.
+    merge_timer.reset();
+    for(auto& [path, shard]: merged) {
+        std::string buf;
+        llvm::raw_string_ostream os(buf);
+        shard.serialize(os);
+        merge_stats.total_serialized_bytes += buf.size();
+    }
+    merge_stats.serialize_ms = merge_timer.ms();
+
+    std::println("  Shards:           {}", merge_stats.shard_count);
+    std::println("  SHA256 precompute (offloadable): {:.0f}ms ({} hashes)",
+                 precompute_hash_ms,
+                 hash_count);
+    std::println("  MergedIndex merge (cached hash): {:.0f}ms (hit: {}, miss: {})",
+                 merged_index_merge_ms,
+                 cache_hits,
+                 cache_misses);
+    std::println("  ProjectIndex merge (selective):  {:.0f}ms", project_merge_selective_ms);
+    std::println("  Main-thread total:  {:.0f}ms ({:.1f}ms/TU)",
+                 merged_index_merge_ms + project_merge_selective_ms,
+                 (merged_index_merge_ms + project_merge_selective_ms) / tu_indices.size());
+    std::println("  Serialize time:     {:.0f}ms", merge_stats.serialize_ms);
+    std::print("  Total size:       ");
+    print_size(merge_stats.total_serialized_bytes);
+    std::println("");
+
+    // Shard size distribution.
+    {
+        std::vector<std::size_t> shard_sizes;
+        shard_sizes.reserve(merged.size());
+        for(auto& [path, shard]: merged) {
+            std::string buf;
+            llvm::raw_string_ostream os(buf);
+            shard.serialize(os);
+            shard_sizes.push_back(buf.size());
+        }
+        std::ranges::sort(shard_sizes);
+        if(!shard_sizes.empty()) {
+            auto p = [&](double pct) {
+                return shard_sizes[static_cast<std::size_t>(shard_sizes.size() * pct)];
+            };
+            std::println("\n  Shard size distribution:");
+            std::print("    p50=");
+            print_size(p(0.5));
+            std::print("  p90=");
+            print_size(p(0.9));
+            std::print("  p99=");
+            print_size(p(0.99));
+            std::print("  max=");
+            print_size(shard_sizes.back());
+            std::println("");
+        }
+    }
+
+    // Phase 3: Query benchmark.
+    std::println("\n=== Phase 3: Query Benchmark ===");
+
+    // Collect some symbol hashes to query.
+    llvm::SmallVector<index::SymbolHash> sample_hashes;
+    for(auto& [file, tu_index]: tu_indices) {
+        for(auto& [hash, sym]: tu_index.symbols) {
+            sample_hashes.push_back(hash);
+            if(sample_hashes.size() >= 1000)
+                break;
+        }
+        if(sample_hashes.size() >= 1000)
+            break;
+    }
+
+    // Occurrence lookup by offset (raw, no position conversion).
+    {
+        std::size_t hit_count = 0;
+        Timer t;
+        constexpr int rounds = 10;
+        for(int r = 0; r < rounds; r++) {
+            for(auto& [path, shard]: merged) {
+                for(std::uint32_t offset = 0; offset < 1000; offset += 50) {
+                    shard.lookup(offset, [&](const index::Occurrence&) {
+                        hit_count++;
+                        return true;
+                    });
+                }
+            }
+        }
+        auto elapsed = t.ms();
+        auto queries = merged.size() * 20 * rounds;
+        std::println("  Occurrence lookup (raw):  {} queries in {:.1f}ms ({:.0f} q/ms, {} hits)",
+                     queries,
+                     elapsed,
+                     queries / elapsed,
+                     hit_count);
+    }
+
+    // Relation lookup by symbol hash (raw, no position conversion).
+    {
+        std::size_t hit_count = 0;
+        Timer t;
+        constexpr int rounds = 10;
+        for(int r = 0; r < rounds; r++) {
+            for(auto& [path, shard]: merged) {
+                for(auto hash: sample_hashes) {
+                    shard.lookup(hash, RelationKind::Definition, [&](const index::Relation&) {
+                        hit_count++;
+                        return true;
+                    });
+                }
+            }
+        }
+        auto elapsed = t.ms();
+        auto queries = merged.size() * sample_hashes.size() * rounds;
+        std::println("  Relation lookup (raw):    {} queries in {:.1f}ms ({:.0f} q/ms, {} hits)",
+                     queries,
+                     elapsed,
+                     queries / elapsed,
+                     hit_count);
+    }
+
+    // PositionMapper construction benchmark.
+    {
+        using kota::ipc::lsp::PositionMapper;
+        using kota::ipc::lsp::PositionEncoding;
+
+        std::size_t mapper_count = 0;
+        std::vector<double> construct_times;
+
+        for(auto& [path, shard]: merged) {
+            auto content = shard.content();
+            if(content.empty())
+                continue;
+            Timer t;
+            PositionMapper mapper(content, PositionEncoding::UTF16);
+            construct_times.push_back(t.ms());
+            mapper_count++;
+        }
+
+        std::ranges::sort(construct_times);
+        if(!construct_times.empty()) {
+            auto sum = std::accumulate(construct_times.begin(), construct_times.end(), 0.0);
+            auto p = [&](double pct) {
+                return construct_times[static_cast<std::size_t>(construct_times.size() * pct)];
+            };
+            std::println("\n  PositionMapper construction ({} shards):", mapper_count);
+            std::println("    Total: {:.1f}ms", sum);
+            std::println("    p50={:.3f}ms  p90={:.3f}ms  p99={:.3f}ms  max={:.3f}ms",
+                         p(0.5),
+                         p(0.9),
+                         p(0.99),
+                         construct_times.back());
+        }
+    }
+
+    // Relation lookup with full position conversion (simulates real LSP query).
+    {
+        using kota::ipc::lsp::PositionMapper;
+        using kota::ipc::lsp::PositionEncoding;
+
+        // Pre-build mappers for all shards.
+        llvm::DenseMap<std::uint32_t, PositionMapper> mappers;
+        for(auto& [path_id, shard]: merged) {
+            auto content = shard.content();
+            if(!content.empty())
+                mappers.try_emplace(path_id, content, PositionEncoding::UTF16);
+        }
+
+        std::size_t hit_count = 0;
+        std::size_t convert_count = 0;
+        Timer t;
+        constexpr int rounds = 10;
+        for(int r = 0; r < rounds; r++) {
+            for(auto& [path_id, shard]: merged) {
+                auto mapper_it = mappers.find(path_id);
+                if(mapper_it == mappers.end())
+                    continue;
+                auto& mapper = mapper_it->second;
+                for(auto hash: sample_hashes) {
+                    shard.lookup(hash, RelationKind::Definition, [&](const index::Relation& rel) {
+                        auto start = mapper.to_position(rel.range.begin);
+                        auto end = mapper.to_position(rel.range.end);
+                        if(start && end)
+                            convert_count++;
+                        hit_count++;
+                        return true;
+                    });
+                }
+            }
+        }
+        auto elapsed = t.ms();
+        auto queries = merged.size() * sample_hashes.size() * rounds;
+        std::println("\n  Relation lookup + to_position:");
+        std::println("    {} queries in {:.1f}ms ({:.0f} q/ms, {} hits, {} converted)",
+                     queries,
+                     elapsed,
+                     queries / elapsed,
+                     hit_count,
+                     convert_count);
+    }
+
+    // Phase 4: Realistic "find references" for specific high-frequency symbols.
+    std::println("=== Phase 4: Realistic Find-References ===");
+    {
+        using kota::ipc::lsp::PositionMapper;
+        using kota::ipc::lsp::PositionEncoding;
+
+        // Use the properly merged ProjectIndex symbol table.
+        auto& symbol_table = project_index.symbols;
+
+        // Find symbols by name to query. Collect ALL hashes whose name contains the target.
+        llvm::StringRef targets[] = {"vector",
+                                     "StringRef",
+                                     "SourceLocation",
+                                     "DenseMap",
+                                     "SmallVector"};
+        for(auto target_name: targets) {
+            llvm::SmallVector<index::SymbolHash> matching_hashes;
+            std::size_t ref_file_count = 0;
+            for(auto& [hash, sym]: symbol_table) {
+                if(sym.name == target_name) {
+                    matching_hashes.push_back(hash);
+                    ref_file_count += sym.reference_files.cardinality();
+                }
+            }
+
+            if(matching_hashes.empty()) {
+                std::println("  {}: not found in symbol table", target_name);
+                continue;
+            }
+
+            // Query all shards for this symbol's references (simulating real find-references).
+            std::size_t total_refs = 0;
+            std::size_t shards_queried = 0;
+            RelationKind kinds[] = {
+                RelationKind::Reference,
+                RelationKind::Definition,
+                RelationKind::Declaration,
+            };
+            Timer t;
+            for(auto& [path, shard]: merged) {
+                bool found = false;
+                for(auto target_hash: matching_hashes) {
+                    for(auto kind: kinds) {
+                        shard.lookup(target_hash, kind, [&](const index::Relation&) {
+                            total_refs++;
+                            found = true;
+                            return true;
+                        });
+                    }
+                }
+                if(found)
+                    shards_queried++;
+            }
+            auto elapsed = t.ms();
+
+            std::println(
+                "  {} ({} hashes, {} ref_files): {} refs across {} shards, " "scanned {} shards in {:.2f}ms",
+                target_name,
+                matching_hashes.size(),
+                ref_file_count,
+                total_refs,
+                shards_queried,
+                merged.size(),
+                elapsed);
+
+            // Now with reference_files bitmap to skip irrelevant shards.
+            // We don't have path_id mapping here, so just measure the full scan above
+            // vs a targeted scan using reference_files count as an estimate.
+        }
+    }
+
+    std::println("");
+    return 0;
+}
diff --git a/src/index/merged_index.cpp b/src/index/merged_index.cpp
index b8c2dbbf2..3ff555874 100644
--- a/src/index/merged_index.cpp
+++ b/src/index/merged_index.cpp
@@ -129,18 +129,19 @@ struct MergedIndex::Impl {
     std::vector<std::uint32_t> canonical_ref_counts;
 
     /// The canonical id set of removed index.
-    roaring::Roaring removed;
+    ContextBitmap removed;
 
     /// All merged symbol occurrences.
-    llvm::DenseMap<Occurrence, roaring::Roaring> occurrences;
+    llvm::DenseMap<Occurrence, ContextBitmap> occurrences;
 
     /// All merged symbol relations.
-    llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, roaring::Roaring>> relations;
+    llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, ContextBitmap>> relations;
 
     /// Sorted occurrences cache for fast lookup.
     std::vector<Occurrence> occurrences_cache;
 
-    void merge(this Impl& self, std::uint32_t path_id, FileIndex& index, auto&& add_context) {
+    /// Returns true if this was a cache hit (no new data inserted).
+    bool merge(this Impl& self, std::uint32_t path_id, FileIndex& index, auto&& add_context) {
         auto hash = index.hash();
         auto hash_key = llvm::StringRef(reinterpret_cast<char*>(hash.data()), hash.size());
         auto [it, success] = self.canonical_cache.try_emplace(hash_key, self.max_canonical_id);
@@ -151,7 +152,7 @@ struct MergedIndex::Impl {
         if(!success) {
             self.canonical_ref_counts[canonical_id] += 1;
             self.removed.remove(canonical_id);
-            return;
+            return true;
         }
 
         for(auto& occurrence: index.occurrences) {
@@ -167,6 +168,7 @@ struct MergedIndex::Impl {
 
         self.canonical_ref_counts.emplace_back(1);
         self.max_canonical_id += 1;
+        return false;
     }
 
     friend bool operator==(const Impl&, const Impl&) = default;
@@ -237,19 +239,20 @@ void MergedIndex::load_in_memory(this Self& self) {
 
     // Deserialize removed bitmap.
     if(root->removed() && root->removed()->size() > 0) {
-        index.removed = read_bitmap(root->removed());
+        index.removed = ContextBitmap::from_roaring(read_bitmap(root->removed()));
     }
 
     for(auto entry: *root->occurrences()) {
         index.occurrences.try_emplace(*safe_cast<Occurrence>(entry->occurrence()),
-                                      read_bitmap(entry->context()));
+                                      ContextBitmap::from_roaring(read_bitmap(entry->context())));
     }
 
     for(auto entry: *root->relations()) {
         auto& relations = index.relations[entry->symbol()];
         for(auto relation_entry: *entry->relations()) {
-            relations.try_emplace(*safe_cast<Relation>(relation_entry->relation()),
-                                  read_bitmap(relation_entry->context()));
+            relations.try_emplace(
+                *safe_cast<Relation>(relation_entry->relation()),
+                ContextBitmap::from_roaring(read_bitmap(relation_entry->context())));
         }
     }
 
@@ -310,17 +313,22 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
             CreateStructVector<binary::IncludeLocation>(builder, context.include_locations));
     });
 
+    auto serialize_ctx_bitmap = [&](const ContextBitmap& ctx) {
+        auto r = ctx.to_roaring();
+        buffer.clear();
+        buffer.resize_for_overwrite(r.getSizeInBytes(false));
+        r.write(buffer.data(), false);
+        return CreateVector(builder, buffer);
+    };
+
     llvm::SmallVector<const Occurrence*> occurrence_keys;
     occurrence_keys.reserve(index->occurrences.size());
     auto occurrences = transform(index->occurrences, [&](auto&& value) {
         auto&& [occurrence, bitmap] = value;
-        buffer.clear();
-        buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
-        bitmap.write(buffer.data(), false);
         occurrence_keys.emplace_back(&occurrence);
         return binary::CreateOccurrenceEntry(builder,
                                              safe_cast<binary::Occurrence>(&occurrence),
-                                             CreateVector(builder, buffer));
+                                             serialize_ctx_bitmap(bitmap));
     });
     std::ranges::sort(std::views::zip(occurrence_keys, occurrences), [](auto lhs, auto rhs) {
         const auto& lo = *std::get<0>(lhs);
@@ -335,12 +343,9 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
         auto&& [symbol_id, symbol_relations] = value;
         auto relations = transform(symbol_relations, [&](auto&& value) {
             auto&& [relation, bitmap] = value;
-            buffer.clear();
-            buffer.resize_for_overwrite(bitmap.getSizeInBytes(false));
-            bitmap.write(buffer.data(), false);
             return binary::CreateRelationEntry(builder,
                                                safe_cast<binary::Relation>(&relation),
-                                               CreateVector(builder, buffer));
+                                               serialize_ctx_bitmap(bitmap));
         });
         relation_keys.emplace_back(symbol_id);
         return binary::CreateSymbolRelationsEntry(builder,
@@ -353,9 +358,10 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) {
 
     // Serialize removed bitmap.
     buffer.clear();
-    if(!index->removed.isEmpty()) {
-        buffer.resize_for_overwrite(index->removed.getSizeInBytes(false));
-        index->removed.write(buffer.data(), false);
+    if(!index->removed.is_empty()) {
+        auto r = index->removed.to_roaring();
+        buffer.resize_for_overwrite(r.getSizeInBytes(false));
+        r.write(buffer.data(), false);
     }
     auto removed = CreateVector(builder, buffer);
 
@@ -398,14 +404,12 @@ void MergedIndex::lookup(this const Self& self,
         while(it != occurrences.end()) {
             if(it->range.contains(offset)) {
                 // Skip occurrences whose canonical_ids are all removed.
-                if(!index.removed.isEmpty()) {
+                if(!index.removed.is_empty()) {
                     auto bitmap_it = index.occurrences.find(*it);
-                    if(bitmap_it != index.occurrences.end()) {
-                        auto remaining = bitmap_it->second - index.removed;
-                        if(remaining.isEmpty()) {
-                            it++;
-                            continue;
-                        }
+                    if(bitmap_it != index.occurrences.end() &&
+                       !bitmap_it->second.any_not_in(index.removed)) {
+                        ++it;
+                        continue;
                     }
                 }
 
@@ -413,7 +417,7 @@ void MergedIndex::lookup(this const Self& self,
                     break;
                 }
 
-                it++;
+                ++it;
                 continue;
             }
 
@@ -434,7 +438,7 @@ void MergedIndex::lookup(this const Self& self,
                     break;
                 }
 
-                it++;
+                ++it;
                 continue;
             }
 
@@ -457,11 +461,8 @@ void MergedIndex::lookup(this const Self& self,
         for(auto& [relation, bitmap]: relations) {
             if(relation.kind & kind) {
                 // Skip relations whose canonical_ids are all removed.
-                if(!self.impl->removed.isEmpty()) {
-                    auto remaining = bitmap - self.impl->removed;
-                    if(remaining.isEmpty()) {
-                        continue;
-                    }
+                if(!self.impl->removed.is_empty() && !bitmap.any_not_in(self.impl->removed)) {
+                    continue;
                 }
 
                 if(!callback(relation)) {
@@ -579,7 +580,7 @@ void MergedIndex::remove(this Self& self, std::uint32_t path_id) {
     index.occurrences_cache.clear();
 }
 
-void MergedIndex::merge(this Self& self,
+bool MergedIndex::merge(this Self& self,
                         std::uint32_t path_id,
                         std::chrono::milliseconds build_at,
                         std::vector<IncludeLocation> include_locations,
@@ -587,16 +588,17 @@ void MergedIndex::merge(this Self& self,
                         llvm::StringRef content) {
     self.load_in_memory();
     self.impl->content = content.str();
-    self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
+    bool hit = self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
         auto& context = self.compilation_contexts[path_id];
         context.canonical_id = canonical_id;
         context.build_at = build_at.count();
         context.include_locations = std::move(include_locations);
     });
     self.impl->occurrences_cache.clear();
+    return hit;
 }
 
-void MergedIndex::merge(this Self& self,
+bool MergedIndex::merge(this Self& self,
                         std::uint32_t path_id,
                         std::uint32_t include_id,
                         FileIndex& index,
@@ -605,11 +607,12 @@ void MergedIndex::merge(this Self& self,
     if(self.impl->content.empty() && !content.empty()) {
         self.impl->content = content.str();
     }
-    self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
+    bool hit = self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) {
         auto& context = self.header_contexts[path_id];
         context.includes.emplace_back(include_id, canonical_id);
     });
     self.impl->occurrences_cache.clear();
+    return hit;
 }
 
 llvm::StringRef MergedIndex::content(this const Self& self) {
diff --git a/src/index/merged_index.h b/src/index/merged_index.h
index 2e4bc375e..b356356b6 100644
--- a/src/index/merged_index.h
+++ b/src/index/merged_index.h
@@ -68,7 +68,8 @@ class MergedIndex {
     llvm::StringRef content(this const Self& self);
 
     /// Merge the index with given compilation context.
-    void merge(this Self& self,
+    /// Returns true if this was a cache hit (no new data inserted).
+    bool merge(this Self& self,
                std::uint32_t path_id,
                std::chrono::milliseconds build_at,
                std::vector<IncludeLocation> include_locations,
@@ -76,7 +77,8 @@ class MergedIndex {
                llvm::StringRef content);
 
     /// Merge the index with given header context.
-    void merge(this Self& self,
+    /// Returns true if this was a cache hit (no new data inserted).
+    bool merge(this Self& self,
                std::uint32_t path_id,
                std::uint32_t include_id,
                FileIndex& index,
diff --git a/src/index/project_index.cpp b/src/index/project_index.cpp
index 087413838..2f3fafabe 100644
--- a/src/index/project_index.cpp
+++ b/src/index/project_index.cpp
@@ -27,6 +27,42 @@ llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self, TU
     return file_ids_map;
 }
 
+llvm::SmallVector<std::uint32_t> ProjectIndex::merge(this ProjectIndex& self,
+                                                     TUIndex& index,
+                                                     const Bitmap& new_file_ids) {
+    auto& paths = index.graph.paths;
+    llvm::SmallVector<std::uint32_t> file_ids_map;
+    file_ids_map.resize_for_overwrite(paths.size());
+
+    for(std::uint32_t i = 0; i < paths.size(); i++) {
+        file_ids_map[i] = self.path_pool.path_id(paths[i]);
+    }
+
+    for(auto& [symbol_id, symbol]: index.symbols) {
+        // Skip symbols that don't reference any new file.
+        bool references_new = false;
+        for(auto ref: symbol.reference_files) {
+            if(new_file_ids.contains(ref)) {
+                references_new = true;
+                break;
+            }
+        }
+        if(!references_new)
+            continue;
+
+        auto& target_symbol = self.symbols[symbol_id];
+        if(target_symbol.name.empty()) {
+            target_symbol.name = symbol.name;
+            target_symbol.kind = symbol.kind;
+        }
+        for(auto ref: symbol.reference_files) {
+            target_symbol.reference_files.add(file_ids_map[ref]);
+        }
+    }
+
+    return file_ids_map;
+}
+
 void ProjectIndex::serialize(this ProjectIndex& self, llvm::raw_ostream& os) {
     fbs::FlatBufferBuilder builder(1024);
 
diff --git a/src/index/project_index.h b/src/index/project_index.h
index 12bbebe01..259bd404c 100644
--- a/src/index/project_index.h
+++ b/src/index/project_index.h
@@ -82,6 +82,12 @@ struct ProjectIndex {
 
     llvm::SmallVector<std::uint32_t> merge(this ProjectIndex& self, TUIndex& index);
 
+    /// Selective merge: only update symbols referenced by files in new_file_ids.
+    /// new_file_ids contains TU-local path indices for cache-miss FileIndices.
+    llvm::SmallVector<std::uint32_t> merge(this ProjectIndex& self,
+                                           TUIndex& index,
+                                           const Bitmap& new_file_ids);
+
     void serialize(this ProjectIndex& self, llvm::raw_ostream& os);
 
     static ProjectIndex from(const void* data);
diff --git a/src/index/tu_index.cpp b/src/index/tu_index.cpp
index 4ba6b1d3c..0049f133c 100644
--- a/src/index/tu_index.cpp
+++ b/src/index/tu_index.cpp
@@ -1,5 +1,6 @@
 #include "index/tu_index.h"
 
+#include <algorithm>
 #include <tuple>
 
 #include "index/serialization.h"
@@ -160,6 +161,9 @@ class Builder : public SemanticVisitor<Builder> {
 }  // namespace
 
 std::array<std::uint8_t, 32> FileIndex::hash() {
+    if(cached_hash)
+        return *cached_hash;
+
     llvm::SHA256 hasher;
 
     using u8 = std::uint8_t;
@@ -172,20 +176,35 @@ std::array<std::uint8_t, 32> FileIndex::hash() {
         hasher.update(llvm::ArrayRef(data, size));
     }
 
-    for(auto& [symbol_id, relations]: relations) {
+    // Sort keys to ensure deterministic iteration order over DenseMap.
+    llvm::SmallVector<SymbolHash> sorted_keys;
+    sorted_keys.reserve(relations.size());
+    for(auto& [symbol_id, _]: relations) {
+        sorted_keys.push_back(symbol_id);
+    }
+    std::ranges::sort(sorted_keys);
+
+    for(auto symbol_id: sorted_keys) {
         hasher.update(std::bit_cast<std::array<u8, sizeof(symbol_id)>>(symbol_id));
         static_assert(sizeof(Relation) ==
                       sizeof(RelationKind) + 4 + sizeof(Range) + sizeof(SymbolHash));
         static_assert(sizeof(Relation) % 8 == 0);
 
-        if(!relations.empty()) {
-            auto data = reinterpret_cast<u8*>(relations.data());
-            auto size = relations.size() * sizeof(Relation);
+        auto& rels = relations[symbol_id];
+        std::ranges::sort(rels, [](const Relation& lhs, const Relation& rhs) {
+            return std::tuple(lhs.kind.value(), lhs.range.begin, lhs.range.end, lhs.target_symbol) <
+                   std::tuple(rhs.kind.value(), rhs.range.begin, rhs.range.end, rhs.target_symbol);
+        });
+
+        if(!rels.empty()) {
+            auto data = reinterpret_cast<u8*>(rels.data());
+            auto size = rels.size() * sizeof(Relation);
             hasher.update(llvm::ArrayRef(data, size));
         }
     }
 
-    return hasher.final();
+    cached_hash = hasher.final();
+    return *cached_hash;
 }
 
 TUIndex TUIndex::build(CompilationUnitRef unit, bool interested_only) {
diff --git a/src/index/tu_index.h b/src/index/tu_index.h
index 65e905009..0666e4b9a 100644
--- a/src/index/tu_index.h
+++ b/src/index/tu_index.h
@@ -4,6 +4,7 @@
 #include <bit>
 #include <chrono>
 #include <cstdint>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -52,7 +53,12 @@ struct FileIndex {
 
     std::vector<Occurrence> occurrences;
 
+    /// Compute (and cache) SHA256 hash of this index's content.
+    /// Each FileIndex instance must only be hashed by one thread at a time.
     std::array<std::uint8_t, 32> hash();
+
+private:
+    std::optional<std::array<std::uint8_t, 32>> cached_hash;
 };
 
 struct Symbol {
diff --git a/src/support/bitmap.h b/src/support/bitmap.h
index c2209ad23..7e0a33ba2 100644
--- a/src/support/bitmap.h
+++ b/src/support/bitmap.h
@@ -1,6 +1,9 @@
 #pragma once
 
+#include <bit>
+#include <cstdint>
 #include <cstdlib>
+#include <utility>
 
 #define ROARING_EXCEPTIONS 0
 #define ROARING_TERMINATE(message) std::abort()
@@ -10,4 +13,163 @@ namespace clice {
 
 using Bitmap = roaring::Roaring;
 
+/// Compact bitmap optimized for small canonical-id sets.
+///
+/// Uses a tagged pointer scheme in a single uint64_t:
+///   - Low bit 1: inline mode, bits 1-63 store the bitmap (ids 0-62)
+///   - Low bit 0: heap mode, the value is a roaring::Roaring* (aligned, so low bit is 0)
+///
+/// Default state is inline-empty (data == 1).  Upgrades to heap when an id >= 63
+/// is added.  The common case (< 63 canonical ids per MergedIndex) stays entirely
+/// inline with zero heap allocation and single-instruction bitwise operations.
+class ContextBitmap {
+    constexpr static std::uint32_t inline_capacity = 63;
+    constexpr static std::uint64_t inline_tag = 1;
+
+    std::uint64_t data = inline_tag;
+
+    bool is_inline() const {
+        return data & inline_tag;
+    }
+
+    roaring::Roaring* as_heap() const {
+        return reinterpret_cast<roaring::Roaring*>(data);
+    }
+
+    std::uint64_t bits() const {
+        return data >> 1;
+    }
+
+    void free_heap() {
+        if(!is_inline())
+            delete as_heap();
+    }
+
+    void upgrade(std::uint32_t new_id) {
+        auto* r = new roaring::Roaring();
+        auto b = bits();
+        while(b) {
+            r->add(static_cast<std::uint32_t>(std::countr_zero(b)));
+            b &= b - 1;
+        }
+        r->add(new_id);
+        data = reinterpret_cast<std::uint64_t>(r);
+    }
+
+public:
+    ContextBitmap() = default;
+
+    ~ContextBitmap() {
+        free_heap();
+    }
+
+    ContextBitmap(ContextBitmap&& other) noexcept : data(other.data) {
+        other.data = inline_tag;
+    }
+
+    ContextBitmap& operator=(ContextBitmap&& other) noexcept {
+        if(this != &other) {
+            free_heap();
+            data = other.data;
+            other.data = inline_tag;
+        }
+        return *this;
+    }
+
+    ContextBitmap(const ContextBitmap&) = delete;
+    ContextBitmap& operator=(const ContextBitmap&) = delete;
+
+    void add(std::uint32_t id) {
+        if(is_inline()) {
+            if(id < inline_capacity) {
+                data |= (1ULL << (id + 1));
+            } else {
+                upgrade(id);
+            }
+        } else {
+            as_heap()->add(id);
+        }
+    }
+
+    void remove(std::uint32_t id) {
+        if(is_inline()) {
+            if(id < inline_capacity)
+                data &= ~(1ULL << (id + 1));
+        } else {
+            as_heap()->remove(id);
+        }
+    }
+
+    bool is_empty() const {
+        if(is_inline())
+            return bits() == 0;
+        return as_heap()->isEmpty();
+    }
+
+    /// Check if (this - other) has any bits set, without allocating in the common inline case.
+    bool any_not_in(const ContextBitmap& other) const {
+        if(is_inline() && other.is_inline())
+            return (bits() & ~other.bits()) != 0;
+
+        // Rare: at least one side is heap.  Use roaring operations.
+        if(is_inline()) {
+            auto b = bits();
+            while(b) {
+                auto i = static_cast<std::uint32_t>(std::countr_zero(b));
+                if(!other.as_heap()->contains(i))
+                    return true;
+                b &= b - 1;
+            }
+            return false;
+        }
+
+        if(other.is_inline()) {
+            for(auto v: *as_heap()) {
+                if(v >= inline_capacity || !(other.bits() & (1ULL << v)))
+                    return true;
+            }
+            return false;
+        }
+
+        return !(*as_heap() - *other.as_heap()).isEmpty();
+    }
+
+    friend bool operator==(const ContextBitmap& lhs, const ContextBitmap& rhs) {
+        if(lhs.is_inline() && rhs.is_inline())
+            return lhs.data == rhs.data;
+        if(lhs.is_inline() != rhs.is_inline())
+            return false;
+        return *lhs.as_heap() == *rhs.as_heap();
+    }
+
+    /// Convert to roaring::Roaring (for serialization).
+    roaring::Roaring to_roaring() const {
+        roaring::Roaring r;
+        if(is_inline()) {
+            auto b = bits();
+            while(b) {
+                r.add(static_cast<std::uint32_t>(std::countr_zero(b)));
+                b &= b - 1;
+            }
+        } else {
+            r = *as_heap();
+        }
+        return r;
+    }
+
+    /// Construct from roaring::Roaring (for deserialization).
+    static ContextBitmap from_roaring(const roaring::Roaring& r) {
+        ContextBitmap result;
+        if(r.isEmpty())
+            return result;
+        if(r.maximum() < inline_capacity) {
+            for(auto v: r)
+                result.data |= (1ULL << (v + 1));
+        } else {
+            result.data = reinterpret_cast<std::uint64_t>(new roaring::Roaring(r));
+        }
+        return result;
+    }
+};
+
 }  // namespace clice
diff --git a/tests/unit/index/merged_index_tests.cpp b/tests/unit/index/merged_index_tests.cpp
index 88bc1a5bb..2cf61fdaf 100644
--- a/tests/unit/index/merged_index_tests.cpp
+++ b/tests/unit/index/merged_index_tests.cpp
@@ -357,6 +357,45 @@ TEST_CASE(CacheInvalidatedAfterMerge) {
     ASSERT_TRUE(found_second);
 }
 
+TEST_CASE(MergeCacheHit) {
+    add_file("header.h", R"(
+            #pragma once
+            inline int shared() { return 1; }
+        )");
+    add_main("a.cpp", R"(
+            #include "header.h"
+            int use_a() { return shared(); }
+        )");
+    ASSERT_TRUE(compile());
+    auto tu_a = index::TUIndex::build(*unit);
+
+    add_file("header.h", R"(
+            #pragma once
+            inline int shared() { return 1; }
+        )");
+    add_main("b.cpp", R"(
+            #include "header.h"
+            int use_b() { return shared(); }
+        )");
+    ASSERT_TRUE(compile());
+    auto tu_b = index::TUIndex::build(*unit);
+
+    // First merge of a FileIndex should return false (new data).
+    index::MergedIndex merged;
+    bool first_hit = false;
+    for(auto& [fid, file_index]: tu_a.file_indices) {
+        first_hit = merged.merge(0, tu_a.graph.include_location_id(fid), file_index, {});
+    }
+    ASSERT_FALSE(first_hit);
+
+    // Second merge with identical content should return true (cache hit).
+    bool second_hit = false;
+    for(auto& [fid, file_index]: tu_b.file_indices) {
+        second_hit = merged.merge(1, tu_b.graph.include_location_id(fid), file_index, {});
+    }
+    ASSERT_TRUE(second_hit);
+}
+
 };  // TEST_SUITE(MergedIndex)
 }  // namespace
 }  // namespace clice::testing
diff --git a/tests/unit/index/project_index_tests.cpp b/tests/unit/index/project_index_tests.cpp
index eb9f36663..fd1b85cce 100644
--- a/tests/unit/index/project_index_tests.cpp
+++ b/tests/unit/index/project_index_tests.cpp
@@ -200,6 +200,58 @@ TEST_CASE(NameSurvivesRoundTrip) {
     }
 }
 
+TEST_CASE(SelectiveMergeFilters) {
+    add_file("header.h", R"(
+            #pragma once
+            inline int shared() { return 1; }
+        )");
+    add_main("main.cpp", R"(
+            #include "header.h"
+            int use() { return shared(); }
+        )");
+    ASSERT_TRUE(compile());
+    auto tu = index::TUIndex::build(*unit);
+
+    // Build a bitmap containing only the main file's path index.
+    auto main_path_id = static_cast<std::uint32_t>(tu.graph.paths.size() - 1);
+    Bitmap new_file_ids;
+    new_file_ids.add(main_path_id);
+
+    index::ProjectIndex project;
+    project.merge(tu, new_file_ids);
+
+    // Symbols that reference the main file should be present.
+    bool has_main_symbols = false;
+    for(auto& [hash, symbol]: tu.symbols) {
+        for(auto ref: symbol.reference_files) {
+            if(ref == main_path_id && project.symbols.contains(hash)) {
+                has_main_symbols = true;
+                // ALL reference_files should be added, not just the new ones.
+                auto& proj_sym = project.symbols[hash];
+                ASSERT_TRUE(proj_sym.reference_files.cardinality() >=
+                            symbol.reference_files.cardinality());
+                break;
+            }
+        }
+    }
+    ASSERT_TRUE(has_main_symbols);
+}
+
+TEST_CASE(SelectiveMergeEmpty) {
+    index::TUIndex tu;
+    ASSERT_TRUE(build_and_index(R"(
+            int foo() { return 42; }
+        )",
+                                tu));
+
+    Bitmap empty_bitmap;
+    index::ProjectIndex project;
+    project.merge(tu, empty_bitmap);
+
+    // No symbols should be added when the bitmap is empty.
+    ASSERT_TRUE(project.symbols.empty());
+}
+
 };  // TEST_SUITE(ProjectIndex)
 }  // namespace
 }  // namespace clice::testing
diff --git a/tests/unit/index/tu_index_tests.cpp b/tests/unit/index/tu_index_tests.cpp
index 8deadbc2f..d425892e2 100644
--- a/tests/unit/index/tu_index_tests.cpp
+++ b/tests/unit/index/tu_index_tests.cpp
@@ -500,6 +500,18 @@ TEST_CASE(SymbolKinds) {
     check_kind("ns", SymbolKind::Namespace);
 }
 
+TEST_CASE(HashCaching) {
+    build_index(R"(
+            int foo() { return 42; }
+            int bar() { return foo() + 1; }
+        )");
+
+    auto& index = tu_index.main_file_index;
+    auto hash1 = index.hash();
+    auto hash2 = index.hash();
+    ASSERT_EQ(hash1, hash2);
+}
+
 };  // TEST_SUITE(TUIndex)
 }  // namespace
 }  // namespace clice::testing
diff --git a/tests/unit/support/context_bitmap_tests.cpp b/tests/unit/support/context_bitmap_tests.cpp
new file mode 100644
index 000000000..1d3885629
--- /dev/null
+++ b/tests/unit/support/context_bitmap_tests.cpp
@@ -0,0 +1,254 @@
+#include "test/test.h"
+#include "support/bitmap.h"
+
+namespace clice::testing {
+namespace {
+
+TEST_SUITE(ContextBitmap) {
+
+TEST_CASE(EmptyByDefault) {
+    ContextBitmap bm;
+    EXPECT_TRUE(bm.is_empty());
+}
+
+TEST_CASE(AddInline) {
+    ContextBitmap bm;
+    bm.add(0);
+    EXPECT_FALSE(bm.is_empty());
+    bm.add(62);
+    EXPECT_FALSE(bm.is_empty());
+
+    auto r = bm.to_roaring();
+    EXPECT_TRUE(r.contains(0));
+    EXPECT_TRUE(r.contains(62));
+    EXPECT_FALSE(r.contains(1));
+    EXPECT_EQ(r.cardinality(), 2U);
+}
+
+TEST_CASE(RemoveInline) {
+    ContextBitmap bm;
+    bm.add(5);
+    bm.add(10);
+    bm.remove(5);
+
+    auto r = bm.to_roaring();
+    EXPECT_FALSE(r.contains(5));
+    EXPECT_TRUE(r.contains(10));
+    EXPECT_EQ(r.cardinality(), 1U);
+
+    bm.remove(10);
+    EXPECT_TRUE(bm.is_empty());
+}
+
+TEST_CASE(UpgradeToHeap) {
+    ContextBitmap bm;
+    bm.add(5);
+    bm.add(63);
+
+    EXPECT_FALSE(bm.is_empty());
+    auto r = bm.to_roaring();
+    EXPECT_TRUE(r.contains(5));
+    EXPECT_TRUE(r.contains(63));
+    EXPECT_EQ(r.cardinality(), 2U);
+}
+
+TEST_CASE(HeapAddRemove) {
+    ContextBitmap bm;
+    bm.add(100);
+    bm.add(200);
+    bm.remove(100);
+
+    auto r = bm.to_roaring();
+    EXPECT_FALSE(r.contains(100));
+    EXPECT_TRUE(r.contains(200));
+}
+
+TEST_CASE(AnyNotInBothInline) {
+    ContextBitmap a, b;
+    a.add(1);
+    a.add(2);
+    b.add(1);
+    b.add(2);
+    EXPECT_FALSE(a.any_not_in(b));
+
+    a.add(3);
+    EXPECT_TRUE(a.any_not_in(b));
+}
+
+TEST_CASE(AnyNotInInlineVsHeap) {
+    ContextBitmap a, b;
+    a.add(5);
+    b.add(5);
+    b.add(100);
+    EXPECT_FALSE(a.any_not_in(b));
+
+    a.add(10);
+    EXPECT_TRUE(a.any_not_in(b));
+}
+
+TEST_CASE(AnyNotInHeapVsInline) {
+    ContextBitmap a, b;
+    a.add(100);
+    b.add(5);
+    // 100 >= 63 not in inline b
+    EXPECT_TRUE(a.any_not_in(b));
+
+    ContextBitmap c, d;
+    c.add(5);
+    c.add(100);
+    d.add(5);
+    // c has 100 which d doesn't have
+    EXPECT_TRUE(c.any_not_in(d));
+}
+
+TEST_CASE(AnyNotInBothHeap) {
+    ContextBitmap a, b;
+    a.add(100);
+    a.add(200);
+    b.add(100);
+    b.add(200);
+    EXPECT_FALSE(a.any_not_in(b));
+
+    a.add(300);
+    EXPECT_TRUE(a.any_not_in(b));
+}
+
+TEST_CASE(MoveConstruct) {
+    ContextBitmap a;
+    a.add(5);
+    a.add(10);
+    ContextBitmap b(std::move(a));
+
+    EXPECT_TRUE(a.is_empty());
+    EXPECT_FALSE(b.is_empty());
+    auto r = b.to_roaring();
+    EXPECT_TRUE(r.contains(5));
+    EXPECT_TRUE(r.contains(10));
+}
+
+TEST_CASE(MoveConstructHeap) {
+    ContextBitmap a;
+    a.add(100);
+    ContextBitmap b(std::move(a));
+
+    EXPECT_TRUE(a.is_empty());
+    auto r = b.to_roaring();
+    EXPECT_TRUE(r.contains(100));
+}
+
+TEST_CASE(MoveAssign) {
+    ContextBitmap a, b;
+    a.add(3);
+    b.add(7);
+    b = std::move(a);
+
+    EXPECT_TRUE(a.is_empty());
+    auto r = b.to_roaring();
+    EXPECT_TRUE(r.contains(3));
+    EXPECT_FALSE(r.contains(7));
+}
+
+TEST_CASE(FromRoaringInline) {
+    roaring::Roaring r;
+    r.add(0);
+    r.add(30);
+    r.add(62);
+    auto bm = ContextBitmap::from_roaring(r);
+
+    EXPECT_FALSE(bm.is_empty());
+    auto out = bm.to_roaring();
+    EXPECT_EQ(out.cardinality(), 3U);
+    EXPECT_TRUE(out.contains(0));
+    EXPECT_TRUE(out.contains(30));
+    EXPECT_TRUE(out.contains(62));
+}
+
+TEST_CASE(FromRoaringHeap) {
+    roaring::Roaring r;
+    r.add(5);
+    r.add(100);
+    auto bm = ContextBitmap::from_roaring(r);
+
+    auto out = bm.to_roaring();
+    EXPECT_EQ(out.cardinality(), 2U);
+    EXPECT_TRUE(out.contains(5));
+    EXPECT_TRUE(out.contains(100));
+}
+
+TEST_CASE(FromRoaringEmpty) {
+    roaring::Roaring r;
+    auto bm = ContextBitmap::from_roaring(r);
+    EXPECT_TRUE(bm.is_empty());
+}
+
+TEST_CASE(Equality) {
+    ContextBitmap a, b;
+    a.add(1);
+    a.add(2);
+    b.add(1);
+    b.add(2);
+    EXPECT_TRUE(a == b);
+
+    b.add(3);
+    EXPECT_FALSE(a == b);
+}
+
+TEST_CASE(BoundaryId62) {
+    ContextBitmap bm;
+    bm.add(62);
+    auto r = bm.to_roaring();
+    EXPECT_TRUE(r.contains(62));
+    EXPECT_EQ(r.cardinality(), 1U);
+}
+
+TEST_CASE(BoundaryId63) {
+    ContextBitmap bm;
+    bm.add(63);
+    auto r = bm.to_roaring();
+    EXPECT_TRUE(r.contains(63));
+    EXPECT_EQ(r.cardinality(), 1U);
+}
+
+TEST_CASE(MoveAssignHeap) {
+    ContextBitmap a, b;
+    a.add(100);
+    a.add(200);
+    b.add(300);
+    b.add(400);
+    b = std::move(a);
+
+    EXPECT_TRUE(a.is_empty());
+    auto r = b.to_roaring();
+    EXPECT_TRUE(r.contains(100));
+    EXPECT_TRUE(r.contains(200));
+    EXPECT_FALSE(r.contains(300));
+    EXPECT_FALSE(r.contains(400));
+}
+
+TEST_CASE(EqualityHeap) {
+    ContextBitmap a, b;
+    a.add(100);
+    a.add(200);
+    b.add(100);
+    b.add(200);
+    EXPECT_TRUE(a == b);
+
+    b.add(300);
+    EXPECT_FALSE(a == b);
+}
+
+TEST_CASE(AddSmallAfterUpgrade) {
+    ContextBitmap bm;
+    bm.add(100);
+    bm.add(5);
+
+    auto r = bm.to_roaring();
+    EXPECT_TRUE(r.contains(100));
+    EXPECT_TRUE(r.contains(5));
+    EXPECT_EQ(r.cardinality(), 2U);
+}
+
+};  // TEST_SUITE(ContextBitmap)
+
+}  // namespace
+}  // namespace clice::testing

From e1dde9ee92697b03720f204a1de1ed274610932c Mon Sep 17 00:00:00 2001
From: ykiko <ykikoykikoykiko@gmail.com>
Date: Sat, 11 Apr 2026 15:00:07 +0800
Subject: [PATCH 2/2] fix(index): address pre-PR review findings

- Fix use-after-partial-move and division-by-zero in benchmark
- Use std::exchange in ContextBitmap move operations
- Add clarifying comments on operator== mixed mode and hash() sorting
- Add tests: compilation-context merge cache hit, hash determinism,
  any_not_in empty inputs, remove edge cases

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 benchmarks/index_benchmark.cpp              | 10 +++--
 src/index/tu_index.cpp                      |  3 +-
 src/support/bitmap.h                        |  9 ++---
 tests/unit/index/merged_index_tests.cpp     | 23 +++++++++++
 tests/unit/index/tu_index_tests.cpp         | 18 +++++++++
 tests/unit/support/context_bitmap_tests.cpp | 43 +++++++++++++++++++++
 6 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/benchmarks/index_benchmark.cpp b/benchmarks/index_benchmark.cpp
index 9c792c5c8..cd25e388a 100644
--- a/benchmarks/index_benchmark.cpp
+++ b/benchmarks/index_benchmark.cpp
@@ -235,8 +235,8 @@ int main(int argc, const char** argv) {
         total_build_ms += r->stats.build_index_ms;
         total_serialize_ms += r->stats.serialize_ms;
         total_serialized += r->stats.serialized_bytes;
+        all_stats.push_back(r->stats);
         tu_indices.emplace_back(std::move(r->stats.file), std::move(r->tu_index));
-        all_stats.push_back(std::move(r->stats));
     }
 
     std::println("=== Phase 1: Compile & Build TUIndex ({} threads) ===", num_jobs);
@@ -407,9 +407,11 @@ int main(int argc, const char** argv) {
                  cache_hits,
                  cache_misses);
     std::println("  ProjectIndex merge (selective):  {:.0f}ms", project_merge_selective_ms);
-    std::println("  Main-thread total:  {:.0f}ms ({:.1f}ms/TU)",
-                 merged_index_merge_ms + project_merge_selective_ms,
-                 (merged_index_merge_ms + project_merge_selective_ms) / tu_indices.size());
+    if(!tu_indices.empty()) {
+        std::println("  Main-thread total:  {:.0f}ms ({:.1f}ms/TU)",
+                     merged_index_merge_ms + project_merge_selective_ms,
+                     (merged_index_merge_ms + project_merge_selective_ms) / tu_indices.size());
+    }
     std::println("  Serialize time:     {:.0f}ms", merge_stats.serialize_ms);
     std::print("  Total size:       ");
     print_size(merge_stats.total_serialized_bytes);
diff --git a/src/index/tu_index.cpp b/src/index/tu_index.cpp
index 0049f133c..1e588dde1 100644
--- a/src/index/tu_index.cpp
+++ b/src/index/tu_index.cpp
@@ -176,7 +176,8 @@ std::array<std::uint8_t, 32> FileIndex::hash() {
         hasher.update(llvm::ArrayRef(data, size));
     }
 
-    // Sort keys to ensure deterministic iteration order over DenseMap.
+    // Sort keys and relations to ensure deterministic hashing regardless of
+    // DenseMap iteration order.  This mutates the relations in-place intentionally.
     llvm::SmallVector<SymbolHash> sorted_keys;
     sorted_keys.reserve(relations.size());
     for(auto& [symbol_id, _]: relations) {
diff --git a/src/support/bitmap.h b/src/support/bitmap.h
index 7e0a33ba2..c99f99cf5 100644
--- a/src/support/bitmap.h
+++ b/src/support/bitmap.h
@@ -63,15 +63,12 @@ class ContextBitmap {
         free_heap();
     }
 
-    ContextBitmap(ContextBitmap&& other) noexcept : data(other.data) {
-        other.data = inline_tag;
-    }
+    ContextBitmap(ContextBitmap&& other) noexcept : data(std::exchange(other.data, inline_tag)) {}
 
     ContextBitmap& operator=(ContextBitmap&& other) noexcept {
         if(this != &other) {
             free_heap();
-            data = other.data;
-            other.data = inline_tag;
+            data = std::exchange(other.data, inline_tag);
         }
         return *this;
     }
@@ -137,6 +134,8 @@ class ContextBitmap {
     friend bool operator==(const ContextBitmap& lhs, const ContextBitmap& rhs) {
         if(lhs.is_inline() && rhs.is_inline())
             return lhs.data == rhs.data;
+        // Different modes implies different sets: a heap bitmap always contains
+        // at least one id >= inline_capacity that an inline bitmap cannot hold.
         if(lhs.is_inline() != rhs.is_inline())
             return false;
         return *lhs.as_heap() == *rhs.as_heap();
diff --git a/tests/unit/index/merged_index_tests.cpp b/tests/unit/index/merged_index_tests.cpp
index 2cf61fdaf..3d85c4717 100644
--- a/tests/unit/index/merged_index_tests.cpp
+++ b/tests/unit/index/merged_index_tests.cpp
@@ -396,6 +396,29 @@ TEST_CASE(MergeCacheHit) {
     ASSERT_TRUE(second_hit);
 }
 
+TEST_CASE(MergeCacheHitCompilation) {
+    build_index(R"(
+            int foo() { return 42; }
+        )");
+    auto tu_a = tu_index;
+
+    build_index(R"(
+            int foo() { return 42; }
+        )");
+    auto tu_b = tu_index;
+
+    index::MergedIndex merged;
+    std::vector<index::IncludeLocation> locations_a;
+    bool first_hit =
+        merged.merge(0, tu_a.built_at, std::move(locations_a), tu_a.main_file_index, {});
+    ASSERT_FALSE(first_hit);
+
+    std::vector<index::IncludeLocation> locations_b;
+    bool second_hit =
+        merged.merge(1, tu_b.built_at, std::move(locations_b), tu_b.main_file_index, {});
+    ASSERT_TRUE(second_hit);
+}
+
 };  // TEST_SUITE(MergedIndex)
 }  // namespace
 }  // namespace clice::testing
diff --git a/tests/unit/index/tu_index_tests.cpp b/tests/unit/index/tu_index_tests.cpp
index d425892e2..fcc5f99fe 100644
--- a/tests/unit/index/tu_index_tests.cpp
+++ b/tests/unit/index/tu_index_tests.cpp
@@ -512,6 +512,24 @@ TEST_CASE(HashCaching) {
     ASSERT_EQ(hash1, hash2);
 }
 
+TEST_CASE(HashDeterminism) {
+    add_main("main.cpp", R"(
+            int foo() { return 42; }
+            int bar() { return foo() + 1; }
+        )");
+    ASSERT_TRUE(compile());
+    auto index_a = index::TUIndex::build(*unit);
+
+    add_main("main.cpp", R"(
+            int foo() { return 42; }
+            int bar() { return foo() + 1; }
+        )");
+    ASSERT_TRUE(compile());
+    auto index_b = index::TUIndex::build(*unit);
+
+    ASSERT_EQ(index_a.main_file_index.hash(), index_b.main_file_index.hash());
+}
+
 };  // TEST_SUITE(TUIndex)
 }  // namespace
 }  // namespace clice::testing
diff --git a/tests/unit/support/context_bitmap_tests.cpp b/tests/unit/support/context_bitmap_tests.cpp
index 1d3885629..255951333 100644
--- a/tests/unit/support/context_bitmap_tests.cpp
+++ b/tests/unit/support/context_bitmap_tests.cpp
@@ -248,6 +248,49 @@ TEST_CASE(AddSmallAfterUpgrade) {
     EXPECT_EQ(r.cardinality(), 2U);
 }
 
+TEST_CASE(AnyNotInEmpty) {
+    ContextBitmap empty_a, empty_b, non_empty;
+    non_empty.add(5);
+
+    EXPECT_FALSE(empty_a.any_not_in(non_empty));
+    EXPECT_TRUE(non_empty.any_not_in(empty_b));
+    EXPECT_FALSE(empty_a.any_not_in(empty_b));
+}
+
+TEST_CASE(RemoveNonexistent) {
+    // Inline mode: remove id that was never added.
+    ContextBitmap inline_bm;
+    inline_bm.add(3);
+    inline_bm.remove(10);
+    EXPECT_FALSE(inline_bm.is_empty());
+    auto r1 = inline_bm.to_roaring();
+    EXPECT_TRUE(r1.contains(3));
+    EXPECT_EQ(r1.cardinality(), 1U);
+
+    // Heap mode: remove id that was never added.
+    ContextBitmap heap_bm;
+    heap_bm.add(100);
+    heap_bm.remove(200);
+    EXPECT_FALSE(heap_bm.is_empty());
+    auto r2 = heap_bm.to_roaring();
+    EXPECT_TRUE(r2.contains(100));
+    EXPECT_EQ(r2.cardinality(), 1U);
+}
+
+TEST_CASE(RemoveLargeInInline) {
+    ContextBitmap bm;
+    bm.add(5);
+    bm.add(10);
+    // Remove id >= 63 while in inline mode; should be a no-op.
+    bm.remove(63);
+    bm.remove(100);
+    EXPECT_FALSE(bm.is_empty());
+    auto r = bm.to_roaring();
+    EXPECT_TRUE(r.contains(5));
+    EXPECT_TRUE(r.contains(10));
+    EXPECT_EQ(r.cardinality(), 2U);
+}
+
 };  // TEST_SUITE(ContextBitmap)
 
 }  // namespace