From 15f54ea18f701eade43c8000f67edefe487a69b4 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 00:20:52 +0800 Subject: [PATCH 01/17] feat(support): add unified CacheStore blob store Content-addressed blob store with two-phase atomic writes (tmp + fsync + rename), per-namespace lifecycle policies (LRU eviction, Persistent, per-instance Scratch), crash recovery via tmp sweep and directory scan, and a manifest checkpoint carrying last-accessed times across restarts. --- src/support/cache_store.cpp | 569 +++++++++++++++++++++++ src/support/cache_store.h | 149 ++++++ tests/unit/support/cache_store_tests.cpp | 379 +++++++++++++++ 3 files changed, 1097 insertions(+) create mode 100644 src/support/cache_store.cpp create mode 100644 src/support/cache_store.h create mode 100644 tests/unit/support/cache_store_tests.cpp diff --git a/src/support/cache_store.cpp b/src/support/cache_store.cpp new file mode 100644 index 000000000..4579c3533 --- /dev/null +++ b/src/support/cache_store.cpp @@ -0,0 +1,569 @@ +#include "support/cache_store.h" + +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#endif + +#include "support/filesystem.h" +#include "support/logging.h" + +#include "kota/codec/json/json.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" + +namespace clice { + +namespace { + +/// Manifest checkpoint is forced after this many commits/invalidates. +constexpr std::uint32_t checkpoint_interval = 16; + +/// JSON layout of manifest.json. Only an acceleration structure: blob +/// presence and size always come from the filesystem; the manifest merely +/// carries last-accessed times across restarts. +struct ManifestEntry { + std::string ns; + std::string key; + std::int64_t atime = 0; +}; + +struct ManifestData { + std::vector entries; +}; + +bool is_pid_alive(std::uint32_t pid) { +#ifdef _WIN32 + HANDLE handle = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid); + if(!handle) { + return GetLastError() == ERROR_ACCESS_DENIED; + } + DWORD exit_code = 0; + bool alive = GetExitCodeProcess(handle, &exit_code) && exit_code == STILL_ACTIVE; + CloseHandle(handle); + return alive; +#else + if(::kill(static_cast(pid), 0) == 0) { + return true; + } + return errno == EPERM; +#endif +} + +/// Flush a file's data to stable storage before the commit rename, so a +/// crash cannot leave a blob whose name promises content it doesn't have. +/// Opened via LLVM so UTF-8 paths work on Windows. +std::error_code sync_file(llvm::StringRef path) { + auto file = llvm::sys::fs::openNativeFile(path, + llvm::sys::fs::CD_OpenExisting, + llvm::sys::fs::FA_Write, + llvm::sys::fs::OF_None); + if(!file) { + return llvm::errorToErrorCode(file.takeError()); + } + + std::error_code ec; +#ifdef _WIN32 + if(!FlushFileBuffers(*file)) { + ec = std::error_code(GetLastError(), std::system_category()); + } +#else + if(::fsync(*file) != 0) { + ec = std::error_code(errno, std::generic_category()); + } +#endif + llvm::sys::fs::closeFile(*file); + return ec; +} + +/// Remove directory children whose name parses as a dead pid. +/// Used for both `tmp/{pid}` and Scratch `{ns}/{pid}` layouts. +void sweep_dead_pid_dirs(llvm::StringRef dir, std::uint32_t self_pid) { + std::error_code ec; + for(auto it = llvm::sys::fs::directory_iterator(dir, ec); + !ec && it != llvm::sys::fs::directory_iterator(); + it.increment(ec)) { + std::uint32_t pid = 0; + auto name = path::filename(it->path()); + if(name.getAsInteger(10, pid)) { + continue; + } + if(pid == self_pid || is_pid_alive(pid)) { + continue; + } + llvm::sys::fs::remove_directories(it->path()); + LOG_DEBUG("CacheStore: removed dead instance directory {}", it->path()); + } +} + +std::int64_t now_ms() { + auto now = std::chrono::system_clock::now().time_since_epoch(); + return std::chrono::duration_cast(now).count(); +} + +} // namespace + +struct CacheStore::State { + struct Entry { + std::uint64_t size = 0; + std::int64_t atime = 0; + }; + + struct Namespace { + CacheNamespace config; + /// Directory holding this namespace's blobs: `{base}/{name}` for + /// LRU/Persistent, `{base}/{name}/{pid}` for Scratch. + std::string dir; + llvm::StringMap entries; + std::uint64_t total_size = 0; + }; + + std::mutex mutex; + + /// `{root}/cache/v{version}` — everything the store manages lives here. + std::string base; + + /// `{base}/tmp/{pid}` — this instance's in-flight writes. + std::string tmp_dir; + + std::uint32_t self_pid = 0; + + llvm::StringMap namespaces; + + /// Last-accessed times loaded from the manifest, keyed by "{ns}/{key}". + /// Consumed when the corresponding namespace is registered. + llvm::StringMap manifest_atimes; + + std::uint64_t next_tmp_id = 0; + + std::uint32_t changes_since_checkpoint = 0; + bool dirty = false; + + /// Logical clock: strictly increasing per issued stamp so that LRU + /// ordering is deterministic even within one millisecond. + std::int64_t last_stamp = 0; + + std::int64_t next_stamp() { + last_stamp = std::max(now_ms(), last_stamp + 1); + return last_stamp; + } + + std::string blob_path(const Namespace& ns, llvm::StringRef key) const { + return path::join(ns.dir, key.str() + ns.config.extension); + } + + Namespace* find_namespace(llvm::StringRef name) { + auto it = namespaces.find(name); + return it != namespaces.end() ? &it->second : nullptr; + } + + void evict_locked(Namespace& ns, llvm::StringRef keep_key); + void checkpoint_locked(); +}; + +CacheStore::CacheStore(std::unique_ptr state) : state(std::move(state)) {} + +CacheStore::CacheStore(CacheStore&&) noexcept = default; + +CacheStore& CacheStore::operator=(CacheStore&&) noexcept = default; + +CacheStore::~CacheStore() = default; + +std::expected CacheStore::open(llvm::StringRef root, + std::uint32_t version) { + assert(!root.empty() && "cache root must not be empty"); + + auto state = std::make_unique(); + state->self_pid = static_cast(llvm::sys::Process::getProcessId()); + + auto parent = path::join(root, "cache"); + auto version_dir = std::format("v{}", version); + state->base = path::join(parent, version_dir); + + if(auto ec = llvm::sys::fs::create_directories(state->base)) { + return std::unexpected(ec); + } + + // Discard anything that isn't the current layout version: older version + // directories and the pre-versioning layout (pch/, pcm/, cache.json). + std::error_code ec; + for(auto it = llvm::sys::fs::directory_iterator(parent, ec); + !ec && it != llvm::sys::fs::directory_iterator(); + it.increment(ec)) { + if(path::filename(it->path()) == version_dir) { + continue; + } + LOG_INFO("CacheStore: discarding stale cache layout {}", it->path()); + if(llvm::sys::fs::is_directory(it->path())) { + llvm::sys::fs::remove_directories(it->path()); + } else { + llvm::sys::fs::remove(it->path()); + } + } + + // Load the manifest. Corrupt or missing is fine: registration falls + // back to a directory scan with mtimes as last-accessed times. + auto manifest_path = path::join(state->base, "manifest.json"); + if(auto content = fs::read(manifest_path)) { + ManifestData data; + if(kota::codec::json::from_json(*content, data)) { + for(auto& entry: data.entries) { + auto key = entry.ns + "/" + entry.key; + state->manifest_atimes[key] = entry.atime; + state->last_stamp = std::max(state->last_stamp, entry.atime); + } + } else { + LOG_WARN("CacheStore: corrupt manifest {}, rebuilding from directory scan", + manifest_path); + } + } + + // The only crash residue are in-flight tmp files; committed blobs are + // complete by construction (atomic rename). Sweep tmp directories of + // instances that no longer exist. + auto tmp_parent = path::join(state->base, "tmp"); + sweep_dead_pid_dirs(tmp_parent, state->self_pid); + + state->tmp_dir = path::join(tmp_parent, std::to_string(state->self_pid)); + llvm::sys::fs::remove_directories(state->tmp_dir); + if(auto ec2 = llvm::sys::fs::create_directories(state->tmp_dir)) { + return std::unexpected(ec2); + } + + return CacheStore(std::move(state)); +} + +void CacheStore::register_namespace(CacheNamespace ns) { + std::lock_guard guard(state->mutex); + + auto ns_dir = path::join(state->base, ns.name); + llvm::sys::fs::create_directories(ns_dir); + + auto [it, inserted] = state->namespaces.try_emplace(ns.name); + assert(inserted && "namespace registered twice"); + if(!inserted) { + return; + } + auto& ns_state = it->second; + ns_state.config = std::move(ns); + + if(ns_state.config.policy == CachePolicy::Scratch) { + // Scratch directories are per-instance; reclaim those left behind + // by crashed instances and start with a fresh one of our own. + sweep_dead_pid_dirs(ns_dir, state->self_pid); + ns_state.dir = path::join(ns_dir, std::to_string(state->self_pid)); + llvm::sys::fs::remove_directories(ns_state.dir); + llvm::sys::fs::create_directories(ns_state.dir); + return; + } + + ns_state.dir = std::move(ns_dir); + + // Adopt blobs already on disk. The directory scan, not the manifest, + // decides existence: this also picks up blobs committed after the last + // checkpoint of a crashed instance. + std::error_code ec; + for(auto iter = llvm::sys::fs::directory_iterator(ns_state.dir, ec); + !ec && iter != llvm::sys::fs::directory_iterator(); + iter.increment(ec)) { + auto filename = path::filename(iter->path()); + auto& ext = ns_state.config.extension; + if(!ext.empty() && !filename.consume_back(ext)) { + continue; + } + + llvm::sys::fs::file_status status; + if(llvm::sys::fs::status(iter->path(), status) || + status.type() != llvm::sys::fs::file_type::regular_file) { + continue; + } + + auto atime_it = state->manifest_atimes.find(ns_state.config.name + "/" + filename.str()); + auto atime = atime_it != state->manifest_atimes.end() + ? atime_it->second + : std::chrono::duration_cast( + status.getLastModificationTime().time_since_epoch()) + .count(); + + ns_state.entries[filename] = {status.getSize(), atime}; + ns_state.total_size += status.getSize(); + } + + // Enforce the budget immediately in case it shrank since the last run. + state->evict_locked(ns_state, ""); +} + +std::optional CacheStore::lookup(llvm::StringRef ns, llvm::StringRef key) { + std::lock_guard guard(state->mutex); + + auto* ns_state = state->find_namespace(ns); + if(!ns_state) { + return std::nullopt; + } + + auto it = ns_state->entries.find(key); + if(it == ns_state->entries.end()) { + return std::nullopt; + } + + it->second.atime = state->next_stamp(); + state->dirty = true; + return state->blob_path(*ns_state, key); +} + +CacheStore::PendingEntry CacheStore::begin_store(llvm::StringRef ns, llvm::StringRef key) { + std::lock_guard guard(state->mutex); + + auto* ns_state = state->find_namespace(ns); + assert(ns_state && "begin_store on unregistered namespace"); + if(!ns_state) { + LOG_ERROR("CacheStore: begin_store on unregistered namespace {}", ns); + return {}; + } + + auto tmp_name = std::format("{}{}", state->next_tmp_id++, ns_state->config.extension); + return PendingEntry{ns.str(), key.str(), path::join(state->tmp_dir, tmp_name)}; +} + +std::expected CacheStore::commit(PendingEntry pending) { + if(pending.tmp_path.empty()) { + return std::unexpected(std::make_error_code(std::errc::invalid_argument)); + } + + llvm::sys::fs::file_status status; + if(auto ec = llvm::sys::fs::status(pending.tmp_path, status)) { + return std::unexpected(ec); + } + + // Scratch blobs are cheap derivatives with no durability requirement; + // they skip the fsync. + bool durable; + { + std::lock_guard guard(state->mutex); + auto* ns_state = state->find_namespace(pending.ns); + if(!ns_state) { + return std::unexpected(std::make_error_code(std::errc::invalid_argument)); + } + durable = ns_state->config.policy != CachePolicy::Scratch; + } + + // fsync outside the lock so lookups are not blocked behind disk flushes. + if(durable) { + if(auto ec = sync_file(pending.tmp_path)) { + llvm::sys::fs::remove(pending.tmp_path); + return std::unexpected(ec); + } + } + + std::string final_path; + { + std::lock_guard guard(state->mutex); + + auto* ns_state = state->find_namespace(pending.ns); + if(!ns_state) { + return std::unexpected(std::make_error_code(std::errc::invalid_argument)); + } + + final_path = state->blob_path(*ns_state, pending.key); + if(auto result = fs::rename(pending.tmp_path, final_path); !result) { + // Content-addressed keys make a rename collision benign: an + // existing blob with the same name has the same content (this + // happens on Windows when the destination is currently open). + // Account for the file that survived on disk, not the tmp. + llvm::sys::fs::remove(pending.tmp_path); + if(llvm::sys::fs::status(final_path, status)) { + return std::unexpected(result.error()); + } + } + + auto& entry = ns_state->entries[pending.key]; + ns_state->total_size += status.getSize() - entry.size; + entry.size = status.getSize(); + entry.atime = state->next_stamp(); + + if(ns_state->config.policy == CachePolicy::LRU) { + state->evict_locked(*ns_state, pending.key); + } + + if(ns_state->config.policy != CachePolicy::Scratch) { + state->dirty = true; + state->changes_since_checkpoint += 1; + } + } + + maybe_checkpoint(); + return final_path; +} + +void CacheStore::abort(const PendingEntry& pending) { + if(!pending.tmp_path.empty()) { + llvm::sys::fs::remove(pending.tmp_path); + } +} + +void CacheStore::invalidate(llvm::StringRef ns, llvm::StringRef key) { + { + std::lock_guard guard(state->mutex); + + auto* ns_state = state->find_namespace(ns); + if(!ns_state) { + return; + } + + auto it = ns_state->entries.find(key); + if(it == ns_state->entries.end()) { + return; + } + + llvm::sys::fs::remove(state->blob_path(*ns_state, key)); + ns_state->total_size -= it->second.size; + ns_state->entries.erase(it); + + if(ns_state->config.policy != CachePolicy::Scratch) { + state->dirty = true; + state->changes_since_checkpoint += 1; + } + } + + maybe_checkpoint(); +} + +void CacheStore::for_each_key(llvm::StringRef ns, llvm::function_ref fn) { + llvm::SmallVector keys; + { + std::lock_guard guard(state->mutex); + auto* ns_state = state->find_namespace(ns); + if(!ns_state) { + return; + } + keys.reserve(ns_state->entries.size()); + for(auto& entry: ns_state->entries) { + keys.push_back(entry.first().str()); + } + } + + for(auto& key: keys) { + fn(key); + } +} + +llvm::StringRef CacheStore::base_dir() const { + return state->base; +} + +void CacheStore::State::evict_locked(Namespace& ns, llvm::StringRef keep_key) { + if(ns.config.policy != CachePolicy::LRU || ns.config.max_bytes == 0 || + ns.total_size <= ns.config.max_bytes) { + return; + } + + struct Candidate { + llvm::StringRef key; + std::int64_t atime; + std::uint64_t size; + }; + + llvm::SmallVector candidates; + candidates.reserve(ns.entries.size()); + for(auto& entry: ns.entries) { + if(entry.first() != keep_key) { + candidates.push_back({entry.first(), entry.second.atime, entry.second.size}); + } + } + std::ranges::sort(candidates, {}, &Candidate::atime); + + for(auto& candidate: candidates) { + if(ns.total_size <= ns.config.max_bytes) { + break; + } + // A failed delete (e.g. the file is open on Windows) keeps the + // entry so the next eviction round retries it. + if(llvm::sys::fs::remove(blob_path(ns, candidate.key))) { + LOG_DEBUG("CacheStore: cannot evict {} from {}, retrying later", + candidate.key, + ns.config.name); + continue; + } + LOG_DEBUG("CacheStore: evicted {} ({} bytes) from {}", + candidate.key, + candidate.size, + ns.config.name); + ns.total_size -= candidate.size; + ns.entries.erase(candidate.key); + dirty = true; + } +} + +void CacheStore::State::checkpoint_locked() { + if(!dirty) { + return; + } + + ManifestData data; + for(auto& [name, ns_state]: namespaces) { + if(ns_state.config.policy == CachePolicy::Scratch) { + continue; + } + for(auto& entry: ns_state.entries) { + data.entries.push_back({name.str(), entry.first().str(), entry.second.atime}); + } + } + + auto json = kota::codec::json::to_json(data); + if(!json) { + LOG_WARN("CacheStore: failed to serialize manifest"); + return; + } + + auto manifest_path = path::join(base, "manifest.json"); + auto tmp_path = path::join(tmp_dir, "manifest.json"); + if(auto result = fs::write(tmp_path, *json); !result) { + LOG_WARN("CacheStore: failed to write manifest: {}", result.error().message()); + return; + } + if(auto result = fs::rename(tmp_path, manifest_path); !result) { + LOG_WARN("CacheStore: failed to publish manifest: {}", result.error().message()); + return; + } + + dirty = false; + changes_since_checkpoint = 0; +} + +void CacheStore::checkpoint() { + std::lock_guard guard(state->mutex); + state->checkpoint_locked(); +} + +void CacheStore::maybe_checkpoint() { + std::lock_guard guard(state->mutex); + if(state->changes_since_checkpoint >= checkpoint_interval) { + state->checkpoint_locked(); + } +} + +void CacheStore::shutdown() { + std::lock_guard guard(state->mutex); + state->checkpoint_locked(); + + llvm::sys::fs::remove_directories(state->tmp_dir); + for(auto& [name, ns_state]: state->namespaces) { + if(ns_state.config.policy == CachePolicy::Scratch) { + llvm::sys::fs::remove_directories(ns_state.dir); + } + } +} + +} // namespace clice diff --git a/src/support/cache_store.h b/src/support/cache_store.h new file mode 100644 index 000000000..25057c654 --- /dev/null +++ b/src/support/cache_store.h @@ -0,0 +1,149 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/StringRef.h" + +namespace clice { + +/// Lifecycle policy for a cache namespace. +enum class CachePolicy : std::uint8_t { + /// Size-capped with least-recently-used eviction: rebuildable large + /// artifacts (PCH, PCM). + LRU, + + /// Never evicted automatically, only via explicit invalidate(): + /// data that is expensive to accumulate (index). + Persistent, + + /// Per-instance working files: not tracked in the manifest, not part + /// of LRU. Stored under a pid subdirectory; directories of dead pids + /// are cleaned up when the namespace is registered. + Scratch, +}; + +/// Static configuration of a cache namespace. +struct CacheNamespace { + /// Directory name under the store root, e.g. "pch". + std::string name; + + /// File extension appended to keys, including the dot, e.g. ".pch". + std::string extension; + + CachePolicy policy = CachePolicy::LRU; + + /// Size budget for LRU namespaces; 0 means unlimited. + /// Ignored for Persistent and Scratch. + std::uint64_t max_bytes = 0; +}; + +/// Content-addressed blob store with atomic writes, crash recovery and +/// per-namespace lifecycle policies. +/// +/// Responsibility split: the store only manages blob lifecycle — atomic +/// two-phase writes (begin_store/commit), LRU accounting and eviction, +/// orphan cleanup and the manifest checkpoint. Keys are opaque, +/// filename-safe strings constructed by the caller (project convention: +/// hex of llvm::xxh3_128bits, optionally with a readable prefix). +/// Dependency tracking and staleness decisions are the caller's job. +/// +/// On-disk layout under `{root}/cache/v{version}/`: +/// manifest.json last-accessed checkpoint (not a source of truth) +/// tmp/{pid}/ in-flight writes of one live instance +/// {ns}/{key}{ext} committed blobs (LRU / Persistent) +/// {ns}/{pid}/{key}{ext} Scratch blobs of one live instance +/// +/// A blob is complete iff it exists at its final path (atomic rename); the +/// only crash residue is tmp files, swept on open(). Opening a root whose +/// layout version differs discards the old directory entirely. +/// +/// The store is passive: it owns no timer and never blocks waiting for IO +/// completion beyond the call itself. Periodic checkpoint() scheduling is +/// the owner's responsibility. All methods are thread-safe so that heavy +/// calls (commit's fsync, checkpoint) can be offloaded to a worker thread +/// while lookups continue on the event loop. +class CacheStore { +public: + /// A two-phase write in progress. The caller (or a worker process on + /// its behalf) writes the blob to tmp_path, then either commits or + /// aborts. Dropping a PendingEntry without commit/abort leaks the tmp + /// file until the next open() sweep. + struct PendingEntry { + std::string ns; + std::string key; + std::string tmp_path; + }; + + /// Open (creating if necessary) the store under `root`. Any sibling + /// version directory other than v{version} is deleted. Loads the + /// manifest if present and sweeps tmp directories of dead instances. + static std::expected open(llvm::StringRef root, + std::uint32_t version); + + CacheStore(CacheStore&&) noexcept; + CacheStore& operator=(CacheStore&&) noexcept; + ~CacheStore(); + + /// Register a namespace and scan its directory to rebuild in-memory + /// state. Blobs already on disk are adopted; their last-accessed time + /// comes from the manifest, falling back to file mtime. For Scratch + /// namespaces this cleans dead-pid subdirectories instead. + void register_namespace(CacheNamespace ns); + + /// Return the absolute blob path on hit and refresh its in-memory + /// last-accessed time (persisted on the next checkpoint). No disk IO. + std::optional lookup(llvm::StringRef ns, llvm::StringRef key); + + /// Begin a two-phase write: returns a unique tmp path the blob must be + /// written to (safe to hand to a worker process). + PendingEntry begin_store(llvm::StringRef ns, llvm::StringRef key); + + /// Finish a two-phase write: fsync the tmp file and atomically rename + /// it to its final path. Triggers LRU eviction when the namespace + /// exceeds its budget. Returns the final blob path. + std::expected commit(PendingEntry pending); + + /// Cancel a two-phase write and delete the tmp file. + void abort(const PendingEntry& pending); + + /// Remove a blob. Primarily for Persistent namespaces, whose cleanup + /// is the caller's mark-and-sweep; LRU namespaces rarely need it. + void invalidate(llvm::StringRef ns, llvm::StringRef key); + + /// Enumerate all keys in a namespace (for caller-side mark-and-sweep). + /// Iterates over a snapshot, so fn may call back into the store. + void for_each_key(llvm::StringRef ns, llvm::function_ref fn); + + /// The versioned root directory, e.g. `{root}/cache/v1`. Callers may + /// place their own metadata files directly under it (the store only + /// manages namespace subdirectories); they die with the version. + llvm::StringRef base_dir() const; + + /// Atomically persist the manifest (key sizes and last-accessed times) + /// if anything changed. Also runs automatically every few commits; + /// the owner should additionally schedule it periodically and call + /// shutdown() on exit. + void checkpoint(); + + /// Final checkpoint plus removal of this instance's tmp and Scratch + /// directories. + void shutdown(); + +private: + struct State; + + explicit CacheStore(std::unique_ptr state); + + /// Checkpoint if enough changes accumulated since the last one. + void maybe_checkpoint(); + + std::unique_ptr state; +}; + +} // namespace clice diff --git a/tests/unit/support/cache_store_tests.cpp b/tests/unit/support/cache_store_tests.cpp new file mode 100644 index 000000000..ee2dd7e04 --- /dev/null +++ b/tests/unit/support/cache_store_tests.cpp @@ -0,0 +1,379 @@ +#include +#include +#include + +#include "test/temp_dir.h" +#include "test/test.h" +#include "support/cache_store.h" +#include "support/filesystem.h" + +#include "llvm/Support/Process.h" + +namespace clice::testing { + +namespace { + +constexpr std::uint32_t version = 1; + +/// A pid no real process can have (Linux pid_max is 4194304; Windows +/// pids are multiples of 4), so its directories always look dead. +constexpr const char* dead_pid = "999999999"; + +/// Helper precondition check that survives NDEBUG builds (the unit tests +/// run under RelWithDebInfo); failures abort with a location instead of +/// becoming UB on a bad expected/optional access. +void require(bool condition, const char* what) { + if(!condition) { + std::println(stderr, "cache_store_tests: requirement failed: {}", what); + std::abort(); + } +} + +CacheStore open_store(const TempDir& tmp, std::uint32_t ver = version) { + auto store = CacheStore::open(tmp.path("root"), ver); + require(store.has_value(), "CacheStore::open failed"); + return std::move(*store); +} + +void register_lru(CacheStore& store, std::uint64_t max_bytes = 0) { + store.register_namespace( + {.name = "pch", .extension = ".pch", .policy = CachePolicy::LRU, .max_bytes = max_bytes}); +} + +/// Run a full two-phase write with the given content. +std::string + put(CacheStore& store, llvm::StringRef ns, llvm::StringRef key, llvm::StringRef content) { + auto pending = store.begin_store(ns, key); + require(!pending.tmp_path.empty(), "begin_store returned no tmp path"); + require(fs::write(pending.tmp_path, content).has_value(), "tmp write failed"); + auto committed = store.commit(std::move(pending)); + require(committed.has_value(), "commit failed"); + return *committed; +} + +TEST_SUITE(CacheStore) { + +TEST_CASE(StoreAndLookup) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store); + + ASSERT_FALSE(store.lookup("pch", "k1").has_value()); + + auto path = put(store, "pch", "k1", "blob content"); + + auto hit = store.lookup("pch", "k1"); + ASSERT_TRUE(hit.has_value()); + ASSERT_EQ(*hit, path); + ASSERT_EQ(fs::read(*hit).value_or(""), "blob content"); + + // The blob landed inside the versioned namespace directory. + ASSERT_TRUE(llvm::StringRef(path).contains("v1")); + ASSERT_TRUE(llvm::StringRef(path).ends_with("k1.pch")); +} + +TEST_CASE(AbortRemovesTmp) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store); + + auto pending = store.begin_store("pch", "k1"); + ASSERT_TRUE(fs::write(pending.tmp_path, "junk").has_value()); + auto tmp_path = pending.tmp_path; + store.abort(pending); + + ASSERT_FALSE(llvm::sys::fs::exists(tmp_path)); + ASSERT_FALSE(store.lookup("pch", "k1").has_value()); +} + +TEST_CASE(CommitWithoutWriteFails) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store); + + auto pending = store.begin_store("pch", "k1"); + ASSERT_FALSE(store.commit(std::move(pending)).has_value()); +} + +TEST_CASE(SurvivesReopen) { + TempDir tmp; + { + auto store = open_store(tmp); + register_lru(store); + put(store, "pch", "k1", "persisted"); + store.shutdown(); + } + + auto store = open_store(tmp); + register_lru(store); + auto hit = store.lookup("pch", "k1"); + ASSERT_TRUE(hit.has_value()); + ASSERT_EQ(fs::read(*hit).value_or(""), "persisted"); +} + +TEST_CASE(VersionBumpDiscards) { + TempDir tmp; + { + auto store = open_store(tmp); + register_lru(store); + put(store, "pch", "k1", "old version blob"); + store.shutdown(); + } + + auto store = open_store(tmp, version + 1); + register_lru(store); + ASSERT_FALSE(store.lookup("pch", "k1").has_value()); + ASSERT_FALSE(llvm::sys::fs::exists(tmp.path("root/cache/v1"))); +} + +TEST_CASE(LegacyLayoutDiscarded) { + TempDir tmp; + // Pre-versioning layout: blobs and metadata directly under cache/. + tmp.touch("root/cache/cache.json", "{}"); + tmp.touch("root/cache/pch/deadbeef.pch", "old"); + + auto store = open_store(tmp); + ASSERT_FALSE(llvm::sys::fs::exists(tmp.path("root/cache/cache.json"))); + ASSERT_FALSE(llvm::sys::fs::exists(tmp.path("root/cache/pch"))); +} + +TEST_CASE(LruEviction) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store, 25); // fits two 10-byte blobs, not three + + put(store, "pch", "a", "aaaaaaaaaa"); + put(store, "pch", "b", "bbbbbbbbbb"); + + // Touch "a" so "b" becomes the coldest entry. + ASSERT_TRUE(store.lookup("pch", "a").has_value()); + + put(store, "pch", "c", "cccccccccc"); + + ASSERT_TRUE(store.lookup("pch", "a").has_value()); + ASSERT_FALSE(store.lookup("pch", "b").has_value()); + ASSERT_TRUE(store.lookup("pch", "c").has_value()); +} + +TEST_CASE(FreshCommitNotEvicted) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store, 5); // smaller than a single blob + + auto path = put(store, "pch", "big", "0123456789"); + ASSERT_TRUE(llvm::sys::fs::exists(path)); + ASSERT_TRUE(store.lookup("pch", "big").has_value()); +} + +TEST_CASE(PersistentNeverEvicted) { + TempDir tmp; + auto store = open_store(tmp); + store.register_namespace( + {.name = "index", .extension = ".idx", .policy = CachePolicy::Persistent, .max_bytes = 1}); + + put(store, "index", "a", "aaaaaaaaaa"); + put(store, "index", "b", "bbbbbbbbbb"); + + ASSERT_TRUE(store.lookup("index", "a").has_value()); + ASSERT_TRUE(store.lookup("index", "b").has_value()); +} + +TEST_CASE(InvalidateRemovesBlob) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store); + + auto path = put(store, "pch", "k1", "blob"); + store.invalidate("pch", "k1"); + + ASSERT_FALSE(store.lookup("pch", "k1").has_value()); + ASSERT_FALSE(llvm::sys::fs::exists(path)); +} + +TEST_CASE(ForEachKey) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store); + + put(store, "pch", "a", "1"); + put(store, "pch", "b", "2"); + + std::vector keys; + store.for_each_key("pch", [&](llvm::StringRef key) { keys.push_back(key.str()); }); + std::ranges::sort(keys); + ASSERT_EQ(keys, (std::vector{"a", "b"})); + + // Snapshot semantics: invalidating from the callback must be safe. + store.for_each_key("pch", [&](llvm::StringRef key) { store.invalidate("pch", key); }); + ASSERT_FALSE(store.lookup("pch", "a").has_value()); + ASSERT_FALSE(store.lookup("pch", "b").has_value()); +} + +TEST_CASE(MissingManifestRescans) { + TempDir tmp; + { + auto store = open_store(tmp); + register_lru(store); + put(store, "pch", "k1", "scanned blob"); + store.shutdown(); + } + + [[maybe_unused]] auto removed = llvm::sys::fs::remove(tmp.path("root/cache/v1/manifest.json")); + ASSERT_FALSE(llvm::sys::fs::exists(tmp.path("root/cache/v1/manifest.json"))); + + auto store = open_store(tmp); + register_lru(store); + auto hit = store.lookup("pch", "k1"); + ASSERT_TRUE(hit.has_value()); + ASSERT_EQ(fs::read(*hit).value_or(""), "scanned blob"); +} + +TEST_CASE(CorruptManifestRescans) { + TempDir tmp; + { + auto store = open_store(tmp); + register_lru(store); + put(store, "pch", "k1", "scanned blob"); + store.shutdown(); + } + + tmp.touch("root/cache/v1/manifest.json", "this is not json {{{"); + + auto store = open_store(tmp); + register_lru(store); + ASSERT_TRUE(store.lookup("pch", "k1").has_value()); +} + +TEST_CASE(UncheckpointedBlobAdopted) { + TempDir tmp; + { + auto store = open_store(tmp); + register_lru(store); + put(store, "pch", "k1", "checkpointed"); + store.shutdown(); + } + + // Simulate a crash after commit but before checkpoint: a blob exists + // on disk that the manifest has never heard of. + tmp.touch("root/cache/v1/pch/orphan.pch", "uncheckpointed"); + + auto store = open_store(tmp); + register_lru(store); + ASSERT_TRUE(store.lookup("pch", "k1").has_value()); + ASSERT_TRUE(store.lookup("pch", "orphan").has_value()); +} + +TEST_CASE(DeadInstanceTmpSwept) { + TempDir tmp; + tmp.touch(std::string("root/cache/v1/tmp/") + dead_pid + "/0.pch", "leftover"); + + auto store = open_store(tmp); + ASSERT_FALSE(llvm::sys::fs::exists(tmp.path(std::string("root/cache/v1/tmp/") + dead_pid))); +} + +TEST_CASE(ShutdownRemovesOwnTmp) { + TempDir tmp; + auto pid = std::to_string(llvm::sys::Process::getProcessId()); + { + auto store = open_store(tmp); + register_lru(store); + ASSERT_TRUE(llvm::sys::fs::exists(tmp.path("root/cache/v1/tmp/" + pid))); + store.shutdown(); + } + ASSERT_FALSE(llvm::sys::fs::exists(tmp.path("root/cache/v1/tmp/" + pid))); +} + +TEST_CASE(ScratchBasics) { + TempDir tmp; + auto store = open_store(tmp); + store.register_namespace( + {.name = "header_context", .extension = ".h", .policy = CachePolicy::Scratch}); + + auto pid = std::to_string(llvm::sys::Process::getProcessId()); + auto path = put(store, "header_context", "k1", "preamble"); + + // Scratch blobs live under this instance's pid directory. + ASSERT_TRUE(llvm::StringRef(path).contains(pid)); + ASSERT_TRUE(store.lookup("header_context", "k1").has_value()); + + // Scratch entries never enter the manifest. + store.checkpoint(); + auto manifest = fs::read(tmp.path("root/cache/v1/manifest.json")); + if(manifest.has_value()) { + ASSERT_FALSE(llvm::StringRef(*manifest).contains("header_context")); + } + + // shutdown removes the whole instance directory. + store.shutdown(); + ASSERT_FALSE(llvm::sys::fs::exists(path)); +} + +TEST_CASE(ScratchDeadPidSwept) { + TempDir tmp; + tmp.touch(std::string("root/cache/v1/header_context/") + dead_pid + "/x.h", "stale"); + + auto store = open_store(tmp); + store.register_namespace( + {.name = "header_context", .extension = ".h", .policy = CachePolicy::Scratch}); + + ASSERT_FALSE( + llvm::sys::fs::exists(tmp.path(std::string("root/cache/v1/header_context/") + dead_pid))); +} + +TEST_CASE(CommitOverwriteSameKey) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store, 20); + + put(store, "pch", "k1", "first"); + auto path = put(store, "pch", "k1", "second"); + ASSERT_EQ(fs::read(path).value_or(""), "second"); + + // total_size must account for replacement, not accumulate: correct + // accounting gives 6 + 10 = 16 <= 20 (no eviction); accumulating the + // replaced 5 bytes would give 21 > 20 and evict k1. + put(store, "pch", "x", "xxxxxxxxxx"); + ASSERT_TRUE(store.lookup("pch", "k1").has_value()); + ASSERT_TRUE(store.lookup("pch", "x").has_value()); +} + +TEST_CASE(ManifestAtimePersisted) { + TempDir tmp; + { + auto store = open_store(tmp); + register_lru(store); + put(store, "pch", "a", "aaaaaaaaaa"); + put(store, "pch", "b", "bbbbbbbbbb"); + // Touch "a" after both writes so only the manifest knows it is the + // hotter entry — the mtime fallback would conclude the opposite. + ASSERT_TRUE(store.lookup("pch", "a").has_value()); + store.shutdown(); + } + + // Reopen with a budget that forces one eviction at registration. + auto store = open_store(tmp); + register_lru(store, 15); + ASSERT_TRUE(store.lookup("pch", "a").has_value()); + ASSERT_FALSE(store.lookup("pch", "b").has_value()); +} + +TEST_CASE(CheckpointAutoTriggers) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store); + + // Enough commits to cross the internal change threshold; the manifest + // must appear without an explicit checkpoint() or shutdown(). + for(int i = 0; i < 16; ++i) { + put(store, "pch", std::format("k{}", i), "blob"); + } + auto manifest = fs::read(tmp.path("root/cache/v1/manifest.json")); + ASSERT_TRUE(manifest.has_value()); + ASSERT_TRUE(llvm::StringRef(*manifest).contains("k0")); +} + +}; // TEST_SUITE(CacheStore) + +} // namespace + +} // namespace clice::testing From ae0aa9ed3d43e5f9933b802f4650da442072fea0 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 00:20:52 +0800 Subject: [PATCH 02/17] feat(command): add canonicalize with argument profiles canonicalize(args, profile) renders the profile-selected subset of a compile command into a stable string for cache keys. Frontend drops codegen-only and clice-managed options; Preprocessing additionally drops diagnostics-presentation options. Filtering never reorders, unknown options are kept verbatim, and per-file identity (input path, -main-file-name) stays out so identical preambles share keys. --- src/command/argument_parser.cpp | 71 ++++++++++++++ src/command/argument_parser.h | 34 +++++++ tests/unit/command/canonicalize_tests.cpp | 110 ++++++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 tests/unit/command/canonicalize_tests.cpp diff --git a/src/command/argument_parser.cpp b/src/command/argument_parser.cpp index f5a9d361c..dbdcca157 100644 --- a/src/command/argument_parser.cpp +++ b/src/command/argument_parser.cpp @@ -121,9 +121,12 @@ using namespace option; bool is_discarded_option(unsigned id) { switch(id) { /// Input file, unknown args, and output — we manage these ourselves. + /// -main-file-name is per-file input identity injected by to_argv() + /// for cc1 commands; it only labels diagnostics/debug info. case OPT_INPUT: case OPT_UNKNOWN: case OPT__DASH_DASH: + case OPT_main_file_name: case OPT_c: case OPT_o: case OPT_dxc_Fc: @@ -259,6 +262,74 @@ bool is_codegen_option(unsigned id) { } } +bool is_diagnostics_option(unsigned id) { + if(id == OPT_w) { + return true; + } + if(auto opt = option::table().option(id)) { + return opt->matches(OPT_Diag_Group) || opt->matches(OPT_pedantic_Group); + } + return false; +} + +std::string canonicalize(llvm::ArrayRef args, ArgsProfile profile) { + std::string buf; + if(args.empty()) { + return buf; + } + + llvm::raw_string_ostream os(buf); + auto append = [&](std::string_view fragment) { + os << fragment << '\0'; + }; + + /// The driver affects language defaults (clang vs clang++ vs clang-cl). + append(args[0]); + + std::vector parse_args(args.begin() + 1, args.end()); + auto options = kota::option::ParseOptions{.dash_dash_parsing = true, + .visibility = default_visibility(args[0])}; + for(auto& result: option::table().parse(parse_args, options)) { + if(!result.has_value()) { + /// Unparseable arguments are kept verbatim: dropping an option + /// we don't understand could merge keys that must differ. + auto index = result.error().index; + if(index < parse_args.size()) { + append(parse_args[index]); + } + continue; + } + + auto& arg = *result; + + /// Unknown options are kept verbatim, same rationale as parse errors. + if(arg.id == OPT_UNKNOWN) { + if(arg.index < parse_args.size()) { + append(parse_args[arg.index]); + } + continue; + } + + bool keep = false; + switch(profile) { + case ArgsProfile::Full: keep = true; break; + case ArgsProfile::Frontend: + keep = !is_discarded_option(arg.id) && !is_codegen_option(arg.id); + break; + case ArgsProfile::Preprocessing: + keep = !is_discarded_option(arg.id) && !is_codegen_option(arg.id) && + !is_diagnostics_option(arg.id); + break; + } + + if(keep) { + option::table().render(arg, append); + } + } + + return buf; +} + std::string print_argv(llvm::ArrayRef args) { std::string buf; llvm::raw_string_ostream os(buf); diff --git a/src/command/argument_parser.h b/src/command/argument_parser.h index e7870096c..c9505275d 100644 --- a/src/command/argument_parser.h +++ b/src/command/argument_parser.h @@ -1,5 +1,8 @@ #pragma once +#include +#include + #include #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" @@ -70,6 +73,37 @@ bool is_include_path_option(unsigned id); /// Check if this is the -Xclang pass-through option. bool is_xclang_option(unsigned id); +/// Diagnostics-presentation options (-W*, -R*, -pedantic*, -w): they affect +/// which warnings are emitted but never the token stream produced by +/// preprocessing. +bool is_diagnostics_option(unsigned id); + +/// Which subset of a compile command matters when deriving a cache key. +enum class ArgsProfile : std::uint8_t { + /// Options that can influence preprocessing: include paths, macros, + /// language dialect and target (predefined macros). Computed + /// subtractively as Frontend minus diagnostics-presentation options, + /// because nearly every language option can define a feature macro — + /// over-inclusion only costs sharing, under-inclusion costs correctness. + Preprocessing, + + /// All frontend-semantic options: preprocessing + language + warnings. + /// Excludes codegen-only options and options clice manages itself. + Frontend, + + /// Every recognized option, including codegen and inputs. + Full, +}; + +/// Render the subset of `args` selected by `profile` into a stable string +/// suitable for hashing (fragments are NUL-separated). This is a pure +/// filter: the relative order of kept options is preserved because it is +/// semantically significant (-I/-D ordering). args[0] must be the driver; +/// it is always kept since it affects language defaults. Arguments that +/// fail to parse are kept verbatim — over-keying is safe, under-keying is +/// not. +std::string canonicalize(llvm::ArrayRef args, ArgsProfile profile); + /// Get the resource directory for clang builtin headers. Computed once /// from the current executable path using Driver::GetResourcesPath. llvm::StringRef resource_dir(); diff --git a/tests/unit/command/canonicalize_tests.cpp b/tests/unit/command/canonicalize_tests.cpp new file mode 100644 index 000000000..1ddf508fe --- /dev/null +++ b/tests/unit/command/canonicalize_tests.cpp @@ -0,0 +1,110 @@ +#include +#include + +#include "test/test.h" +#include "command/argument_parser.h" + +namespace clice::testing { + +namespace { + +std::string canon(std::vector args, ArgsProfile profile = ArgsProfile::Frontend) { + return canonicalize(args, profile); +} + +TEST_SUITE(Canonicalize) { + +TEST_CASE(StableForSameArgs) { + std::vector args = {"clang++", "-std=c++20", "-DFOO=1", "-I/usr/include"}; + ASSERT_EQ(canon(args), canon(args)); +} + +TEST_CASE(CodegenFlagsIgnored) { + // The point of the Frontend profile: pure codegen flags must not + // change the key. Note -O* is deliberately NOT here: it defines + // __OPTIMIZE__ and is kept by is_codegen_option() on purpose. + auto base = canon({"clang++", "-std=c++20", "-DFOO", "main.cpp"}); + ASSERT_EQ(base, canon({"clang++", "-g", "-std=c++20", "-DFOO", "main.cpp"})); + ASSERT_EQ(base, canon({"clang++", "-gdwarf-4", "-std=c++20", "-DFOO", "main.cpp"})); + ASSERT_EQ(base, canon({"clang++", "-std=c++20", "-fPIC", "-DFOO", "main.cpp"})); + ASSERT_EQ(base, canon({"clang++", "-std=c++20", "-DFOO", "-flto", "main.cpp"})); + ASSERT_EQ(base, canon({"clang++", "-std=c++20", "-ffunction-sections", "-DFOO", "main.cpp"})); +} + +TEST_CASE(SemanticFlagsChangeKey) { + // -D/-I/-std differences must produce different keys. + auto base = canon({"clang++", "-std=c++20", "-DFOO", "-I/a"}); + ASSERT_NE(base, canon({"clang++", "-std=c++20", "-DBAR", "-I/a"})); + ASSERT_NE(base, canon({"clang++", "-std=c++20", "-DFOO", "-I/b"})); + ASSERT_NE(base, canon({"clang++", "-std=c++17", "-DFOO", "-I/a"})); + ASSERT_NE(base, canon({"clang++", "-std=c++20", "-DFOO", "-DBAR", "-I/a"})); + ASSERT_NE(base, canon({"clang++", "-std=c++20", "-UFOO", "-I/a"})); + ASSERT_NE(base, canon({"clang++", "-std=c++20", "-DFOO", "-isystem", "/sys", "-I/a"})); +} + +TEST_CASE(InputFileExcluded) { + // The source file must stay out of the key so identical preambles + // are shared across files. + ASSERT_EQ(canon({"clang++", "-std=c++20", "a.cpp"}), canon({"clang++", "-std=c++20", "b.cpp"})); + // Absolute paths too (the CDB always carries absolute source paths). + ASSERT_EQ(canon({"clang++", "-std=c++17", "-fsyntax-only", "/tmp/x/a.cpp"}), + canon({"clang++", "-std=c++17", "-fsyntax-only", "/tmp/x/b.cpp"})); + // cc1 commands carry per-file -main-file-name (injected by to_argv); + // it labels diagnostics only and must stay out of the key. + ASSERT_EQ(canon({"clang-20", "-cc1", "-main-file-name", "a.cpp", "-std=c++17", "/x/a.cpp"}), + canon({"clang-20", "-cc1", "-main-file-name", "b.cpp", "-std=c++17", "/x/b.cpp"})); +} + +TEST_CASE(OrderPreserved) { + // -I/-D relative order is semantically significant; canonicalize + // filters but never reorders. + ASSERT_NE(canon({"clang++", "-I/a", "-I/b"}), canon({"clang++", "-I/b", "-I/a"})); + ASSERT_NE(canon({"clang++", "-DFOO", "-UFOO"}), canon({"clang++", "-UFOO", "-DFOO"})); +} + +TEST_CASE(DriverIncluded) { + // clang vs clang++ changes language defaults. + ASSERT_NE(canon({"clang", "-DFOO"}), canon({"clang++", "-DFOO"})); +} + +TEST_CASE(SpellingNormalized) { + // Alias spellings render identically after parsing. + ASSERT_EQ(canon({"clang++", "-I", "/usr/include"}), canon({"clang++", "-I/usr/include"})); + ASSERT_EQ(canon({"clang++", "--include-directory=/usr/include"}), + canon({"clang++", "-I/usr/include"})); +} + +TEST_CASE(UnknownOptionKept) { + // Unknown options might be semantic; dropping them could merge keys + // that must differ. + ASSERT_NE(canon({"clang++", "-DFOO"}), canon({"clang++", "-DFOO", "-fplugin-arg-x-y"})); +} + +TEST_CASE(PreprocessingDropsWarnings) { + auto base = canon({"clang++", "-std=c++20", "-DFOO"}, ArgsProfile::Preprocessing); + ASSERT_EQ(base, canon({"clang++", "-std=c++20", "-Wall", "-DFOO"}, ArgsProfile::Preprocessing)); + ASSERT_EQ( + base, + canon({"clang++", "-std=c++20", "-DFOO", "-Werror", "-w"}, ArgsProfile::Preprocessing)); + ASSERT_EQ(base, + canon({"clang++", "-pedantic", "-std=c++20", "-DFOO"}, ArgsProfile::Preprocessing)); + // But preprocessing-relevant options still matter. + ASSERT_NE(base, canon({"clang++", "-std=c++20", "-DBAR"}, ArgsProfile::Preprocessing)); + ASSERT_NE(base, canon({"clang++", "-std=c++17", "-DFOO"}, ArgsProfile::Preprocessing)); +} + +TEST_CASE(FrontendKeepsWarnings) { + // Warnings affect diagnostics, which an LSP must reproduce. + ASSERT_NE(canon({"clang++", "-DFOO"}), canon({"clang++", "-Wall", "-DFOO"})); +} + +TEST_CASE(FullKeepsCodegen) { + ASSERT_NE(canon({"clang++", "-DFOO"}, ArgsProfile::Full), + canon({"clang++", "-O2", "-DFOO"}, ArgsProfile::Full)); +} + +}; // TEST_SUITE(Canonicalize) + +} // namespace + +} // namespace clice::testing From cb145d38e794c5429b04979a3646185e98f49075 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 00:20:52 +0800 Subject: [PATCH 03/17] feat(server): migrate PCH/PCM/index caches to CacheStore PCH cache keys now hash the preamble text together with the canonical frontend flags, the compile directories and the clang version (fixes the FIXME where -D/-I/-std variants wrongly shared one PCH); PCM keys likewise. Keys use xxh3_128bits. Workers write blobs to store-allocated tmp paths and the master commits them; evicted PCMs are re-validated before each compile. The index moved into the store as a Persistent namespace with mark-and-sweep cleanup; stale pre-framework layouts are discarded on startup. cache.json keeps only validity metadata and lives under the versioned store root. --- src/server/compiler/compiler.cpp | 158 +++++++++++----- src/server/compiler/indexer.cpp | 119 +++++++----- src/server/compiler/indexer.h | 10 +- src/server/service/master_server.cpp | 84 ++++++--- src/server/service/master_server.h | 12 ++ src/server/service/session.h | 2 +- src/server/worker/stateless_worker.cpp | 44 ++--- src/server/workspace/workspace.cpp | 71 +++----- src/server/workspace/workspace.h | 33 +++- .../compilation/test_persistent_cache.py | 171 ++++++++++++++---- .../integration/compilation/test_staleness.py | 31 ++++ tests/integration/utils/cache.py | 38 +++- 12 files changed, 511 insertions(+), 262 deletions(-) diff --git a/src/server/compiler/compiler.cpp b/src/server/compiler/compiler.cpp index 5f3075e2d..9a61e0056 100644 --- a/src/server/compiler/compiler.cpp +++ b/src/server/compiler/compiler.cpp @@ -4,6 +4,7 @@ #include #include +#include "command/argument_parser.h" #include "command/search_config.h" #include "index/tu_index.h" #include "server/protocol/worker.h" @@ -12,19 +13,35 @@ #include "syntax/include_resolver.h" #include "syntax/scan.h" +#include "kota/async/async.h" #include "kota/codec/json/json.h" #include "kota/ipc/lsp/position.h" #include "kota/ipc/lsp/uri.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/xxhash.h" +#include "clang/Basic/Version.h" namespace clice { namespace lsp = kota::ipc::lsp; using serde_raw = kota::codec::RawValue; +/// Render hash-input fragments into an unambiguous byte stream (length- +/// prefixed, so embedded NULs cannot create colliding splits) and return +/// the 32-hex xxh3_128bits cache key. +static std::string cache_key(std::initializer_list parts) { + std::string input; + for(auto part: parts) { + input += std::format("{}:", part.size()); + input += part; + } + auto hash = llvm::xxh3_128bits(llvm::arrayRefFromStringRef(input)); + return std::format("{:016x}{:016x}", hash.high64, hash.low64); +} + /// Detect whether the cursor is inside a preamble directive (include/import). Compiler::Compiler(kota::event_loop& loop, @@ -95,21 +112,25 @@ void Compiler::init_compile_graph() { if(!fill_compile_args(file_path, bp.directory, bp.arguments)) co_return false; - // Compute deterministic content-addressed PCM path. + if(!workspace.store) { + LOG_WARN("BuildPCM skipped for module {}: cache store is unavailable", mod_it->second); + co_return false; + } + + // Deterministic content-addressed PCM key over the source path and + // the frontend-relevant subset of the compile flags. auto safe_module_name = mod_it->second; std::ranges::replace(safe_module_name, ':', '-'); - std::string hash_input = file_path; - for(auto& arg: bp.arguments) { - hash_input += arg; - } - auto args_hash = llvm::xxh3_64bits(llvm::StringRef(hash_input)); - auto pcm_filename = std::format("{}-{:016x}.pcm", safe_module_name, args_hash); - auto pcm_path = - path::join(workspace.config.project.cache_dir, "cache", "pcm", pcm_filename); + auto pcm_key = std::format("{}-{}", + safe_module_name, + cache_key({clang::getClangFullVersion(), + bp.directory, + file_path, + canonicalize(bp.arguments, ArgsProfile::Frontend)})); // Check if cached PCM is still valid. if(auto pcm_it = workspace.pcm_cache.find(path_id); pcm_it != workspace.pcm_cache.end()) { - if(!pcm_it->second.path.empty() && llvm::sys::fs::exists(pcm_it->second.path) && + if(pcm_it->second.key == pcm_key && workspace.store->lookup("pcm", pcm_key) && !deps_changed(workspace.path_pool, pcm_it->second.deps)) { workspace.pcm_paths[path_id] = pcm_it->second.path; co_return true; @@ -117,24 +138,36 @@ void Compiler::init_compile_graph() { } bp.module_name = mod_it->second; - bp.output_path = pcm_path; + auto pending = workspace.store->begin_store("pcm", pcm_key); + bp.output_path = pending.tmp_path; // Clang needs ALL transitive PCM deps, not just direct imports. workspace.fill_pcm_deps(bp.pcms); auto result = co_await pool.send_stateless(bp); if(!result.has_value() || !result.value().success) { + workspace.store->abort(pending); LOG_WARN("BuildPCM failed for module {}: {}", mod_it->second, result.has_value() ? result.value().error : result.error().message); co_return false; } - workspace.pcm_paths[path_id] = result.value().output_path; + // Commit on the thread pool: it fsyncs the freshly written PCM. + auto committed = + co_await kota::queue([&] { return workspace.store->commit(std::move(pending)); }); + if(!committed.has_value() || !committed.value().has_value()) { + LOG_WARN("Failed to commit PCM for module {}", mod_it->second); + co_return false; + } + + auto pcm_path = std::move(committed.value().value()); + workspace.pcm_paths[path_id] = pcm_path; workspace.pcm_cache[path_id] = { - result.value().output_path, + pcm_path, + pcm_key, capture_deps_snapshot(workspace.path_pool, result.value().deps)}; - LOG_INFO("Built PCM for module {}: {}", mod_it->second, result.value().output_path); + LOG_INFO("Built PCM for module {}: {}", mod_it->second, pcm_path); // Persist cache metadata after successful build. workspace.save_cache(); @@ -452,27 +485,29 @@ kota::task Compiler::ensure_pch(Session& session, co_return true; } - // FIXME: hash should also include compile flags that affect preprocessing - // (e.g. -D, -I, -isystem, -std) so that files with the same preamble text - // but different flags produce separate PCHs. Currently only the preamble - // text is hashed — the source file path must be excluded from the hash - // to allow sharing across files with identical preambles. + // Key the PCH by preamble text plus the frontend-relevant compile flags, + // so files with the same preamble text but different flags (-D, -I, -std) + // produce separate PCHs. The source file path stays out of the key so + // files with identical preambles share one PCH — but its DIRECTORY (and + // the working directory) must stay in: quote includes and relative paths + // resolve against them, so equal preamble text in different directories + // can mean different content. The clang version guards against reusing + // blobs a newer bundled clang would reject. auto preamble_text = llvm::StringRef(text).substr(0, bound); - auto preamble_hash = llvm::xxh3_64bits(preamble_text); - - // Deterministic content-addressed PCH path. - auto pch_path = path::join(workspace.config.project.cache_dir, - "cache", - "pch", - std::format("{:016x}.pch", preamble_hash)); - - // Reuse existing PCH if preamble content and deps haven't changed. + auto pch_key = cache_key({clang::getClangFullVersion(), + directory, + path::parent_path(path), + preamble_text, + canonicalize(arguments, ArgsProfile::Frontend)}); + + // Reuse existing PCH if the key and deps haven't changed. The store + // lookup refreshes the blob's LRU position and catches eviction. if(auto it = workspace.pch_cache.find(path_id); it != workspace.pch_cache.end()) { auto& st = it->second; - if(st.hash == preamble_hash && !st.path.empty() && - !deps_changed(workspace.path_pool, st.deps)) { + if(st.key == pch_key && !st.path.empty() && workspace.store && + workspace.store->lookup("pch", pch_key) && !deps_changed(workspace.path_pool, st.deps)) { st.bound = bound; - session.pch_ref = Session::PCHRef{path_id, preamble_hash, bound}; + session.pch_ref = Session::PCHRef{path_id, pch_key, bound}; co_return true; } } @@ -488,7 +523,7 @@ kota::task Compiler::ensure_pch(Session& session, it != workspace.pch_cache.end() && it->second.building) { co_await it->second.building->wait(); if(auto it2 = workspace.pch_cache.find(path_id); it2 != workspace.pch_cache.end()) { - session.pch_ref = Session::PCHRef{path_id, it2->second.hash, it2->second.bound}; + session.pch_ref = Session::PCHRef{path_id, it2->second.key, it2->second.bound}; } co_return workspace.pch_cache.count(path_id) && !workspace.pch_cache[path_id].path.empty(); } @@ -497,23 +532,17 @@ kota::task Compiler::ensure_pch(Session& session, auto completion = std::make_shared(); workspace.pch_cache[path_id].building = completion; - if(workspace.config.project.cache_dir.empty()) { - LOG_WARN("PCH build skipped: cache_dir is not configured"); + if(!workspace.store) { + LOG_WARN("PCH build skipped: cache store is unavailable"); workspace.pch_cache[path_id].building.reset(); completion->set(); co_return false; } - // Ensure the PCH cache directory exists. - auto pch_dir = path::join(workspace.config.project.cache_dir, "cache", "pch"); - if(auto ec = llvm::sys::fs::create_directories(pch_dir)) { - LOG_WARN("Cannot create PCH cache dir {}: {}", pch_dir, ec.message()); - workspace.pch_cache[path_id].building.reset(); - completion->set(); - co_return false; - } + // Build a new PCH via stateless worker: it writes the blob to the tmp + // path allocated here; the store commits (fsync + rename) on success. + auto pending = workspace.store->begin_store("pch", pch_key); - // Build a new PCH via stateless worker. worker::BuildParams bp; bp.kind = worker::BuildKind::BuildPCH; bp.file = std::string(path); @@ -521,13 +550,14 @@ kota::task Compiler::ensure_pch(Session& session, bp.arguments = arguments; bp.text = text; bp.preamble_bound = bound; - bp.output_path = pch_path; + bp.output_path = pending.tmp_path; - LOG_DEBUG("Building PCH for {}, bound={}, output={}", path, bound, pch_path); + LOG_DEBUG("Building PCH for {}, bound={}, key={}", path, bound, pch_key); auto result = co_await pool.send_stateless(bp); if(!result.has_value() || !result.value().success) { + workspace.store->abort(pending); LOG_WARN("PCH build failed for {}: {}", path, result.has_value() ? result.value().error : result.error().message); @@ -536,17 +566,27 @@ kota::task Compiler::ensure_pch(Session& session, co_return false; } + // Commit on the thread pool: it fsyncs the freshly written PCH. + auto committed = + co_await kota::queue([&] { return workspace.store->commit(std::move(pending)); }); + if(!committed.has_value() || !committed.value().has_value()) { + LOG_WARN("Failed to commit PCH for {}", path); + workspace.pch_cache[path_id].building.reset(); + completion->set(); + co_return false; + } + auto& st = workspace.pch_cache[path_id]; - st.path = result.value().output_path; + st.path = committed.value().value(); st.bound = bound; - st.hash = preamble_hash; + st.key = pch_key; st.deps = capture_deps_snapshot(workspace.path_pool, result.value().deps); st.document_links_json = std::move(result.value().pch_links_json); st.building.reset(); - session.pch_ref = Session::PCHRef{path_id, preamble_hash, bound}; + session.pch_ref = Session::PCHRef{path_id, pch_key, bound}; - LOG_INFO("PCH built for {}: {}", path, result.value().output_path); + LOG_INFO("PCH built for {}: {}", path, st.path); // Persist cache metadata after successful build. workspace.save_cache(); @@ -565,6 +605,26 @@ kota::task Compiler::ensure_deps(Session& session, std::unordered_map& pcms) { auto path_id = session.path_id; + // Re-validate cached PCM blobs before compiling: LRU eviction can remove + // one while its compile unit is still marked clean, so dirty those units + // to trigger a rebuild instead of handing clang a dangling path. + if(workspace.compile_graph) { + llvm::SmallVector evicted; + for(auto& [pid, pcm_path]: workspace.pcm_paths) { + if(!llvm::sys::fs::exists(pcm_path)) { + evicted.push_back(pid); + } + } + for(auto pid: evicted) { + for(auto id: workspace.compile_graph->update(pid)) { + workspace.pcm_paths.erase(id); + workspace.pcm_cache.erase(id); + } + workspace.pcm_paths.erase(pid); + workspace.pcm_cache.erase(pid); + } + } + // Compile C++20 module dependencies (PCMs). if(workspace.compile_graph && !co_await workspace.compile_graph->compile_deps(path_id)) { co_return false; diff --git a/src/server/compiler/indexer.cpp b/src/server/compiler/indexer.cpp index 4248574ef..2dd595775 100644 --- a/src/server/compiler/indexer.cpp +++ b/src/server/compiler/indexer.cpp @@ -94,74 +94,103 @@ void Indexer::merge(const void* tu_index_data, std::size_t size) { workspace.merged_indices.size()); } -void Indexer::save(llvm::StringRef index_dir) { - if(index_dir.empty()) - return; - - auto ec = llvm::sys::fs::create_directories(index_dir); - if(ec) { - LOG_WARN("Failed to create index directory {}: {}", std::string(index_dir), ec.message()); - return; - } - - auto project_path = path::join(index_dir, "project.idx"); +/// Serialize via a two-phase store write: serialize to the tmp path, then +/// commit (fsync + atomic rename). Returns false if any step fails. +static bool store_blob(CacheStore& store, + llvm::StringRef key, + llvm::function_ref serialize) { + auto pending = store.begin_store("index", key); { - std::error_code write_ec; - llvm::raw_fd_ostream os(project_path, write_ec); - if(!write_ec) { - workspace.project_index.serialize(os); - LOG_INFO("Saved ProjectIndex to {}", project_path); - } else { - LOG_WARN("Failed to save ProjectIndex: {}", write_ec.message()); + std::error_code ec; + llvm::raw_fd_ostream os(pending.tmp_path, ec); + if(ec) { + LOG_WARN("Failed to write index blob {}: {}", key, ec.message()); + store.abort(pending); + return false; + } + serialize(os); + os.flush(); + // A truncated blob (disk full) must never be committed: the index + // namespace is Persistent, so it would be served forever. + if(os.has_error()) { + LOG_WARN("Failed to write index blob {}: {}", key, os.error().message()); + os.clear_error(); + store.abort(pending); + return false; } } + if(auto result = store.commit(std::move(pending)); !result) { + LOG_WARN("Failed to commit index blob {}: {}", key, result.error().message()); + return false; + } + return true; +} - auto shards_dir = path::join(index_dir, "shards"); - ec = llvm::sys::fs::create_directories(shards_dir); - if(ec) { - LOG_WARN("Failed to create shards directory: {}", ec.message()); +void Indexer::save() { + if(!workspace.store) return; + + if(store_blob(*workspace.store, "project", [&](llvm::raw_ostream& os) { + workspace.project_index.serialize(os); + })) { + LOG_INFO("Saved ProjectIndex ({} symbols)", workspace.project_index.symbols.size()); } std::size_t saved = 0; for(auto& [path_id, shard]: workspace.merged_indices) { if(!shard.index.need_rewrite()) continue; - auto shard_path = path::join(shards_dir, std::to_string(path_id) + ".idx"); - std::error_code write_ec; - llvm::raw_fd_ostream os(shard_path, write_ec); - if(!write_ec) { - shard.index.serialize(os); + if(store_blob(*workspace.store, std::to_string(path_id), [&](llvm::raw_ostream& os) { + shard.index.serialize(os); + })) { ++saved; } } LOG_INFO("Saved {} MergedIndex shards (of {} total)", saved, workspace.merged_indices.size()); } -void Indexer::load(llvm::StringRef index_dir) { - if(index_dir.empty()) +void Indexer::load() { + if(!workspace.store) return; - auto project_path = path::join(index_dir, "project.idx"); - auto buf = llvm::MemoryBuffer::getFile(project_path); - if(buf) { + bool has_project = false; + auto project_path = workspace.store->lookup("index", "project"); + if(project_path) { + auto buf = llvm::MemoryBuffer::getFile(*project_path); + if(!buf) { + // Transient read failure — don't load shards (useless without + // the project index), but don't destroy them either. + LOG_WARN("Failed to read ProjectIndex blob: {}", buf.getError().message()); + return; + } workspace.project_index = index::ProjectIndex::from((*buf)->getBufferStart()); + has_project = true; LOG_INFO("Loaded ProjectIndex: {} symbols", workspace.project_index.symbols.size()); } - auto shards_dir = path::join(index_dir, "shards"); - std::error_code ec; - for(auto it = llvm::sys::fs::directory_iterator(shards_dir, ec); - !ec && it != llvm::sys::fs::directory_iterator(); - it.increment(ec)) { - auto filename = llvm::sys::path::filename(it->path()); - if(!filename.ends_with(".idx")) - continue; - auto stem = filename.drop_back(4); + // Load shards; sweep blobs that no longer correspond to anything — + // unparseable keys, or all shards when the project index itself is + // gone (Persistent namespace cleanup is the caller's mark-and-sweep). + llvm::SmallVector orphans; + workspace.store->for_each_key("index", [&](llvm::StringRef key) { + if(key == "project") + return; + std::uint32_t path_id = 0; - if(stem.getAsInteger(10, path_id)) - continue; - workspace.merged_indices[path_id] = MergedIndexShard{index::MergedIndex::load(it->path())}; + if(key.getAsInteger(10, path_id) || !has_project) { + orphans.push_back(key.str()); + return; + } + + auto shard_path = workspace.store->lookup("index", key); + if(shard_path) { + workspace.merged_indices[path_id] = + MergedIndexShard{index::MergedIndex::load(*shard_path)}; + } + }); + + for(auto& key: orphans) { + workspace.store->invalidate("index", key); } if(!workspace.merged_indices.empty()) { @@ -955,7 +984,7 @@ kota::task<> Indexer::run_background_indexing() { indexing_active = false; LOG_INFO("Background indexing complete: {} files dispatched", dispatched); - save(workspace.config.project.index_dir); + save(); } } // namespace clice diff --git a/src/server/compiler/indexer.h b/src/server/compiler/indexer.h index d1779cb26..3122b3e5b 100644 --- a/src/server/compiler/indexer.h +++ b/src/server/compiler/indexer.h @@ -114,11 +114,13 @@ class Indexer { /// Merge a TUIndex result into Workspace's ProjectIndex and MergedIndex shards. void merge(const void* tu_index_data, std::size_t size); - /// Save Workspace's ProjectIndex and MergedIndex shards to disk. - void save(llvm::StringRef index_dir); + /// Save Workspace's ProjectIndex and MergedIndex shards to the cache + /// store ("index" namespace, Persistent policy). + void save(); - /// Load Workspace's ProjectIndex and MergedIndex shards from disk. - void load(llvm::StringRef index_dir); + /// Load Workspace's ProjectIndex and MergedIndex shards from the cache + /// store, sweeping orphaned shard blobs. + void load(); /// Check whether a file needs re-indexing (stale or missing shard). bool need_update(llvm::StringRef file_path); diff --git a/src/server/service/master_server.cpp b/src/server/service/master_server.cpp index 4248f3126..e9add7b3d 100644 --- a/src/server/service/master_server.cpp +++ b/src/server/service/master_server.cpp @@ -37,7 +37,7 @@ namespace lsp = kota::ipc::lsp; namespace protocol = kota::ipc::protocol; MasterServer::MasterServer(kota::event_loop& loop, std::string self_path) : - loop(loop), pool(loop), compiler(loop, workspace, pool, sessions), + loop(loop), bg_tasks(loop), pool(loop), compiler(loop, workspace, pool, sessions), indexer(loop, workspace, sessions, @@ -206,38 +206,78 @@ void MasterServer::schedule_shutdown() { } kota::task<> MasterServer::shutdown_and_cleanup() { - indexer.save(workspace.config.project.index_dir); + bg_tasks.cancel(); + co_await bg_tasks.join(); + indexer.save(); workspace.save_cache(); co_await kota::when_all(indexer.stop(), compiler.stop()); co_await pool.stop(); + if(workspace.store) { + workspace.store->shutdown(); + } lifecycle = ServerLifecycle::Exited; } +kota::task<> MasterServer::cache_checkpoint_task() { + constexpr auto interval = std::chrono::minutes(5); + while(true) { + co_await kota::sleep(interval); + if(workspace.store) { + // Offload to the thread pool: checkpoint writes the manifest. + co_await kota::queue([this] { workspace.store->checkpoint(); }); + } + } +} + +void MasterServer::open_cache_store() { + auto& cfg = workspace.config.project; + if(workspace.store || cfg.cache_dir.empty()) + return; + + auto store = CacheStore::open(cfg.cache_dir, cache_format_version); + if(!store) { + LOG_WARN("Failed to open cache store at {}: {}", + std::string_view(cfg.cache_dir), + store.error().message()); + return; + } + + // Size budgets are deliberately generous: eviction exists to bound + // disk usage, not to keep the working set tight. + constexpr std::uint64_t GiB = 1ull << 30; + store->register_namespace( + {.name = "pch", .extension = ".pch", .policy = CachePolicy::LRU, .max_bytes = 8 * GiB}); + store->register_namespace( + {.name = "pcm", .extension = ".pcm", .policy = CachePolicy::LRU, .max_bytes = 8 * GiB}); + store->register_namespace( + {.name = "index", .extension = ".idx", .policy = CachePolicy::Persistent}); + store->register_namespace( + {.name = "header_context", .extension = ".h", .policy = CachePolicy::Scratch}); + workspace.store.emplace(std::move(*store)); + LOG_INFO("Cache store: {}", workspace.store->base_dir()); + + // The index now lives inside the store; the old default index dir is + // discarded (no migration), and a configured index_dir is ignored. + // header_context/ is still written directly by the legacy preamble + // path; drop this exemption once it moves to the Scratch namespace. + auto default_index_dir = path::join(cfg.cache_dir, "index"); + if(!cfg.index_dir.empty() && cfg.index_dir != default_index_dir) { + LOG_WARN("project.index_dir is no longer used; the index is stored in {}", + workspace.store->base_dir()); + } + llvm::sys::fs::remove_directories(default_index_dir); + + workspace.load_cache(); + bg_tasks.spawn(cache_checkpoint_task()); +} + void MasterServer::load_workspace() { if(workspace_root.empty()) return; auto& cfg = workspace.config.project; - if(!cfg.cache_dir.empty()) { - auto ec = llvm::sys::fs::create_directories(cfg.cache_dir); - if(ec) { - LOG_WARN("Failed to create cache directory {}: {}", - std::string_view(cfg.cache_dir), - ec.message()); - } else { - LOG_INFO("Cache directory: {}", std::string_view(cfg.cache_dir)); - } - - for(auto* subdir: {"cache/pch", "cache/pcm"}) { - auto dir = path::join(cfg.cache_dir, subdir); - if(auto ec2 = llvm::sys::fs::create_directories(dir)) - LOG_WARN("Failed to create {}: {}", dir, ec2.message()); - } - - workspace.cleanup_cache(); - workspace.load_cache(); - } + open_cache_store(); std::string cdb_path; for(auto& configured: cfg.compile_commands_paths) { @@ -317,7 +357,7 @@ void MasterServer::load_workspace() { LOG_WARN("{} unresolved includes", unresolved); workspace.build_module_map(); - indexer.load(cfg.index_dir); + indexer.load(); if(*cfg.enable_indexing) { for(auto& entry: workspace.cdb.get_entries()) { diff --git a/src/server/service/master_server.h b/src/server/service/master_server.h index e0987ad8b..a272b7d64 100644 --- a/src/server/service/master_server.h +++ b/src/server/service/master_server.h @@ -104,8 +104,20 @@ class MasterServer { kota::event shutdown_event; void load_workspace(); + /// Open the CacheStore under cache_dir and register the blob + /// namespaces. No-op if already open or caching is disabled. + void open_cache_store(); + + /// Periodically checkpoint the cache store manifest so last-accessed + /// times survive crashes (the store itself is passive by design). + kota::task<> cache_checkpoint_task(); + kota::event_loop& loop; + /// Server-owned background tasks (cache checkpoint); cancelled and + /// joined in shutdown_and_cleanup(). + kota::task_group<> bg_tasks; + Workspace workspace; llvm::DenseMap sessions; WorkerPool pool; diff --git a/src/server/service/session.h b/src/server/service/session.h index 0d66c688c..4192297d5 100644 --- a/src/server/service/session.h +++ b/src/server/service/session.h @@ -54,7 +54,7 @@ struct Session { /// Session only stores enough to locate and validate it. struct PCHRef { std::uint32_t path_id = 0; ///< Key into Workspace.pch_cache. - std::uint64_t hash = 0; ///< Preamble hash at build time. + std::string key; ///< CacheStore key at build time. std::uint32_t bound = 0; ///< Preamble byte boundary. }; diff --git a/src/server/worker/stateless_worker.cpp b/src/server/worker/stateless_worker.cpp index e5910a006..b0e0204f0 100644 --- a/src/server/worker/stateless_worker.cpp +++ b/src/server/worker/stateless_worker.cpp @@ -59,28 +59,6 @@ static std::string serialize_tu_index(CompilationUnit& unit, bool interested_onl return serialized; } -/// Write compilation output to disk, handling tmp+rename pattern. -/// Returns the final path on success, or empty string on failure. -static std::string finalize_output(const std::string& output_path, - const std::string& tmp_path, - const std::string& file, - const char* label) { - if(!output_path.empty()) { - auto ec = fs::rename(tmp_path, output_path); - if(ec) { - return output_path; - } else { - LOG_WARN("{}: rename {} -> {} failed: {}", - label, - tmp_path, - output_path, - ec.error().message()); - return tmp_path; - } - } - return tmp_path; -} - static worker::BuildResult handle_build_pch(const worker::BuildParams& params) { ScopedTimer timer; @@ -89,10 +67,12 @@ static worker::BuildResult handle_build_pch(const worker::BuildParams& params) { fill_args(cp, params.directory, params.arguments); cp.add_remapped_file(params.file, params.text, params.preamble_bound); + // When the master provides an output path it is already a tmp path + // allocated by its CacheStore: write directly, the master commits + // (fsync + atomic rename) after we report success. std::string tmp_path; - bool has_output = !params.output_path.empty(); - if(has_output) { - tmp_path = params.output_path + ".tmp"; + if(!params.output_path.empty()) { + tmp_path = params.output_path; } else { auto tmp = fs::createTemporaryFile("clice-pch", "pch"); if(!tmp) { @@ -124,11 +104,10 @@ static worker::BuildResult handle_build_pch(const worker::BuildParams& params) { unit = CompilationUnit(nullptr); if(success) { - auto final_path = finalize_output(params.output_path, tmp_path, params.file, "BuildPCH"); - LOG_INFO("BuildPCH done: file={}, output={}, {}ms", params.file, final_path, timer.ms()); + LOG_INFO("BuildPCH done: file={}, output={}, {}ms", params.file, tmp_path, timer.ms()); worker::BuildResult result; result.success = true; - result.output_path = std::move(final_path); + result.output_path = tmp_path; result.deps = pch_info.deps; result.tu_index_data = std::move(tu_index_data); result.pch_links_json = std::move(pch_links_json); @@ -150,10 +129,10 @@ static worker::BuildResult handle_build_pcm(const worker::BuildParams& params) { cp.pcms.try_emplace(name, path); } + // See handle_build_pch: a provided output path is the master's tmp path. std::string tmp_path; - bool has_output = !params.output_path.empty(); - if(has_output) { - tmp_path = params.output_path + ".tmp"; + if(!params.output_path.empty()) { + tmp_path = params.output_path; } else { auto tmp = fs::createTemporaryFile("clice-pcm", "pcm"); if(!tmp) { @@ -179,11 +158,10 @@ static worker::BuildResult handle_build_pcm(const worker::BuildParams& params) { unit = CompilationUnit(nullptr); if(success) { - auto final_path = finalize_output(params.output_path, tmp_path, params.file, "BuildPCM"); LOG_INFO("BuildPCM done: module={}, {}ms", params.module_name, timer.ms()); worker::BuildResult result; result.success = true; - result.output_path = std::move(final_path); + result.output_path = tmp_path; result.deps = pcm_info.deps; result.tu_index_data = std::move(tu_index_data); return result; diff --git a/src/server/workspace/workspace.cpp b/src/server/workspace/workspace.cpp index 5a9640595..ba993ba8d 100644 --- a/src/server/workspace/workspace.cpp +++ b/src/server/workspace/workspace.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/xxhash.h" namespace clice { @@ -158,16 +159,15 @@ struct CacheDepEntry { }; struct CachePCHEntry { - std::string filename; + std::string key; // CacheStore key in the "pch" namespace std::uint32_t source_file; // index into CacheData::paths - std::uint64_t hash; std::uint32_t bound; std::int64_t build_at; std::vector deps; }; struct CachePCMEntry { - std::string filename; + std::string key; // CacheStore key in the "pcm" namespace std::uint32_t source_file; std::string module_name; std::int64_t build_at; @@ -183,10 +183,10 @@ struct CacheData { } // namespace void Workspace::load_cache() { - if(config.project.cache_dir.empty()) + if(!store) return; - auto cache_path = path::join(config.project.cache_dir, "cache", "cache.json"); + auto cache_path = path::join(store->base_dir(), "cache.json"); auto content = fs::read(cache_path); if(!content) { LOG_DEBUG("No cache.json found at {}", cache_path); @@ -218,32 +218,32 @@ void Workspace::load_cache() { }; for(auto& entry: data.pch) { - auto pch_path = path::join(config.project.cache_dir, "cache", "pch", entry.filename); + auto pch_path = store->lookup("pch", entry.key); auto source = resolve(entry.source_file); - if(!llvm::sys::fs::exists(pch_path) || source.empty()) + if(!pch_path || source.empty()) continue; auto path_id = path_pool.intern(source); auto& st = pch_cache[path_id]; - st.path = pch_path; - st.hash = entry.hash; + st.path = *pch_path; + st.key = entry.key; st.bound = entry.bound; st.deps = load_deps(entry.build_at, entry.deps); - LOG_DEBUG("Loaded cached PCH: {} -> {}", source, pch_path); + LOG_DEBUG("Loaded cached PCH: {} -> {}", source, *pch_path); } for(auto& entry: data.pcm) { - auto pcm_path = path::join(config.project.cache_dir, "cache", "pcm", entry.filename); + auto pcm_path = store->lookup("pcm", entry.key); auto source = resolve(entry.source_file); - if(!llvm::sys::fs::exists(pcm_path) || source.empty()) + if(!pcm_path || source.empty()) continue; auto path_id = path_pool.intern(source); - pcm_cache[path_id] = {pcm_path, load_deps(entry.build_at, entry.deps)}; - pcm_paths[path_id] = pcm_path; + pcm_cache[path_id] = {*pcm_path, entry.key, load_deps(entry.build_at, entry.deps)}; + pcm_paths[path_id] = *pcm_path; - LOG_DEBUG("Loaded cached PCM: {} (module {}) -> {}", source, entry.module_name, pcm_path); + LOG_DEBUG("Loaded cached PCM: {} (module {}) -> {}", source, entry.module_name, *pcm_path); } LOG_INFO("Loaded cache.json: {} PCH entries, {} PCM entries", @@ -252,7 +252,7 @@ void Workspace::load_cache() { } void Workspace::save_cache() { - if(config.project.cache_dir.empty()) + if(!store) return; CacheData data; @@ -273,9 +273,8 @@ void Workspace::save_cache() { continue; CachePCHEntry entry; - entry.filename = std::string(path::filename(st.path)); + entry.key = st.key; entry.source_file = intern(path_id); - entry.hash = st.hash; entry.bound = st.bound; entry.build_at = st.deps.build_at; for(std::size_t i = 0; i < st.deps.path_ids.size(); ++i) { @@ -289,7 +288,7 @@ void Workspace::save_cache() { continue; CachePCMEntry entry; - entry.filename = std::string(path::filename(st.path)); + entry.key = st.key; entry.source_file = intern(path_id); auto mod_it = path_to_module.find(path_id); entry.module_name = mod_it != path_to_module.end() ? mod_it->second : ""; @@ -306,8 +305,11 @@ void Workspace::save_cache() { return; } - auto cache_path = path::join(config.project.cache_dir, "cache", "cache.json"); - auto tmp_path = cache_path + ".tmp"; + auto cache_path = path::join(store->base_dir(), "cache.json"); + // Stage inside this instance's store tmp directory: other instances of + // the same workspace must not clobber each other's half-written file. + auto pid = llvm::sys::Process::getProcessId(); + auto tmp_path = path::join(store->base_dir(), "tmp", std::to_string(pid), "cache.json"); auto write_result = fs::write(tmp_path, *json_str); if(!write_result) { LOG_WARN("Failed to write cache.json.tmp: {}", write_result.error().message()); @@ -320,33 +322,6 @@ void Workspace::save_cache() { } } -void Workspace::cleanup_cache(int max_age_days) { - if(config.project.cache_dir.empty()) - return; - - auto now = std::chrono::system_clock::now(); - auto max_age = std::chrono::hours(max_age_days * 24); - - for(auto* subdir: {"cache/pch", "cache/pcm"}) { - auto dir = path::join(config.project.cache_dir, subdir); - std::error_code ec; - for(auto it = llvm::sys::fs::directory_iterator(dir, ec); - !ec && it != llvm::sys::fs::directory_iterator(); - it.increment(ec)) { - llvm::sys::fs::file_status status; - if(auto stat_ec = llvm::sys::fs::status(it->path(), status)) - continue; - - auto mtime = status.getLastModificationTime(); - auto age = now - mtime; - if(age > max_age) { - llvm::sys::fs::remove(it->path()); - LOG_DEBUG("Cleaned up stale cache file: {}", it->path()); - } - } - } -} - void Workspace::build_module_map() { for(auto& [module_name, path_ids]: dep_graph.modules()) { for(auto path_id: path_ids) { diff --git a/src/server/workspace/workspace.h b/src/server/workspace/workspace.h index 32bba802a..59f91a2a7 100644 --- a/src/server/workspace/workspace.h +++ b/src/server/workspace/workspace.h @@ -14,6 +14,7 @@ #include "semantic/relation_kind.h" #include "server/compiler/compile_graph.h" #include "server/workspace/config.h" +#include "support/cache_store.h" #include "support/path_pool.h" #include "syntax/dependency_graph.h" @@ -29,6 +30,10 @@ namespace clice { namespace protocol = kota::ipc::protocol; namespace lsp = kota::ipc::lsp; +/// On-disk cache layout version (CacheStore root `cache/v{N}`). +/// Bump to discard all cached artifacts after incompatible format changes. +constexpr inline std::uint32_t cache_format_version = 1; + /// Two-layer staleness snapshot for compilation artifacts (PCH, AST, etc.). /// /// Layer 1 (fast): compare each file's current mtime against build_at. @@ -133,12 +138,13 @@ struct MergedIndexShard { } }; -/// Cached PCH state. Content-addressed by preamble hash — shared across all -/// files (open or on-disk) that have the same preamble content. +/// Cached PCH state. Content-addressed by preamble text + frontend compile +/// flags — shared across all files (open or on-disk) with the same key. struct PCHState { std::string path; std::uint32_t bound = 0; - std::uint64_t hash = 0; + /// CacheStore key: hex of xxh3_128bits(preamble text + canonical flags). + std::string key; DepsSnapshot deps; std::string document_links_json; ///< Pre-serialized DocumentLink[] from PCH build std::shared_ptr building; @@ -148,6 +154,8 @@ struct PCHState { /// import the same module. struct PCMState { std::string path; + /// CacheStore key: "{module}-{hash}" over source path + canonical flags. + std::string key; DepsSnapshot deps; }; @@ -177,6 +185,12 @@ struct Workspace { PathPool path_pool; + /// Unified on-disk blob store for PCH/PCM/index artifacts. Opened by + /// load_workspace() when cache_dir is configured; absent means caching + /// is disabled. Owns blob lifecycle (atomic writes, LRU, crash + /// recovery); validity metadata (deps snapshots) stays in cache.json. + std::optional store; + /// Include relationships between files on disk (#include edges). /// Built once at startup from CDB scan; updated incrementally on didSave. DependencyGraph dep_graph; @@ -190,9 +204,9 @@ struct Workspace { /// declarations change. llvm::DenseMap path_to_module; - /// PCH cache, keyed by file path_id. - /// TODO: re-key by preamble content hash to enable cross-file sharing and - /// add LRU eviction. Compile flags should also be part of the key. + /// PCH cache, keyed by file path_id. Hot-path mirror of CacheStore + /// state; blob paths come from the store. + /// TODO: re-key by content hash to enable cross-file sharing. llvm::DenseMap pch_cache; /// PCM cache, keyed by module source path_id. @@ -219,12 +233,11 @@ struct Workspace { /// is a module unit so dependents can be re-evaluated on next compile. void on_file_closed(std::uint32_t path_id); - /// Load PCH/PCM cache from cache.json on disk. + /// Load PCH/PCM validity metadata from cache.json (under the store's + /// versioned root); entries whose blob is gone from the store are dropped. void load_cache(); - /// Save PCH/PCM cache to cache.json on disk. + /// Save PCH/PCM validity metadata to cache.json. void save_cache(); - /// Remove stale PCH/PCM files older than max_age_days. - void cleanup_cache(int max_age_days = 7); /// Build path_to_module reverse mapping from dep_graph. void build_module_map(); /// Fill PCM paths for all built modules, excluding exclude_path_id. diff --git a/tests/integration/compilation/test_persistent_cache.py b/tests/integration/compilation/test_persistent_cache.py index fb7be2f7d..34a7c42a6 100644 --- a/tests/integration/compilation/test_persistent_cache.py +++ b/tests/integration/compilation/test_persistent_cache.py @@ -1,11 +1,12 @@ """Integration tests for persistent PCH/PCM cache. -Verifies that PCH/PCM artifacts are written to .clice/cache/pch/ and .clice/cache/pcm/ -with content-addressed filenames, survive server restarts via cache.json, -and are properly reused across sessions. +Verifies that PCH/PCM artifacts are written to the unified cache store +(.clice/cache/v1/{pch,pcm}/) with content-addressed filenames, survive +server restarts via cache.json, and are properly reused across sessions. """ import asyncio +import json import pytest from lsprotocol.types import ( @@ -17,24 +18,20 @@ from tests.conftest import make_client, shutdown_client from tests.integration.utils import write_cdb, doc from tests.integration.utils.cache import ( + cache_root, list_pch_files, list_pcm_files, + list_tmp_files, + pin_cache_to_workspace, read_cache_json, ) from tests.integration.utils.assertions import assert_clean_compile -def _pin_cache_to_workspace(tmp_path): - """Write a clice.toml that pins cache_dir to /.clice/.""" - (tmp_path / "clice.toml").write_text( - '[project]\ncache_dir = "${workspace}/.clice"\n' - ) - - async def test_pch_written_to_cache_dir(client, tmp_path): """After opening a file with #include, a .pch file should appear in .clice/cache/pch/ with a hex-hash filename.""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "header.h").write_text("#pragma once\nstruct Foo { int x; };\n") (tmp_path / "main.cpp").write_text( '#include "header.h"\nint main() { Foo f; return f.x; }\n' @@ -47,16 +44,16 @@ async def test_pch_written_to_cache_dir(client, tmp_path): # Verify PCH file exists in the cache directory. pch_files = list_pch_files(tmp_path) - assert len(pch_files) >= 1, "Expected at least one .pch file in .clice/cache/pch/" - # Filename should be a 16-char hex hash + .pch - assert pch_files[0].stem and len(pch_files[0].stem) == 16, ( - f"Expected 16-char hex filename, got: {pch_files[0].name}" + assert len(pch_files) >= 1, "Expected at least one .pch file in the store" + # Filename should be a 32-char hex hash (xxh3_128bits) + .pch + assert pch_files[0].stem and len(pch_files[0].stem) == 32, ( + f"Expected 32-char hex filename, got: {pch_files[0].name}" ) async def test_cache_json_persisted(client, tmp_path): """After a PCH build, cache.json should be written with the entry.""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "header.h").write_text("#pragma once\nint global_val = 42;\n") (tmp_path / "main.cpp").write_text( '#include "header.h"\nint main() { return global_val; }\n' @@ -74,7 +71,7 @@ async def test_cache_json_persisted(client, tmp_path): # Verify the entry has expected fields. entry = cache["pch"][0] - assert "hash" in entry + assert "key" in entry assert "build_at" in entry assert "deps" in entry assert "source_file" in entry @@ -83,7 +80,7 @@ async def test_cache_json_persisted(client, tmp_path): async def test_pch_reused_on_close_reopen(client, tmp_path): """Closing and reopening a file within the same session should reuse the cached PCH — no additional .pch files should be created.""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "header.h").write_text("#pragma once\nstruct Bar { int y; };\n") (tmp_path / "main.cpp").write_text( '#include "header.h"\nint main() { Bar b; return b.y; }\n' @@ -118,7 +115,7 @@ async def test_pch_reused_on_close_reopen(client, tmp_path): async def test_pch_survives_server_restart(executable, tmp_path): """PCH cache should survive a full server restart — cache.json is loaded on startup and the existing .pch file is reused.""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "header.h").write_text("#pragma once\nstruct Baz { int z; };\n") (tmp_path / "main.cpp").write_text( '#include "header.h"\nint main() { Baz b; return b.z; }\n' @@ -161,7 +158,7 @@ async def test_pch_survives_server_restart(executable, tmp_path): async def test_shared_preamble_shares_pch(client, tmp_path): """Two files with identical preambles should share the same PCH file (content-addressed by preamble hash).""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "header.h").write_text("#pragma once\nint shared_val = 1;\n") (tmp_path / "a.cpp").write_text( '#include "header.h"\nint fa() { return shared_val; }\n' @@ -188,7 +185,7 @@ async def test_shared_preamble_shares_pch(client, tmp_path): async def test_different_preamble_different_pch(client, tmp_path): """Files with different preambles should produce different PCH files.""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "a.h").write_text("#pragma once\nint val_a = 1;\n") (tmp_path / "b.h").write_text("#pragma once\nint val_b = 2;\n") (tmp_path / "a.cpp").write_text('#include "a.h"\nint fa() { return val_a; }\n') @@ -212,7 +209,7 @@ async def test_different_preamble_different_pch(client, tmp_path): async def test_pch_rebuilt_on_header_change(client, tmp_path): """When a preamble header changes, a new PCH should be built (different hash → different filename). The old one remains for cleanup.""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "header.h").write_text("#pragma once\nstruct V1 { int a; };\n") (tmp_path / "main.cpp").write_text( '#include "header.h"\nint main() { V1 v; return v.a; }\n' @@ -254,7 +251,7 @@ async def test_pch_rebuilt_on_header_change(client, tmp_path): async def test_no_tmp_files_after_build(client, tmp_path): """After a successful PCH build, no .tmp files should remain in the cache dir.""" - _pin_cache_to_workspace(tmp_path) + pin_cache_to_workspace(tmp_path) (tmp_path / "header.h").write_text("#pragma once\nint val = 1;\n") (tmp_path / "main.cpp").write_text( '#include "header.h"\nint main() { return val; }\n' @@ -265,22 +262,19 @@ async def test_no_tmp_files_after_build(client, tmp_path): uri, _ = await client.open_and_wait(tmp_path / "main.cpp") assert_clean_compile(client, uri) - # No .tmp files should linger. - pch_dir = tmp_path / ".clice" / "cache" / "pch" - if pch_dir.exists(): - tmp_files = list(pch_dir.glob("*.tmp")) - assert len(tmp_files) == 0, f"Stale .tmp files found: {tmp_files}" - - pcm_dir = tmp_path / ".clice" / "cache" / "pcm" - if pcm_dir.exists(): - tmp_files = list(pcm_dir.glob("*.tmp")) - assert len(tmp_files) == 0, f"Stale .tmp files found: {tmp_files}" + # No in-flight tmp files should linger after the build settles. + assert list_tmp_files(tmp_path) == [], "Stale tmp files found" + for subdir in ("pch", "pcm"): + blob_dir = cache_root(tmp_path) / subdir + if blob_dir.exists(): + stray = [p for p in blob_dir.iterdir() if not p.name.endswith(f".{subdir}")] + assert stray == [], f"Stray files in {subdir}/: {stray}" async def test_cache_dirs_created_on_startup(client, tmp_path): - """The .clice/cache/pch/ and .clice/cache/pcm/ directories should be created - when the server initializes a workspace.""" - _pin_cache_to_workspace(tmp_path) + """The versioned store directories should be created when the server + initializes a workspace.""" + pin_cache_to_workspace(tmp_path) (tmp_path / "main.cpp").write_text("int main() { return 0; }\n") write_cdb(tmp_path, ["main.cpp"]) await client.initialize(tmp_path) @@ -290,9 +284,106 @@ async def test_cache_dirs_created_on_startup(client, tmp_path): uri, _ = await client.open_and_wait(tmp_path / "main.cpp") assert_clean_compile(client, uri) - assert (tmp_path / ".clice" / "cache" / "pch").is_dir(), ( - ".clice/cache/pch/ should be created" + for subdir in ("pch", "pcm", "index"): + assert (cache_root(tmp_path) / subdir).is_dir(), f"{subdir}/ should be created" + + +async def test_different_flags_different_pch(client, tmp_path): + """Two files with identical preamble text but different -D flags must + not share a PCH.""" + pin_cache_to_workspace(tmp_path) + (tmp_path / "header.h").write_text( + "#pragma once\n" + "#ifdef MODE\nstruct Cfg { int mode; };\n#else\nstruct Cfg { int plain; };\n#endif\n" + ) + body = '#include "header.h"\nint use() { Cfg c; return 0; }\n' + (tmp_path / "a.cpp").write_text(body) + (tmp_path / "b.cpp").write_text(body) + + # Same preamble text, different macro definitions per file. + entries = [] + for name, extra in (("a.cpp", []), ("b.cpp", ["-DMODE=1"])): + entries.append( + { + "directory": str(tmp_path), + "file": str(tmp_path / name), + "arguments": ["clang++", "-std=c++17", "-fsyntax-only"] + + extra + + [str(tmp_path / name)], + } + ) + (tmp_path / "compile_commands.json").write_text(json.dumps(entries)) + await client.initialize(tmp_path) + + uri_a, _ = await client.open_and_wait(tmp_path / "a.cpp") + uri_b, _ = await client.open_and_wait(tmp_path / "b.cpp") + assert_clean_compile(client, uri_a) + assert_clean_compile(client, uri_b) + + pch_files = list_pch_files(tmp_path) + assert len(pch_files) == 2, ( + f"Same preamble with different -D flags must produce 2 PCHs, got " + f"{len(pch_files)}: {[f.name for f in pch_files]}" ) - assert (tmp_path / ".clice" / "cache" / "pcm").is_dir(), ( - ".clice/cache/pcm/ should be created" + + +async def _wait_residue_released(workspace, deadline: float = 20.0): + """Wait until orphaned workers of a killed server release their handles + on tmp residue (a rename probe fails on Windows while a file is open).""" + end = asyncio.get_event_loop().time() + deadline + while asyncio.get_event_loop().time() < end: + locked = False + for f in list_tmp_files(workspace): + probe = f.with_name(f.name + ".probe") + try: + f.rename(probe) + probe.rename(f) + except OSError: + locked = True + break + if not locked: + return + await asyncio.sleep(0.5) + + +async def test_kill9_recovery(executable, tmp_path): + """kill -9 during compilation must not corrupt the store: a restarted + server sweeps crash residue and serves the file normally.""" + pin_cache_to_workspace(tmp_path) + (tmp_path / "header.h").write_text("#pragma once\nstruct K { int x; };\n") + (tmp_path / "main.cpp").write_text( + '#include "header.h"\nint main() { K k; return k.x; }\n' ) + write_cdb(tmp_path, ["main.cpp"]) + + # Session 1: open the file and kill the server; the short delay makes it + # likely (not guaranteed) the first build is still in flight. + c1 = await make_client(executable, tmp_path) + c1.open(tmp_path / "main.cpp") + await asyncio.sleep(0.3) + server = getattr(c1, "_server", None) + assert server is not None + server.kill() + try: + c1._stop_event.set() + for task in c1._async_tasks: + task.cancel() + except Exception: + pass + await _wait_residue_released(tmp_path) + + # Session 2: its startup sweeps the dead instance's tmp directory, and + # the cache must be usable again. + c2 = await make_client(executable, tmp_path) + uri, _ = await c2.open_and_wait(tmp_path / "main.cpp") + assert_clean_compile(c2, uri) + pch_files = list_pch_files(tmp_path) + assert len(pch_files) >= 1, "PCH should be (re)built after crash" + # Blob directories contain only committed blobs, never partial writes. + stray = [p for p in (cache_root(tmp_path) / "pch").iterdir() if p.suffix != ".pch"] + assert stray == [], f"Crash residue in pch/: {stray}" + await shutdown_client(c2) + + # Clean shutdown removed session 2's own tmp; session 1's residue was + # swept at session 2 startup, so nothing may remain. + assert list_tmp_files(tmp_path) == [], "tmp residue should be swept" diff --git a/tests/integration/compilation/test_staleness.py b/tests/integration/compilation/test_staleness.py index 88df093b3..7044ef89b 100644 --- a/tests/integration/compilation/test_staleness.py +++ b/tests/integration/compilation/test_staleness.py @@ -20,7 +20,9 @@ VersionedTextDocumentIdentifier, ) +from tests.conftest import make_client, shutdown_client from tests.integration.utils import write_cdb, doc +from tests.integration.utils.cache import list_pch_files, pin_cache_to_workspace from tests.integration.utils.wait import wait_for_recompile from tests.integration.utils.assertions import assert_clean_compile, assert_has_errors @@ -396,3 +398,32 @@ async def test_didsave_with_module_deps(client, test_data_dir, tmp_path): await wait_for_recompile(client, mid_uri) assert_clean_compile(client, mid_uri) + + +async def test_flag_change_invalidates_pch(executable, tmp_path): + """Changing a -D flag in the CDB must produce a new PCH on the next + session even though the preamble text is unchanged (flags are part of + the cache key).""" + pin_cache_to_workspace(tmp_path) + (tmp_path / "header.h").write_text("#pragma once\nstruct F { int x; };\n") + (tmp_path / "main.cpp").write_text( + '#include "header.h"\nint main() { F f; return f.x; }\n' + ) + + # Session 1: build with -DFOO=1. + write_cdb(tmp_path, ["main.cpp"], extra_args=["-DFOO=1"]) + c1 = await make_client(executable, tmp_path) + uri, _ = await c1.open_and_wait(tmp_path / "main.cpp") + assert_clean_compile(c1, uri) + assert len(list_pch_files(tmp_path)) == 1 + await shutdown_client(c1) + + # Session 2: same preamble text, different flag — must not reuse. + write_cdb(tmp_path, ["main.cpp"], extra_args=["-DFOO=2"]) + c2 = await make_client(executable, tmp_path) + uri2, _ = await c2.open_and_wait(tmp_path / "main.cpp") + assert_clean_compile(c2, uri2) + assert len(list_pch_files(tmp_path)) == 2, ( + "A flag change must produce a second, separately keyed PCH" + ) + await shutdown_client(c2) diff --git a/tests/integration/utils/cache.py b/tests/integration/utils/cache.py index d6769cfab..bf9137ce0 100644 --- a/tests/integration/utils/cache.py +++ b/tests/integration/utils/cache.py @@ -3,10 +3,26 @@ import json from pathlib import Path +# Versioned root of the unified cache store; bump together with +# cache_format_version in src/server/workspace/workspace.h. +CACHE_ROOT = Path(".clice") / "cache" / "v1" + + +def cache_root(workspace: Path) -> Path: + """Return the versioned cache store root for a workspace.""" + return workspace / CACHE_ROOT + + +def pin_cache_to_workspace(workspace: Path) -> None: + """Write a clice.toml that pins cache_dir to /.clice/.""" + (workspace / "clice.toml").write_text( + '[project]\ncache_dir = "${workspace}/.clice"\n' + ) + def list_pch_files(workspace: Path) -> list[Path]: """Return all .pch files in the cache directory, sorted.""" - pch_dir = workspace / ".clice" / "cache" / "pch" + pch_dir = cache_root(workspace) / "pch" if not pch_dir.exists(): return [] return sorted(pch_dir.glob("*.pch")) @@ -14,7 +30,7 @@ def list_pch_files(workspace: Path) -> list[Path]: def list_pcm_files(workspace: Path) -> list[Path]: """Return all .pcm files in the cache directory, sorted.""" - pcm_dir = workspace / ".clice" / "cache" / "pcm" + pcm_dir = cache_root(workspace) / "pcm" if not pcm_dir.exists(): return [] return sorted(pcm_dir.glob("*.pcm")) @@ -22,17 +38,19 @@ def list_pcm_files(workspace: Path) -> list[Path]: def read_cache_json(workspace: Path) -> dict | None: """Read and parse cache.json, or return None if absent.""" - path = workspace / ".clice" / "cache" / "cache.json" + path = cache_root(workspace) / "cache.json" if not path.exists(): return None return json.loads(path.read_text()) def list_tmp_files(workspace: Path) -> list[Path]: - """Return stale .tmp files in pch and pcm cache directories.""" - tmp_files = [] - for subdir in ("pch", "pcm"): - d = workspace / ".clice" / "cache" / subdir - if d.exists(): - tmp_files.extend(d.glob("*.tmp")) - return tmp_files + """Return in-flight tmp files of all store instances. + + Committed blobs appear atomically, so anything under tmp/ is either an + in-flight write of a live server or crash residue awaiting cleanup. + """ + tmp_dir = cache_root(workspace) / "tmp" + if not tmp_dir.exists(): + return [] + return sorted(p for p in tmp_dir.rglob("*") if p.is_file()) From dd36c1dfc270815449bf0aa5e3991b3909da8b84 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 01:21:32 +0800 Subject: [PATCH 04/17] fix(support): shield LLVM headers from windows.h min/max macros windows.h was included before the LLVM headers without NOMINMAX, so its min/max function-like macros broke SmallVector.h/MathExtras.h macro expansion on the MSVC CI jobs. Also define WIN32_LEAN_AND_MEAN; this is the only direct windows.h include in clice, and the comment documents that any future include needs the same guards. --- src/support/cache_store.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/support/cache_store.cpp b/src/support/cache_store.cpp index 4579c3533..4bbbec2b7 100644 --- a/src/support/cache_store.cpp +++ b/src/support/cache_store.cpp @@ -8,6 +8,11 @@ #include #ifdef _WIN32 +// The only direct windows.h include in clice. The defines keep it from +// spilling the min/max macros (and other clutter) that break LLVM and +// standard headers; any future include of windows.h needs the same guards. +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX #include #else #include From 5c363b891497616d8aeb1ee4361f4238d5d1b326 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 01:21:40 +0800 Subject: [PATCH 05/17] fix(support): do not drop mutable-key blobs on rename conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The benign rename-collision fallback in CacheStore::commit assumed content-addressed keys, which only holds for LRU namespaces. Persistent and Scratch keys are mutable (the index rewrites "project"/"{path_id}" with new content), so on Windows a rename onto an open blob was silently treated as success and the new data discarded — stale index served forever. Now only LRU keeps the fallback (and requires a regular file); Persistent/Scratch remove the stale destination and retry, surfacing an error if the blob still cannot be published, and dropping the in-memory entry when the old blob is gone too. --- src/support/cache_store.cpp | 36 ++++++++++++++++---- src/support/cache_store.h | 6 ++++ tests/unit/support/cache_store_tests.cpp | 42 ++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 7 deletions(-) diff --git a/src/support/cache_store.cpp b/src/support/cache_store.cpp index 4bbbec2b7..99b80f03d 100644 --- a/src/support/cache_store.cpp +++ b/src/support/cache_store.cpp @@ -383,13 +383,35 @@ std::expected CacheStore::commit(PendingEntry pend final_path = state->blob_path(*ns_state, pending.key); if(auto result = fs::rename(pending.tmp_path, final_path); !result) { - // Content-addressed keys make a rename collision benign: an - // existing blob with the same name has the same content (this - // happens on Windows when the destination is currently open). - // Account for the file that survived on disk, not the tmp. - llvm::sys::fs::remove(pending.tmp_path); - if(llvm::sys::fs::status(final_path, status)) { - return std::unexpected(result.error()); + if(ns_state->config.policy == CachePolicy::LRU) { + // LRU namespaces hold content-addressed blobs, which makes + // a rename collision benign: an existing blob with the same + // name has the same content (this happens on Windows when + // the destination is currently open). Account for the file + // that survived on disk, not the tmp. + llvm::sys::fs::remove(pending.tmp_path); + if(llvm::sys::fs::status(final_path, status) || + status.type() != llvm::sys::fs::file_type::regular_file) { + return std::unexpected(result.error()); + } + } else { + // Persistent and Scratch keys are mutable — the same key is + // rewritten with new content — so the blob on disk is stale. + // Remove it and retry; if the rename still fails, report the + // error instead of silently dropping the new data. + llvm::sys::fs::remove(final_path); + if(auto retry = fs::rename(pending.tmp_path, final_path); !retry) { + llvm::sys::fs::remove(pending.tmp_path); + if(auto it = ns_state->entries.find(pending.key); + it != ns_state->entries.end() && llvm::sys::fs::status(final_path, status)) { + // The old blob is gone as well: drop its entry so + // lookups don't hand out a dangling path. + ns_state->total_size -= it->second.size; + ns_state->entries.erase(it); + state->dirty = true; + } + return std::unexpected(retry.error()); + } } } diff --git a/src/support/cache_store.h b/src/support/cache_store.h index 25057c654..0462ecec7 100644 --- a/src/support/cache_store.h +++ b/src/support/cache_store.h @@ -107,6 +107,12 @@ class CacheStore { /// Finish a two-phase write: fsync the tmp file and atomically rename /// it to its final path. Triggers LRU eviction when the namespace /// exceeds its budget. Returns the final blob path. + /// + /// A rename collision with an existing blob (Windows, destination open) + /// is benign for LRU namespaces — content-addressed keys imply the same + /// content. Persistent and Scratch keys are mutable, so the stale + /// destination is removed and the rename retried; if the new blob still + /// cannot be published, an error is returned. std::expected commit(PendingEntry pending); /// Cancel a two-phase write and delete the tmp file. diff --git a/tests/unit/support/cache_store_tests.cpp b/tests/unit/support/cache_store_tests.cpp index ee2dd7e04..dfd6fe177 100644 --- a/tests/unit/support/cache_store_tests.cpp +++ b/tests/unit/support/cache_store_tests.cpp @@ -178,6 +178,48 @@ TEST_CASE(PersistentNeverEvicted) { ASSERT_TRUE(store.lookup("index", "b").has_value()); } +TEST_CASE(PersistentRewriteWins) { + TempDir tmp; + auto store = open_store(tmp); + store.register_namespace( + {.name = "index", .extension = ".idx", .policy = CachePolicy::Persistent}); + + // Persistent keys are mutable: a rewrite of the same key must serve + // the new content, never the old blob. + put(store, "index", "project", "first snapshot"); + auto path = put(store, "index", "project", "second"); + ASSERT_EQ(fs::read(path).value_or(""), "second"); +} + +TEST_CASE(PersistentRenameRetry) { + TempDir tmp; + auto store = open_store(tmp); + store.register_namespace( + {.name = "index", .extension = ".idx", .policy = CachePolicy::Persistent}); + + // Squat the blob path with an empty directory: the first rename fails, + // and the store must remove the obstacle and publish the new blob. + tmp.mkdir("root/cache/v1/index/project.idx"); + auto path = put(store, "index", "project", "fresh"); + ASSERT_EQ(fs::read(path).value_or(""), "fresh"); + ASSERT_TRUE(store.lookup("index", "project").has_value()); +} + +TEST_CASE(PersistentCommitFailureSurfaces) { + TempDir tmp; + auto store = open_store(tmp); + store.register_namespace( + {.name = "index", .extension = ".idx", .policy = CachePolicy::Persistent}); + + // A non-empty directory can neither be renamed over nor removed: the + // commit must report the failure, not silently claim the data is stored. + tmp.touch("root/cache/v1/index/project.idx/squatter", "x"); + auto pending = store.begin_store("index", "project"); + ASSERT_TRUE(fs::write(pending.tmp_path, "dropped").has_value()); + ASSERT_FALSE(store.commit(std::move(pending)).has_value()); + ASSERT_FALSE(store.lookup("index", "project").has_value()); +} + TEST_CASE(InvalidateRemovesBlob) { TempDir tmp; auto store = open_store(tmp); From 1ef168881dd4d344e21b0fe906c9d2182f35d284 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 01:21:49 +0800 Subject: [PATCH 06/17] fix(server): offload index blob commit fsync to thread pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Indexer::save() ran store.commit() — an fsync per blob — synchronously on the event loop, unlike the PCH/PCM paths which already offload via kota::queue. save() is now a task: phase one serializes the ProjectIndex and dirty shards to tmp files with no suspension point, so the batch stays a consistent snapshot even if a merge runs before the commits land; phase two commits each blob on the kota thread pool. --- src/server/compiler/indexer.cpp | 87 ++++++++++++++++------------ src/server/compiler/indexer.h | 6 +- src/server/service/master_server.cpp | 2 +- 3 files changed, 56 insertions(+), 39 deletions(-) diff --git a/src/server/compiler/indexer.cpp b/src/server/compiler/indexer.cpp index 2dd595775..1ea1ece73 100644 --- a/src/server/compiler/indexer.cpp +++ b/src/server/compiler/indexer.cpp @@ -94,59 +94,74 @@ void Indexer::merge(const void* tu_index_data, std::size_t size) { workspace.merged_indices.size()); } -/// Serialize via a two-phase store write: serialize to the tmp path, then -/// commit (fsync + atomic rename). Returns false if any step fails. -static bool store_blob(CacheStore& store, - llvm::StringRef key, - llvm::function_ref serialize) { +/// Begin a two-phase store write and serialize the blob to its tmp path. +/// Returns the entry to commit, or nullopt if serialization failed. +static std::optional + serialize_blob(CacheStore& store, + llvm::StringRef key, + llvm::function_ref serialize) { auto pending = store.begin_store("index", key); - { - std::error_code ec; - llvm::raw_fd_ostream os(pending.tmp_path, ec); - if(ec) { - LOG_WARN("Failed to write index blob {}: {}", key, ec.message()); - store.abort(pending); - return false; - } - serialize(os); - os.flush(); - // A truncated blob (disk full) must never be committed: the index - // namespace is Persistent, so it would be served forever. - if(os.has_error()) { - LOG_WARN("Failed to write index blob {}: {}", key, os.error().message()); - os.clear_error(); - store.abort(pending); - return false; - } + std::error_code ec; + llvm::raw_fd_ostream os(pending.tmp_path, ec); + if(ec) { + LOG_WARN("Failed to write index blob {}: {}", key, ec.message()); + store.abort(pending); + return std::nullopt; } - if(auto result = store.commit(std::move(pending)); !result) { - LOG_WARN("Failed to commit index blob {}: {}", key, result.error().message()); - return false; + serialize(os); + os.flush(); + // A truncated blob (disk full) must never be committed: the index + // namespace is Persistent, so it would be served forever. + if(os.has_error()) { + LOG_WARN("Failed to write index blob {}: {}", key, os.error().message()); + os.clear_error(); + store.abort(pending); + return std::nullopt; } - return true; + return pending; } -void Indexer::save() { +kota::task<> Indexer::save() { if(!workspace.store) - return; - - if(store_blob(*workspace.store, "project", [&](llvm::raw_ostream& os) { + co_return; + auto& store = *workspace.store; + + // Phase 1, synchronous: serialize the ProjectIndex and every dirty + // shard to tmp files. No suspension point in between, so the batch is + // a consistent snapshot even if a merge runs before the commits below + // are done. + llvm::SmallVector batch; + if(auto pending = serialize_blob(store, "project", [&](llvm::raw_ostream& os) { workspace.project_index.serialize(os); })) { + batch.push_back(std::move(*pending)); LOG_INFO("Saved ProjectIndex ({} symbols)", workspace.project_index.symbols.size()); } std::size_t saved = 0; + std::size_t total = workspace.merged_indices.size(); for(auto& [path_id, shard]: workspace.merged_indices) { if(!shard.index.need_rewrite()) continue; - if(store_blob(*workspace.store, std::to_string(path_id), [&](llvm::raw_ostream& os) { - shard.index.serialize(os); - })) { + if(auto pending = + serialize_blob(store, std::to_string(path_id), [&](llvm::raw_ostream& os) { + shard.index.serialize(os); + })) { + batch.push_back(std::move(*pending)); ++saved; } } - LOG_INFO("Saved {} MergedIndex shards (of {} total)", saved, workspace.merged_indices.size()); + LOG_INFO("Saved {} MergedIndex shards (of {} total)", saved, total); + + // Phase 2: commit each blob (fsync + atomic rename) on the kota thread + // pool, keeping the heavy IO off the event loop. + for(auto& pending: batch) { + auto key = pending.key; + auto committed = co_await kota::queue([&] { return store.commit(std::move(pending)); }); + if(!committed.has_value() || !committed.value().has_value()) { + LOG_WARN("Failed to commit index blob {}", key); + } + } } void Indexer::load() { @@ -984,7 +999,7 @@ kota::task<> Indexer::run_background_indexing() { indexing_active = false; LOG_INFO("Background indexing complete: {} files dispatched", dispatched); - save(); + co_await save(); } } // namespace clice diff --git a/src/server/compiler/indexer.h b/src/server/compiler/indexer.h index 3122b3e5b..6e3398c01 100644 --- a/src/server/compiler/indexer.h +++ b/src/server/compiler/indexer.h @@ -115,8 +115,10 @@ class Indexer { void merge(const void* tu_index_data, std::size_t size); /// Save Workspace's ProjectIndex and MergedIndex shards to the cache - /// store ("index" namespace, Persistent policy). - void save(); + /// store ("index" namespace, Persistent policy). Serialization runs + /// on the event loop; each blob's commit (fsync + rename) is offloaded + /// to the kota thread pool. + kota::task<> save(); /// Load Workspace's ProjectIndex and MergedIndex shards from the cache /// store, sweeping orphaned shard blobs. diff --git a/src/server/service/master_server.cpp b/src/server/service/master_server.cpp index e9add7b3d..5265357f5 100644 --- a/src/server/service/master_server.cpp +++ b/src/server/service/master_server.cpp @@ -208,7 +208,7 @@ void MasterServer::schedule_shutdown() { kota::task<> MasterServer::shutdown_and_cleanup() { bg_tasks.cancel(); co_await bg_tasks.join(); - indexer.save(); + co_await indexer.save(); workspace.save_cache(); co_await kota::when_all(indexer.stop(), compiler.stop()); co_await pool.stop(); From 0d0bbd3293a7b01b2cc3be8f1132d939c0f61e5d Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 01:21:57 +0800 Subject: [PATCH 07/17] refactor(server): remove project.index_dir config field The index lives inside the unified cache store under cache_dir, so the field no longer points at anything. Drop it from the config struct, its default/substitution logic and the docs; the TOML codec skips unknown keys, so configs still setting index_dir keep parsing (covered by a new test). The legacy index/ directory is still cleaned up at store open. --- docs/clice.toml | 4 +--- docs/en/guide/configuration.md | 9 +-------- docs/zh/guide/configuration.md | 9 +-------- src/server/service/master_server.cpp | 16 ++++++---------- src/server/workspace/config.cpp | 3 --- src/server/workspace/config.h | 1 - tests/unit/server/config_tests.cpp | 20 ++++++++++++++------ 7 files changed, 23 insertions(+), 39 deletions(-) diff --git a/docs/clice.toml b/docs/clice.toml index ccf06bd5a..940330867 100644 --- a/docs/clice.toml +++ b/docs/clice.toml @@ -16,10 +16,8 @@ clang_tidy = false # exceeds this limit, the least recently used files will be removed. # The default value is 8. Whatever the number you set, the minimum is 1, the maximum is 512. max_active_file = 8 -# Directory for storing PCH and PCM files. +# Directory for storing on-disk caches (PCH, PCM and index files). cache_dir = "${workspace}/.clice/cache" -# Directory for storing index files. -index_dir = "${workspace}/.clice/index" logging_dir = "${workspace}/.clice/logs" # Compile commands files or directories to search for compile_commands.json files. compile_commands_paths = ["${workspace}/build"] diff --git a/docs/en/guide/configuration.md b/docs/en/guide/configuration.md index 7c784aa13..270a2cec2 100644 --- a/docs/en/guide/configuration.md +++ b/docs/en/guide/configuration.md @@ -8,14 +8,7 @@ This is the documentation for `clice.toml`. | ------------------- | -------- | ----------------------------- | | `project.cache_dir` | `string` | `"${workspace}/.clice/cache"` | -Folder for storing PCH and PCM caches. -
- -| Name | Type | Default | -| ------------------- | -------- | ----------------------------- | -| `project.index_dir` | `string` | `"${workspace}/.clice/index"` | - -Folder for storing index files. +Folder for storing on-disk caches (PCH, PCM and index files).
## Rule diff --git a/docs/zh/guide/configuration.md b/docs/zh/guide/configuration.md index 8683a3b0b..ac24277a8 100644 --- a/docs/zh/guide/configuration.md +++ b/docs/zh/guide/configuration.md @@ -8,14 +8,7 @@ | ------------------- | -------- | ----------------------------- | | `project.cache_dir` | `string` | `"${workspace}/.clice/cache"` | -用于储存 PCH 和 PCM 缓存的文件夹。 -
- -| 名称 | 类型 | 默认值 | -| ------------------- | -------- | ----------------------------- | -| `project.index_dir` | `string` | `"${workspace}/.clice/index"` | - -用于储存索引文件的文件夹。 +用于储存磁盘缓存(PCH、PCM 和索引文件)的文件夹。
## Rule diff --git a/src/server/service/master_server.cpp b/src/server/service/master_server.cpp index 5265357f5..611a97cba 100644 --- a/src/server/service/master_server.cpp +++ b/src/server/service/master_server.cpp @@ -256,16 +256,12 @@ void MasterServer::open_cache_store() { workspace.store.emplace(std::move(*store)); LOG_INFO("Cache store: {}", workspace.store->base_dir()); - // The index now lives inside the store; the old default index dir is - // discarded (no migration), and a configured index_dir is ignored. - // header_context/ is still written directly by the legacy preamble - // path; drop this exemption once it moves to the Scratch namespace. - auto default_index_dir = path::join(cfg.cache_dir, "index"); - if(!cfg.index_dir.empty() && cfg.index_dir != default_index_dir) { - LOG_WARN("project.index_dir is no longer used; the index is stored in {}", - workspace.store->base_dir()); - } - llvm::sys::fs::remove_directories(default_index_dir); + // The index lives inside the store; the legacy index/ directory of + // pre-store layouts (the removed project.index_dir default) is + // discarded, no migration. header_context/ is still written directly + // by the legacy preamble path; drop this exemption once it moves to + // the Scratch namespace. + llvm::sys::fs::remove_directories(path::join(cfg.cache_dir, "index")); workspace.load_cache(); bg_tasks.spawn(cache_checkpoint_task()); diff --git a/src/server/workspace/config.cpp b/src/server/workspace/config.cpp index 0bd567c42..0a88e4858 100644 --- a/src/server/workspace/config.cpp +++ b/src/server/workspace/config.cpp @@ -78,14 +78,11 @@ void Config::apply_defaults(llvm::StringRef workspace_root) { if(p.cache_dir.empty()) p.cache_dir = path::join(workspace_root, ".clice"); } - if(p.index_dir.empty() && !p.cache_dir.empty()) - p.index_dir = path::join(p.cache_dir, "index"); if(p.logging_dir.empty() && !p.cache_dir.empty()) p.logging_dir = path::join(p.cache_dir, "logs"); // Variable substitution on string fields. substitute_workspace(p.cache_dir, workspace_root); - substitute_workspace(p.index_dir, workspace_root); substitute_workspace(p.logging_dir, workspace_root); for(auto& entry: p.compile_commands_paths) substitute_workspace(entry, workspace_root); diff --git a/src/server/workspace/config.h b/src/server/workspace/config.h index 6d398491c..b2df0c298 100644 --- a/src/server/workspace/config.h +++ b/src/server/workspace/config.h @@ -28,7 +28,6 @@ struct ProjectConfig { defaulted max_active_file = {}; defaulted cache_dir; - defaulted index_dir; defaulted logging_dir; defaulted> compile_commands_paths; diff --git a/tests/unit/server/config_tests.cpp b/tests/unit/server/config_tests.cpp index 8fd9adcae..90d2a1f7b 100644 --- a/tests/unit/server/config_tests.cpp +++ b/tests/unit/server/config_tests.cpp @@ -150,7 +150,6 @@ TEST_CASE(ApplyDefaults) { EXPECT_EQ(config.project.stateful_worker_count.value, 2u); EXPECT_GE(config.project.stateless_worker_count.value, 2u); EXPECT_FALSE(config.project.cache_dir.empty()); - EXPECT_FALSE(config.project.index_dir.empty()); EXPECT_FALSE(config.project.logging_dir.empty()); } @@ -158,7 +157,6 @@ TEST_CASE(ApplyDefaultsEmptyWorkspace) { Config config; config.apply_defaults(""); EXPECT_TRUE(config.project.cache_dir.empty()); - EXPECT_TRUE(config.project.index_dir.empty()); EXPECT_TRUE(config.project.logging_dir.empty()); } @@ -203,6 +201,20 @@ TEST_CASE(LoadMalformedToml) { EXPECT_FALSE(result.has_value()); } +TEST_CASE(LegacyIndexDirIgnored) { + // Configs written for older clice may still set the removed + // project.index_dir key; unknown keys must not fail the parse. + TempDir tmp; + tmp.touch("clice.toml", R"( +[project] +cache_dir = "/opt/cache" +index_dir = "/opt/index" +)"); + auto result = Config::load(tmp.path("clice.toml"), tmp.root.str().str()); + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(std::string_view(result->project.cache_dir), "/opt/cache"); +} + TEST_CASE(LoadMissingFile) { auto result = Config::load("/nonexistent/clice.toml", "/workspace"); EXPECT_FALSE(result.has_value()); @@ -211,12 +223,10 @@ TEST_CASE(LoadMissingFile) { TEST_CASE(WorkspaceVarSubst) { Config config; config.project.cache_dir = "${workspace}/cache"; - config.project.index_dir = "${workspace}/idx"; config.project.logging_dir = "${workspace}/logs"; config.project.compile_commands_paths = {"${workspace}/build"}; config.apply_defaults("/my/ws"); EXPECT_EQ(std::string_view(config.project.cache_dir), "/my/ws/cache"); - EXPECT_EQ(std::string_view(config.project.index_dir), "/my/ws/idx"); EXPECT_EQ(std::string_view(config.project.logging_dir), "/my/ws/logs"); EXPECT_EQ(config.project.compile_commands_paths[0], "/my/ws/build"); } @@ -324,8 +334,6 @@ TEST_CASE(WorkspaceCacheFallback) { EXPECT_EQ(path::convert_to_slash(std::string_view(config.project.cache_dir)), "/ws/root/.clice"); - EXPECT_EQ(path::convert_to_slash(std::string_view(config.project.index_dir)), - "/ws/root/.clice/index"); EXPECT_EQ(path::convert_to_slash(std::string_view(config.project.logging_dir)), "/ws/root/.clice/logs"); } From ec80822213d5d7c705411707d3c88bfc1019191a Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 01:22:06 +0800 Subject: [PATCH 08/17] test(integration): add formal client process-control API CliceClient now exposes server/kill_server()/stop_io(), giving the pygls internals (_server, _stop_event, _async_tasks) a single home. The kill -9 recovery test, conftest shutdown and the agentic shutdown tests use the new API instead of poking at private members. --- tests/conftest.py | 8 ++------ tests/integration/agentic/test_agentic.py | 16 ++++++--------- .../compilation/test_persistent_cache.py | 11 ++-------- tests/integration/utils/client.py | 20 +++++++++++++++++++ 4 files changed, 30 insertions(+), 25 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 79da8a7f5..4f53fa73b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -251,8 +251,6 @@ async def assert_server_exited_cleanly(server, timeout: float = 10.0) -> None: async def _shutdown_client(c: CliceClient) -> None: """Gracefully shut down a client, force-kill if needed.""" - server = getattr(c, "_server", None) - try: await asyncio.wait_for(c.shutdown_async(None), timeout=10.0) except Exception: @@ -264,12 +262,10 @@ async def _shutdown_client(c: CliceClient) -> None: pass try: - await assert_server_exited_cleanly(server) + await assert_server_exited_cleanly(c.server) finally: try: - c._stop_event.set() - for task in c._async_tasks: - task.cancel() + await c.stop_io() await asyncio.sleep(0.1) except Exception: pass diff --git a/tests/integration/agentic/test_agentic.py b/tests/integration/agentic/test_agentic.py index 5e47487f7..ea80581b1 100644 --- a/tests/integration/agentic/test_agentic.py +++ b/tests/integration/agentic/test_agentic.py @@ -444,10 +444,8 @@ async def test_rpc_shutdown(executable, workspace): pass rpc.sock.close() - await assert_server_exited_cleanly(c._server) - c._stop_event.set() - for task in c._async_tasks: - task.cancel() + await assert_server_exited_cleanly(c.server) + await c.stop_io() @pytest.mark.workspace("index_features") @@ -566,8 +564,8 @@ async def test_shutdown_during_indexing(executable, tmp_path): try: await c.initialize(workspace, initialization_options=init_options) except Exception: - if c._server.returncode is not None: - await assert_server_exited_cleanly(c._server, timeout=15.0) + if c.server.returncode is not None: + await assert_server_exited_cleanly(c.server, timeout=15.0) raise # Give indexing a moment to start, then send shutdown @@ -585,9 +583,7 @@ async def test_shutdown_during_indexing(executable, tmp_path): pass rpc.sock.close() - await assert_server_exited_cleanly(c._server, timeout=15.0) + await assert_server_exited_cleanly(c.server, timeout=15.0) finally: - c._stop_event.set() - for task in c._async_tasks: - task.cancel() + await c.stop_io() await asyncio.sleep(0.1) diff --git a/tests/integration/compilation/test_persistent_cache.py b/tests/integration/compilation/test_persistent_cache.py index 34a7c42a6..7d2378603 100644 --- a/tests/integration/compilation/test_persistent_cache.py +++ b/tests/integration/compilation/test_persistent_cache.py @@ -361,15 +361,8 @@ async def test_kill9_recovery(executable, tmp_path): c1 = await make_client(executable, tmp_path) c1.open(tmp_path / "main.cpp") await asyncio.sleep(0.3) - server = getattr(c1, "_server", None) - assert server is not None - server.kill() - try: - c1._stop_event.set() - for task in c1._async_tasks: - task.cancel() - except Exception: - pass + c1.kill_server() + await c1.stop_io() await _wait_residue_released(tmp_path) # Session 2: its startup sweeps the dead instance's tmp directory, and diff --git a/tests/integration/utils/client.py b/tests/integration/utils/client.py index 8a6882b99..10ea768cd 100644 --- a/tests/integration/utils/client.py +++ b/tests/integration/utils/client.py @@ -112,6 +112,26 @@ async def initialize( self.init_result = result return result + # ── Server process control ─────────────────────────────────────── + # Single home for the pygls internals these wrap; tests must not poke + # at _server/_stop_event/_async_tasks directly. + + @property + def server(self) -> asyncio.subprocess.Process | None: + """The spawned server process, if started via start_io.""" + return self._server + + def kill_server(self) -> None: + """Force-kill the server process, simulating a crash.""" + assert self._server is not None, "no server process to kill" + self._server.kill() + + async def stop_io(self) -> None: + """Tear down client-side IO tasks without contacting the server.""" + self._stop_event.set() + for task in self._async_tasks: + task.cancel() + # ── Document operations ────────────────────────────────────────── def open(self, filepath: Path, version: int = 0) -> tuple[str, str]: From 3307a6bd47fecab1e8f27d7ee00845819ea2573d Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 02:48:23 +0800 Subject: [PATCH 09/17] fix(support): verify blob content on rename collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The collision fallback assumed LRU blobs colliding by name share the same content, but even LRU keys are not fully content-addressed: a dependency edit changes PCH content without changing the key input, so on Windows a rename onto an open stale blob was kept while the caller recorded fresh deps metadata — stale PCH served with no rebuild trigger. Keep the survivor only when verified byte-identical; otherwise remove and retry like mutable keys. Also document that the unsigned wraparound in the size accounting is intentional and exact. --- src/support/cache_store.cpp | 49 ++++++++++++++++++------ src/support/cache_store.h | 9 ++--- tests/unit/support/cache_store_tests.cpp | 15 ++++++++ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/support/cache_store.cpp b/src/support/cache_store.cpp index 99b80f03d..8fdd18976 100644 --- a/src/support/cache_store.cpp +++ b/src/support/cache_store.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" namespace clice { @@ -93,6 +94,26 @@ std::error_code sync_file(llvm::StringRef path) { return ec; } +/// On a rename collision, check whether the existing destination blob is +/// byte-identical to the one we tried to publish. Only called on the +/// (Windows-only) collision path, so the extra reads stay off hot paths. +bool same_content(llvm::StringRef tmp_path, llvm::StringRef final_path) { + llvm::sys::fs::file_status tmp_status, final_status; + if(llvm::sys::fs::status(tmp_path, tmp_status) || + llvm::sys::fs::status(final_path, final_status) || + final_status.type() != llvm::sys::fs::file_type::regular_file || + tmp_status.getSize() != final_status.getSize()) { + return false; + } + + auto tmp_buf = llvm::MemoryBuffer::getFile(tmp_path); + auto final_buf = llvm::MemoryBuffer::getFile(final_path); + if(!tmp_buf || !final_buf) { + return false; + } + return (*tmp_buf)->getBuffer() == (*final_buf)->getBuffer(); +} + /// Remove directory children whose name parses as a dead pid. /// Used for both `tmp/{pid}` and Scratch `{ns}/{pid}` layouts. void sweep_dead_pid_dirs(llvm::StringRef dir, std::uint32_t self_pid) { @@ -383,22 +404,23 @@ std::expected CacheStore::commit(PendingEntry pend final_path = state->blob_path(*ns_state, pending.key); if(auto result = fs::rename(pending.tmp_path, final_path); !result) { - if(ns_state->config.policy == CachePolicy::LRU) { - // LRU namespaces hold content-addressed blobs, which makes - // a rename collision benign: an existing blob with the same - // name has the same content (this happens on Windows when - // the destination is currently open). Account for the file - // that survived on disk, not the tmp. + if(same_content(pending.tmp_path, final_path)) { + // Benign collision: an identical blob is already published + // (Windows, destination currently open). Keep the survivor + // and account for it. This is verified by comparison, not + // assumed from the key: even LRU keys are not fully + // content-addressed (a dependency edit changes the PCH + // content without changing its key input). llvm::sys::fs::remove(pending.tmp_path); - if(llvm::sys::fs::status(final_path, status) || - status.type() != llvm::sys::fs::file_type::regular_file) { + if(llvm::sys::fs::status(final_path, status)) { return std::unexpected(result.error()); } } else { - // Persistent and Scratch keys are mutable — the same key is - // rewritten with new content — so the blob on disk is stale. - // Remove it and retry; if the rename still fails, report the - // error instead of silently dropping the new data. + // The destination is stale — a rewritten mutable key + // (Persistent/Scratch) or an LRU blob whose content drifted + // from its key. Remove it and retry; if the rename still + // fails, report the error instead of silently dropping the + // new data. llvm::sys::fs::remove(final_path); if(auto retry = fs::rename(pending.tmp_path, final_path); !retry) { llvm::sys::fs::remove(pending.tmp_path); @@ -416,6 +438,9 @@ std::expected CacheStore::commit(PendingEntry pend } auto& entry = ns_state->entries[pending.key]; + // Unsigned wraparound is intentional and exact here: entry.size is + // already included in total_size, so total + new - old stays correct + // even when the replacement blob is smaller. ns_state->total_size += status.getSize() - entry.size; entry.size = status.getSize(); entry.atime = state->next_stamp(); diff --git a/src/support/cache_store.h b/src/support/cache_store.h index 0462ecec7..8acd7646b 100644 --- a/src/support/cache_store.h +++ b/src/support/cache_store.h @@ -108,11 +108,10 @@ class CacheStore { /// it to its final path. Triggers LRU eviction when the namespace /// exceeds its budget. Returns the final blob path. /// - /// A rename collision with an existing blob (Windows, destination open) - /// is benign for LRU namespaces — content-addressed keys imply the same - /// content. Persistent and Scratch keys are mutable, so the stale - /// destination is removed and the rename retried; if the new blob still - /// cannot be published, an error is returned. + /// On a rename collision (Windows, destination open) the existing blob + /// is kept only when verified byte-identical to the new one; otherwise + /// the stale destination is removed and the rename retried, and if the + /// new blob still cannot be published an error is returned. std::expected commit(PendingEntry pending); /// Cancel a two-phase write and delete the tmp file. diff --git a/tests/unit/support/cache_store_tests.cpp b/tests/unit/support/cache_store_tests.cpp index dfd6fe177..46a9f0069 100644 --- a/tests/unit/support/cache_store_tests.cpp +++ b/tests/unit/support/cache_store_tests.cpp @@ -205,6 +205,21 @@ TEST_CASE(PersistentRenameRetry) { ASSERT_TRUE(store.lookup("index", "project").has_value()); } +TEST_CASE(LruStaleBlobReplaced) { + TempDir tmp; + auto store = open_store(tmp); + register_lru(store); + + // Even LRU keys are not fully content-addressed (dependency edits + // change PCH content without changing the key input): on a rename + // collision with DIFFERENT content the stale blob must be replaced, + // never kept. Squat the path to force the collision portably. + tmp.mkdir("root/cache/v1/pch/k1.pch"); + auto path = put(store, "pch", "k1", "fresh"); + ASSERT_EQ(fs::read(path).value_or(""), "fresh"); + ASSERT_TRUE(store.lookup("pch", "k1").has_value()); +} + TEST_CASE(PersistentCommitFailureSurfaces) { TempDir tmp; auto store = open_store(tmp); From 87a495f413b2895f7cd5fde02f603971dd0f9d36 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 02:48:23 +0800 Subject: [PATCH 10/17] fix(server): keep persisted index snapshots consistent Publish shard blobs only together with the ProjectIndex they were built against: bail out when its serialization fails and drop the staged shards when its commit fails, so a restart never pairs new shards with an old project blob. Quiesce compiler/indexer before the shutdown save so the snapshot covers everything that completed. --- src/server/compiler/indexer.cpp | 43 ++++++++++++++++++---------- src/server/service/master_server.cpp | 4 ++- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/server/compiler/indexer.cpp b/src/server/compiler/indexer.cpp index 1ea1ece73..d78224ac4 100644 --- a/src/server/compiler/indexer.cpp +++ b/src/server/compiler/indexer.cpp @@ -129,16 +129,19 @@ kota::task<> Indexer::save() { // Phase 1, synchronous: serialize the ProjectIndex and every dirty // shard to tmp files. No suspension point in between, so the batch is // a consistent snapshot even if a merge runs before the commits below - // are done. - llvm::SmallVector batch; - if(auto pending = serialize_blob(store, "project", [&](llvm::raw_ostream& os) { - workspace.project_index.serialize(os); - })) { - batch.push_back(std::move(*pending)); - LOG_INFO("Saved ProjectIndex ({} symbols)", workspace.project_index.symbols.size()); + // are done. Shards are only published together with the ProjectIndex + // they were built against: pairing new shards with an old project blob + // (or vice versa) would serve a mixed snapshot after restart. + auto project_pending = serialize_blob(store, "project", [&](llvm::raw_ostream& os) { + workspace.project_index.serialize(os); + }); + if(!project_pending) { + LOG_WARN("Skipping index save: ProjectIndex serialization failed"); + co_return; } + LOG_INFO("Saved ProjectIndex ({} symbols)", workspace.project_index.symbols.size()); - std::size_t saved = 0; + llvm::SmallVector shards; std::size_t total = workspace.merged_indices.size(); for(auto& [path_id, shard]: workspace.merged_indices) { if(!shard.index.need_rewrite()) @@ -147,18 +150,28 @@ kota::task<> Indexer::save() { serialize_blob(store, std::to_string(path_id), [&](llvm::raw_ostream& os) { shard.index.serialize(os); })) { - batch.push_back(std::move(*pending)); - ++saved; + shards.push_back(std::move(*pending)); } } - LOG_INFO("Saved {} MergedIndex shards (of {} total)", saved, total); + LOG_INFO("Saved {} MergedIndex shards (of {} total)", shards.size(), total); // Phase 2: commit each blob (fsync + atomic rename) on the kota thread - // pool, keeping the heavy IO off the event loop. - for(auto& pending: batch) { + // pool, keeping the heavy IO off the event loop. The project blob goes + // first; if it cannot be published, drop the shards of this snapshot. + auto committed = + co_await kota::queue([&] { return store.commit(std::move(*project_pending)); }); + if(!committed.has_value() || !committed.value().has_value()) { + LOG_WARN("Failed to commit ProjectIndex blob, dropping {} shard blobs", shards.size()); + for(auto& pending: shards) { + store.abort(pending); + } + co_return; + } + + for(auto& pending: shards) { auto key = pending.key; - auto committed = co_await kota::queue([&] { return store.commit(std::move(pending)); }); - if(!committed.has_value() || !committed.value().has_value()) { + auto result = co_await kota::queue([&] { return store.commit(std::move(pending)); }); + if(!result.has_value() || !result.value().has_value()) { LOG_WARN("Failed to commit index blob {}", key); } } diff --git a/src/server/service/master_server.cpp b/src/server/service/master_server.cpp index 611a97cba..b8a4e4be7 100644 --- a/src/server/service/master_server.cpp +++ b/src/server/service/master_server.cpp @@ -208,9 +208,11 @@ void MasterServer::schedule_shutdown() { kota::task<> MasterServer::shutdown_and_cleanup() { bg_tasks.cancel(); co_await bg_tasks.join(); + // Quiesce in-flight compilation and indexing first so the persisted + // snapshot below covers everything that actually completed. + co_await kota::when_all(indexer.stop(), compiler.stop()); co_await indexer.save(); workspace.save_cache(); - co_await kota::when_all(indexer.stop(), compiler.stop()); co_await pool.stop(); if(workspace.store) { workspace.store->shutdown(); From ade90171463dedd13e9a0c8746beea64717a2678 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 02:48:23 +0800 Subject: [PATCH 11/17] fix(tests): harden client IO teardown stop_io now awaits the cancelled IO tasks so none outlive the test, and test_rpc_shutdown reaches it even when the exit assertion fails. --- tests/integration/agentic/test_agentic.py | 6 ++++-- tests/integration/utils/client.py | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/agentic/test_agentic.py b/tests/integration/agentic/test_agentic.py index ea80581b1..6d430448c 100644 --- a/tests/integration/agentic/test_agentic.py +++ b/tests/integration/agentic/test_agentic.py @@ -444,8 +444,10 @@ async def test_rpc_shutdown(executable, workspace): pass rpc.sock.close() - await assert_server_exited_cleanly(c.server) - await c.stop_io() + try: + await assert_server_exited_cleanly(c.server) + finally: + await c.stop_io() @pytest.mark.workspace("index_features") diff --git a/tests/integration/utils/client.py b/tests/integration/utils/client.py index 10ea768cd..9be922e22 100644 --- a/tests/integration/utils/client.py +++ b/tests/integration/utils/client.py @@ -131,6 +131,8 @@ async def stop_io(self) -> None: self._stop_event.set() for task in self._async_tasks: task.cancel() + # Wait the cancellations out so no task outlives the test teardown. + await asyncio.gather(*self._async_tasks, return_exceptions=True) # ── Document operations ────────────────────────────────────────── From 322dcddbbcab388b11859dd80123de80cc5f0409 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 17:00:18 +0800 Subject: [PATCH 12/17] fix(server): make PCH build registration cancellation-safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The building-event cleanup (wake waiters, clear the marker) ran inline on each exit path of ensure_pch, so a coroutine cancelled at any of its suspension points unwound without ever signalling the event and later requests for the same file would wait on it forever. Today that only happens at shutdown where the waiters are cancelled too, but the invariant is fragile — any future per-compile cancellation would trip it. An RAII guard now owns the cleanup, covering cancellation as well as the normal paths, and only clears the marker if it still points at its own registration (the entry can be erased by didClose and re- registered by a newer build mid-flight). --- src/server/compiler/compiler.cpp | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/server/compiler/compiler.cpp b/src/server/compiler/compiler.cpp index 9a61e0056..b1621e8e0 100644 --- a/src/server/compiler/compiler.cpp +++ b/src/server/compiler/compiler.cpp @@ -42,6 +42,26 @@ static std::string cache_key(std::initializer_list parts) { return std::format("{:016x}{:016x}", hash.high64, hash.low64); } +/// RAII completion of an in-flight PCH build registration: wakes waiters +/// and clears the building marker on every exit path — crucially also when +/// the coroutine is cancelled and its frame unwinds at a suspension point, +/// which would otherwise leave waiters suspended on the event forever. +struct BuildingGuard { + Workspace& workspace; + std::uint32_t path_id; + std::shared_ptr completion; + + ~BuildingGuard() { + // Reset only our own registration: the entry may have been erased + // (didClose) and re-registered by a newer build in the meantime. + if(auto it = workspace.pch_cache.find(path_id); + it != workspace.pch_cache.end() && it->second.building == completion) { + it->second.building.reset(); + } + completion->set(); + } +}; + /// Detect whether the cursor is inside a preamble directive (include/import). Compiler::Compiler(kota::event_loop& loop, @@ -528,14 +548,14 @@ kota::task Compiler::ensure_pch(Session& session, co_return workspace.pch_cache.count(path_id) && !workspace.pch_cache[path_id].path.empty(); } - // Register in-flight build so concurrent requests wait on us. + // Register in-flight build so concurrent requests wait on us. The + // guard wakes them on every exit, including cancellation mid-await. auto completion = std::make_shared(); workspace.pch_cache[path_id].building = completion; + BuildingGuard guard{workspace, path_id, completion}; if(!workspace.store) { LOG_WARN("PCH build skipped: cache store is unavailable"); - workspace.pch_cache[path_id].building.reset(); - completion->set(); co_return false; } @@ -561,8 +581,6 @@ kota::task Compiler::ensure_pch(Session& session, LOG_WARN("PCH build failed for {}: {}", path, result.has_value() ? result.value().error : result.error().message); - workspace.pch_cache[path_id].building.reset(); - completion->set(); co_return false; } @@ -571,8 +589,6 @@ kota::task Compiler::ensure_pch(Session& session, co_await kota::queue([&] { return workspace.store->commit(std::move(pending)); }); if(!committed.has_value() || !committed.value().has_value()) { LOG_WARN("Failed to commit PCH for {}", path); - workspace.pch_cache[path_id].building.reset(); - completion->set(); co_return false; } @@ -582,7 +598,6 @@ kota::task Compiler::ensure_pch(Session& session, st.key = pch_key; st.deps = capture_deps_snapshot(workspace.path_pool, result.value().deps); st.document_links_json = std::move(result.value().pch_links_json); - st.building.reset(); session.pch_ref = Session::PCHRef{path_id, pch_key, bound}; @@ -591,7 +606,6 @@ kota::task Compiler::ensure_pch(Session& session, // Persist cache metadata after successful build. workspace.save_cache(); - completion->set(); co_return true; } From 52ed28fd2089ef3d478a6ab3bcac8ad5a231ac81 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 17:00:18 +0800 Subject: [PATCH 13/17] chore(support): document sync-op atomicity and loop-confined TODO Record why the store is synchronous-plus-mutex rather than coroutines over kota's async fs: a started operation always runs to completion, so cancellation never observes a torn mid-operation state. Leave a TODO to re-evaluate the loop-confined coroutine design (no mutex, cancellation points inside operations) once real-world usage settles. --- src/support/cache_store.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/support/cache_store.h b/src/support/cache_store.h index 8acd7646b..659c80a55 100644 --- a/src/support/cache_store.h +++ b/src/support/cache_store.h @@ -67,7 +67,14 @@ struct CacheNamespace { /// completion beyond the call itself. Periodic checkpoint() scheduling is /// the owner's responsibility. All methods are thread-safe so that heavy /// calls (commit's fsync, checkpoint) can be offloaded to a worker thread -/// while lookups continue on the event loop. +/// while lookups continue on the event loop. A synchronous operation runs +/// to completion once started, so cancellation can never observe a torn +/// mid-operation state. +/// +/// TODO: once usage settles, evaluate making commit/checkpoint coroutines +/// over kota's async fs (kota::fsync/rename) with all state confined to +/// the event loop — that removes the mutex entirely, but moves the burden +/// from lock discipline to cancellation points inside each operation. class CacheStore { public: /// A two-phase write in progress. The caller (or a worker process on From eff8fcf93037723b2092c3455c5cf005724c6f53 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 17:23:26 +0800 Subject: [PATCH 14/17] refactor(support): avoid shell-COM remove_directories on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit llvm::sys::fs::remove_directories is implemented over shell COM on Windows (CoInitializeEx + IFileOperation) with no fallback, and with the default IgnoreErrors=true a COM failure (e.g. the thread already holds a different apartment model) is a silent no-op — unsuitable for the server event loop, which uses it for the version sweep, dead-pid reclamation and shutdown cleanup. Add fs::remove_all, the same plain recursive removal LLVM's Unix implementation uses (directory_iterator + remove, no COM), and use it everywhere. Also document that the commit rename collision is rarer than it looks: sys::fs::rename already moves aside destinations whose holders granted delete sharing, which LLVM-opened files always do. --- src/server/service/master_server.cpp | 2 +- src/support/cache_store.cpp | 19 ++++++++++------- src/support/filesystem.h | 32 ++++++++++++++++++++++++++++ tests/unit/test/temp_dir.h | 4 +++- 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/src/server/service/master_server.cpp b/src/server/service/master_server.cpp index b8a4e4be7..100f6d57d 100644 --- a/src/server/service/master_server.cpp +++ b/src/server/service/master_server.cpp @@ -263,7 +263,7 @@ void MasterServer::open_cache_store() { // discarded, no migration. header_context/ is still written directly // by the legacy preamble path; drop this exemption once it moves to // the Scratch namespace. - llvm::sys::fs::remove_directories(path::join(cfg.cache_dir, "index")); + fs::remove_all(path::join(cfg.cache_dir, "index")); workspace.load_cache(); bg_tasks.spawn(cache_checkpoint_task()); diff --git a/src/support/cache_store.cpp b/src/support/cache_store.cpp index 8fdd18976..9ac6b9d1c 100644 --- a/src/support/cache_store.cpp +++ b/src/support/cache_store.cpp @@ -95,8 +95,11 @@ std::error_code sync_file(llvm::StringRef path) { } /// On a rename collision, check whether the existing destination blob is -/// byte-identical to the one we tried to publish. Only called on the -/// (Windows-only) collision path, so the extra reads stay off hot paths. +/// byte-identical to the one we tried to publish. This path is rare even +/// on Windows: llvm::sys::fs::rename already moves an open destination +/// aside when its holder granted delete sharing (LLVM-opened files always +/// do), so a collision means a non-cooperating process (AV scanner, +/// indexing service) holds the blob. The extra reads stay off hot paths. bool same_content(llvm::StringRef tmp_path, llvm::StringRef final_path) { llvm::sys::fs::file_status tmp_status, final_status; if(llvm::sys::fs::status(tmp_path, tmp_status) || @@ -129,7 +132,7 @@ void sweep_dead_pid_dirs(llvm::StringRef dir, std::uint32_t self_pid) { if(pid == self_pid || is_pid_alive(pid)) { continue; } - llvm::sys::fs::remove_directories(it->path()); + fs::remove_all(it->path()); LOG_DEBUG("CacheStore: removed dead instance directory {}", it->path()); } } @@ -233,7 +236,7 @@ std::expected CacheStore::open(llvm::StringRef root } LOG_INFO("CacheStore: discarding stale cache layout {}", it->path()); if(llvm::sys::fs::is_directory(it->path())) { - llvm::sys::fs::remove_directories(it->path()); + fs::remove_all(it->path()); } else { llvm::sys::fs::remove(it->path()); } @@ -263,7 +266,7 @@ std::expected CacheStore::open(llvm::StringRef root sweep_dead_pid_dirs(tmp_parent, state->self_pid); state->tmp_dir = path::join(tmp_parent, std::to_string(state->self_pid)); - llvm::sys::fs::remove_directories(state->tmp_dir); + fs::remove_all(state->tmp_dir); if(auto ec2 = llvm::sys::fs::create_directories(state->tmp_dir)) { return std::unexpected(ec2); } @@ -290,7 +293,7 @@ void CacheStore::register_namespace(CacheNamespace ns) { // by crashed instances and start with a fresh one of our own. sweep_dead_pid_dirs(ns_dir, state->self_pid); ns_state.dir = path::join(ns_dir, std::to_string(state->self_pid)); - llvm::sys::fs::remove_directories(ns_state.dir); + fs::remove_all(ns_state.dir); llvm::sys::fs::create_directories(ns_state.dir); return; } @@ -610,10 +613,10 @@ void CacheStore::shutdown() { std::lock_guard guard(state->mutex); state->checkpoint_locked(); - llvm::sys::fs::remove_directories(state->tmp_dir); + fs::remove_all(state->tmp_dir); for(auto& [name, ns_state]: state->namespaces) { if(ns_state.config.policy == CachePolicy::Scratch) { - llvm::sys::fs::remove_directories(ns_state.dir); + fs::remove_all(ns_state.dir); } } } diff --git a/src/support/filesystem.h b/src/support/filesystem.h index 4a99d67b2..4ddcf02b8 100644 --- a/src/support/filesystem.h +++ b/src/support/filesystem.h @@ -90,6 +90,38 @@ inline std::expected rename(llvm::StringRef from, llvm::S return std::expected(); } +/// Recursively remove a directory tree using plain filesystem primitives. +/// Use this instead of llvm::sys::fs::remove_directories: on Windows that +/// is implemented over shell COM (CoInitializeEx + IFileOperation), which +/// initializes apartment COM on the calling thread and silently no-ops +/// when that fails — unsuitable for server threads. This mirrors the +/// recursion LLVM's own Unix implementation uses. Symlinks are removed +/// without following them. Returns the first error; a missing path is +/// not an error. +inline std::error_code remove_all(llvm::StringRef target) { + std::error_code ec; + for(llvm::sys::fs::directory_iterator it(target, ec, /*follow_symlinks=*/false), end; + !ec && it != end; + it.increment(ec)) { + auto status = it->status(); + if(!status) { + return status.getError(); + } + if(llvm::sys::fs::is_directory(*status)) { + if(auto sub_ec = remove_all(it->path())) { + return sub_ec; + } + } else if(auto remove_ec = llvm::sys::fs::remove(it->path(), /*IgnoreNonExisting=*/true)) { + return remove_ec; + } + } + if(ec) { + // A missing root is fine: there is simply nothing to remove. + return ec == std::errc::no_such_file_or_directory ? std::error_code() : ec; + } + return llvm::sys::fs::remove(target, /*IgnoreNonExisting=*/true); +} + } // namespace fs namespace vfs = llvm::vfs; diff --git a/tests/unit/test/temp_dir.h b/tests/unit/test/temp_dir.h index b9eca86b5..42b80e46e 100644 --- a/tests/unit/test/temp_dir.h +++ b/tests/unit/test/temp_dir.h @@ -3,6 +3,8 @@ #include #include +#include "support/filesystem.h" + #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FileSystem.h" @@ -28,7 +30,7 @@ struct TempDir { } ~TempDir() { - llvm::sys::fs::remove_directories(root); + fs::remove_all(root.str()); } TempDir(const TempDir&) = delete; From 0b9bfcd37680a407e771f0a01479a883ca80d2e6 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 19:56:41 +0800 Subject: [PATCH 15/17] chore(server): drop pre-release legacy cache cleanup Nothing has shipped yet, so there is no old on-disk layout to clean up after: remove the startup deletion of the pre-store index/ directory and the stale-layout wording around the version sweep (which keeps discarding anything under cache/ that isn't the current version dir). --- src/server/service/master_server.cpp | 7 ------- src/support/cache_store.cpp | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/server/service/master_server.cpp b/src/server/service/master_server.cpp index 100f6d57d..0f8edb845 100644 --- a/src/server/service/master_server.cpp +++ b/src/server/service/master_server.cpp @@ -258,13 +258,6 @@ void MasterServer::open_cache_store() { workspace.store.emplace(std::move(*store)); LOG_INFO("Cache store: {}", workspace.store->base_dir()); - // The index lives inside the store; the legacy index/ directory of - // pre-store layouts (the removed project.index_dir default) is - // discarded, no migration. header_context/ is still written directly - // by the legacy preamble path; drop this exemption once it moves to - // the Scratch namespace. - fs::remove_all(path::join(cfg.cache_dir, "index")); - workspace.load_cache(); bg_tasks.spawn(cache_checkpoint_task()); } diff --git a/src/support/cache_store.cpp b/src/support/cache_store.cpp index 9ac6b9d1c..608b8c164 100644 --- a/src/support/cache_store.cpp +++ b/src/support/cache_store.cpp @@ -226,7 +226,7 @@ std::expected CacheStore::open(llvm::StringRef root } // Discard anything that isn't the current layout version: older version - // directories and the pre-versioning layout (pch/, pcm/, cache.json). + // directories and any stray files. std::error_code ec; for(auto it = llvm::sys::fs::directory_iterator(parent, ec); !ec && it != llvm::sys::fs::directory_iterator(); From 2294252c112b84cb6182c54a8feeb871a393c1d0 Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 20:29:23 +0800 Subject: [PATCH 16/17] fix(support): never follow a symlinked root in remove_all MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A stale entry under cache/ that is a symlink to a directory outside the cache root would be recursed into by the version sweep, deleting files the store does not own. lstat the root first: anything that is not a real directory — including symlinks to directories — is unlinked without being followed (children were already handled no-follow). --- src/support/filesystem.h | 15 ++++++++++++--- tests/unit/support/cache_store_tests.cpp | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/support/filesystem.h b/src/support/filesystem.h index 4ddcf02b8..2b9eae7f3 100644 --- a/src/support/filesystem.h +++ b/src/support/filesystem.h @@ -95,10 +95,19 @@ inline std::expected rename(llvm::StringRef from, llvm::S /// is implemented over shell COM (CoInitializeEx + IFileOperation), which /// initializes apartment COM on the calling thread and silently no-ops /// when that fails — unsuitable for server threads. This mirrors the -/// recursion LLVM's own Unix implementation uses. Symlinks are removed -/// without following them. Returns the first error; a missing path is -/// not an error. +/// recursion LLVM's own Unix implementation uses. Symlinks — including a +/// symlinked root — are removed without following them, so the recursion +/// can never escape into the link's target. Returns the first error; a +/// missing path is not an error. inline std::error_code remove_all(llvm::StringRef target) { + llvm::sys::fs::file_status target_status; + if(auto status_ec = llvm::sys::fs::status(target, target_status, /*follow=*/false)) { + return status_ec == std::errc::no_such_file_or_directory ? std::error_code() : status_ec; + } + if(target_status.type() != llvm::sys::fs::file_type::directory_file) { + return llvm::sys::fs::remove(target, /*IgnoreNonExisting=*/true); + } + std::error_code ec; for(llvm::sys::fs::directory_iterator it(target, ec, /*follow_symlinks=*/false), end; !ec && it != end; diff --git a/tests/unit/support/cache_store_tests.cpp b/tests/unit/support/cache_store_tests.cpp index 46a9f0069..f2ad33915 100644 --- a/tests/unit/support/cache_store_tests.cpp +++ b/tests/unit/support/cache_store_tests.cpp @@ -2,6 +2,10 @@ #include #include +#ifndef _WIN32 +#include +#endif + #include "test/temp_dir.h" #include "test/test.h" #include "support/cache_store.h" @@ -137,6 +141,22 @@ TEST_CASE(LegacyLayoutDiscarded) { ASSERT_FALSE(llvm::sys::fs::exists(tmp.path("root/cache/pch"))); } +#ifndef _WIN32 +TEST_CASE(SymlinkNotFollowed) { + TempDir tmp; + tmp.touch("outside/keep.txt", "data"); + tmp.mkdir("root/cache"); + // A stale entry that is a symlink to data outside the cache root: the + // version sweep must unlink it without recursing into the target. + [[maybe_unused]] auto linked = + ::symlink(tmp.path("outside").c_str(), tmp.path("root/cache/v0").c_str()); + + auto store = open_store(tmp); + ASSERT_FALSE(llvm::sys::fs::exists(tmp.path("root/cache/v0"))); + ASSERT_TRUE(llvm::sys::fs::exists(tmp.path("outside/keep.txt"))); +} +#endif + TEST_CASE(LruEviction) { TempDir tmp; auto store = open_store(tmp); From 4f6d17dfe6b4a5d729db386c97ec453451a86b8f Mon Sep 17 00:00:00 2001 From: ykiko Date: Fri, 12 Jun 2026 20:29:23 +0800 Subject: [PATCH 17/17] fix(server): re-validate PCMs until the set is stable The eviction preflight ran only before compile_deps, but building dependencies can itself evict another clean module's PCM under LRU budget pressure, reopening the window just closed and handing clang a dangling path via fill_pcm_deps. Scan-dirty-compile now loops until a scan finds nothing evicted (bounded retries). --- src/server/compiler/compiler.cpp | 46 +++++++++++++++++++------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/server/compiler/compiler.cpp b/src/server/compiler/compiler.cpp index b1621e8e0..6986a2c60 100644 --- a/src/server/compiler/compiler.cpp +++ b/src/server/compiler/compiler.cpp @@ -619,29 +619,37 @@ kota::task Compiler::ensure_deps(Session& session, std::unordered_map& pcms) { auto path_id = session.path_id; - // Re-validate cached PCM blobs before compiling: LRU eviction can remove - // one while its compile unit is still marked clean, so dirty those units - // to trigger a rebuild instead of handing clang a dangling path. + // Re-validate cached PCM blobs and compile module dependencies. LRU + // eviction can remove a blob while its compile unit is still marked + // clean, so dirty those units instead of handing clang a dangling + // path. Building dependencies can itself evict another clean + // module's PCM under budget pressure, which reopens the window the + // scan just closed — hence the bounded retry until the set is stable. if(workspace.compile_graph) { - llvm::SmallVector evicted; - for(auto& [pid, pcm_path]: workspace.pcm_paths) { - if(!llvm::sys::fs::exists(pcm_path)) { - evicted.push_back(pid); + for(int attempt = 0; attempt < 3; ++attempt) { + llvm::SmallVector evicted; + for(auto& [pid, pcm_path]: workspace.pcm_paths) { + if(!llvm::sys::fs::exists(pcm_path)) { + evicted.push_back(pid); + } } - } - for(auto pid: evicted) { - for(auto id: workspace.compile_graph->update(pid)) { - workspace.pcm_paths.erase(id); - workspace.pcm_cache.erase(id); + if(attempt > 0 && evicted.empty()) { + break; } - workspace.pcm_paths.erase(pid); - workspace.pcm_cache.erase(pid); - } - } - // Compile C++20 module dependencies (PCMs). - if(workspace.compile_graph && !co_await workspace.compile_graph->compile_deps(path_id)) { - co_return false; + for(auto pid: evicted) { + for(auto id: workspace.compile_graph->update(pid)) { + workspace.pcm_paths.erase(id); + workspace.pcm_cache.erase(id); + } + workspace.pcm_paths.erase(pid); + workspace.pcm_cache.erase(pid); + } + + if(!co_await workspace.compile_graph->compile_deps(path_id)) { + co_return false; + } + } } // Scan buffer text for module imports that might not be in compile_graph yet.