diff --git a/cmake/package.cmake b/cmake/package.cmake index eddadea52..6049f2846 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -41,7 +41,7 @@ set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "" FORCE) FetchContent_Declare( kotatsu GIT_REPOSITORY https://github.com/clice-io/kotatsu - GIT_TAG b4bcd3c7e011812a345b4c520ccc77a6ed09174b + GIT_TAG 980c178 ) set(KOTA_ENABLE_ZEST ON) diff --git a/src/compile/compilation_unit.cpp b/src/compile/compilation_unit.cpp index 03d56576e..fc3ba0a25 100644 --- a/src/compile/compilation_unit.cpp +++ b/src/compile/compilation_unit.cpp @@ -125,6 +125,19 @@ auto CompilationUnitRef::interested_content() -> llvm::StringRef { return file_content(interested_file()); } +auto CompilationUnitRef::line_starts() -> std::span { + if(self->line_starts_cache.empty()) { + auto content = interested_content(); + self->line_starts_cache.push_back(0); + for(std::uint32_t i = 0; i < content.size(); ++i) { + if(content[i] == '\n') { + self->line_starts_cache.push_back(i + 1); + } + } + } + return self->line_starts_cache; +} + bool CompilationUnitRef::is_builtin_file(clang::FileID fid) { // No FileEntryRef => built-in/command line/scratch. if(!self->SM().getFileEntryRefForID(fid)) { diff --git a/src/compile/compilation_unit.h b/src/compile/compilation_unit.h index 5a36722f6..2f1828dcc 100644 --- a/src/compile/compilation_unit.h +++ b/src/compile/compilation_unit.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -138,6 +139,10 @@ class CompilationUnitRef { /// Get the content of interested file. auto interested_content() -> llvm::StringRef; + /// Get the byte offsets of each line start in the interested file. + /// Lazily computed and cached. + auto line_starts() -> std::span; + /// Check if a file is a builtin file. bool is_builtin_file(clang::FileID fid); diff --git a/src/compile/implement.h b/src/compile/implement.h index 72b346a35..7f1a3d099 100644 --- a/src/compile/implement.h +++ b/src/compile/implement.h @@ -85,6 +85,9 @@ struct CompilationUnitRef::Self { /// Cache for symbol id. llvm::DenseMap symbol_hash_cache; + /// Cache for line starts of the interested file. + std::vector line_starts_cache; + llvm::BumpPtrAllocator path_storage; std::vector diagnostics; diff --git a/src/feature/code_completion.cpp b/src/feature/code_completion.cpp index b883d5ab7..b21facbc6 100644 --- a/src/feature/code_completion.cpp +++ b/src/feature/code_completion.cpp @@ -255,11 +255,8 @@ class CodeCompletionCollector final : public clang::CodeCompleteConsumer { auto prefix = CompletionPrefix::from(content, offset); FuzzyMatcher matcher(prefix.spelling); - PositionMapper converter(content, encoding); - auto replace_range = protocol::Range{ - .start = *converter.to_position(prefix.range.begin), - .end = *converter.to_position(prefix.range.end), - }; + auto line_starts = lsp::build_line_starts(content); + auto replace_range = to_range(content, line_starts, encoding, prefix.range); std::vector collected; collected.reserve(candidate_count); diff --git a/src/feature/diagnostics.cpp b/src/feature/diagnostics.cpp index 7eb5cbf75..4b3de5def 100644 --- a/src/feature/diagnostics.cpp +++ b/src/feature/diagnostics.cpp @@ -10,8 +10,6 @@ namespace clice::feature { namespace { -namespace lsp = kota::ipc::lsp; - auto to_uri(llvm::StringRef file) -> std::string { const auto file_view = std::string_view(file.data(), file.size()); @@ -49,13 +47,13 @@ void add_related(protocol::Diagnostic& diagnostic, } auto content = unit.file_content(raw.fid); - PositionMapper converter(content, encoding); + auto line_starts = lsp::build_line_starts(content); protocol::DiagnosticRelatedInformation related{ .location = protocol::Location{ .uri = to_uri(unit.file_path(raw.fid)), - .range = to_range(converter, raw.range), + .range = to_range(content, line_starts, encoding, raw.range), }, .message = raw.message, }; @@ -80,8 +78,6 @@ auto diagnostics(CompilationUnitRef unit, PositionEncoding encoding) } }; - PositionMapper main_converter(unit.interested_content(), encoding); - for(const auto& raw: unit.diagnostics()) { auto level = raw.id.level; @@ -136,7 +132,7 @@ auto diagnostics(CompilationUnitRef unit, PositionEncoding encoding) } if(raw.fid == unit.interested_file()) { - diagnostic.range = to_range(main_converter, raw.range); + diagnostic.range = to_range(unit, encoding, raw.range); current = std::move(diagnostic); continue; } @@ -152,11 +148,9 @@ auto diagnostics(CompilationUnitRef unit, PositionEncoding encoding) } auto offset = unit.file_offset(include_location); - auto end_offset = offset + unit.token_spelling(include_location).size(); - diagnostic.range = protocol::Range{ - .start = *main_converter.to_position(offset), - .end = *main_converter.to_position(end_offset), - }; + auto end_offset = + static_cast(offset + unit.token_spelling(include_location).size()); + diagnostic.range = to_range(unit, encoding, {offset, end_offset}); current = std::move(diagnostic); } diff --git a/src/feature/document_links.cpp b/src/feature/document_links.cpp index 56320ec86..1d17f4e28 100644 --- a/src/feature/document_links.cpp +++ b/src/feature/document_links.cpp @@ -18,7 +18,6 @@ auto document_links(CompilationUnitRef unit, PositionEncoding encoding) } auto content = unit.interested_content(); - PositionMapper converter(content, encoding); auto& directives = directives_it->second; auto* lang_opts = &unit.lang_options(); @@ -29,7 +28,7 @@ auto document_links(CompilationUnitRef unit, PositionEncoding encoding) auto range = find_directive_argument(content, offset, lang_opts); if(!range) return; - protocol::DocumentLink link{.range = to_range(converter, *range)}; + protocol::DocumentLink link{.range = to_range(unit, encoding, *range)}; link.target = target.str(); links.push_back(std::move(link)); }; diff --git a/src/feature/document_symbols.cpp b/src/feature/document_symbols.cpp index 70ddba713..a9b96c8d1 100644 --- a/src/feature/document_symbols.cpp +++ b/src/feature/document_symbols.cpp @@ -178,13 +178,14 @@ void sort_symbols(std::vector& symbols) { } } -auto to_protocol_symbol(const DocumentSymbol& symbol, const PositionMapper& converter) - -> protocol::DocumentSymbol { +auto to_protocol_symbol(const DocumentSymbol& symbol, + CompilationUnitRef unit, + PositionEncoding encoding) -> protocol::DocumentSymbol { protocol::DocumentSymbol result{ .name = symbol.name, .kind = to_protocol_symbol_kind(symbol.kind), - .range = to_range(converter, symbol.range), - .selection_range = to_range(converter, symbol.selection_range), + .range = to_range(unit, encoding, symbol.range), + .selection_range = to_range(unit, encoding, symbol.selection_range), }; if(!symbol.detail.empty()) { @@ -195,8 +196,8 @@ auto to_protocol_symbol(const DocumentSymbol& symbol, const PositionMapper& conv std::vector> children; children.reserve(symbol.children.size()); for(const auto& child: symbol.children) { - children.push_back( - std::make_shared(to_protocol_symbol(child, converter))); + children.push_back(std::make_shared( + to_protocol_symbol(child, unit, encoding))); } result.children = std::move(children); } @@ -216,12 +217,11 @@ auto document_symbols(CompilationUnitRef unit, PositionEncoding encoding) -> std::vector { auto internal = document_symbols(unit); - PositionMapper converter(unit.interested_content(), encoding); std::vector symbols; symbols.reserve(internal.size()); for(const auto& symbol: internal) { - symbols.push_back(to_protocol_symbol(symbol, converter)); + symbols.push_back(to_protocol_symbol(symbol, unit, encoding)); } return symbols; diff --git a/src/feature/feature.h b/src/feature/feature.h index 86eda11e2..113c16522 100644 --- a/src/feature/feature.h +++ b/src/feature/feature.h @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include "compile/compilation.h" @@ -15,19 +17,34 @@ namespace clice::feature { +namespace lsp = kota::ipc::lsp; namespace protocol = kota::ipc::protocol; using kota::ipc::lsp::PositionEncoding; -using kota::ipc::lsp::PositionMapper; using kota::ipc::lsp::parse_position_encoding; -inline auto to_range(const PositionMapper& converter, LocalSourceRange range) -> protocol::Range { +inline auto to_range(std::string_view content, + std::span line_starts, + lsp::PositionEncoding encoding, + LocalSourceRange range) -> protocol::Range { return protocol::Range{ - .start = *converter.to_position(range.begin), - .end = *converter.to_position(range.end), + .start = *lsp::to_position(content, line_starts, encoding, range.begin), + .end = *lsp::to_position(content, line_starts, encoding, range.end), }; } +inline auto to_position(CompilationUnitRef unit, + lsp::PositionEncoding encoding, + std::uint32_t offset) -> std::optional { + return lsp::to_position(unit.interested_content(), unit.line_starts(), encoding, offset); +} + +inline auto to_range(CompilationUnitRef unit, + lsp::PositionEncoding encoding, + LocalSourceRange range) -> protocol::Range { + return to_range(unit.interested_content(), unit.line_starts(), encoding, range); +} + struct CodeCompletionOptions { bool enable_keyword_snippet = false; bool enable_function_arguments_snippet = false; diff --git a/src/feature/folding_ranges.cpp b/src/feature/folding_ranges.cpp index 6f53fb991..3835b5303 100644 --- a/src/feature/folding_ranges.cpp +++ b/src/feature/folding_ranges.cpp @@ -349,14 +349,12 @@ auto folding_ranges(CompilationUnitRef unit) -> std::vector { auto folding_ranges(CompilationUnitRef unit, PositionEncoding encoding) -> std::vector { auto collected = folding_ranges(unit); - PositionMapper converter(unit.interested_content(), encoding); std::vector result; result.reserve(collected.size()); for(const auto& item: collected) { - auto start = *converter.to_position(item.range.begin); - auto end = *converter.to_position(item.range.end); + auto [start, end] = to_range(unit, encoding, item.range); protocol::FoldingRange range{ .start_line = start.line, diff --git a/src/feature/formatting.cpp b/src/feature/formatting.cpp index 0be637028..dffa30f4e 100644 --- a/src/feature/formatting.cpp +++ b/src/feature/formatting.cpp @@ -53,13 +53,15 @@ auto document_format(llvm::StringRef file, return edits; } - PositionMapper converter(content, encoding); + auto line_starts = lsp::build_line_starts(content); for(const auto& replacement: *replacements) { - protocol::TextEdit edit; - edit.range.start = *converter.to_position(replacement.getOffset()); - edit.range.end = *converter.to_position(replacement.getOffset() + replacement.getLength()); - edit.new_text = replacement.getReplacementText().str(); + auto begin = replacement.getOffset(); + auto end = begin + replacement.getLength(); + protocol::TextEdit edit{ + .range = to_range(content, line_starts, encoding, {begin, end}), + .new_text = replacement.getReplacementText().str(), + }; edits.push_back(std::move(edit)); } diff --git a/src/feature/hover.cpp b/src/feature/hover.cpp index b602fe6a9..2649df965 100644 --- a/src/feature/hover.cpp +++ b/src/feature/hover.cpp @@ -1310,8 +1310,7 @@ auto to_protocol_hover(CompilationUnitRef unit, }; if(info.symbol_range) { - PositionMapper converter(unit.interested_content(), encoding); - result.range = to_range(converter, *info.symbol_range); + result.range = to_range(unit, encoding, *info.symbol_range); } return result; diff --git a/src/feature/inlay_hints.cpp b/src/feature/inlay_hints.cpp index 2e21901b9..fc68a38cc 100644 --- a/src/feature/inlay_hints.cpp +++ b/src/feature/inlay_hints.cpp @@ -929,13 +929,12 @@ auto inlay_hints(CompilationUnitRef unit, PositionEncoding encoding) -> std::vector { auto collected = inlay_hints(unit, target, options); - PositionMapper converter(unit.interested_content(), encoding); std::vector hints; hints.reserve(collected.size()); for(const auto& hint: collected) { protocol::InlayHint out{ - .position = *converter.to_position(hint.offset), + .position = *to_position(unit, encoding, hint.offset), .label = hint.label, }; diff --git a/src/feature/semantic_tokens.cpp b/src/feature/semantic_tokens.cpp index 9c4db1e01..b3de1ad40 100644 --- a/src/feature/semantic_tokens.cpp +++ b/src/feature/semantic_tokens.cpp @@ -479,10 +479,11 @@ class SemanticTokensCollector : public SemanticVisitor class SemanticTokenEncoder { public: - SemanticTokenEncoder(llvm::StringRef content, + SemanticTokenEncoder(CompilationUnitRef unit, PositionEncoding encoding, protocol::SemanticTokens& output) : - content(content), converter(content, encoding), output(output) {} + content(unit.interested_content()), line_starts(unit.line_starts()), encoding(encoding), + output(output) {} void append(const SemanticToken& token) { if(!token.range.valid() || token.range.end <= token.range.begin || @@ -492,8 +493,8 @@ class SemanticTokenEncoder { auto begin = token.range.begin; auto end = token.range.end; - auto begin_position = *converter.to_position(begin); - auto end_position = *converter.to_position(end); + auto begin_position = *lsp::to_position(content, line_starts, encoding, begin); + auto end_position = *lsp::to_position(content, line_starts, encoding, end); auto begin_line = static_cast(begin_position.line); auto begin_char = static_cast(begin_position.character); auto end_line = static_cast(end_position.line); @@ -524,7 +525,7 @@ class SemanticTokenEncoder { first_piece = false; } - auto length = converter.measure(chunk.substr(chunk_offset, piece_size)); + auto length = lsp::encoded_length(chunk.substr(chunk_offset, piece_size), encoding); emit_relative(delta_line, delta_start, length, token.kind, token.modifiers); chunk_offset += piece_size; @@ -532,7 +533,7 @@ class SemanticTokenEncoder { } if(piece_size > 0) { - auto length = converter.measure(chunk.substr(chunk_offset)); + auto length = lsp::encoded_length(chunk.substr(chunk_offset), encoding); emit_relative(1, 0, length, token.kind, token.modifiers); } } @@ -560,7 +561,8 @@ class SemanticTokenEncoder { private: llvm::StringRef content; - PositionMapper converter; + std::span line_starts; + PositionEncoding encoding; protocol::SemanticTokens& output; std::uint32_t last_line = 0; std::uint32_t last_start_character = 0; @@ -580,7 +582,7 @@ auto semantic_tokens(CompilationUnitRef unit, PositionEncoding encoding) protocol::SemanticTokens result; result.data.reserve(tokens.size() * 5); - SemanticTokenEncoder encoder(unit.interested_content(), encoding, result); + SemanticTokenEncoder encoder(unit, encoding, result); for(const auto& token: tokens) { encoder.append(token); } diff --git a/src/index/merged_index.cpp b/src/index/merged_index.cpp index b8c2dbbf2..61bd0a309 100644 --- a/src/index/merged_index.cpp +++ b/src/index/merged_index.cpp @@ -6,6 +6,7 @@ #include "index/serialization.h" #include "support/filesystem.h" +#include "kota/ipc/lsp/position.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Support/raw_os_ostream.h" @@ -108,6 +109,9 @@ struct MergedIndex::Impl { /// The content of corresponding source file. std::string content; + /// Line start offsets for position mapping. + std::vector line_starts; + /// If this file is included by other source file, then it has header contexts. /// The key represents the source file id, value represents the context in the /// source file. @@ -257,6 +261,13 @@ void MergedIndex::load_in_memory(this Self& self) { index.content = root->content()->str(); } + if(root->line_starts() && root->line_starts()->size() > 0) { + auto* ls = root->line_starts(); + index.line_starts.assign(ls->begin(), ls->end()); + } else if(!index.content.empty()) { + index.line_starts = kota::ipc::lsp::build_line_starts(index.content); + } + self.buffer.reset(); } @@ -264,9 +275,8 @@ MergedIndex MergedIndex::load(llvm::StringRef path) { auto buffer = llvm::MemoryBuffer::getFile(path); if(!buffer) { return MergedIndex(); - } else { - return MergedIndex(std::move(*buffer), nullptr); } + return MergedIndex(std::move(*buffer), nullptr); } void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) { @@ -360,6 +370,7 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) { auto removed = CreateVector(builder, buffer); auto content_offset = CreateString(builder, index->content); + auto line_starts_offset = builder.CreateVector(index->line_starts); auto merged_index = binary::CreateMergedIndex(builder, index->max_canonical_id, @@ -369,7 +380,8 @@ void MergedIndex::serialize(this const Self& self, llvm::raw_ostream& out) { CreateVector(builder, occurrences), CreateVector(builder, relations), removed, - content_offset); + content_offset, + line_starts_offset); builder.Finish(merged_index); out.write(safe_cast(builder.GetBufferPointer()), builder.GetSize()); @@ -587,6 +599,7 @@ void MergedIndex::merge(this Self& self, llvm::StringRef content) { self.load_in_memory(); self.impl->content = content.str(); + self.impl->line_starts = kota::ipc::lsp::build_line_starts(self.impl->content); self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) { auto& context = self.compilation_contexts[path_id]; context.canonical_id = canonical_id; @@ -604,6 +617,7 @@ void MergedIndex::merge(this Self& self, self.load_in_memory(); if(self.impl->content.empty() && !content.empty()) { self.impl->content = content.str(); + self.impl->line_starts = kota::ipc::lsp::build_line_starts(self.impl->content); } self.impl->merge(path_id, index, [&](Impl& self, std::uint32_t canonical_id) { auto& context = self.header_contexts[path_id]; @@ -624,6 +638,18 @@ llvm::StringRef MergedIndex::content(this const Self& self) { return {}; } +std::span MergedIndex::line_starts(this const Self& self) { + if(self.impl) { + return self.impl->line_starts; + } else if(self.buffer) { + auto root = fbs::GetRoot(self.buffer->getBufferStart()); + if(root->line_starts() && root->line_starts()->size() > 0) { + return {root->line_starts()->data(), root->line_starts()->size()}; + } + } + return {}; +} + bool operator==(MergedIndex& lhs, MergedIndex& rhs) { lhs.load_in_memory(); rhs.load_in_memory(); diff --git a/src/index/merged_index.h b/src/index/merged_index.h index 2e4bc375e..4d6f4edac 100644 --- a/src/index/merged_index.h +++ b/src/index/merged_index.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "index/tu_index.h" @@ -67,6 +68,9 @@ class MergedIndex { /// Get the stored source content for position mapping. llvm::StringRef content(this const Self& self); + /// Get line starts for position mapping. + std::span line_starts(this const Self& self); + /// Merge the index with given compilation context. void merge(this Self& self, std::uint32_t path_id, diff --git a/src/index/schema.fbs b/src/index/schema.fbs index e25e1f293..29db9ace7 100644 --- a/src/index/schema.fbs +++ b/src/index/schema.fbs @@ -118,6 +118,9 @@ removed: content: string; + +line_starts: + [uint]; } table TUFileRelationsEntry { diff --git a/src/server/compiler/compiler.cpp b/src/server/compiler/compiler.cpp index 5f75fb865..e1f0db995 100644 --- a/src/server/compiler/compiler.cpp +++ b/src/server/compiler/compiler.cpp @@ -726,12 +726,8 @@ kota::task<> Compiler::run_compile(std::shared_ptr session) { if(!result.value().tu_index_data.empty()) { auto tu_index = index::TUIndex::from(result.value().tu_index_data.data()); - OpenFileIndex ofi; - ofi.file_index = std::move(tu_index.main_file_index); - ofi.symbols = std::move(tu_index.symbols); - ofi.content = params.text; - ofi.mapper.emplace(ofi.content, lsp::PositionEncoding::UTF16); - session->file_index = std::move(ofi); + session->file_index = std::move(tu_index.main_file_index); + session->symbols = std::move(tu_index.symbols); } auto version = session->version; @@ -839,6 +835,7 @@ Compiler::RawResult Compiler::forward_query(worker::QueryKind kind, auto path = std::string(workspace.path_pool.resolve(path_id)); auto gen = session->generation; auto text = session->text; + auto line_starts = session->line_starts; if(!co_await ensure_compiled(session)) { co_return serde_raw{"null"}; @@ -852,18 +849,16 @@ Compiler::RawResult Compiler::forward_query(worker::QueryKind kind, wp.kind = kind; wp.path = path; - lsp::PositionMapper mapper(text, lsp::PositionEncoding::UTF16); - if(position) { - auto offset = mapper.to_offset(*position); + auto offset = lsp::to_offset(text, line_starts, lsp::PositionEncoding::UTF16, *position); if(!offset) co_return serde_raw{"null"}; wp.offset = *offset; } if(range) { - auto start = mapper.to_offset(range->start); - auto end = mapper.to_offset(range->end); + auto start = lsp::to_offset(text, line_starts, lsp::PositionEncoding::UTF16, range->start); + auto end = lsp::to_offset(text, line_starts, lsp::PositionEncoding::UTF16, range->end); if(start && end) { wp.range = {*start, *end}; } @@ -882,6 +877,7 @@ Compiler::RawResult Compiler::forward_build(worker::BuildKind kind, auto path_id = session->path_id; auto path = std::string(workspace.path_pool.resolve(path_id)); auto gen = session->generation; + auto line_starts = session->line_starts; worker::BuildParams wp; wp.kind = kind; @@ -900,8 +896,7 @@ Compiler::RawResult Compiler::forward_build(worker::BuildKind kind, co_return serde_raw{}; } - lsp::PositionMapper mapper(wp.text, lsp::PositionEncoding::UTF16); - auto offset = mapper.to_offset(position); + auto offset = lsp::to_offset(wp.text, line_starts, lsp::PositionEncoding::UTF16, position); if(!offset) co_return serde_raw{"null"}; wp.offset = *offset; @@ -924,9 +919,10 @@ Compiler::RawResult Compiler::forward_format(std::shared_ptr session, wp.text = session->text; if(range) { - lsp::PositionMapper mapper(wp.text, lsp::PositionEncoding::UTF16); - auto begin = mapper.to_offset(range->start); - auto end = mapper.to_offset(range->end); + auto line_starts = lsp::build_line_starts(wp.text); + auto begin = + lsp::to_offset(wp.text, line_starts, lsp::PositionEncoding::UTF16, range->start); + auto end = lsp::to_offset(wp.text, line_starts, lsp::PositionEncoding::UTF16, range->end); if(!begin || !end) co_return serde_raw{"null"}; wp.format_range = {*begin, *end}; @@ -944,8 +940,8 @@ Compiler::RawResult Compiler::handle_completion(const protocol::Position& positi auto path_id = session->path_id; auto path = std::string(workspace.path_pool.resolve(path_id)); - lsp::PositionMapper mapper(session->text, lsp::PositionEncoding::UTF16); - auto offset = mapper.to_offset(position); + auto offset = + lsp::to_offset(session->text, session->line_starts, lsp::PositionEncoding::UTF16, position); if(offset) { auto pctx = detect_completion_context(session->text, *offset); if(pctx.kind == CompletionContext::IncludeQuoted || diff --git a/src/server/compiler/indexer.cpp b/src/server/compiler/indexer.cpp index 7a5911ca3..f04b9f16f 100644 --- a/src/server/compiler/indexer.cpp +++ b/src/server/compiler/indexer.cpp @@ -26,6 +26,107 @@ namespace clice { namespace lsp = kota::ipc::lsp; +static auto to_position(const Session& session, std::uint32_t offset) { + return lsp::to_position(session.text, + session.line_starts, + lsp::PositionEncoding::UTF16, + offset); +} + +static auto to_offset(const Session& session, const protocol::Position& position) { + return lsp::to_offset(session.text, + session.line_starts, + lsp::PositionEncoding::UTF16, + position); +} + +const static index::Occurrence* lookup_occurrence(const std::vector& occs, + std::uint32_t offset) { + auto it = std::ranges::lower_bound(occs, offset, {}, [](const index::Occurrence& o) { + return o.range.end; + }); + const index::Occurrence* best = nullptr; + while(it != occs.end() && it->range.contains(offset)) { + if(!best || (it->range.end - it->range.begin) < (best->range.end - best->range.begin)) + best = &*it; + ++it; + } + return best; +} + +static std::optional> + find_occurrence(const Session& session, std::uint32_t offset) { + auto* occ = lookup_occurrence(session.file_index->occurrences, offset); + if(!occ) + return std::nullopt; + auto start = to_position(session, occ->range.begin); + auto end = to_position(session, occ->range.end); + if(!start || !end) + return std::nullopt; + return std::pair{ + occ->target, + protocol::Range{*start, *end} + }; +} + +template +static void + find_relations(const Session& session, index::SymbolHash hash, RelationKind kind, Fn&& fn) { + auto it = session.file_index->relations.find(hash); + if(it == session.file_index->relations.end()) + return; + for(auto& r: it->second) { + if(r.kind & kind) { + auto start = to_position(session, r.range.begin); + auto end = to_position(session, r.range.end); + if(start && end) { + if(!fn(r, protocol::Range{*start, *end})) + return; + } + } + } +} + +static std::optional> + find_occurrence(const index::MergedIndex& index, std::uint32_t offset) { + auto ls = index.line_starts(); + auto c = index.content(); + if(ls.empty()) + return std::nullopt; + std::optional> result; + index.lookup(offset, [&](const index::Occurrence& o) { + auto start = lsp::to_position(c, ls, lsp::PositionEncoding::UTF16, o.range.begin); + auto end = lsp::to_position(c, ls, lsp::PositionEncoding::UTF16, o.range.end); + if(start && end) { + result = { + o.target, + protocol::Range{*start, *end} + }; + } + return false; + }); + return result; +} + +template +static void find_relations(const index::MergedIndex& index, + index::SymbolHash hash, + RelationKind kind, + Fn&& fn) { + auto ls = index.line_starts(); + auto c = index.content(); + if(ls.empty()) + return; + index.lookup(hash, kind, [&](const index::Relation& r) { + auto start = lsp::to_position(c, ls, lsp::PositionEncoding::UTF16, r.range.begin); + auto end = lsp::to_position(c, ls, lsp::PositionEncoding::UTF16, r.range.end); + if(start && end) { + return fn(r, protocol::Range{*start, *end}); + } + return true; + }); +} + void Indexer::merge(const void* tu_index_data, std::size_t size) { auto tu_index = index::TUIndex::from(tu_index_data); if(tu_index.graph.paths.empty()) { @@ -54,11 +155,11 @@ void Indexer::merge(const void* tu_index_data, std::size_t size) { file_content_storage = (*buf)->getBuffer().str(); file_content = file_content_storage; } - shard.index.merge(global_path_id, - tu_index.built_at, - std::move(include_locs), - file_idx, - file_content); + shard.merge(global_path_id, + tu_index.built_at, + std::move(include_locs), + file_idx, + file_content); } else { std::optional include_id; for(std::uint32_t i = 0; i < tu_index.graph.locations.size(); ++i) { @@ -79,9 +180,8 @@ void Indexer::merge(const void* tu_index_data, std::size_t size) { header_content_storage = (*header_buf)->getBuffer().str(); header_content = header_content_storage; } - shard.index.merge(global_path_id, *include_id, file_idx, header_content); + shard.merge(global_path_id, *include_id, file_idx, header_content); } - shard.invalidate_mapper(); }; for(auto& [tu_path_id, file_idx]: tu_index.path_file_indices) { @@ -126,13 +226,13 @@ void Indexer::save(llvm::StringRef index_dir) { std::size_t saved = 0; for(auto& [path_id, shard]: workspace.merged_indices) { - if(!shard.index.need_rewrite()) + if(!shard.need_rewrite()) continue; auto shard_path = path::join(shards_dir, std::to_string(path_id) + ".idx"); std::error_code write_ec; llvm::raw_fd_ostream os(shard_path, write_ec); if(!write_ec) { - shard.index.serialize(os); + shard.serialize(os); ++saved; } } @@ -162,7 +262,7 @@ void Indexer::load(llvm::StringRef index_dir) { std::uint32_t path_id = 0; if(stem.getAsInteger(10, path_id)) continue; - workspace.merged_indices[path_id] = MergedIndexShard{index::MergedIndex::load(it->path())}; + workspace.merged_indices[path_id] = index::MergedIndex::load(it->path()); } if(!workspace.merged_indices.empty()) { @@ -183,14 +283,14 @@ bool Indexer::need_update(llvm::StringRef file_path) { for(auto& p: workspace.project_index.path_pool.paths) { path_mapping.push_back(p); } - return merged_it->second.index.need_update(path_mapping); + return merged_it->second.need_update(path_mapping); } bool Indexer::find_symbol_info(index::SymbolHash hash, std::string& name, SymbolKind& kind) const { bool found = false; - for_each_overlay([&](std::uint32_t, const OpenFileIndex& ofi) -> bool { - auto it = ofi.symbols.find(hash); - if(it != ofi.symbols.end()) { + for_each_session([&](std::uint32_t, const Session& session) -> bool { + auto it = session.symbols->find(hash); + if(it != session.symbols->end()) { name = it->second.name; kind = it->second.kind; found = true; @@ -214,23 +314,18 @@ Indexer::CursorHit Indexer::resolve_cursor(llvm::StringRef path, Session* session) { // Try the session's open file index first. if(session && session->file_index) { - auto& index = *session->file_index; - if(!index.mapper) - return {}; - auto offset = index.mapper->to_offset(position); + auto offset = to_offset(*session, position); if(!offset) return {}; - if(auto found = index.find_occurrence(*offset)) + if(auto found = find_occurrence(*session, *offset)) return {found->first, found->second}; return {}; } - // Fallback to MergedIndex, using session text (or reading from disk) for position -> offset. - const std::string* doc_text = session ? &session->text : nullptr; - if(!doc_text) + // Fallback to MergedIndex, using session text for position -> offset. + if(!session) return {}; - lsp::PositionMapper doc_mapper(*doc_text, lsp::PositionEncoding::UTF16); - auto offset = doc_mapper.to_offset(position); + auto offset = to_offset(*session, position); if(!offset) return {}; @@ -241,7 +336,7 @@ Indexer::CursorHit Indexer::resolve_cursor(llvm::StringRef path, if(shard_it == workspace.merged_indices.end()) return {}; - if(auto found = shard_it->second.find_occurrence(*offset)) + if(auto found = find_occurrence(shard_it->second, *offset)) return {found->first, found->second}; return {}; } @@ -267,20 +362,21 @@ std::vector Indexer::query_relations(llvm::StringRef path, auto uri = lsp::URI::from_file_path(workspace.project_index.path_pool.path(file_id)); if(!uri) continue; - shard_it->second.find_relations(hit.hash, - kind, - [&](const auto&, protocol::Range range) { - locations.push_back({uri->str(), range}); - return true; - }); + find_relations(shard_it->second, + hit.hash, + kind, + [&](const auto&, protocol::Range range) { + locations.push_back({uri->str(), range}); + return true; + }); } } - for_each_overlay([&](std::uint32_t id, const OpenFileIndex& ofi) -> bool { + for_each_session([&](std::uint32_t id, const Session& session) -> bool { auto uri = lsp::URI::from_file_path(std::string(workspace.path_pool.resolve(id))); if(!uri) return true; - ofi.find_relations(hit.hash, kind, [&](const auto&, protocol::Range range) { + find_relations(session, hit.hash, kind, [&](const auto&, protocol::Range range) { locations.push_back({uri->str(), range}); return true; }); @@ -307,19 +403,22 @@ std::optional Indexer::lookup_symbol(const std::string& uri, } std::optional Indexer::find_definition_location(index::SymbolHash hash) { - std::optional overlay_result; - for_each_overlay([&](std::uint32_t id, const OpenFileIndex& ofi) -> bool { + std::optional session_result; + for_each_session([&](std::uint32_t id, const Session& session) -> bool { auto uri = lsp::URI::from_file_path(std::string(workspace.path_pool.resolve(id))); if(!uri) return true; - ofi.find_relations(hash, RelationKind::Definition, [&](const auto&, protocol::Range range) { - overlay_result = protocol::Location{uri->str(), range}; - return false; - }); - return !overlay_result.has_value(); + find_relations(session, + hash, + RelationKind::Definition, + [&](const auto&, protocol::Range range) { + session_result = protocol::Location{uri->str(), range}; + return false; + }); + return !session_result.has_value(); }); - if(overlay_result) - return overlay_result; + if(session_result) + return session_result; // Fall back to ProjectIndex reference files. auto sym_it = workspace.project_index.symbols.find(hash); @@ -336,12 +435,13 @@ std::optional Indexer::find_definition_location(index::Symbo if(!uri) continue; std::optional result; - shard_it->second.find_relations(hash, - RelationKind::Definition, - [&](const auto&, protocol::Range range) { - result = protocol::Location{uri->str(), range}; - return false; - }); + find_relations(shard_it->second, + hash, + RelationKind::Definition, + [&](const auto&, protocol::Range range) { + result = protocol::Location{uri->str(), range}; + return false; + }); if(result) return result; } @@ -380,14 +480,14 @@ void Indexer::collect_grouped_relations( auto shard_it = workspace.merged_indices.find(file_id); if(shard_it == workspace.merged_indices.end()) continue; - shard_it->second.find_relations(hash, kind, [&](const auto& r, protocol::Range range) { + find_relations(shard_it->second, hash, kind, [&](const auto& r, protocol::Range range) { target_ranges[r.target_symbol].push_back(range); return true; }); } } - for_each_overlay([&](std::uint32_t, const OpenFileIndex& ofi) -> bool { - ofi.find_relations(hash, kind, [&](const auto& r, protocol::Range range) { + for_each_session([&](std::uint32_t, const Session& session) -> bool { + find_relations(session, hash, kind, [&](const auto& r, protocol::Range range) { target_ranges[r.target_symbol].push_back(range); return true; }); @@ -407,8 +507,7 @@ void Indexer::collect_unique_targets(index::SymbolHash hash, auto shard_it = workspace.merged_indices.find(file_id); if(shard_it == workspace.merged_indices.end()) continue; - /// No position conversion needed -- just collect target symbol hashes. - shard_it->second.index.lookup(hash, kind, [&](const index::Relation& r) { + shard_it->second.lookup(hash, kind, [&](const index::Relation& r) { if(seen.insert(r.target_symbol).second) { targets.push_back(r.target_symbol); } @@ -416,9 +515,9 @@ void Indexer::collect_unique_targets(index::SymbolHash hash, }); } } - for_each_overlay([&](std::uint32_t, const OpenFileIndex& ofi) -> bool { - auto rel_it = ofi.file_index.relations.find(hash); - if(rel_it == ofi.file_index.relations.end()) + for_each_session([&](std::uint32_t, const Session& session) -> bool { + auto rel_it = session.file_index->relations.find(hash); + if(rel_it == session.file_index->relations.end()) return true; for(auto& r: rel_it->second) { if(r.kind & kind) { @@ -460,12 +559,10 @@ static std::string extract_line(llvm::StringRef content, std::uint32_t offset) { } std::optional Indexer::get_definition_text(index::SymbolHash hash) { - std::optional overlay_result; - for_each_overlay([&](std::uint32_t id, const OpenFileIndex& ofi) -> bool { - if(!ofi.mapper) - return true; - auto it = ofi.file_index.relations.find(hash); - if(it == ofi.file_index.relations.end()) + std::optional session_result; + for_each_session([&](std::uint32_t id, const Session& session) -> bool { + auto it = session.file_index->relations.find(hash); + if(it == session.file_index->relations.end()) return true; for(auto& rel: it->second) { if(rel.kind.value() != RelationKind::Definition) @@ -473,26 +570,25 @@ std::optional Indexer::get_definition_text(index::Symbo auto def_range = std::bit_cast(rel.target_symbol); if(def_range.begin >= def_range.end) continue; - llvm::StringRef content = ofi.content; - if(def_range.end > content.size()) + if(def_range.end > session.text.size()) continue; - auto start = ofi.mapper->to_position(def_range.begin); - auto end = ofi.mapper->to_position(def_range.end); + auto start = to_position(session, def_range.begin); + auto end = to_position(session, def_range.end); if(!start || !end) continue; - overlay_result = DefinitionText{ + session_result = DefinitionText{ .file = std::string(workspace.path_pool.resolve(id)), .start_line = static_cast(start->line) + 1, .end_line = static_cast(end->line) + 1, - .text = - std::string(content.substr(def_range.begin, def_range.end - def_range.begin)), + .text = std::string( + session.text.substr(def_range.begin, def_range.end - def_range.begin)), }; return false; } return true; }); - if(overlay_result) - return overlay_result; + if(session_result) + return session_result; auto sym_it = workspace.project_index.symbols.find(hash); if(sym_it == workspace.project_index.symbols.end()) @@ -504,32 +600,31 @@ std::optional Indexer::get_definition_text(index::Symbo auto shard_it = workspace.merged_indices.find(file_id); if(shard_it == workspace.merged_indices.end()) continue; - auto* m = shard_it->second.mapper(); - if(!m) + auto& mi = shard_it->second; + auto ls = mi.line_starts(); + if(ls.empty()) continue; - auto content = shard_it->second.index.content(); + auto content = mi.content(); std::optional result; - shard_it->second.index.lookup( - hash, - RelationKind::Definition, - [&](const index::Relation& r) { - auto def_range = std::bit_cast(r.target_symbol); - if(def_range.begin >= def_range.end || def_range.end > content.size()) - return true; - auto start = m->to_position(def_range.begin); - auto end = m->to_position(def_range.end); - if(!start || !end) - return true; - result = DefinitionText{ - .file = workspace.project_index.path_pool.path(file_id).str(), - .start_line = static_cast(start->line) + 1, - .end_line = static_cast(end->line) + 1, - .text = std::string( - content.substr(def_range.begin, def_range.end - def_range.begin)), - }; - return false; - }); + mi.lookup(hash, RelationKind::Definition, [&](const index::Relation& r) { + auto def_range = std::bit_cast(r.target_symbol); + if(def_range.begin >= def_range.end || def_range.end > content.size()) + return true; + auto start = + lsp::to_position(content, ls, lsp::PositionEncoding::UTF16, def_range.begin); + auto end = lsp::to_position(content, ls, lsp::PositionEncoding::UTF16, def_range.end); + if(!start || !end) + return true; + result = DefinitionText{ + .file = workspace.project_index.path_pool.path(file_id).str(), + .start_line = static_cast(start->line) + 1, + .end_line = static_cast(end->line) + 1, + .text = + std::string(content.substr(def_range.begin, def_range.end - def_range.begin)), + }; + return false; + }); if(result) return result; } @@ -549,14 +644,16 @@ std::vector Indexer::collect_references(index::Sy auto shard_it = workspace.merged_indices.find(file_id); if(shard_it == workspace.merged_indices.end()) continue; - auto* m = shard_it->second.mapper(); - if(!m) + auto& mi = shard_it->second; + auto ls = mi.line_starts(); + if(ls.empty()) continue; - auto content = shard_it->second.index.content(); + auto content = mi.content(); auto file_path = workspace.project_index.path_pool.path(file_id); - shard_it->second.index.lookup(hash, kind, [&](const index::Relation& r) { - auto start = m->to_position(r.range.begin); + mi.lookup(hash, kind, [&](const index::Relation& r) { + auto start = + lsp::to_position(content, ls, lsp::PositionEncoding::UTF16, r.range.begin); if(!start) return true; results.push_back(ReferenceWithContext{ @@ -569,25 +666,22 @@ std::vector Indexer::collect_references(index::Sy } } - for_each_overlay([&](std::uint32_t id, const OpenFileIndex& ofi) -> bool { - if(!ofi.mapper) - return true; - auto it = ofi.file_index.relations.find(hash); - if(it == ofi.file_index.relations.end()) + for_each_session([&](std::uint32_t id, const Session& session) -> bool { + auto it = session.file_index->relations.find(hash); + if(it == session.file_index->relations.end()) return true; auto file_path = workspace.path_pool.resolve(id); - llvm::StringRef content = ofi.content; for(auto& rel: it->second) { if(rel.kind != kind) continue; - auto start = ofi.mapper->to_position(rel.range.begin); + auto start = to_position(session, rel.range.begin); if(!start) continue; results.push_back(ReferenceWithContext{ .file = file_path.str(), .line = static_cast(start->line) + 1, - .context = extract_line(content, rel.range.begin), + .context = extract_line(session.text, rel.range.begin), }); } return true; @@ -697,10 +791,10 @@ std::vector Indexer::search_symbols(llvm::StringRef seen.insert(hash); } - for_each_overlay([&](std::uint32_t, const OpenFileIndex& ofi) -> bool { + for_each_session([&](std::uint32_t, const Session& session) -> bool { if(results.size() >= max_results) return false; - for(auto& [hash, symbol]: ofi.symbols) { + for(auto& [hash, symbol]: *session.symbols) { if(results.size() >= max_results) return false; if(seen.contains(hash)) diff --git a/src/server/compiler/indexer.h b/src/server/compiler/indexer.h index d2994d1e9..db33fe5ac 100644 --- a/src/server/compiler/indexer.h +++ b/src/server/compiler/indexer.h @@ -42,7 +42,7 @@ struct SymbolInfo { /// /// Indexer holds no index data of its own. All persistent data lives in /// Workspace (disk-derived ProjectIndex + MergedIndex shards) and per-file -/// data lives in Session (OpenFileIndex from unsaved buffers). +/// data lives in Session (file index from unsaved buffers). /// /// Responsibilities: /// - Cross-file navigation queries (definition, references, hierarchy) @@ -55,9 +55,9 @@ struct SymbolInfo { /// - Document lifecycle — handled by MasterServer class Indexer { public: - /// Visitor for iterating open-file overlays. Returns false to stop early. + /// Visitor for iterating open Sessions. Returns false to stop early. using OverlayVisitor = - std::function; + std::function; Indexer(kota::event_loop& loop, Workspace& workspace, @@ -193,23 +193,23 @@ class Indexer { /// Cancel background indexing and wait for all tasks to settle. kota::task<> stop(); - /// Iterate all open-file overlays via the callback set at construction. - void for_each_overlay(OverlayVisitor visitor) const { + /// Iterate all open Sessions via the callback set at construction. + void for_each_session(OverlayVisitor visitor) const { if(each_overlay) each_overlay(std::move(visitor)); } - /// Get the overlay for a specific server-level path_id (nullptr if not open). - const OpenFileIndex* get_overlay(std::uint32_t server_path_id) const { - const OpenFileIndex* result = nullptr; - for_each_overlay([&](std::uint32_t id, const OpenFileIndex& ofi) -> bool { + /// Invoke a callback with the Session for a specific server-level path_id. + /// The callback is not invoked if no Session exists for that path_id. + template + void with_session(std::uint32_t server_path_id, Fn&& fn) const { + for_each_session([&](std::uint32_t id, const Session& session) -> bool { if(id == server_path_id) { - result = &ofi; + fn(session); return false; } return true; }); - return result; } /// Whether background indexing is currently idle (no active or queued work). @@ -282,7 +282,7 @@ class Indexer { /// Checks if a server-level path_id has an open Session. std::function is_open; - /// Iterates all open-file overlays (OpenFileIndex from live compilations). + /// Iterates all open Sessions with valid file indices. std::function each_overlay; /// LSP peer for progress reporting (optional, not owned). diff --git a/src/server/service/agent_client.cpp b/src/server/service/agent_client.cpp index 0ddaf1fba..eade0a8a9 100644 --- a/src/server/service/agent_client.cpp +++ b/src/server/service/agent_client.cpp @@ -11,6 +11,7 @@ #include "support/filesystem.h" #include "support/logging.h" +#include "kota/ipc/lsp/position.h" #include "kota/ipc/lsp/uri.h" #include "kota/meta/enum.h" #include "llvm/ADT/DenseSet.h" @@ -23,6 +24,25 @@ using RequestContext = kota::ipc::JsonPeer::RequestContext; namespace lsp = kota::ipc::lsp; namespace protocol = kota::ipc::protocol; +template +static void find_relations(const index::MergedIndex& index, + index::SymbolHash hash, + RelationKind kind, + Fn&& fn) { + auto ls = index.line_starts(); + auto c = index.content(); + if(ls.empty()) + return; + index.lookup(hash, kind, [&](const index::Relation& r) { + auto start = lsp::to_position(c, ls, lsp::PositionEncoding::UTF16, r.range.begin); + auto end = lsp::to_position(c, ls, lsp::PositionEncoding::UTF16, r.range.end); + if(start && end) { + return fn(r, protocol::Range{*start, *end}); + } + return true; + }); +} + static std::string_view symbol_kind_name(SymbolKind kind) { constexpr auto names = kota::meta::reflection::member_names; auto idx = static_cast(kind.value()); @@ -101,8 +121,8 @@ static std::vector resolve_locator(const agentic::ReadSymbolPara for(auto& [hash, symbol]: workspace.project_index.symbols) try_symbol(hash, symbol); - indexer.for_each_overlay([&](std::uint32_t, const OpenFileIndex& ofi) -> bool { - for(auto& [hash, symbol]: ofi.symbols) + indexer.for_each_session([&](std::uint32_t, const Session& session) -> bool { + for(auto& [hash, symbol]: *session.symbols) try_symbol(hash, symbol); return true; }); @@ -119,24 +139,31 @@ static std::vector resolve_locator(const agentic::ReadSymbolPara auto pool_it = workspace.path_pool.cache.find(path_str); auto server_id = pool_it != workspace.path_pool.cache.end() ? pool_it->second : ~0u; if(server_id != ~0u) { - auto* ofi = indexer.get_overlay(server_id); - if(ofi && ofi->mapper) { - for(auto& [hash, rels]: ofi->file_index.relations) { + std::vector session_result; + indexer.with_session(server_id, [&](const Session& session) { + for(auto& [hash, rels]: session.file_index->relations) { for(auto& rel: rels) { if(rel.kind.value() != RelationKind::Definition) continue; - auto start = ofi->mapper->to_position(rel.range.begin); + auto start = lsp::to_position(session.text, + session.line_starts, + lsp::PositionEncoding::UTF16, + rel.range.begin); if(start && start->line == target_line) { std::string name; SymbolKind kind; - if(indexer.find_symbol_info(hash, name, kind)) - return { - {hash, std::move(name), kind, path_str, *loc.line} - }; + if(!indexer.find_symbol_info(hash, name, kind)) + continue; + if(kind == SymbolKind::Parameter || kind == SymbolKind::Label) + continue; + session_result.push_back( + {hash, std::move(name), kind, path_str, *loc.line}); } } } - } + }); + if(!session_result.empty()) + return session_result; } auto it = workspace.project_index.path_pool.find(path_str); @@ -152,15 +179,16 @@ static std::vector resolve_locator(const agentic::ReadSymbolPara if(!symbol.reference_files.contains(proj_id)) continue; bool found = false; - shard_it->second.find_relations(hash, - RelationKind::Definition, - [&](const index::Relation&, protocol::Range range) { - if(range.start.line == target_line) { - found = true; - return false; - } - return true; - }); + find_relations(shard_it->second, + hash, + RelationKind::Definition, + [&](const index::Relation&, protocol::Range range) { + if(range.start.line == target_line) { + found = true; + return false; + } + return true; + }); if(found) return { {hash, symbol.name, symbol.kind, path_str, *loc.line} @@ -429,8 +457,8 @@ AgentClient::AgentClient(MasterServer& server, kota::ipc::JsonPeer& peer) : for(auto& [hash, symbol]: srv.workspace.project_index.symbols) try_symbol(hash, symbol); - srv.indexer.for_each_overlay([&](std::uint32_t, const OpenFileIndex& ofi) -> bool { - for(auto& [hash, symbol]: ofi.symbols) + srv.indexer.for_each_session([&](std::uint32_t, const Session& session) -> bool { + for(auto& [hash, symbol]: *session.symbols) try_symbol(hash, symbol); return true; }); @@ -485,10 +513,10 @@ AgentClient::AgentClient(MasterServer& server, kota::ipc::JsonPeer& peer) : if(pool_it == srv.workspace.path_pool.cache.end()) co_return result; auto server_id = pool_it->second; - auto* ofi = srv.indexer.get_overlay(server_id); - if(ofi) { - auto& fi = *ofi; - for(auto& [hash, rels]: fi.file_index.relations) { + bool found_session = false; + srv.indexer.with_session(server_id, [&](const Session& session) { + found_session = true; + for(auto& [hash, rels]: session.file_index->relations) { for(auto& rel: rels) { if(rel.kind.value() != RelationKind::Definition) continue; @@ -498,24 +526,29 @@ AgentClient::AgentClient(MasterServer& server, kota::ipc::JsonPeer& peer) : continue; if(!is_document_level(kind)) continue; - if(fi.mapper) { - auto start = fi.mapper->to_position(rel.range.begin); - auto end = fi.mapper->to_position(rel.range.end); - if(start && end) { - result.symbols.push_back(DocumentSymbolEntry{ - .name = std::move(name), - .kind = std::string(symbol_kind_name(kind)), - .start_line = static_cast(start->line) + 1, - .end_line = static_cast(end->line) + 1, - .symbol_id = hash, - }); - break; - } + auto start = lsp::to_position(session.text, + session.line_starts, + lsp::PositionEncoding::UTF16, + rel.range.begin); + auto end = lsp::to_position(session.text, + session.line_starts, + lsp::PositionEncoding::UTF16, + rel.range.end); + if(start && end) { + result.symbols.push_back(DocumentSymbolEntry{ + .name = std::move(name), + .kind = std::string(symbol_kind_name(kind)), + .start_line = static_cast(start->line) + 1, + .end_line = static_cast(end->line) + 1, + .symbol_id = hash, + }); + break; } } } + }); + if(found_session) co_return result; - } auto it = srv.workspace.project_index.path_pool.find(params.path); if(it == srv.workspace.project_index.path_pool.cache.end()) @@ -534,19 +567,19 @@ AgentClient::AgentClient(MasterServer& server, kota::ipc::JsonPeer& peer) : if(!symbol.reference_files.contains(proj_id)) continue; - shard_it->second.find_relations( - hash, - RelationKind::Definition, - [&](const index::Relation&, protocol::Range range) { - result.symbols.push_back(DocumentSymbolEntry{ - .name = symbol.name, - .kind = std::string(symbol_kind_name(symbol.kind)), - .start_line = static_cast(range.start.line) + 1, - .end_line = static_cast(range.end.line) + 1, - .symbol_id = hash, - }); - return true; - }); + find_relations(shard_it->second, + hash, + RelationKind::Definition, + [&](const index::Relation&, protocol::Range range) { + result.symbols.push_back(DocumentSymbolEntry{ + .name = symbol.name, + .kind = std::string(symbol_kind_name(symbol.kind)), + .start_line = static_cast(range.start.line) + 1, + .end_line = static_cast(range.end.line) + 1, + .symbol_id = hash, + }); + return true; + }); } co_return result; diff --git a/src/server/service/lsp_client.cpp b/src/server/service/lsp_client.cpp index b5ca8b64b..ce544c733 100644 --- a/src/server/service/lsp_client.cpp +++ b/src/server/service/lsp_client.cpp @@ -169,6 +169,7 @@ LSPClient::LSPClient(MasterServer& server, kota::ipc::JsonPeer& peer) : server(s auto session = srv.open_session(path_id); session->version = params.text_document.version; session->text = params.text_document.text; + session->line_starts = lsp::build_line_starts(session->text); session->generation++; LOG_DEBUG("didOpen: {} (v{})", path, params.text_document.version); @@ -197,13 +198,19 @@ LSPClient::LSPClient(MasterServer& server, kota::ipc::JsonPeer& peer) : server(s session->text = c.text; } else { auto& range = c.range; - lsp::PositionMapper mapper(session->text, lsp::PositionEncoding::UTF16); - auto start = mapper.to_offset(range.start); - auto end = mapper.to_offset(range.end); + auto start = lsp::to_offset(session->text, + session->line_starts, + lsp::PositionEncoding::UTF16, + range.start); + auto end = lsp::to_offset(session->text, + session->line_starts, + lsp::PositionEncoding::UTF16, + range.end); if(start && end && *start <= *end) { session->text.replace(*start, *end - *start, c.text); } } + session->line_starts = lsp::build_line_starts(session->text); }, change); } diff --git a/src/server/service/master_server.cpp b/src/server/service/master_server.cpp index 80c451d88..c5b49011e 100644 --- a/src/server/service/master_server.cpp +++ b/src/server/service/master_server.cpp @@ -46,8 +46,8 @@ MasterServer::MasterServer(kota::event_loop& loop, std::string self_path) : [this](uint32_t server_path_id) { return sessions.contains(server_path_id); }, [this](Indexer::OverlayVisitor visitor) { for(auto& [path_id, session]: sessions) { - if(session && session->file_index) { - if(!visitor(path_id, *session->file_index)) + if(session && session->file_index && session->symbols) { + if(!visitor(path_id, *session)) break; } } diff --git a/src/server/service/session.h b/src/server/service/session.h index eb95aab0b..0a11aba10 100644 --- a/src/server/service/session.h +++ b/src/server/service/session.h @@ -4,7 +4,9 @@ #include #include #include +#include +#include "index/tu_index.h" #include "server/workspace/workspace.h" #include "kota/async/async.h" @@ -32,6 +34,10 @@ struct Session { /// Current buffer content (may differ from disk until saved). std::string text; + /// Byte offsets of each line start in `text`, built by `build_line_starts`. + /// Updated on didOpen and after every didChange. + std::vector line_starts; + /// Monotonic generation counter, incremented on every didChange and on close. /// Used to detect stale compilation results (ABA prevention). std::uint64_t generation = 0; @@ -89,7 +95,11 @@ struct Session { /// Used for queries (hover, goto, references) on this file. /// NOT merged into Workspace.project_index — that only gets disk-derived /// data from background indexing. - std::optional file_index; + std::optional file_index; + + /// Symbol table from the latest compilation, mapping symbol hashes to + /// names and kinds. + std::optional symbols; }; } // namespace clice diff --git a/src/server/workspace/workspace.cpp b/src/server/workspace/workspace.cpp index 5a9640595..9b27a5759 100644 --- a/src/server/workspace/workspace.cpp +++ b/src/server/workspace/workspace.cpp @@ -1,6 +1,5 @@ #include "server/workspace/workspace.h" -#include #include #include "support/filesystem.h" @@ -8,8 +7,6 @@ #include "syntax/scan.h" #include "kota/codec/json/json.h" -#include "kota/ipc/lsp/position.h" -#include "kota/ipc/lsp/protocol.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -18,61 +15,6 @@ namespace clice { -namespace lsp = kota::ipc::lsp; - -/// Find the tightest (innermost) occurrence containing `offset` via binary search. -const static index::Occurrence* lookup_occurrence(const std::vector& occs, - std::uint32_t offset) { - auto it = std::ranges::lower_bound(occs, offset, {}, [](const index::Occurrence& o) { - return o.range.end; - }); - const index::Occurrence* best = nullptr; - while(it != occs.end() && it->range.contains(offset)) { - if(!best || (it->range.end - it->range.begin) < (best->range.end - best->range.begin)) { - best = &*it; - } - ++it; - } - return best; -} - -std::optional> - OpenFileIndex::find_occurrence(std::uint32_t offset) const { - if(!mapper) - return std::nullopt; - auto* occ = lookup_occurrence(file_index.occurrences, offset); - if(!occ) - return std::nullopt; - auto start = mapper->to_position(occ->range.begin); - auto end = mapper->to_position(occ->range.end); - if(!start || !end) - return std::nullopt; - return std::pair{ - occ->target, - protocol::Range{*start, *end} - }; -} - -std::optional> - MergedIndexShard::find_occurrence(std::uint32_t offset) const { - auto* m = mapper(); - if(!m) - return std::nullopt; - std::optional> result; - index.lookup(offset, [&](const index::Occurrence& o) { - auto start = m->to_position(o.range.begin); - auto end = m->to_position(o.range.end); - if(start && end) { - result = { - o.target, - protocol::Range{*start, *end} - }; - } - return false; - }); - return result; -} - llvm::SmallVector Workspace::on_file_saved(std::uint32_t path_id) { llvm::SmallVector dirtied; diff --git a/src/server/workspace/workspace.h b/src/server/workspace/workspace.h index 32bba802a..46eba389c 100644 --- a/src/server/workspace/workspace.h +++ b/src/server/workspace/workspace.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "command/command.h" #include "command/toolchain.h" @@ -17,8 +18,6 @@ #include "support/path_pool.h" #include "syntax/dependency_graph.h" -#include "kota/ipc/lsp/position.h" -#include "kota/ipc/lsp/protocol.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -26,9 +25,6 @@ namespace clice { -namespace protocol = kota::ipc::protocol; -namespace lsp = kota::ipc::lsp; - /// Two-layer staleness snapshot for compilation artifacts (PCH, AST, etc.). /// /// Layer 1 (fast): compare each file's current mtime against build_at. @@ -50,89 +46,6 @@ struct HeaderFileContext { std::uint64_t preamble_hash; ///< Hash of preamble content for staleness. }; -/// In-memory index for an open file. Kept separate from MergedIndex because -/// open files change frequently, are based on unsaved buffer content, and only -/// need to track the main file (headers are covered by PCH/PCM indexing). -struct OpenFileIndex { - index::FileIndex file_index; - index::SymbolTable symbols; - std::string content; ///< Buffer text at index time (for position mapping). - - /// Cached PositionMapper built from `content`. Avoids re-scanning line - /// offsets on every query. Initialized by Indexer::set_open_file(). - std::optional mapper; - - /// Find the tightest occurrence containing `offset`. - /// Returns (symbol_hash, LSP range) with positions already converted. - std::optional> - find_occurrence(std::uint32_t offset) const; - - /// Iterate relations matching `kind`, calling back with pre-converted ranges. - /// Callback: (const index::Relation&, protocol::Range) -> bool (true = continue). - template - void find_relations(index::SymbolHash hash, RelationKind kind, Fn&& fn) const { - if(!mapper) - return; - auto it = file_index.relations.find(hash); - if(it == file_index.relations.end()) - return; - for(auto& r: it->second) { - if(r.kind & kind) { - auto start = mapper->to_position(r.range.begin); - auto end = mapper->to_position(r.range.end); - if(start && end) { - if(!fn(r, protocol::Range{*start, *end})) - return; - } - } - } - } -}; - -/// Wraps index::MergedIndex with a lazily-cached PositionMapper. -struct MergedIndexShard { - index::MergedIndex index; - mutable std::optional cached_mapper; - - /// Get or lazily build a PositionMapper from the index's stored content. - const lsp::PositionMapper* mapper() const { - if(!cached_mapper) { - auto c = index.content(); - if(!c.empty()) { - cached_mapper.emplace(c, lsp::PositionEncoding::UTF16); - } - } - return cached_mapper ? &*cached_mapper : nullptr; - } - - /// Invalidate the cached mapper (call after merge changes content). - void invalidate_mapper() { - cached_mapper.reset(); - } - - /// Find occurrence at byte offset. - /// Returns (symbol_hash, LSP range) with positions already converted. - std::optional> - find_occurrence(std::uint32_t offset) const; - - /// Iterate relations matching `kind`, calling back with pre-converted ranges. - /// Callback: (const index::Relation&, protocol::Range) -> bool (true = continue). - template - void find_relations(index::SymbolHash hash, RelationKind kind, Fn&& fn) const { - auto* m = mapper(); - if(!m) - return; - index.lookup(hash, kind, [&](const index::Relation& r) { - auto start = m->to_position(r.range.begin); - auto end = m->to_position(r.range.end); - if(start && end) { - return fn(r, protocol::Range{*start, *end}); - } - return true; - }); - } -}; - /// Cached PCH state. Content-addressed by preamble hash — shared across all /// files (open or on-disk) that have the same preamble content. struct PCHState { @@ -208,7 +121,7 @@ struct Workspace { /// Per-file index shards from background indexing, keyed by project-level /// path_id. Contains symbol occurrences, relations, and stored content /// for position mapping. - llvm::DenseMap merged_indices; + llvm::DenseMap merged_indices; /// Called when a file is saved to disk. Cascades invalidation through /// compile_graph and clears affected PCM caches. diff --git a/tests/unit/feature/document_link_tests.cpp b/tests/unit/feature/document_link_tests.cpp index 9da700530..5f9650698 100644 --- a/tests/unit/feature/document_link_tests.cpp +++ b/tests/unit/feature/document_link_tests.cpp @@ -22,8 +22,14 @@ void run(llvm::StringRef source, llvm::StringRef standard = "-std=c++17") { } auto to_local_range(const protocol::Range& range) -> LocalSourceRange { - feature::PositionMapper converter(unit->interested_content(), feature::PositionEncoding::UTF8); - return LocalSourceRange(*converter.to_offset(range.start), *converter.to_offset(range.end)); + auto content = unit->interested_content(); + auto line_starts = unit->line_starts(); + return LocalSourceRange( + *feature::lsp::to_offset(content, + line_starts, + feature::PositionEncoding::UTF8, + range.start), + *feature::lsp::to_offset(content, line_starts, feature::PositionEncoding::UTF8, range.end)); } void EXPECT_LINK(std::size_t index, llvm::StringRef name, llvm::StringRef path) { diff --git a/tests/unit/feature/document_symbol_tests.cpp b/tests/unit/feature/document_symbol_tests.cpp index 603edc1f8..8e68a76d3 100644 --- a/tests/unit/feature/document_symbol_tests.cpp +++ b/tests/unit/feature/document_symbol_tests.cpp @@ -1,7 +1,10 @@ #include +#include #include #include #include +#include +#include #include #include "test/test.h" @@ -184,18 +187,22 @@ VAR(test) } void format_document_symbols(std::string& out, - const feature::PositionMapper& mapper, + std::string_view content, + std::span line_starts, + feature::PositionEncoding encoding, llvm::ArrayRef nodes, int depth) { auto pad = std::string(depth * 2, ' '); for(auto& node: nodes) { auto kind = kota::meta::enum_name(static_cast(node.kind), "Unknown"); - auto start = mapper.to_position(node.range.begin); - auto end = mapper.to_position(node.range.end); + auto start = feature::lsp::to_position(content, line_starts, encoding, node.range.begin); + auto end = feature::lsp::to_position(content, line_starts, encoding, node.range.end); if(!start || !end) continue; - auto sel_start = mapper.to_position(node.selection_range.begin); - auto sel_end = mapper.to_position(node.selection_range.end); + auto sel_start = + feature::lsp::to_position(content, line_starts, encoding, node.selection_range.begin); + auto sel_end = + feature::lsp::to_position(content, line_starts, encoding, node.selection_range.end); out += std::format("- {}{{ name: {}, kind: {}, range: \"{}:{}-{}:{}\"", pad, yaml_str(node.name), @@ -216,7 +223,7 @@ void format_document_symbols(std::string& out, } out += " }\n"; if(!node.children.empty()) { - format_document_symbols(out, mapper, node.children, depth + 1); + format_document_symbols(out, content, line_starts, encoding, node.children, depth + 1); } } } @@ -226,9 +233,14 @@ TEST_CASE(snapshot) { if(!compile_file(path)) return "COMPILE_ERROR"; auto content = unit->interested_content(); - feature::PositionMapper mapper(content, feature::PositionEncoding::UTF8); + auto line_starts = unit->line_starts(); std::string result; - format_document_symbols(result, mapper, feature::document_symbols(*unit), 0); + format_document_symbols(result, + content, + line_starts, + feature::PositionEncoding::UTF8, + feature::document_symbols(*unit), + 0); return result; }); } diff --git a/tests/unit/feature/folding_range_tests.cpp b/tests/unit/feature/folding_range_tests.cpp index b7a75a554..fb30a2653 100644 --- a/tests/unit/feature/folding_range_tests.cpp +++ b/tests/unit/feature/folding_range_tests.cpp @@ -37,7 +37,8 @@ void run(llvm::StringRef code) { } auto to_local_range(const protocol::FoldingRange& range) -> LocalSourceRange { - feature::PositionMapper converter(unit->interested_content(), feature::PositionEncoding::UTF8); + auto content = unit->interested_content(); + auto line_starts = unit->line_starts(); auto start = protocol::Position{ .line = range.start_line, @@ -49,7 +50,9 @@ auto to_local_range(const protocol::FoldingRange& range) -> LocalSourceRange { .character = range.end_character.value_or(0), }; - return LocalSourceRange(*converter.to_offset(start), *converter.to_offset(end)); + return LocalSourceRange( + *feature::lsp::to_offset(content, line_starts, feature::PositionEncoding::UTF8, start), + *feature::lsp::to_offset(content, line_starts, feature::PositionEncoding::UTF8, end)); } void EXPECT_FOLDING(std::uint32_t index, @@ -434,11 +437,18 @@ TEST_CASE(snapshot) { if(!compile_file(path)) return "COMPILE_ERROR"; auto ranges = feature::folding_ranges(*unit); - feature::PositionMapper mapper(unit->interested_content(), feature::PositionEncoding::UTF8); + auto content = unit->interested_content(); + auto line_starts = unit->line_starts(); std::string result; for(auto& r: ranges) { - auto start = mapper.to_position(r.range.begin); - auto end = mapper.to_position(r.range.end); + auto start = feature::lsp::to_position(content, + line_starts, + feature::PositionEncoding::UTF8, + r.range.begin); + auto end = feature::lsp::to_position(content, + line_starts, + feature::PositionEncoding::UTF8, + r.range.end); if(!start || !end) continue; result += std::format("- {{ range: \"{}:{}-{}:{}\"", diff --git a/tests/unit/feature/inlay_hint_tests.cpp b/tests/unit/feature/inlay_hint_tests.cpp index f6eb89df6..bd7fb460f 100644 --- a/tests/unit/feature/inlay_hint_tests.cpp +++ b/tests/unit/feature/inlay_hint_tests.cpp @@ -26,9 +26,13 @@ void run(llvm::StringRef code, std::source_location location = std::source_locat hints = feature::inlay_hints(*unit, range, {}, feature::PositionEncoding::UTF8); hints_map.clear(); - feature::PositionMapper converter(unit->interested_content(), feature::PositionEncoding::UTF8); + auto content = unit->interested_content(); + auto line_starts = unit->line_starts(); for(auto& hint: hints) { - hints_map[*converter.to_offset(hint.position)] = hint; + hints_map[*feature::lsp::to_offset(content, + line_starts, + feature::PositionEncoding::UTF8, + hint.position)] = hint; } if(!unit->diagnostics().empty()) { @@ -1539,10 +1543,13 @@ TEST_CASE(snapshot) { auto content = unit->interested_content(); LocalSourceRange range(0, content.size()); auto hints = feature::inlay_hints(*unit, range); - feature::PositionMapper mapper(content, feature::PositionEncoding::UTF8); + auto line_starts = unit->line_starts(); std::string result; for(auto& hint: hints) { - auto pos = mapper.to_position(hint.offset); + auto pos = feature::lsp::to_position(content, + line_starts, + feature::PositionEncoding::UTF8, + hint.offset); if(!pos) continue; auto kind = kota::meta::enum_name(hint.kind, "Unknown"); diff --git a/tests/unit/feature/semantic_tokens_tests.cpp b/tests/unit/feature/semantic_tokens_tests.cpp index e5e613992..cb930a482 100644 --- a/tests/unit/feature/semantic_tokens_tests.cpp +++ b/tests/unit/feature/semantic_tokens_tests.cpp @@ -589,14 +589,17 @@ TEST_CASE(snapshot) { return "COMPILE_ERROR"; auto content = unit->interested_content(); auto tokens = feature::semantic_tokens(*unit); - feature::PositionMapper mapper(content, feature::PositionEncoding::UTF8); + auto line_starts = unit->line_starts(); std::string result; for(auto& token: tokens) { if(!token.range.valid() || token.range.end <= token.range.begin || token.range.end > content.size()) continue; - auto pos = mapper.to_position(token.range.begin); + auto pos = feature::lsp::to_position(content, + line_starts, + feature::PositionEncoding::UTF8, + token.range.begin); if(!pos) continue; diff --git a/tests/unit/index/tu_index_tests.cpp b/tests/unit/index/tu_index_tests.cpp index 01b624d45..f0c98170e 100644 --- a/tests/unit/index/tu_index_tests.cpp +++ b/tests/unit/index/tu_index_tests.cpp @@ -511,7 +511,7 @@ TEST_CASE(snapshot) { return "COMPILE_ERROR"; auto idx = index::TUIndex::build(*unit); auto content = unit->interested_content(); - feature::PositionMapper mapper(content, feature::PositionEncoding::UTF8); + auto line_starts = unit->line_starts(); std::string result; auto sorted = idx.main_file_index.occurrences; @@ -522,7 +522,10 @@ TEST_CASE(snapshot) { for(auto& occ: sorted) { auto text = content.substr(occ.range.begin, occ.range.end - occ.range.begin); - auto pos = mapper.to_position(occ.range.begin); + auto pos = feature::lsp::to_position(content, + line_starts, + feature::PositionEncoding::UTF8, + occ.range.begin); if(!pos) continue;