Skip to content
2 changes: 1 addition & 1 deletion cmake/package.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "" FORCE)
FetchContent_Declare(
kotatsu
GIT_REPOSITORY https://github.com/clice-io/kotatsu
GIT_TAG b4bcd3c7e011812a345b4c520ccc77a6ed09174b
GIT_TAG 980c178
)

set(KOTA_ENABLE_ZEST ON)
Expand Down
6 changes: 3 additions & 3 deletions src/feature/code_completion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,10 @@ class CodeCompletionCollector final : public clang::CodeCompleteConsumer {
auto prefix = CompletionPrefix::from(content, offset);
FuzzyMatcher matcher(prefix.spelling);

PositionMapper converter(content, encoding);
auto line_starts = lsp::build_line_starts(content);
auto replace_range = protocol::Range{
.start = *converter.to_position(prefix.range.begin),
.end = *converter.to_position(prefix.range.end),
.start = *lsp::to_position(content, line_starts, encoding, prefix.range.begin),
.end = *lsp::to_position(content, line_starts, encoding, prefix.range.end),
};
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

std::vector<protocol::CompletionItem> collected;
Expand Down
15 changes: 7 additions & 8 deletions src/feature/diagnostics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ namespace clice::feature {

namespace {

namespace lsp = kota::ipc::lsp;

auto to_uri(llvm::StringRef file) -> std::string {
const auto file_view = std::string_view(file.data(), file.size());

Expand Down Expand Up @@ -49,13 +47,13 @@ void add_related(protocol::Diagnostic& diagnostic,
}

auto content = unit.file_content(raw.fid);
PositionMapper converter(content, encoding);
auto line_starts = lsp::build_line_starts(content);

protocol::DiagnosticRelatedInformation related{
.location =
protocol::Location{
.uri = to_uri(unit.file_path(raw.fid)),
.range = to_range(converter, raw.range),
.range = to_range(content, line_starts, encoding, raw.range),
},
.message = raw.message,
};
Expand All @@ -80,7 +78,8 @@ auto diagnostics(CompilationUnitRef unit, PositionEncoding encoding)
}
};

PositionMapper main_converter(unit.interested_content(), encoding);
auto main_content = unit.interested_content();
auto main_line_starts = lsp::build_line_starts(main_content);

for(const auto& raw: unit.diagnostics()) {
auto level = raw.id.level;
Expand Down Expand Up @@ -136,7 +135,7 @@ auto diagnostics(CompilationUnitRef unit, PositionEncoding encoding)
}

if(raw.fid == unit.interested_file()) {
diagnostic.range = to_range(main_converter, raw.range);
diagnostic.range = to_range(main_content, main_line_starts, encoding, raw.range);
current = std::move(diagnostic);
continue;
}
Expand All @@ -154,8 +153,8 @@ auto diagnostics(CompilationUnitRef unit, PositionEncoding encoding)
auto offset = unit.file_offset(include_location);
auto end_offset = offset + unit.token_spelling(include_location).size();
diagnostic.range = protocol::Range{
.start = *main_converter.to_position(offset),
.end = *main_converter.to_position(end_offset),
.start = *lsp::to_position(main_content, main_line_starts, encoding, offset),
.end = *lsp::to_position(main_content, main_line_starts, encoding, end_offset),
};

current = std::move(diagnostic);
Expand Down
4 changes: 2 additions & 2 deletions src/feature/document_links.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ auto document_links(CompilationUnitRef unit, PositionEncoding encoding)
}

auto content = unit.interested_content();
PositionMapper converter(content, encoding);
auto line_starts = lsp::build_line_starts(content);
auto& directives = directives_it->second;
auto* lang_opts = &unit.lang_options();

Expand All @@ -29,7 +29,7 @@ auto document_links(CompilationUnitRef unit, PositionEncoding encoding)
auto range = find_directive_argument(content, offset, lang_opts);
if(!range)
return;
protocol::DocumentLink link{.range = to_range(converter, *range)};
protocol::DocumentLink link{.range = to_range(content, line_starts, encoding, *range)};
link.target = target.str();
links.push_back(std::move(link));
};
Expand Down
19 changes: 11 additions & 8 deletions src/feature/document_symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,15 @@ void sort_symbols(std::vector<DocumentSymbol>& symbols) {
}
}

auto to_protocol_symbol(const DocumentSymbol& symbol, const PositionMapper& converter)
-> protocol::DocumentSymbol {
auto to_protocol_symbol(const DocumentSymbol& symbol,
std::string_view content,
std::span<const std::uint32_t> line_starts,
lsp::PositionEncoding encoding) -> protocol::DocumentSymbol {
protocol::DocumentSymbol result{
.name = symbol.name,
.kind = to_protocol_symbol_kind(symbol.kind),
.range = to_range(converter, symbol.range),
.selection_range = to_range(converter, symbol.selection_range),
.range = to_range(content, line_starts, encoding, symbol.range),
.selection_range = to_range(content, line_starts, encoding, symbol.selection_range),
};

if(!symbol.detail.empty()) {
Expand All @@ -195,8 +197,8 @@ auto to_protocol_symbol(const DocumentSymbol& symbol, const PositionMapper& conv
std::vector<std::shared_ptr<protocol::DocumentSymbol>> children;
children.reserve(symbol.children.size());
for(const auto& child: symbol.children) {
children.push_back(
std::make_shared<protocol::DocumentSymbol>(to_protocol_symbol(child, converter)));
children.push_back(std::make_shared<protocol::DocumentSymbol>(
to_protocol_symbol(child, content, line_starts, encoding)));
}
result.children = std::move(children);
}
Expand All @@ -216,12 +218,13 @@ auto document_symbols(CompilationUnitRef unit, PositionEncoding encoding)
-> std::vector<protocol::DocumentSymbol> {
auto internal = document_symbols(unit);

PositionMapper converter(unit.interested_content(), encoding);
auto content = unit.interested_content();
auto line_starts = lsp::build_line_starts(content);
std::vector<protocol::DocumentSymbol> symbols;
symbols.reserve(internal.size());

for(const auto& symbol: internal) {
symbols.push_back(to_protocol_symbol(symbol, converter));
symbols.push_back(to_protocol_symbol(symbol, content, line_starts, encoding));
}

return symbols;
Expand Down
13 changes: 9 additions & 4 deletions src/feature/feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

#include <cstdint>
#include <optional>
#include <span>
#include <string>
#include <string_view>
#include <vector>

#include "compile/compilation.h"
Expand All @@ -15,16 +17,19 @@

namespace clice::feature {

namespace lsp = kota::ipc::lsp;
namespace protocol = kota::ipc::protocol;

using kota::ipc::lsp::PositionEncoding;
using kota::ipc::lsp::PositionMapper;
using kota::ipc::lsp::parse_position_encoding;

inline auto to_range(const PositionMapper& converter, LocalSourceRange range) -> protocol::Range {
inline auto to_range(std::string_view content,
std::span<const std::uint32_t> line_starts,
lsp::PositionEncoding encoding,
LocalSourceRange range) -> protocol::Range {
return protocol::Range{
.start = *converter.to_position(range.begin),
.end = *converter.to_position(range.end),
.start = *lsp::to_position(content, line_starts, encoding, range.begin),
.end = *lsp::to_position(content, line_starts, encoding, range.end),
};
Comment on lines +26 to 33

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Unchecked optional dereference may cause undefined behavior.

lsp::to_position returns std::optional<protocol::Position>. Lines 31-32 dereference the result without checking validity. If an offset is out of bounds or line_starts is malformed, this will cause undefined behavior.

Consider either:

  1. Adding a precondition assert to document the caller's responsibility
  2. Returning std::optional<protocol::Range> for safe handling
🛡️ Option 1: Add defensive assert
 inline auto to_range(std::string_view content,
                      std::span<const std::uint32_t> line_starts,
                      lsp::PositionEncoding encoding,
                      LocalSourceRange range) -> protocol::Range {
+    assert(range.end <= content.size() && "range extends beyond content");
+    auto start = lsp::to_position(content, line_starts, encoding, range.begin);
+    auto end = lsp::to_position(content, line_starts, encoding, range.end);
+    assert(start && end && "to_position failed for valid range");
     return protocol::Range{
-        .start = *lsp::to_position(content, line_starts, encoding, range.begin),
-        .end = *lsp::to_position(content, line_starts, encoding, range.end),
+        .start = *start,
+        .end = *end,
     };
 }
🛡️ Option 2: Return optional for caller handling
 inline auto to_range(std::string_view content,
                      std::span<const std::uint32_t> line_starts,
                      lsp::PositionEncoding encoding,
-                     LocalSourceRange range) -> protocol::Range {
+                     LocalSourceRange range) -> std::optional<protocol::Range> {
+    auto start = lsp::to_position(content, line_starts, encoding, range.begin);
+    auto end = lsp::to_position(content, line_starts, encoding, range.end);
+    if (!start || !end)
+        return std::nullopt;
     return protocol::Range{
-        .start = *lsp::to_position(content, line_starts, encoding, range.begin),
-        .end = *lsp::to_position(content, line_starts, encoding, range.end),
+        .start = *start,
+        .end = *end,
     };
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
inline auto to_range(std::string_view content,
std::span<const std::uint32_t> line_starts,
lsp::PositionEncoding encoding,
LocalSourceRange range) -> protocol::Range {
return protocol::Range{
.start = *converter.to_position(range.begin),
.end = *converter.to_position(range.end),
.start = *lsp::to_position(content, line_starts, encoding, range.begin),
.end = *lsp::to_position(content, line_starts, encoding, range.end),
};
inline auto to_range(std::string_view content,
std::span<const std::uint32_t> line_starts,
lsp::PositionEncoding encoding,
LocalSourceRange range) -> protocol::Range {
assert(range.end <= content.size() && "range extends beyond content");
auto start = lsp::to_position(content, line_starts, encoding, range.begin);
auto end = lsp::to_position(content, line_starts, encoding, range.end);
assert(start && end && "to_position failed for valid range");
return protocol::Range{
.start = *start,
.end = *end,
};
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/feature/feature.h` around lines 26 - 33, The to_range function
dereferences the results of lsp::to_position() calls without validating that the
returned optional values are valid, which can cause undefined behavior if the
optional is empty. Fix this by either adding assert statements to validate both
optional results before dereferencing them (documenting the precondition), or
change the return type from protocol::Range to std::optional<protocol::Range>
and check both lsp::to_position() results, returning an empty optional if either
call fails.

}

Expand Down
7 changes: 4 additions & 3 deletions src/feature/folding_ranges.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,14 +349,15 @@ auto folding_ranges(CompilationUnitRef unit) -> std::vector<FoldingRange> {
auto folding_ranges(CompilationUnitRef unit, PositionEncoding encoding)
-> std::vector<protocol::FoldingRange> {
auto collected = folding_ranges(unit);
PositionMapper converter(unit.interested_content(), encoding);
auto content = unit.interested_content();
auto line_starts = lsp::build_line_starts(content);

std::vector<protocol::FoldingRange> result;
result.reserve(collected.size());

for(const auto& item: collected) {
auto start = *converter.to_position(item.range.begin);
auto end = *converter.to_position(item.range.end);
auto start = *lsp::to_position(content, line_starts, encoding, item.range.begin);
auto end = *lsp::to_position(content, line_starts, encoding, item.range.end);

protocol::FoldingRange range{
.start_line = start.line,
Expand Down
10 changes: 7 additions & 3 deletions src/feature/formatting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,16 @@ auto document_format(llvm::StringRef file,
return edits;
}

PositionMapper converter(content, encoding);
auto line_starts = lsp::build_line_starts(content);

for(const auto& replacement: *replacements) {
protocol::TextEdit edit;
edit.range.start = *converter.to_position(replacement.getOffset());
edit.range.end = *converter.to_position(replacement.getOffset() + replacement.getLength());
edit.range.start =
*lsp::to_position(content, line_starts, encoding, replacement.getOffset());
edit.range.end = *lsp::to_position(content,
line_starts,
encoding,
replacement.getOffset() + replacement.getLength());
edit.new_text = replacement.getReplacementText().str();
edits.push_back(std::move(edit));
}
Expand Down
5 changes: 3 additions & 2 deletions src/feature/hover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1310,8 +1310,9 @@ auto to_protocol_hover(CompilationUnitRef unit,
};

if(info.symbol_range) {
PositionMapper converter(unit.interested_content(), encoding);
result.range = to_range(converter, *info.symbol_range);
auto content = unit.interested_content();
auto line_starts = lsp::build_line_starts(content);
result.range = to_range(content, line_starts, encoding, *info.symbol_range);
}

return result;
Expand Down
5 changes: 3 additions & 2 deletions src/feature/inlay_hints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -929,13 +929,14 @@ auto inlay_hints(CompilationUnitRef unit,
PositionEncoding encoding) -> std::vector<protocol::InlayHint> {
auto collected = inlay_hints(unit, target, options);

PositionMapper converter(unit.interested_content(), encoding);
auto content = unit.interested_content();
auto line_starts = lsp::build_line_starts(content);
std::vector<protocol::InlayHint> hints;
hints.reserve(collected.size());

for(const auto& hint: collected) {
protocol::InlayHint out{
.position = *converter.to_position(hint.offset),
.position = *lsp::to_position(content, line_starts, encoding, hint.offset),
.label = hint.label,
};
Comment on lines 935 to 939

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Unchecked optional dereference could cause undefined behavior.

to_position returns std::optional<protocol::Position>. While the hint collection filters to valid source ranges, unconditionally dereferencing with * is unsafe if the conversion ever fails (e.g., corrupted line_starts or offset mismatch).

Consider adding a guard:

🛡️ Proposed defensive fix
     for(const auto& hint: collected) {
+        auto pos = to_position(unit, encoding, hint.offset);
+        if(!pos) {
+            continue;
+        }
         protocol::InlayHint out{
-            .position = *to_position(unit, encoding, hint.offset),
+            .position = *pos,
             .label = hint.label,
         };
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for(const auto& hint: collected) {
protocol::InlayHint out{
.position = *converter.to_position(hint.offset),
.position = *to_position(unit, encoding, hint.offset),
.label = hint.label,
};
for(const auto& hint: collected) {
auto pos = to_position(unit, encoding, hint.offset);
if(!pos) {
continue;
}
protocol::InlayHint out{
.position = *pos,
.label = hint.label,
};
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/feature/inlay_hints.cpp` around lines 935 - 939, The call to
`to_position` returns a `std::optional<protocol::Position>`, but the code
unconditionally dereferences it with `*` in the loop over `collected` without
checking if the optional contains a value. Add a guard before dereferencing:
check if the optional returned by `to_position(unit, encoding, hint.offset)` has
a value using a conditional statement, and only construct the
`protocol::InlayHint` out object and proceed with the position assignment if the
optional is valid. If the conversion fails, skip adding that particular hint to
the output.


Expand Down
14 changes: 8 additions & 6 deletions src/feature/semantic_tokens.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ class SemanticTokenEncoder {
SemanticTokenEncoder(llvm::StringRef content,
PositionEncoding encoding,
protocol::SemanticTokens& output) :
content(content), converter(content, encoding), output(output) {}
content(content), line_starts(lsp::build_line_starts(content)), encoding(encoding),
output(output) {}

void append(const SemanticToken& token) {
if(!token.range.valid() || token.range.end <= token.range.begin ||
Expand All @@ -492,8 +493,8 @@ class SemanticTokenEncoder {

auto begin = token.range.begin;
auto end = token.range.end;
auto begin_position = *converter.to_position(begin);
auto end_position = *converter.to_position(end);
auto begin_position = *lsp::to_position(content, line_starts, encoding, begin);
auto end_position = *lsp::to_position(content, line_starts, encoding, end);
auto begin_line = static_cast<std::uint32_t>(begin_position.line);
auto begin_char = static_cast<std::uint32_t>(begin_position.character);
auto end_line = static_cast<std::uint32_t>(end_position.line);
Expand Down Expand Up @@ -524,15 +525,15 @@ class SemanticTokenEncoder {
first_piece = false;
}

auto length = converter.measure(chunk.substr(chunk_offset, piece_size));
auto length = lsp::encoded_length(chunk.substr(chunk_offset, piece_size), encoding);
emit_relative(delta_line, delta_start, length, token.kind, token.modifiers);

chunk_offset += piece_size;
piece_size = 0;
}

if(piece_size > 0) {
auto length = converter.measure(chunk.substr(chunk_offset));
auto length = lsp::encoded_length(chunk.substr(chunk_offset), encoding);
emit_relative(1, 0, length, token.kind, token.modifiers);
}
}
Expand Down Expand Up @@ -560,7 +561,8 @@ class SemanticTokenEncoder {

private:
llvm::StringRef content;
PositionMapper converter;
std::vector<std::uint32_t> line_starts;
PositionEncoding encoding;
protocol::SemanticTokens& output;
std::uint32_t last_line = 0;
std::uint32_t last_start_character = 0;
Expand Down
32 changes: 14 additions & 18 deletions src/server/compiler/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -726,12 +726,8 @@ kota::task<> Compiler::run_compile(std::shared_ptr<Session> session) {

if(!result.value().tu_index_data.empty()) {
auto tu_index = index::TUIndex::from(result.value().tu_index_data.data());
OpenFileIndex ofi;
ofi.file_index = std::move(tu_index.main_file_index);
ofi.symbols = std::move(tu_index.symbols);
ofi.content = params.text;
ofi.mapper.emplace(ofi.content, lsp::PositionEncoding::UTF16);
session->file_index = std::move(ofi);
session->file_index = std::move(tu_index.main_file_index);
session->symbols = std::move(tu_index.symbols);
}

auto version = session->version;
Expand Down Expand Up @@ -839,6 +835,7 @@ Compiler::RawResult Compiler::forward_query(worker::QueryKind kind,
auto path = std::string(workspace.path_pool.resolve(path_id));
auto gen = session->generation;
auto text = session->text;
auto line_starts = session->line_starts;

if(!co_await ensure_compiled(session)) {
co_return serde_raw{"null"};
Expand All @@ -852,18 +849,16 @@ Compiler::RawResult Compiler::forward_query(worker::QueryKind kind,
wp.kind = kind;
wp.path = path;

lsp::PositionMapper mapper(text, lsp::PositionEncoding::UTF16);

if(position) {
auto offset = mapper.to_offset(*position);
auto offset = lsp::to_offset(text, line_starts, lsp::PositionEncoding::UTF16, *position);
if(!offset)
co_return serde_raw{"null"};
wp.offset = *offset;
}

if(range) {
auto start = mapper.to_offset(range->start);
auto end = mapper.to_offset(range->end);
auto start = lsp::to_offset(text, line_starts, lsp::PositionEncoding::UTF16, range->start);
auto end = lsp::to_offset(text, line_starts, lsp::PositionEncoding::UTF16, range->end);
if(start && end) {
wp.range = {*start, *end};
}
Expand All @@ -882,6 +877,7 @@ Compiler::RawResult Compiler::forward_build(worker::BuildKind kind,
auto path_id = session->path_id;
auto path = std::string(workspace.path_pool.resolve(path_id));
auto gen = session->generation;
auto line_starts = session->line_starts;

worker::BuildParams wp;
wp.kind = kind;
Expand All @@ -900,8 +896,7 @@ Compiler::RawResult Compiler::forward_build(worker::BuildKind kind,
co_return serde_raw{};
}

lsp::PositionMapper mapper(wp.text, lsp::PositionEncoding::UTF16);
auto offset = mapper.to_offset(position);
auto offset = lsp::to_offset(wp.text, line_starts, lsp::PositionEncoding::UTF16, position);
if(!offset)
co_return serde_raw{"null"};
wp.offset = *offset;
Expand All @@ -924,9 +919,10 @@ Compiler::RawResult Compiler::forward_format(std::shared_ptr<Session> session,
wp.text = session->text;

if(range) {
lsp::PositionMapper mapper(wp.text, lsp::PositionEncoding::UTF16);
auto begin = mapper.to_offset(range->start);
auto end = mapper.to_offset(range->end);
auto line_starts = lsp::build_line_starts(wp.text);
auto begin =
lsp::to_offset(wp.text, line_starts, lsp::PositionEncoding::UTF16, range->start);
auto end = lsp::to_offset(wp.text, line_starts, lsp::PositionEncoding::UTF16, range->end);
if(!begin || !end)
co_return serde_raw{"null"};
wp.format_range = {*begin, *end};
Expand All @@ -944,8 +940,8 @@ Compiler::RawResult Compiler::handle_completion(const protocol::Position& positi
auto path_id = session->path_id;
auto path = std::string(workspace.path_pool.resolve(path_id));

lsp::PositionMapper mapper(session->text, lsp::PositionEncoding::UTF16);
auto offset = mapper.to_offset(position);
auto offset =
lsp::to_offset(session->text, session->line_starts, lsp::PositionEncoding::UTF16, position);
if(offset) {
auto pctx = detect_completion_context(session->text, *offset);
if(pctx.kind == CompletionContext::IncludeQuoted ||
Expand Down
Loading
Loading