Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 14 additions & 10 deletions packages/builder/src/ast/adapters/swc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Implements parser-specific logic using the SWC parser.
*/

import { createCanonicalId, createCanonicalTracker, type ScopeHandle } from "@soda-gql/common";
import { createCanonicalId, createCanonicalTracker, createSwcSpanConverter, type ScopeHandle, type SwcSpanConverter } from "@soda-gql/common";
import { parseSync } from "@swc/core";
import type { CallExpression, ImportDeclaration, Module } from "@swc/types";
import type { GraphqlSystemIdentifyHelper } from "../../internal/graphql-system";
Expand All @@ -17,6 +17,8 @@ type SwcModule = Module & {
__filePath: string;
/** Offset to subtract from spans to normalize to 0-based source indices */
__spanOffset: number;
/** Converter for UTF-8 byte offsets to UTF-16 char indices */
__spanConverter: SwcSpanConverter;
};

import { createStandardDiagnostic } from "../common/detection";
Expand Down Expand Up @@ -362,10 +364,11 @@ const collectAllDefinitions = ({
};

const expressionFromCall = (call: CallExpression): string => {
// Normalize span by subtracting the module's span offset
// Normalize span by subtracting the module's span offset, then convert byte→char
const spanOffset = module.__spanOffset;
let start = call.span.start - spanOffset;
const end = call.span.end - spanOffset;
const converter = module.__spanConverter;
let start = converter.byteOffsetToCharIndex(call.span.start - spanOffset);
const end = converter.byteOffsetToCharIndex(call.span.end - spanOffset);

// Adjust when span starts one character after the leading "g"
if (start > 0 && source[start] === "q" && source[start - 1] === "g" && source.slice(start, start + 3) === "ql.") {
Expand Down Expand Up @@ -587,8 +590,9 @@ const collectAllDefinitions = ({
* Get location from an SWC node span
*/
const getLocation = (module: SwcModule, span: { start: number; end: number }): DiagnosticLocation => {
const start = span.start - module.__spanOffset;
const end = span.end - module.__spanOffset;
const converter = module.__spanConverter;
const start = converter.byteOffsetToCharIndex(span.start - module.__spanOffset);
const end = converter.byteOffsetToCharIndex(span.end - module.__spanOffset);
return { start, end };
};

Expand Down Expand Up @@ -916,15 +920,15 @@ export const swcAdapter: AnalyzerAdapter = {
}

// SWC's BytePos counter accumulates across parseSync calls within the same process.
// To convert span positions to 0-indexed source positions, we compute the accumulated
// offset from previous parses: (program.span.end - source.length) gives us the total
// bytes from previously parsed files, and we add 1 because spans are 1-indexed.
const spanOffset = program.span.end - input.source.length + 1;
// Use UTF-8 byte length (not source.length which is UTF-16 code units) for correct offset.
const converter = createSwcSpanConverter(input.source);
const spanOffset = program.span.end - converter.byteLength + 1;

// Attach filePath to module (similar to ts.SourceFile.fileName)
const swcModule = program as SwcModule;
swcModule.__filePath = input.filePath;
swcModule.__spanOffset = spanOffset;
swcModule.__spanConverter = converter;

// Collect all data in one pass
const gqlIdentifiers = collectGqlIdentifiers(swcModule, helper);
Expand Down
3 changes: 2 additions & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
"zod": "^4.1.11"
},
"optionalDependencies": {
"@soda-gql/formatter": "workspace:*"
"@soda-gql/formatter": "workspace:*",
"@soda-gql/lsp": "workspace:*"
}
}
31 changes: 31 additions & 0 deletions packages/cli/src/commands/lsp.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
const LSP_HELP = `Usage: soda-gql lsp [options]

Start the GraphQL Language Server Protocol server.

The LSP server communicates over stdio and provides:
- Diagnostics (validation errors in GraphQL templates)
- Autocompletion (field, argument, type suggestions)
- Hover information (type details on hover)

Options:
--help, -h Show this help message

The server is typically started by an editor extension, not directly by users.
Configure your editor to use 'soda-gql lsp' as the GraphQL language server command.`;

export const lspCommand = async (argv: readonly string[]): Promise<never> => {
if (argv.includes("--help") || argv.includes("-h")) {
process.stdout.write(`${LSP_HELP}\n`);
process.exit(0);
}

// Dynamic import to avoid loading LSP deps for other commands
const { createLspServer } = await import("@soda-gql/lsp");
const server = createLspServer();
server.start();

// Server runs indefinitely via stdio; this promise never resolves
await new Promise(() => {});
// TypeScript needs this for the `never` return type
throw new Error("unreachable");
};
7 changes: 7 additions & 0 deletions packages/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { codegenCommand } from "./commands/codegen/index";
import { doctorCommand } from "./commands/doctor";
import { formatCommand } from "./commands/format";
import { initCommand } from "./commands/init";
import { lspCommand } from "./commands/lsp";
import { typegenCommand } from "./commands/typegen";
import { cliErrors } from "./errors";
import type { CommandResult, CommandSuccess, OutputFormat } from "./types";
Expand All @@ -18,6 +19,7 @@ Commands:
format Format soda-gql field selections
artifact Manage soda-gql artifacts
doctor Run diagnostic checks
lsp Start the GraphQL language server

Run 'soda-gql <command> --help' for more information on a specific command.
`;
Expand Down Expand Up @@ -73,6 +75,11 @@ const dispatch = async (argv: readonly string[]): Promise<DispatchResult> => {
return artifactCommand(rest);
}

if (command === "lsp") {
await lspCommand(rest);
return ok({ message: "" }); // unreachable, lsp runs forever
}

if (command === "doctor") {
const result = doctorCommand(rest);
if (result.isOk()) {
Expand Down
1 change: 1 addition & 0 deletions packages/common/src/utils/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export * from "./alias-resolver";
export * from "./cached-fn";
export * from "./path";
export * from "./swc-span";
export * from "./tsconfig";
105 changes: 105 additions & 0 deletions packages/common/src/utils/swc-span.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { describe, expect, test } from "bun:test";
import { createSwcSpanConverter } from "./swc-span";

describe("createSwcSpanConverter", () => {
test("ASCII-only: byteLength equals string length", () => {
const source = "const x = 42;";
const converter = createSwcSpanConverter(source);
expect(converter.byteLength).toBe(source.length);
});

test("ASCII-only: identity conversion", () => {
const source = "hello world";
const converter = createSwcSpanConverter(source);
for (let i = 0; i <= source.length; i++) {
expect(converter.byteOffsetToCharIndex(i)).toBe(i);
}
});

test("empty string", () => {
const converter = createSwcSpanConverter("");
expect(converter.byteLength).toBe(0);
expect(converter.byteOffsetToCharIndex(0)).toBe(0);
});

test("2-byte UTF-8 characters (accented)", () => {
// "\u00E9" = e-acute, 2 bytes in UTF-8, 1 code unit in UTF-16
const source = "caf\u00E9";
const converter = createSwcSpanConverter(source);
// "caf" = 3 bytes, "\u00E9" = 2 bytes → total 5 bytes
expect(converter.byteLength).toBe(5);
// byte 0 → char 0 ('c')
expect(converter.byteOffsetToCharIndex(0)).toBe(0);
// byte 3 → char 3 (start of '\u00E9')
expect(converter.byteOffsetToCharIndex(3)).toBe(3);
// byte 5 → char 4 (end sentinel)
expect(converter.byteOffsetToCharIndex(5)).toBe(4);
});

test("3-byte UTF-8 characters (CJK)", () => {
// Each Japanese character is 3 bytes in UTF-8, 1 code unit in UTF-16
const source = "\u3053\u3093\u306B\u3061\u306F"; // konnichiwa
const converter = createSwcSpanConverter(source);
expect(converter.byteLength).toBe(15); // 5 chars * 3 bytes
// byte 0 → char 0
expect(converter.byteOffsetToCharIndex(0)).toBe(0);
// byte 3 → char 1
expect(converter.byteOffsetToCharIndex(3)).toBe(1);
// byte 6 → char 2
expect(converter.byteOffsetToCharIndex(6)).toBe(2);
// byte 15 → char 5 (end sentinel)
expect(converter.byteOffsetToCharIndex(15)).toBe(5);
});

test("4-byte UTF-8 / surrogate pair (emoji)", () => {
// "\u{1F600}" = grinning face, 4 bytes UTF-8, 2 code units UTF-16
const source = "a\u{1F600}b";
const converter = createSwcSpanConverter(source);
// 'a' = 1 byte, emoji = 4 bytes, 'b' = 1 byte → 6 bytes
expect(converter.byteLength).toBe(6);
// byte 0 → char 0 ('a')
expect(converter.byteOffsetToCharIndex(0)).toBe(0);
// byte 1 → char 1 (start of emoji, first surrogate)
expect(converter.byteOffsetToCharIndex(1)).toBe(1);
// byte 5 → char 3 ('b', after 2 code units for surrogate pair)
expect(converter.byteOffsetToCharIndex(5)).toBe(3);
// byte 6 → char 4 (end sentinel)
expect(converter.byteOffsetToCharIndex(6)).toBe(4);
});

test("mixed ASCII and multi-byte", () => {
// "hello \u3053\u3093\u306B\u3061\u306F world"
const source = "hello \u3053\u3093\u306B\u3061\u306F world";
const converter = createSwcSpanConverter(source);
// "hello " = 6 bytes, 5 CJK chars = 15 bytes, " world" = 6 bytes → 27 bytes
expect(converter.byteLength).toBe(27);

// "hello " → bytes 0-5, chars 0-5
expect(converter.byteOffsetToCharIndex(0)).toBe(0);
expect(converter.byteOffsetToCharIndex(5)).toBe(5);

// First CJK char starts at byte 6 → char 6
expect(converter.byteOffsetToCharIndex(6)).toBe(6);

// " world" starts at byte 21 → char 11
expect(converter.byteOffsetToCharIndex(21)).toBe(11);

// End sentinel
expect(converter.byteOffsetToCharIndex(27)).toBe(17);
});

test("end sentinel: byteOffsetToCharIndex(byteLength) === source.length", () => {
const sources = [
"",
"ascii",
"caf\u00E9",
"\u3053\u3093\u306B\u3061\u306F",
"a\u{1F600}b",
"hello \u3053\u3093\u306B\u3061\u306F world",
];
for (const source of sources) {
const converter = createSwcSpanConverter(source);
expect(converter.byteOffsetToCharIndex(converter.byteLength)).toBe(source.length);
}
});
});
67 changes: 67 additions & 0 deletions packages/common/src/utils/swc-span.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* SWC span position converter: UTF-8 byte offsets → UTF-16 code unit indices.
*
* SWC (Rust-based) returns span positions as UTF-8 byte offsets.
* JavaScript strings use UTF-16 code units for indexing.
* For ASCII-only content these are identical, but for multi-byte
* characters the positions diverge.
*/

export type SwcSpanConverter = {
/** UTF-8 byte length of the source string */
readonly byteLength: number;
/** Convert a UTF-8 byte offset (within the source) to a UTF-16 code unit index */
readonly byteOffsetToCharIndex: (byteOffset: number) => number;
};

/**
* Create a converter that maps UTF-8 byte offsets to UTF-16 char indices
* for the given source string.
*
* Includes a fast path for ASCII-only sources (zero allocation).
*/
export const createSwcSpanConverter = (source: string): SwcSpanConverter => {
const byteLength = Buffer.byteLength(source, "utf8");

// Fast path: ASCII-only — byte offsets equal char indices
if (byteLength === source.length) {
return {
byteLength,
byteOffsetToCharIndex: (byteOffset: number) => byteOffset,
};
}

// Build lookup table: byteOffset → charIndex
const byteToChar = new Uint32Array(byteLength + 1);
let bytePos = 0;

for (let charIdx = 0; charIdx < source.length; charIdx++) {
const codePoint = source.codePointAt(charIdx)!;
const bytesForCodePoint =
codePoint <= 0x7f
? 1
: codePoint <= 0x7ff
? 2
: codePoint <= 0xffff
? 3
: 4;

for (let b = 0; b < bytesForCodePoint; b++) {
byteToChar[bytePos + b] = charIdx;
}
bytePos += bytesForCodePoint;

// Astral code points use a surrogate pair (2 UTF-16 code units)
if (codePoint > 0xffff) {
charIdx++;
}
}

// Sentinel: end-of-string
byteToChar[byteLength] = source.length;

return {
byteLength,
byteOffsetToCharIndex: (byteOffset: number) => byteToChar[byteOffset]!,
};
};
1 change: 1 addition & 0 deletions packages/lsp/@x-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./src/index";
Loading