whatasoda · whatasoda · Feb 1, 2026 · Feb 1, 2026 · Feb 1, 2026 · Feb 1, 2026
diff --git a/bun.lock b/bun.lock
diff --git a/packages/builder/src/ast/adapters/swc.ts b/packages/builder/src/ast/adapters/swc.ts
@@ -3,7 +3,7 @@
  * Implements parser-specific logic using the SWC parser.
  */
 
-import { createCanonicalId, createCanonicalTracker, type ScopeHandle } from "@soda-gql/common";
+import { createCanonicalId, createCanonicalTracker, createSwcSpanConverter, type ScopeHandle, type SwcSpanConverter } from "@soda-gql/common";
 import { parseSync } from "@swc/core";
 import type { CallExpression, ImportDeclaration, Module } from "@swc/types";
 import type { GraphqlSystemIdentifyHelper } from "../../internal/graphql-system";
@@ -17,6 +17,8 @@ type SwcModule = Module & {
   __filePath: string;
   /** Offset to subtract from spans to normalize to 0-based source indices */
   __spanOffset: number;
+  /** Converter for UTF-8 byte offsets to UTF-16 char indices */
+  __spanConverter: SwcSpanConverter;
 };
 
 import { createStandardDiagnostic } from "../common/detection";
@@ -362,10 +364,11 @@ const collectAllDefinitions = ({
   };
 
   const expressionFromCall = (call: CallExpression): string => {
-    // Normalize span by subtracting the module's span offset
+    // Normalize span by subtracting the module's span offset, then convert byte→char
     const spanOffset = module.__spanOffset;
-    let start = call.span.start - spanOffset;
-    const end = call.span.end - spanOffset;
+    const converter = module.__spanConverter;
+    let start = converter.byteOffsetToCharIndex(call.span.start - spanOffset);
+    const end = converter.byteOffsetToCharIndex(call.span.end - spanOffset);
 
     // Adjust when span starts one character after the leading "g"
     if (start > 0 && source[start] === "q" && source[start - 1] === "g" && source.slice(start, start + 3) === "ql.") {
@@ -587,8 +590,9 @@ const collectAllDefinitions = ({
  * Get location from an SWC node span
  */
 const getLocation = (module: SwcModule, span: { start: number; end: number }): DiagnosticLocation => {
-  const start = span.start - module.__spanOffset;
-  const end = span.end - module.__spanOffset;
+  const converter = module.__spanConverter;
+  const start = converter.byteOffsetToCharIndex(span.start - module.__spanOffset);
+  const end = converter.byteOffsetToCharIndex(span.end - module.__spanOffset);
   return { start, end };
 };
 
@@ -916,15 +920,15 @@ export const swcAdapter: AnalyzerAdapter = {
     }
 
     // SWC's BytePos counter accumulates across parseSync calls within the same process.
-    // To convert span positions to 0-indexed source positions, we compute the accumulated
-    // offset from previous parses: (program.span.end - source.length) gives us the total
-    // bytes from previously parsed files, and we add 1 because spans are 1-indexed.
-    const spanOffset = program.span.end - input.source.length + 1;
+    // Use UTF-8 byte length (not source.length which is UTF-16 code units) for correct offset.
+    const converter = createSwcSpanConverter(input.source);
+    const spanOffset = program.span.end - converter.byteLength + 1;
 
     // Attach filePath to module (similar to ts.SourceFile.fileName)
     const swcModule = program as SwcModule;
     swcModule.__filePath = input.filePath;
     swcModule.__spanOffset = spanOffset;
+    swcModule.__spanConverter = converter;
 
     // Collect all data in one pass
     const gqlIdentifiers = collectGqlIdentifiers(swcModule, helper);

diff --git a/packages/cli/package.json b/packages/cli/package.json
@@ -60,6 +60,7 @@
     "zod": "^4.1.11"
   },
   "optionalDependencies": {
-    "@soda-gql/formatter": "workspace:*"
+    "@soda-gql/formatter": "workspace:*",
+    "@soda-gql/lsp": "workspace:*"
   }
 }
diff --git a/packages/cli/src/commands/lsp.ts b/packages/cli/src/commands/lsp.ts
@@ -0,0 +1,31 @@
+const LSP_HELP = `Usage: soda-gql lsp [options]
+
+Start the GraphQL Language Server Protocol server.
+
+The LSP server communicates over stdio and provides:
+  - Diagnostics (validation errors in GraphQL templates)
+  - Autocompletion (field, argument, type suggestions)
+  - Hover information (type details on hover)
+
+Options:
+  --help, -h    Show this help message
+
+The server is typically started by an editor extension, not directly by users.
+Configure your editor to use 'soda-gql lsp' as the GraphQL language server command.`;
+
+export const lspCommand = async (argv: readonly string[]): Promise<never> => {
+  if (argv.includes("--help") || argv.includes("-h")) {
+    process.stdout.write(`${LSP_HELP}\n`);
+    process.exit(0);
+  }
+
+  // Dynamic import to avoid loading LSP deps for other commands
+  const { createLspServer } = await import("@soda-gql/lsp");
+  const server = createLspServer();
+  server.start();
+
+  // Server runs indefinitely via stdio; this promise never resolves
+  await new Promise(() => {});
+  // TypeScript needs this for the `never` return type
+  throw new Error("unreachable");
+};
diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts
@@ -4,6 +4,7 @@ import { codegenCommand } from "./commands/codegen/index";
 import { doctorCommand } from "./commands/doctor";
 import { formatCommand } from "./commands/format";
 import { initCommand } from "./commands/init";
+import { lspCommand } from "./commands/lsp";
 import { typegenCommand } from "./commands/typegen";
 import { cliErrors } from "./errors";
 import type { CommandResult, CommandSuccess, OutputFormat } from "./types";
@@ -18,6 +19,7 @@ Commands:
   format     Format soda-gql field selections
   artifact   Manage soda-gql artifacts
   doctor     Run diagnostic checks
+  lsp        Start the GraphQL language server
 
 Run 'soda-gql <command> --help' for more information on a specific command.
 `;
@@ -73,6 +75,11 @@ const dispatch = async (argv: readonly string[]): Promise<DispatchResult> => {
     return artifactCommand(rest);
   }
 
+  if (command === "lsp") {
+    await lspCommand(rest);
+    return ok({ message: "" }); // unreachable, lsp runs forever
+  }
+
   if (command === "doctor") {
     const result = doctorCommand(rest);
     if (result.isOk()) {

diff --git a/packages/common/src/utils/index.ts b/packages/common/src/utils/index.ts
@@ -1,4 +1,5 @@
 export * from "./alias-resolver";
 export * from "./cached-fn";
 export * from "./path";
+export * from "./swc-span";
 export * from "./tsconfig";
diff --git a/packages/common/src/utils/swc-span.test.ts b/packages/common/src/utils/swc-span.test.ts
@@ -0,0 +1,105 @@
+import { describe, expect, test } from "bun:test";
+import { createSwcSpanConverter } from "./swc-span";
+
+describe("createSwcSpanConverter", () => {
+  test("ASCII-only: byteLength equals string length", () => {
+    const source = "const x = 42;";
+    const converter = createSwcSpanConverter(source);
+    expect(converter.byteLength).toBe(source.length);
+  });
+
+  test("ASCII-only: identity conversion", () => {
+    const source = "hello world";
+    const converter = createSwcSpanConverter(source);
+    for (let i = 0; i <= source.length; i++) {
+      expect(converter.byteOffsetToCharIndex(i)).toBe(i);
+    }
+  });
+
+  test("empty string", () => {
+    const converter = createSwcSpanConverter("");
+    expect(converter.byteLength).toBe(0);
+    expect(converter.byteOffsetToCharIndex(0)).toBe(0);
+  });
+
+  test("2-byte UTF-8 characters (accented)", () => {
+    // "\u00E9" = e-acute, 2 bytes in UTF-8, 1 code unit in UTF-16
+    const source = "caf\u00E9";
+    const converter = createSwcSpanConverter(source);
+    // "caf" = 3 bytes, "\u00E9" = 2 bytes → total 5 bytes
+    expect(converter.byteLength).toBe(5);
+    // byte 0 → char 0 ('c')
+    expect(converter.byteOffsetToCharIndex(0)).toBe(0);
+    // byte 3 → char 3 (start of '\u00E9')
+    expect(converter.byteOffsetToCharIndex(3)).toBe(3);
+    // byte 5 → char 4 (end sentinel)
+    expect(converter.byteOffsetToCharIndex(5)).toBe(4);
+  });
+
+  test("3-byte UTF-8 characters (CJK)", () => {
+    // Each Japanese character is 3 bytes in UTF-8, 1 code unit in UTF-16
+    const source = "\u3053\u3093\u306B\u3061\u306F"; // konnichiwa
+    const converter = createSwcSpanConverter(source);
+    expect(converter.byteLength).toBe(15); // 5 chars * 3 bytes
+    // byte 0 → char 0
+    expect(converter.byteOffsetToCharIndex(0)).toBe(0);
+    // byte 3 → char 1
+    expect(converter.byteOffsetToCharIndex(3)).toBe(1);
+    // byte 6 → char 2
+    expect(converter.byteOffsetToCharIndex(6)).toBe(2);
+    // byte 15 → char 5 (end sentinel)
+    expect(converter.byteOffsetToCharIndex(15)).toBe(5);
+  });
+
+  test("4-byte UTF-8 / surrogate pair (emoji)", () => {
+    // "\u{1F600}" = grinning face, 4 bytes UTF-8, 2 code units UTF-16
+    const source = "a\u{1F600}b";
+    const converter = createSwcSpanConverter(source);
+    // 'a' = 1 byte, emoji = 4 bytes, 'b' = 1 byte → 6 bytes
+    expect(converter.byteLength).toBe(6);
+    // byte 0 → char 0 ('a')
+    expect(converter.byteOffsetToCharIndex(0)).toBe(0);
+    // byte 1 → char 1 (start of emoji, first surrogate)
+    expect(converter.byteOffsetToCharIndex(1)).toBe(1);
+    // byte 5 → char 3 ('b', after 2 code units for surrogate pair)
+    expect(converter.byteOffsetToCharIndex(5)).toBe(3);
+    // byte 6 → char 4 (end sentinel)
+    expect(converter.byteOffsetToCharIndex(6)).toBe(4);
+  });
+
+  test("mixed ASCII and multi-byte", () => {
+    // "hello \u3053\u3093\u306B\u3061\u306F world"
+    const source = "hello \u3053\u3093\u306B\u3061\u306F world";
+    const converter = createSwcSpanConverter(source);
+    // "hello " = 6 bytes, 5 CJK chars = 15 bytes, " world" = 6 bytes → 27 bytes
+    expect(converter.byteLength).toBe(27);
+
+    // "hello " → bytes 0-5, chars 0-5
+    expect(converter.byteOffsetToCharIndex(0)).toBe(0);
+    expect(converter.byteOffsetToCharIndex(5)).toBe(5);
+
+    // First CJK char starts at byte 6 → char 6
+    expect(converter.byteOffsetToCharIndex(6)).toBe(6);
+
+    // " world" starts at byte 21 → char 11
+    expect(converter.byteOffsetToCharIndex(21)).toBe(11);
+
+    // End sentinel
+    expect(converter.byteOffsetToCharIndex(27)).toBe(17);
+  });
+
+  test("end sentinel: byteOffsetToCharIndex(byteLength) === source.length", () => {
+    const sources = [
+      "",
+      "ascii",
+      "caf\u00E9",
+      "\u3053\u3093\u306B\u3061\u306F",
+      "a\u{1F600}b",
+      "hello \u3053\u3093\u306B\u3061\u306F world",
+    ];
+    for (const source of sources) {
+      const converter = createSwcSpanConverter(source);
+      expect(converter.byteOffsetToCharIndex(converter.byteLength)).toBe(source.length);
+    }
+  });
+});
diff --git a/packages/common/src/utils/swc-span.ts b/packages/common/src/utils/swc-span.ts
@@ -0,0 +1,67 @@
+/**
+ * SWC span position converter: UTF-8 byte offsets → UTF-16 code unit indices.
+ *
+ * SWC (Rust-based) returns span positions as UTF-8 byte offsets.
+ * JavaScript strings use UTF-16 code units for indexing.
+ * For ASCII-only content these are identical, but for multi-byte
+ * characters the positions diverge.
+ */
+
+export type SwcSpanConverter = {
+  /** UTF-8 byte length of the source string */
+  readonly byteLength: number;
+  /** Convert a UTF-8 byte offset (within the source) to a UTF-16 code unit index */
+  readonly byteOffsetToCharIndex: (byteOffset: number) => number;
+};
+
+/**
+ * Create a converter that maps UTF-8 byte offsets to UTF-16 char indices
+ * for the given source string.
+ *
+ * Includes a fast path for ASCII-only sources (zero allocation).
+ */
+export const createSwcSpanConverter = (source: string): SwcSpanConverter => {
+  const byteLength = Buffer.byteLength(source, "utf8");
+
+  // Fast path: ASCII-only — byte offsets equal char indices
+  if (byteLength === source.length) {
+    return {
+      byteLength,
+      byteOffsetToCharIndex: (byteOffset: number) => byteOffset,
+    };
+  }
+
+  // Build lookup table: byteOffset → charIndex
+  const byteToChar = new Uint32Array(byteLength + 1);
+  let bytePos = 0;
+
+  for (let charIdx = 0; charIdx < source.length; charIdx++) {
+    const codePoint = source.codePointAt(charIdx)!;
+    const bytesForCodePoint =
+      codePoint <= 0x7f
+        ? 1
+        : codePoint <= 0x7ff
+          ? 2
+          : codePoint <= 0xffff
+            ? 3
+            : 4;
+
+    for (let b = 0; b < bytesForCodePoint; b++) {
+      byteToChar[bytePos + b] = charIdx;
+    }
+    bytePos += bytesForCodePoint;
+
+    // Astral code points use a surrogate pair (2 UTF-16 code units)
+    if (codePoint > 0xffff) {
+      charIdx++;
+    }
+  }
+
+  // Sentinel: end-of-string
+  byteToChar[byteLength] = source.length;
+
+  return {
+    byteLength,
+    byteOffsetToCharIndex: (byteOffset: number) => byteToChar[byteOffset]!,
+  };
+};
diff --git a/packages/lsp/@x-index.ts b/packages/lsp/@x-index.ts
@@ -0,0 +1 @@
+export * from "./src/index";