From 32ed23ccf5522854c7b94e07582811159640523e Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Tue, 14 Apr 2026 18:05:18 +0700 Subject: [PATCH 01/10] wip full text search --- package-lock.json | 28 ++++---- package.json | 1 + src/logic/Keybinds.ts | 5 +- src/logic/Settings.ts | 1 + src/ui/FullTextSearchModal.tsx | 50 ++++++++++++++ src/ui/Modals.tsx | 2 + src/ui/SettingsModal.tsx | 13 ++-- src/workers/full-text-search/client.ts | 49 ++++++++++++++ src/workers/full-text-search/worker.ts | 90 ++++++++++++++++++++++++++ 9 files changed, 214 insertions(+), 25 deletions(-) create mode 100644 src/ui/FullTextSearchModal.tsx create mode 100644 src/workers/full-text-search/client.ts create mode 100644 src/workers/full-text-search/worker.ts diff --git a/package-lock.json b/package-lock.json index c4bf322..9a7489d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "@katana-project/zip": "^0.7.1", "@monaco-editor/react": "^4.7.0", "@run-slicer/vf": "^0.5.0-1.11.2", + "@sqlite.org/sqlite-wasm": "^3.51.2-build8", "@xyflow/react": "^12.10.1", "antd": "^6.3.2", "comlink": "^4.4.2", @@ -162,7 +163,6 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -3086,6 +3086,15 @@ "dev": true, "license": "CC0-1.0" }, + "node_modules/@sqlite.org/sqlite-wasm": { + "version": "3.51.2-build8", + "resolved": "https://registry.npmjs.org/@sqlite.org/sqlite-wasm/-/sqlite-wasm-3.51.2-build8.tgz", + "integrity": "sha512-NvWLTgbqGu1XOLKxePE5Jvc5bzy6QACAQLURQnumSFm0hQms09r2rjMp5i1rvh1DXCe5Rx1FEGcEeQ9BnBB2+Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=22" + } + }, "node_modules/@standard-schema/spec": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz", @@ -3262,7 +3271,6 @@ "integrity": "sha512-8QqtOQT5ACVlmsvKOJNEaWmRPmcojMOzCz4Hs2BGG/toAp/K38LcsMRyLp349glq5AzJbCEeimEoxaX6v/fLrA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/core": "^7.21.3", "@svgr/babel-preset": "8.1.0", @@ -3417,7 +3425,6 @@ "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.18.0" } @@ -3428,7 +3435,6 @@ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -3772,7 +3778,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -3979,7 +3984,6 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } @@ -4042,8 +4046,7 @@ "version": "1.11.20", "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.20.tgz", "integrity": "sha512-YbwwqR/uYpeoP4pu043q+LTDLFBLApUP6VxRihdfNTqu4ubqMlGDLd6ErXhEgsyvY0K6nCs7nggYumAN+9uEuQ==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/debug": { "version": "4.4.3", @@ -4153,7 +4156,6 @@ "dev": true, "hasInstallScript": true, "license": "MIT", - "peer": true, "bin": { "esbuild": "bin/esbuild" }, @@ -4715,7 +4717,6 @@ "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.55.1.tgz", "integrity": "sha512-jz4x+TJNFHwHtwuV9vA9rMujcZRb0CEilTEwG2rRSpe/A7Jdkuj8xPKttCgOh+v/lkHy7HsZ64oj+q3xoAFl9A==", "license": "MIT", - "peer": true, "dependencies": { "dompurify": "3.2.7", "marked": "14.0.0" @@ -4827,7 +4828,6 @@ "integrity": "sha512-4RuJK2jP08XwqtUu+5yhCbxEauCm6tv2MFHKEMsjbosK2+vy5us82oI3VLuHwbNyZG7ekZA26U2LLHnGR4frIA==", "dev": true, "license": "MIT", - "peer": true, "bin": { "tsgolint": "bin/tsgolint.js" }, @@ -4982,7 +4982,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -4992,7 +4991,6 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", "license": "MIT", - "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -5294,7 +5292,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -5326,7 +5323,6 @@ "integrity": "sha512-i7qRCmY42zmCwnYlh9H2SvLEypEFGye5iRmEMKjcGi7zk9UquigRjFtTLz0TYqr0ZGLZhaMHl/foy1bZR+Cwlw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "pathe": "^2.0.3" } @@ -5377,7 +5373,6 @@ "integrity": "sha512-fPGaRNj9Zytaf8LEiBhY7Z6ijnFKdzU/+mL8EFBaKr7Vw1/FWcTBAMW0wLPJAGMPX38ZPVCVgLceWiEqeoqL2Q==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@oxc-project/runtime": "0.115.0", "lightningcss": "^1.32.0", @@ -5597,7 +5592,6 @@ "dev": true, "hasInstallScript": true, "license": "Apache-2.0", - "peer": true, "bin": { "workerd": "bin/workerd" }, diff --git a/package.json b/package.json index b45ecf3..7300c1d 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "@katana-project/zip": "^0.7.1", "@monaco-editor/react": "^4.7.0", "@run-slicer/vf": "^0.5.0-1.11.2", + "@sqlite.org/sqlite-wasm": "^3.51.2-build8", "@xyflow/react": "^12.10.1", "antd": "^6.3.2", "comlink": "^4.4.2", diff --git a/src/logic/Keybinds.ts b/src/logic/Keybinds.ts index 2488596..1fa4e83 100644 --- a/src/logic/Keybinds.ts +++ b/src/logic/Keybinds.ts @@ -1,5 +1,5 @@ import { BehaviorSubject, filter, fromEvent, Observable, tap } from "rxjs"; -import { focusSearch, showStructure, type KeybindSetting } from "./Settings"; +import { focusSearch, fullTextSearchBind, showStructure, type KeybindSetting } from "./Settings"; // Set to true when the user is currently capturing a keybind export const capturingKeybind = new BehaviorSubject(null); @@ -19,4 +19,5 @@ function keyBindEvent(setting: KeybindSetting): Observable { } export const focusSearchEvent = keyBindEvent(focusSearch); -export const showStructureEvent = keyBindEvent(showStructure); \ No newline at end of file +export const showStructureEvent = keyBindEvent(showStructure); +export const fullTextSearchEvent = keyBindEvent(fullTextSearchBind); diff --git a/src/logic/Settings.ts b/src/logic/Settings.ts index 8561eb5..224fd3b 100644 --- a/src/logic/Settings.ts +++ b/src/logic/Settings.ts @@ -127,6 +127,7 @@ export const bytecode = new BooleanSetting('bytecode', false); export const unifiedDiff = new BooleanSetting('unified_diff', false); export const focusSearch = new KeybindSetting('focus_search', 'Ctrl+ '); export const showStructure = new KeybindSetting('show_structure', 'Ctrl+F12'); +export const fullTextSearchBind = new KeybindSetting('full_text_search', 'Ctrl+Shift+f'); export const preferWasmDecompiler = new BooleanSetting('prefer_wasm_decompiler', true); preferWasmDecompiler.observable diff --git a/src/ui/FullTextSearchModal.tsx b/src/ui/FullTextSearchModal.tsx new file mode 100644 index 0000000..a863994 --- /dev/null +++ b/src/ui/FullTextSearchModal.tsx @@ -0,0 +1,50 @@ +import { useEffect, useState } from "react"; +import { fullTextSearchEvent } from "../logic/Keybinds"; +import { useObservable } from "../utils/UseObservable"; +import { Input, Modal } from "antd"; +import { BehaviorSubject, combineLatest, switchMap } from "rxjs"; +import { fullTextSearch } from "../workers/full-text-search/client"; +import { currentResult } from "../logic/Decompiler"; + +const query = new BehaviorSubject(""); +const resultsObs = combineLatest([fullTextSearch, query, currentResult]).pipe( + switchMap(async ([fts, query, currentResult]) => { + if (query.length < 3) return []; + + await fts.index(currentResult.className, currentResult.source); + const res = await fts.find(query); + return res; + })); + +const FullTextSearchModal = () => { + const showEvent = useObservable(fullTextSearchEvent); + const results = useObservable(resultsObs) ?? []; + const [open, setOpen] = useState(false); + + useEffect(() => { + if (showEvent) { + setOpen(true); + } + }, [showEvent]); + + return ( + setOpen(false)} + footer={null} + > + query.next(q)} + /> + {results.map(r => ( +
+
{r.key}
+
{r.snippet}
+
+ ))} +
+ ); +}; +export default FullTextSearchModal; diff --git a/src/ui/Modals.tsx b/src/ui/Modals.tsx index 296246e..ea9496e 100644 --- a/src/ui/Modals.tsx +++ b/src/ui/Modals.tsx @@ -7,6 +7,7 @@ import SettingsModal from "./SettingsModal"; import StructureModal from "./StructureModal"; import { JarDecompilerModal, JarDecompilerProgressModal } from "./JarDecompilerModal"; import IndexProgressNotification from "./IndexProgressNotification"; +import FullTextSearchModal from "./FullTextSearchModal"; const Modals = () => { return ( @@ -21,6 +22,7 @@ const Modals = () => { + ); }; diff --git a/src/ui/SettingsModal.tsx b/src/ui/SettingsModal.tsx index a0ba5e2..c6aa272 100644 --- a/src/ui/SettingsModal.tsx +++ b/src/ui/SettingsModal.tsx @@ -2,7 +2,7 @@ import { Button, Modal, type CheckboxProps, Form, Tooltip, InputNumber, type Inp import { SettingOutlined } from '@ant-design/icons'; import { Checkbox } from 'antd'; import { useObservable } from "../utils/UseObservable"; -import { BooleanSetting, enableTabs, displayLambdas, focusSearch, KeybindSetting, type KeybindValue, bytecode, showStructure, NumberSetting, preferWasmDecompiler, compactPackages } from "../logic/Settings"; +import { BooleanSetting, enableTabs, displayLambdas, focusSearch, KeybindSetting, type KeybindValue, bytecode, showStructure, NumberSetting, preferWasmDecompiler, compactPackages, fullTextSearchBind } from "../logic/Settings"; import { capturingKeybind, rawKeydownEvent } from "../logic/Keybinds"; import { BehaviorSubject } from "rxjs"; import type React from "react"; @@ -34,9 +34,10 @@ const SettingsModal = () => { - + + ); @@ -72,18 +73,18 @@ export interface NumberOptionProps { testid?: string; } -export const NumberOption: React.FC = ({ setting, title, min, max, testid}) => { +export const NumberOption: React.FC = ({ setting, title, min, max, testid }) => { const value = useObservable(setting.observable); const onChange: InputNumberProps["onChange"] = (e) => { setting.value = e ?? setting.defaultValue; - } + }; return ( - + ); -} +}; interface KeybindOptionProps { setting: KeybindSetting; diff --git a/src/workers/full-text-search/client.ts b/src/workers/full-text-search/client.ts new file mode 100644 index 0000000..0db59ca --- /dev/null +++ b/src/workers/full-text-search/client.ts @@ -0,0 +1,49 @@ +import * as Comlink from "comlink"; +import { minecraftJar, type MinecraftJar } from "../../logic/MinecraftApi"; +import { distinctUntilChanged, mergeMap, shareReplay } from "rxjs"; +import type { FullTextSearchResult, FullTextSearchWorker } from "./worker"; + +let currentInstance: FullTextSearch | undefined; +export const fullTextSearch = minecraftJar.pipe( + distinctUntilChanged(), + mergeMap(async jar => { + if (currentInstance) { + await currentInstance.destroy(); + } + + const newInstance = new FullTextSearch(jar); + currentInstance = newInstance; + return newInstance; + }), + shareReplay({ bufferSize: 1, refCount: false }) +); + +export class FullTextSearch { + readonly #jar: MinecraftJar; + constructor(jar: MinecraftJar) { + this.#jar = jar; + } + + #_worker?: Comlink.Remote; + async #worker(): Promise> { + if (this.#_worker) return this.#_worker; + const worker = new Worker(new URL("./worker.ts", import.meta.url), { type: "module", name: "full-text-search" }); + this.#_worker = Comlink.wrap(worker); + await this.#_worker.init(this.#jar.jar.name); + return this.#_worker; + }; + + async destroy() { + await this.#_worker?.destroy(); + } + + async index(key: string, source: string) { + const worker = await this.#worker(); + await worker.index(key, source); + } + + async find(query: string): Promise { + const worker = await this.#worker(); + return await worker.find(query); + } +} diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts new file mode 100644 index 0000000..7da6db3 --- /dev/null +++ b/src/workers/full-text-search/worker.ts @@ -0,0 +1,90 @@ +import * as Comlink from "comlink"; +import sqlite3InitModule, { type OpfsDatabase } from "@sqlite.org/sqlite-wasm"; + +export interface FullTextSearchRegion { + start: number; + end: number; +} + +export interface FullTextSearchResult { + key: string; + snippet: string; +} + +export class FullTextSearchWorker { + #db?: OpfsDatabase; + + async init(name: string): Promise { + try { + console.log("Loading SQLite3 Module..."); + const sqlite3 = await sqlite3InitModule(); + console.log("Loading SQLite3 Module... Done."); + + // TODO: change the db name + this.#db = new sqlite3.oo1.OpfsDb(`/fts-test.${name}.sqlite3`); + this.#db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS sources USING fts5(key, source);"); + return undefined; + } catch (err: any) { + console.error(err); + return String(err); + } + } + + destroy() { + this.#db?.close(); + close(); + } + + index(key: string, source: string) { + if (!this.#db) { + console.error("DB not initialized"); + return; + } + + this.#db.exec({ + sql: "INSERT INTO sources(key, source) VALUES(?, ?)", + bind: [key, source] + }); + } + + find(query: string): FullTextSearchResult[] { + if (!this.#db) { + console.error("DB not initialized"); + return []; + } + + const res = this.#db.selectObjects(` + SELECT + key, + snippet(sources, -1, '[', ']', '...', 10) AS snippet + FROM sources + WHERE source MATCH ?; + `, [query]); + + return res.map((r: any) => ({ + key: r["key"] as string, + snippet: r["snippet"] as string + })); + } + + // TODO: figure out how to get offsets in FTS5 + // require creating SQLite extension. + #parseOffsets(s: string): FullTextSearchRegion[] { + if (!s) return []; + + const parts = s.trim().split(/\s+/).map(Number); + const regions: FullTextSearchRegion[] = []; + + // [col] [startToken] [endToken] [termIndex] ... + for (let i = 0; i + 3 < parts.length; i += 4) { + const startToken = parts[i + 1]; + const endToken = parts[i + 2]; + if (Number.isFinite(startToken) && Number.isFinite(endToken)) { + regions.push({ start: startToken, end: endToken }); + } + } + + return regions; + } +} +Comlink.expose(new FullTextSearchWorker()); From 918bb01e8191dcf584faf5d75786797302d5be8c Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Tue, 14 Apr 2026 21:16:56 +0700 Subject: [PATCH 02/10] index all decompiled classes --- src/ui/FullTextSearchModal.tsx | 7 ++----- src/workers/decompile/client.ts | 8 ++++++++ src/workers/decompile/worker.ts | 22 ++++++++++++++++++++++ src/workers/full-text-search/client.ts | 13 ++++++++----- src/workers/full-text-search/worker.ts | 6 +++--- 5 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/ui/FullTextSearchModal.tsx b/src/ui/FullTextSearchModal.tsx index a863994..fefdcc9 100644 --- a/src/ui/FullTextSearchModal.tsx +++ b/src/ui/FullTextSearchModal.tsx @@ -4,14 +4,11 @@ import { useObservable } from "../utils/UseObservable"; import { Input, Modal } from "antd"; import { BehaviorSubject, combineLatest, switchMap } from "rxjs"; import { fullTextSearch } from "../workers/full-text-search/client"; -import { currentResult } from "../logic/Decompiler"; const query = new BehaviorSubject(""); -const resultsObs = combineLatest([fullTextSearch, query, currentResult]).pipe( - switchMap(async ([fts, query, currentResult]) => { +const resultsObs = combineLatest([fullTextSearch, query]).pipe( + switchMap(async ([fts, query]) => { if (query.length < 3) return []; - - await fts.index(currentResult.className, currentResult.source); const res = await fts.find(query); return res; })); diff --git a/src/workers/decompile/client.ts b/src/workers/decompile/client.ts index 4c63b74..f330326 100644 --- a/src/workers/decompile/client.ts +++ b/src/workers/decompile/client.ts @@ -61,6 +61,14 @@ export async function deleteCache(): Promise { return await worker.clear(); } +export async function onDecompiledSources( + jar: Jar, + callback: (className: string, source: string) => Promise | void +) { + const worker = await findWorker(); + await worker.onDecompiledSources(jar.name, jar.blob, Comlink.proxy(callback)); +} + export type DecompileEntireJarOptions = { threads?: number, splits?: number, diff --git a/src/workers/decompile/worker.ts b/src/workers/decompile/worker.ts index cd9c5c3..5c41e7c 100644 --- a/src/workers/decompile/worker.ts +++ b/src/workers/decompile/worker.ts @@ -83,6 +83,28 @@ export class DecompileWorker { return count; }); + onDecompiledSources = ( + jarName: string, + jarBlob: Blob, + callback: (className: string, source: string) => Promise | void + ) => this.schedule(async () => { + const jar = new DecompileJar(await openJar(jarName, jarBlob)); + const classNames = jar.classes.filter(n => !n.includes("$")); + + const promises: Promise[] = []; + for (const className of classNames) { + const data = jar.proxy[className]; + if (!data) continue; + + promises.push((async () => { + const result = await this.db.results3.get([className, data.checksum, "java"]); + if (result) await callback(result.className, result.source); + })()); + } + + await Promise.all(promises); + }); + decompileMany = ( jarName: string, jarBlob: Blob, diff --git a/src/workers/full-text-search/client.ts b/src/workers/full-text-search/client.ts index 0db59ca..82d4f3d 100644 --- a/src/workers/full-text-search/client.ts +++ b/src/workers/full-text-search/client.ts @@ -2,6 +2,7 @@ import * as Comlink from "comlink"; import { minecraftJar, type MinecraftJar } from "../../logic/MinecraftApi"; import { distinctUntilChanged, mergeMap, shareReplay } from "rxjs"; import type { FullTextSearchResult, FullTextSearchWorker } from "./worker"; +import { onDecompiledSources } from "../decompile/client"; let currentInstance: FullTextSearch | undefined; export const fullTextSearch = minecraftJar.pipe( @@ -27,9 +28,16 @@ export class FullTextSearch { #_worker?: Comlink.Remote; async #worker(): Promise> { if (this.#_worker) return this.#_worker; + const worker = new Worker(new URL("./worker.ts", import.meta.url), { type: "module", name: "full-text-search" }); this.#_worker = Comlink.wrap(worker); await this.#_worker.init(this.#jar.jar.name); + + await onDecompiledSources(this.#jar.jar, async (className, source) => { + console.log("fts", className); + this.#_worker!.index(className, source); + }); + return this.#_worker; }; @@ -37,11 +45,6 @@ export class FullTextSearch { await this.#_worker?.destroy(); } - async index(key: string, source: string) { - const worker = await this.#worker(); - await worker.index(key, source); - } - async find(query: string): Promise { const worker = await this.#worker(); return await worker.find(query); diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts index 7da6db3..f956b39 100644 --- a/src/workers/full-text-search/worker.ts +++ b/src/workers/full-text-search/worker.ts @@ -1,5 +1,5 @@ import * as Comlink from "comlink"; -import sqlite3InitModule, { type OpfsDatabase } from "@sqlite.org/sqlite-wasm"; +import sqlite3InitModule, { type Database } from "@sqlite.org/sqlite-wasm"; export interface FullTextSearchRegion { start: number; @@ -12,7 +12,7 @@ export interface FullTextSearchResult { } export class FullTextSearchWorker { - #db?: OpfsDatabase; + #db?: Database; async init(name: string): Promise { try { @@ -21,7 +21,7 @@ export class FullTextSearchWorker { console.log("Loading SQLite3 Module... Done."); // TODO: change the db name - this.#db = new sqlite3.oo1.OpfsDb(`/fts-test.${name}.sqlite3`); + this.#db = new sqlite3.oo1.DB(`/fts.${name}.sqlite3`); this.#db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS sources USING fts5(key, source);"); return undefined; } catch (err: any) { From 3b9fe4f02fcee94ee2ea5ece3e473de432ecd88e Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Thu, 16 Apr 2026 02:28:05 +0700 Subject: [PATCH 03/10] proper search modal --- src/index.css | 10 ++- src/ui/FullTextSearchModal.tsx | 97 +++++++++++++++++++++----- src/utils/UseObservable.ts | 4 +- src/workers/full-text-search/client.ts | 6 +- src/workers/full-text-search/worker.ts | 21 ++++-- 5 files changed, 109 insertions(+), 29 deletions(-) diff --git a/src/index.css b/src/index.css index 3052186..badb105 100644 --- a/src/index.css +++ b/src/index.css @@ -108,5 +108,13 @@ html, body, #root { /* Don't have text overflow in the structure dialog */ .structure-tree { - overflow: auto; + overflow: auto; +} + +.full-text-search-item { + cursor: pointer; +} + +.full-text-search-item:hover { + background-color: rgba(255, 255, 255, 0.1); } diff --git a/src/ui/FullTextSearchModal.tsx b/src/ui/FullTextSearchModal.tsx index fefdcc9..6941091 100644 --- a/src/ui/FullTextSearchModal.tsx +++ b/src/ui/FullTextSearchModal.tsx @@ -1,22 +1,38 @@ -import { useEffect, useState } from "react"; +import { useEffect, useRef, useState } from "react"; import { fullTextSearchEvent } from "../logic/Keybinds"; import { useObservable } from "../utils/UseObservable"; -import { Input, Modal } from "antd"; -import { BehaviorSubject, combineLatest, switchMap } from "rxjs"; +import { Flex, Input, List, Modal, type InputRef } from "antd"; +import { BehaviorSubject, catchError, combineLatest, distinctUntilChanged, from, map, Observable, of, startWith, switchMap } from "rxjs"; import { fullTextSearch } from "../workers/full-text-search/client"; +import type { FullTextSearchResult } from "../workers/full-text-search/worker"; +import { openCodeTab } from "../logic/Tabs"; + +const SearchState = (r: SearchState) => r; +type SearchState = + | { state: "loading"; } + | { state: "ok"; results: FullTextSearchResult[]; } + | { state: "error"; error: string; }; const query = new BehaviorSubject(""); -const resultsObs = combineLatest([fullTextSearch, query]).pipe( - switchMap(async ([fts, query]) => { - if (query.length < 3) return []; - const res = await fts.find(query); - return res; +const search$ = combineLatest([fullTextSearch, query]).pipe( + distinctUntilChanged(), + switchMap(([fts, query]) => { + if (query.length < 3) return of(SearchState({ + state: "error", + error: "Query must be at least 3 characters" + })); + + return from(fts.find(query, { maxTokens: 8 })).pipe( + map(results => SearchState({ state: "ok", results })), + startWith(SearchState({ state: "loading" })), + catchError(error => of(SearchState({ state: "error", error: String(error) })))); })); const FullTextSearchModal = () => { const showEvent = useObservable(fullTextSearchEvent); - const results = useObservable(resultsObs) ?? []; + const search = useObservable(search$) ?? { state: "ok", results: [] }; const [open, setOpen] = useState(false); + const inputRef = useRef(null); useEffect(() => { if (showEvent) { @@ -24,23 +40,68 @@ const FullTextSearchModal = () => { } }, [showEvent]); + function openResult(result: FullTextSearchResult) { + setOpen(false); + openCodeTab(result.key); + } + + let resultsElement; + if (search.state === "loading") { + resultsElement = (
Loading...
); + } else if (search.state === "error") { + resultsElement = (
{search.error}
); + } else if (search.results.length === 0) { + resultsElement = (
No results
); + } else { + resultsElement = ( + ( + openResult(result)} + className="full-text-search-item" + > + + {result.snippet} + + )} + /> + + )} + /> + ); + } + return ( setOpen(false)} + afterOpenChange={open => open && inputRef.current?.focus()} footer={null} + width="50%" > - query.next(q)} - /> - {results.map(r => ( -
-
{r.key}
-
{r.snippet}
+ + query.next(q.trim())} + /> +
+ {resultsElement}
- ))} +
); }; diff --git a/src/utils/UseObservable.ts b/src/utils/UseObservable.ts index b8fd44e..7995230 100644 --- a/src/utils/UseObservable.ts +++ b/src/utils/UseObservable.ts @@ -2,8 +2,8 @@ import { useState, useEffect } from 'react'; import { Observable, BehaviorSubject } from 'rxjs'; export function useObservable(observable: Observable) { - const [state, setState] = useState(() => - observable instanceof BehaviorSubject ? observable.getValue() : undefined as T + const [state, setState] = useState(() => + observable instanceof BehaviorSubject ? observable.getValue() : undefined ); useEffect(() => { diff --git a/src/workers/full-text-search/client.ts b/src/workers/full-text-search/client.ts index 82d4f3d..253cb4f 100644 --- a/src/workers/full-text-search/client.ts +++ b/src/workers/full-text-search/client.ts @@ -1,7 +1,7 @@ import * as Comlink from "comlink"; import { minecraftJar, type MinecraftJar } from "../../logic/MinecraftApi"; import { distinctUntilChanged, mergeMap, shareReplay } from "rxjs"; -import type { FullTextSearchResult, FullTextSearchWorker } from "./worker"; +import type { FullTextSearchOptions, FullTextSearchResult, FullTextSearchWorker } from "./worker"; import { onDecompiledSources } from "../decompile/client"; let currentInstance: FullTextSearch | undefined; @@ -45,8 +45,8 @@ export class FullTextSearch { await this.#_worker?.destroy(); } - async find(query: string): Promise { + async find(query: string, options?: FullTextSearchOptions): Promise { const worker = await this.#worker(); - return await worker.find(query); + return await worker.find(query, options); } } diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts index f956b39..0a14f77 100644 --- a/src/workers/full-text-search/worker.ts +++ b/src/workers/full-text-search/worker.ts @@ -1,6 +1,14 @@ import * as Comlink from "comlink"; import sqlite3InitModule, { type Database } from "@sqlite.org/sqlite-wasm"; +/** https://www.sqlite.org/fts5.html#the_snippet_function */ +export interface FullTextSearchOptions { + pre?: string; + post?: string; + ellipsis?: string; + maxTokens?: number; +} + export interface FullTextSearchRegion { start: number; end: number; @@ -20,9 +28,8 @@ export class FullTextSearchWorker { const sqlite3 = await sqlite3InitModule(); console.log("Loading SQLite3 Module... Done."); - // TODO: change the db name this.#db = new sqlite3.oo1.DB(`/fts.${name}.sqlite3`); - this.#db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS sources USING fts5(key, source);"); + this.#db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS sources USING fts5(key, source, tokenize='porter');"); return undefined; } catch (err: any) { console.error(err); @@ -47,19 +54,23 @@ export class FullTextSearchWorker { }); } - find(query: string): FullTextSearchResult[] { + find(query: string, options?: FullTextSearchOptions): FullTextSearchResult[] { if (!this.#db) { console.error("DB not initialized"); return []; } + console.log("Starting full text search..."); + const startTime = performance.now(); const res = this.#db.selectObjects(` SELECT key, - snippet(sources, -1, '[', ']', '...', 10) AS snippet + snippet(sources, -1, ?, ?, ?, ?) AS snippet FROM sources WHERE source MATCH ?; - `, [query]); + `, [options?.pre ?? "[", options?.post ?? "]", options?.ellipsis ?? "…", options?.maxTokens ?? 10, query]); + const elapsedMs = performance.now() - startTime; + console.log(`Finished in ${elapsedMs} ms`); return res.map((r: any) => ({ key: r["key"] as string, From b16714fde447ee78b8d3e2ca0405f9f44f3dd67f Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Fri, 17 Apr 2026 01:06:54 +0700 Subject: [PATCH 04/10] preprocess source to remove package and import lines --- src/workers/full-text-search/worker.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts index 0a14f77..32e6f55 100644 --- a/src/workers/full-text-search/worker.ts +++ b/src/workers/full-text-search/worker.ts @@ -48,6 +48,11 @@ export class FullTextSearchWorker { return; } + source = source + .replace(/^\s*package\s+[^\r\n;]+;\s*\r?\n?/m, "") + .replace(/^\s*import\s+[^\r\n;]+;\s*\r?\n?/gm, "") + .trim(); + this.#db.exec({ sql: "INSERT INTO sources(key, source) VALUES(?, ?)", bind: [key, source] From 252995cb090c10cae0edbe7ca18b8baf9b00e844 Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Fri, 17 Apr 2026 01:19:55 +0700 Subject: [PATCH 05/10] merge fixes --- src/ui/FullTextSearchModal.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ui/FullTextSearchModal.tsx b/src/ui/FullTextSearchModal.tsx index 6941091..31e69a7 100644 --- a/src/ui/FullTextSearchModal.tsx +++ b/src/ui/FullTextSearchModal.tsx @@ -2,10 +2,10 @@ import { useEffect, useRef, useState } from "react"; import { fullTextSearchEvent } from "../logic/Keybinds"; import { useObservable } from "../utils/UseObservable"; import { Flex, Input, List, Modal, type InputRef } from "antd"; -import { BehaviorSubject, catchError, combineLatest, distinctUntilChanged, from, map, Observable, of, startWith, switchMap } from "rxjs"; +import { BehaviorSubject, catchError, combineLatest, distinctUntilChanged, from, map, of, startWith, switchMap } from "rxjs"; import { fullTextSearch } from "../workers/full-text-search/client"; import type { FullTextSearchResult } from "../workers/full-text-search/worker"; -import { openCodeTab } from "../logic/Tabs"; +import { openCodeTab } from "../logic/tabs"; const SearchState = (r: SearchState) => r; type SearchState = From 882b94178b8949fa333143bf84e5fef8efaf7ca3 Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Sun, 26 Apr 2026 10:29:41 +0700 Subject: [PATCH 06/10] use custom sqlite extension --- package-lock.json | 12 +++---- package.json | 2 +- src/ui/FullTextSearchModal.tsx | 21 +++++++----- src/workers/full-text-search/worker.ts | 47 ++++++++++++++++---------- 4 files changed, 46 insertions(+), 36 deletions(-) diff --git a/package-lock.json b/package-lock.json index 9a7489d..30f2512 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "@katana-project/zip": "^0.7.1", "@monaco-editor/react": "^4.7.0", "@run-slicer/vf": "^0.5.0-1.11.2", - "@sqlite.org/sqlite-wasm": "^3.51.2-build8", + "@sqlite.org/sqlite-wasm": "github:deirn/sqlite-wasm#mcsrc-dist", "@xyflow/react": "^12.10.1", "antd": "^6.3.2", "comlink": "^4.4.2", @@ -3087,13 +3087,9 @@ "license": "CC0-1.0" }, "node_modules/@sqlite.org/sqlite-wasm": { - "version": "3.51.2-build8", - "resolved": "https://registry.npmjs.org/@sqlite.org/sqlite-wasm/-/sqlite-wasm-3.51.2-build8.tgz", - "integrity": "sha512-NvWLTgbqGu1XOLKxePE5Jvc5bzy6QACAQLURQnumSFm0hQms09r2rjMp5i1rvh1DXCe5Rx1FEGcEeQ9BnBB2+Q==", - "license": "Apache-2.0", - "engines": { - "node": ">=22" - } + "version": "3.53.0-build1", + "resolved": "git+ssh://git@github.com/deirn/sqlite-wasm.git#4ba3a494aed505a38d9c2598db2c3962268cf050", + "license": "Apache-2.0" }, "node_modules/@standard-schema/spec": { "version": "1.1.0", diff --git a/package.json b/package.json index 7300c1d..ee7f8af 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,7 @@ "@katana-project/zip": "^0.7.1", "@monaco-editor/react": "^4.7.0", "@run-slicer/vf": "^0.5.0-1.11.2", - "@sqlite.org/sqlite-wasm": "^3.51.2-build8", + "@sqlite.org/sqlite-wasm": "github:deirn/sqlite-wasm#mcsrc-dist", "@xyflow/react": "^12.10.1", "antd": "^6.3.2", "comlink": "^4.4.2", diff --git a/src/ui/FullTextSearchModal.tsx b/src/ui/FullTextSearchModal.tsx index 31e69a7..f17fe5e 100644 --- a/src/ui/FullTextSearchModal.tsx +++ b/src/ui/FullTextSearchModal.tsx @@ -63,16 +63,19 @@ const FullTextSearchModal = () => { > - {result.snippet} + description={result.regions.map((r, i) => ( +
+ {r.snippet}
- )} + ))} /> )} diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts index 32e6f55..49e284d 100644 --- a/src/workers/full-text-search/worker.ts +++ b/src/workers/full-text-search/worker.ts @@ -12,15 +12,18 @@ export interface FullTextSearchOptions { export interface FullTextSearchRegion { start: number; end: number; + snippet: string; } export interface FullTextSearchResult { key: string; - snippet: string; + regions: FullTextSearchRegion[] } export class FullTextSearchWorker { #db?: Database; + #enc = new TextEncoder(); + #dec = new TextDecoder(); async init(name: string): Promise { try { @@ -29,7 +32,7 @@ export class FullTextSearchWorker { console.log("Loading SQLite3 Module... Done."); this.#db = new sqlite3.oo1.DB(`/fts.${name}.sqlite3`); - this.#db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS sources USING fts5(key, source, tokenize='porter');"); + this.#db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS sources USING fts5(key, source, tokenize='porter mcsrc_tokenizer');"); return undefined; } catch (err: any) { console.error(err); @@ -70,34 +73,42 @@ export class FullTextSearchWorker { const res = this.#db.selectObjects(` SELECT key, - snippet(sources, -1, ?, ?, ?, ?) AS snippet + mcsrc_offsets(sources, ?, ?, ?, ?) AS offsets FROM sources WHERE source MATCH ?; `, [options?.pre ?? "[", options?.post ?? "]", options?.ellipsis ?? "…", options?.maxTokens ?? 10, query]); - const elapsedMs = performance.now() - startTime; - console.log(`Finished in ${elapsedMs} ms`); - return res.map((r: any) => ({ + const out = res.map((r: any) => ({ key: r["key"] as string, - snippet: r["snippet"] as string + regions: this.#parseOffsets(r["offsets"] as string), })); + + const elapsedMs = performance.now() - startTime; + console.log(`Finished in ${elapsedMs} ms`); + return out; } - // TODO: figure out how to get offsets in FTS5 - // require creating SQLite extension. #parseOffsets(s: string): FullTextSearchRegion[] { if (!s) return []; - const parts = s.trim().split(/\s+/).map(Number); + const bytes = this.#enc.encode(s); const regions: FullTextSearchRegion[] = []; - - // [col] [startToken] [endToken] [termIndex] ... - for (let i = 0; i + 3 < parts.length; i += 4) { - const startToken = parts[i + 1]; - const endToken = parts[i + 2]; - if (Number.isFinite(startToken) && Number.isFinite(endToken)) { - regions.push({ start: startToken, end: endToken }); - } + let pos = 0; + + while (pos < bytes.length) { + const newline = bytes.indexOf(10, pos); // '\n' + const header = this.#dec.decode(bytes.slice(pos, newline)); + const [_col, _phrase, byteOffset, byteSize, snippetByteLen] = header.trim().split(/\s+/).map(Number); + pos = newline + 1; + + const snippetBytes = bytes.slice(pos, pos + snippetByteLen); + const snippet = this.#dec.decode(snippetBytes); + regions.push({ + start: byteOffset, + end: byteOffset + byteSize, + snippet + }); + pos += snippetByteLen; } return regions; From 67d0d1c4ac4e2a798f6664d75bc71e9a5ad9d06e Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Sun, 26 Apr 2026 22:09:11 +0700 Subject: [PATCH 07/10] collapsible regions --- src/ui/FullTextSearchModal.tsx | 65 ++++++++++++++++++-------- src/workers/full-text-search/client.ts | 6 ++- src/workers/full-text-search/worker.ts | 5 +- 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/src/ui/FullTextSearchModal.tsx b/src/ui/FullTextSearchModal.tsx index f17fe5e..a34378a 100644 --- a/src/ui/FullTextSearchModal.tsx +++ b/src/ui/FullTextSearchModal.tsx @@ -1,7 +1,7 @@ -import { useEffect, useRef, useState } from "react"; +import React, { useEffect, useRef, useState } from "react"; import { fullTextSearchEvent } from "../logic/Keybinds"; import { useObservable } from "../utils/UseObservable"; -import { Flex, Input, List, Modal, type InputRef } from "antd"; +import { Button, Flex, Input, List, Modal, type InputRef } from "antd"; import { BehaviorSubject, catchError, combineLatest, distinctUntilChanged, from, map, of, startWith, switchMap } from "rxjs"; import { fullTextSearch } from "../workers/full-text-search/client"; import type { FullTextSearchResult } from "../workers/full-text-search/worker"; @@ -22,12 +22,54 @@ const search$ = combineLatest([fullTextSearch, query]).pipe( error: "Query must be at least 3 characters" })); - return from(fts.find(query, { maxTokens: 8 })).pipe( + return from(fts.find(query, { maxTokens: 11 })).pipe( map(results => SearchState({ state: "ok", results })), startWith(SearchState({ state: "loading" })), catchError(error => of(SearchState({ state: "error", error: String(error) })))); })); +type FullTextSearchResultElementProps = { + result: FullTextSearchResult; +}; + +const FullTextSearchResultElement: React.FC = ({ result }) => { + const [expand, setExpand] = useState(false); + + const sliced = expand ? result.regions : result.regions.slice(0, 5); + const canToggleExpand = expand || sliced.length < result.regions.length; + + return ( + + {canToggleExpand && ( + + )} + {sliced.map((r, i) => ( +
+ {r.snippet} +
+ ))} + + )} + /> + ); +}; + const FullTextSearchModal = () => { const showEvent = useObservable(fullTextSearchEvent); const search = useObservable(search$) ?? { state: "ok", results: [] }; @@ -61,22 +103,7 @@ const FullTextSearchModal = () => { onClick={() => openResult(result)} className="full-text-search-item" > - ( -
- {r.snippet} -
- ))} - /> + )} /> diff --git a/src/workers/full-text-search/client.ts b/src/workers/full-text-search/client.ts index 253cb4f..ee8ee9c 100644 --- a/src/workers/full-text-search/client.ts +++ b/src/workers/full-text-search/client.ts @@ -33,10 +33,14 @@ export class FullTextSearch { this.#_worker = Comlink.wrap(worker); await this.#_worker.init(this.#jar.jar.name); + console.log("Indexing decompiled sources..."); + const startTime = performance.now(); await onDecompiledSources(this.#jar.jar, async (className, source) => { - console.log("fts", className); + // console.log("fts", className); this.#_worker!.index(className, source); }); + const elapsedMs = performance.now() - startTime; + console.log(`Finished in ${elapsedMs.toFixed(3)} ms`); return this.#_worker; }; diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts index 49e284d..ff95748 100644 --- a/src/workers/full-text-search/worker.ts +++ b/src/workers/full-text-search/worker.ts @@ -75,7 +75,8 @@ export class FullTextSearchWorker { key, mcsrc_offsets(sources, ?, ?, ?, ?) AS offsets FROM sources - WHERE source MATCH ?; + WHERE source MATCH ? + ORDER BY rank; `, [options?.pre ?? "[", options?.post ?? "]", options?.ellipsis ?? "…", options?.maxTokens ?? 10, query]); const out = res.map((r: any) => ({ @@ -84,7 +85,7 @@ export class FullTextSearchWorker { })); const elapsedMs = performance.now() - startTime; - console.log(`Finished in ${elapsedMs} ms`); + console.log(`Finished in ${elapsedMs.toFixed(3)} ms`); return out; } From b679c6bf3b37d5cfba6e101da84d6f085fd9c267 Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Sun, 26 Apr 2026 22:41:46 +0700 Subject: [PATCH 08/10] vendor sqlite extension code --- package-lock.json | 32 +- package.json | 6 +- packages/mcsrc-sqlite/.gitignore | 1 + packages/mcsrc-sqlite/Dockerfile | 36 ++ packages/mcsrc-sqlite/index.d.ts | 2 + packages/mcsrc-sqlite/index.js | 1 + packages/mcsrc-sqlite/package.json | 13 + .../mcsrc-sqlite/sqlite3_wasm_extra_init.c | 454 ++++++++++++++++++ src/workers/full-text-search/worker.ts | 2 +- 9 files changed, 542 insertions(+), 5 deletions(-) create mode 100644 packages/mcsrc-sqlite/.gitignore create mode 100644 packages/mcsrc-sqlite/Dockerfile create mode 100644 packages/mcsrc-sqlite/index.d.ts create mode 100644 packages/mcsrc-sqlite/index.js create mode 100644 packages/mcsrc-sqlite/package.json create mode 100644 packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c diff --git a/package-lock.json b/package-lock.json index 30f2512..54ab947 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,16 +7,20 @@ "": { "name": "mcsrc", "version": "0.0.0", + "workspaces": [ + ".", + "packages/*" + ], "dependencies": { "@katana-project/zip": "^0.7.1", "@monaco-editor/react": "^4.7.0", "@run-slicer/vf": "^0.5.0-1.11.2", - "@sqlite.org/sqlite-wasm": "github:deirn/sqlite-wasm#mcsrc-dist", "@xyflow/react": "^12.10.1", "antd": "^6.3.2", "comlink": "^4.4.2", "dagre": "^0.8.5", "dexie": "^4.3.0", + "mcsrc-sqlite": "workspace:*", "monaco-editor": "^0.55.1", "react": "^19.2.4", "react-dom": "^19.2.4", @@ -3088,8 +3092,16 @@ }, "node_modules/@sqlite.org/sqlite-wasm": { "version": "3.53.0-build1", - "resolved": "git+ssh://git@github.com/deirn/sqlite-wasm.git#4ba3a494aed505a38d9c2598db2c3962268cf050", - "license": "Apache-2.0" + "resolved": "https://registry.npmjs.org/@sqlite.org/sqlite-wasm/-/sqlite-wasm-3.53.0-build1.tgz", + "integrity": "sha512-PfWPWN2n+/37doa8oh2/oUXk4OOsRYZsxc1W1sDXIGb/Pu5Yrb+f2eyYpgQMGITVX7HVgxhs9P18Rc6I97ym/g==", + "dev": true, + "license": "Apache-2.0", + "workspaces": [ + "demos/*" + ], + "engines": { + "node": ">=22" + } }, "node_modules/@standard-schema/spec": { "version": "1.1.0", @@ -4687,6 +4699,14 @@ "node": ">= 18" } }, + "node_modules/mcsrc": { + "resolved": "", + "link": true + }, + "node_modules/mcsrc-sqlite": { + "resolved": "packages/mcsrc-sqlite", + "link": true + }, "node_modules/miniflare": { "version": "4.20260312.0", "resolved": "https://registry.npmjs.org/miniflare/-/miniflare-4.20260312.0.tgz", @@ -5718,6 +5738,12 @@ "optional": true } } + }, + "packages/mcsrc-sqlite": { + "version": "0.0.0", + "devDependencies": { + "@sqlite.org/sqlite-wasm": "^3.53.0-build1" + } } } } diff --git a/package.json b/package.json index ee7f8af..72fe10e 100644 --- a/package.json +++ b/package.json @@ -3,6 +3,10 @@ "private": true, "version": "0.0.0", "type": "module", + "workspaces": [ + ".", + "packages/*" + ], "scripts": { "dev": "vite", "dev:javadoc": "vite --mode javadoc", @@ -20,12 +24,12 @@ "@katana-project/zip": "^0.7.1", "@monaco-editor/react": "^4.7.0", "@run-slicer/vf": "^0.5.0-1.11.2", - "@sqlite.org/sqlite-wasm": "github:deirn/sqlite-wasm#mcsrc-dist", "@xyflow/react": "^12.10.1", "antd": "^6.3.2", "comlink": "^4.4.2", "dagre": "^0.8.5", "dexie": "^4.3.0", + "mcsrc-sqlite": "workspace:*", "monaco-editor": "^0.55.1", "react": "^19.2.4", "react-dom": "^19.2.4", diff --git a/packages/mcsrc-sqlite/.gitignore b/packages/mcsrc-sqlite/.gitignore new file mode 100644 index 0000000..7773828 --- /dev/null +++ b/packages/mcsrc-sqlite/.gitignore @@ -0,0 +1 @@ +dist/ \ No newline at end of file diff --git a/packages/mcsrc-sqlite/Dockerfile b/packages/mcsrc-sqlite/Dockerfile new file mode 100644 index 0000000..356943c --- /dev/null +++ b/packages/mcsrc-sqlite/Dockerfile @@ -0,0 +1,36 @@ +FROM emscripten/emsdk:5.0.6 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + tcl \ + wabt \ + zip \ + curl \ + unzip \ + openssl \ + && rm -rf /var/lib/apt/lists/* + +USER ubuntu +WORKDIR /home/ubuntu/build + +ARG SQLITE_NAME=sqlite-src-3530000 +ARG SQLITE_URL=https://www.sqlite.org/2026/${SQLITE_NAME}.zip +ARG SQLITE_SHA3=4ffbd00ba8db1e1172dbc69a5203a2c185556a32543e58585ba3713abf676fe5 + +RUN curl -L -o sqlite-src.zip ${SQLITE_URL} && \ + actual=$(openssl dgst -sha3-256 sqlite-src.zip | awk '{print $2}') && \ + if [ "$actual" != "${SQLITE_SHA3}" ]; then \ + echo "SHA3 verification failed: $actual"; \ + rm sqlite-src.zip; \ + exit 1; \ + fi && \ + unzip -o sqlite-src.zip && \ + mv ${SQLITE_NAME} sqlite-src + +RUN cd sqlite-src && ./configure && make sqlite3.c + +COPY sqlite3_wasm_extra_init.c sqlite-src/ext/wasm/sqlite3_wasm_extra_init.c + +RUN make -C sqlite-src/ext/wasm npm + +FROM scratch +COPY --from=0 /home/ubuntu/build/sqlite-src/ext/wasm/jswasm/ / diff --git a/packages/mcsrc-sqlite/index.d.ts b/packages/mcsrc-sqlite/index.d.ts new file mode 100644 index 0000000..101884c --- /dev/null +++ b/packages/mcsrc-sqlite/index.d.ts @@ -0,0 +1,2 @@ +export { default } from "@sqlite.org/sqlite-wasm"; +export * from "@sqlite.org/sqlite-wasm"; diff --git a/packages/mcsrc-sqlite/index.js b/packages/mcsrc-sqlite/index.js new file mode 100644 index 0000000..f03e578 --- /dev/null +++ b/packages/mcsrc-sqlite/index.js @@ -0,0 +1 @@ +export { default } from "./dist/sqlite3-bundler-friendly.mjs"; diff --git a/packages/mcsrc-sqlite/package.json b/packages/mcsrc-sqlite/package.json new file mode 100644 index 0000000..4f47c13 --- /dev/null +++ b/packages/mcsrc-sqlite/package.json @@ -0,0 +1,13 @@ +{ + "name": "mcsrc-sqlite", + "private": true, + "version": "0.0.0", + "type": "module", + "main": "index.js", + "scripts": { + "build": "docker build --output dist/ ." + }, + "devDependencies": { + "@sqlite.org/sqlite-wasm": "^3.53.0-build1" + } +} diff --git a/packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c b/packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c new file mode 100644 index 0000000..233c01e --- /dev/null +++ b/packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c @@ -0,0 +1,454 @@ +#include "sqlite3.h" +#include + +// -------------------------------------------------- +// TOKENIZER + +#define MCSRC_TOK_BUF 512 + +typedef struct McsrcTokenizer { + int iVersion; +} McsrcTokenizer; + +static int mcsrc_lower(unsigned char c) { + return (c >= 'A' && c <= 'Z') ? c + 32 : c; +} + +static int mcsrc_is_ident_start(unsigned char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c > 127; +} + +static int mcsrc_is_ident_cont(unsigned char c) { + return mcsrc_is_ident_start(c) || (c >= '0' && c <= '9'); +} + +/* +** Emit a Java identifier as: +** 1. full token lowercased (position N) +** 2. camelCase/underscore parts lowercased, colocated at position N +** +** Examples: +** getUserName -> "getusername", "get", "user", "name" +** parseXMLDoc -> "parsexmldoc", "parse", "xml", "doc" +** MAX_VALUE -> "max_value", "max", "value" +** HTML5Parser -> "html5parser", "html5", "parser" +*/ +static int mcsrc_emit_ident(const char *pText, int iStart, int iEnd, + void *pCtx, + int (*xToken)(void*, int, const char*, int, int, int)) +{ + char aBuf[MCSRC_TOK_BUF]; + const char *p = pText + iStart; + int n = iEnd - iStart; + int i, segStart, segEnd, rc; + + /* Emit full identifier lowercased */ + int nFull = n < MCSRC_TOK_BUF ? n : MCSRC_TOK_BUF - 1; + for (i = 0; i < nFull; i++) aBuf[i] = (char)mcsrc_lower((unsigned char)p[i]); + rc = xToken(pCtx, 0, aBuf, nFull, iStart, iEnd); + if (rc != SQLITE_OK) return rc; + + /* Emit split parts as colocated tokens */ + segStart = 0; + while (segStart < nFull) { + /* Skip leading underscores between segments */ + while (segStart < nFull && p[segStart] == '_') segStart++; + if (segStart >= nFull) break; + + segEnd = segStart + 1; + while (segEnd < nFull) { + unsigned char cur = (unsigned char)p[segEnd]; + unsigned char prev = (unsigned char)p[segEnd - 1]; + + if (cur == '_') break; + + /* letter -> digit: split (item3d -> item|3d) */ + if ((cur >= '0' && cur <= '9') && + ((prev >= 'a' && prev <= 'z') || (prev >= 'A' && prev <= 'Z'))) break; + + if (cur >= 'A' && cur <= 'Z') { + /* lower/digit -> upper: new word starts here */ + if ((prev >= 'a' && prev <= 'z') || (prev >= '0' && prev <= '9')) break; + /* upper run -> upper+lower: split before last upper (XMLDoc -> XML|Doc) */ + if ((prev >= 'A' && prev <= 'Z') && segEnd + 1 < nFull) { + unsigned char next = (unsigned char)p[segEnd + 1]; + if (next >= 'a' && next <= 'z') break; + } + } + segEnd++; + } + + int segLen = segEnd - segStart; + if (segLen > 0 && segLen < nFull) { + int tokLen = segLen < MCSRC_TOK_BUF ? segLen : MCSRC_TOK_BUF - 1; + for (i = 0; i < tokLen; i++) { + aBuf[i] = (char)mcsrc_lower((unsigned char)p[segStart + i]); + } + rc = xToken(pCtx, FTS5_TOKEN_COLOCATED, aBuf, tokLen, iStart, iEnd); + if (rc != SQLITE_OK) return rc; + } + + segStart = segEnd; + } + + return SQLITE_OK; +} + +int mcsrc_tokenizer_create(void *z, const char **argv, int argc, Fts5Tokenizer **out) +{ + McsrcTokenizer *p = (McsrcTokenizer*)sqlite3_malloc(sizeof(McsrcTokenizer)); + if (!p) return SQLITE_NOMEM; + p->iVersion = 1; + *out = (Fts5Tokenizer*)p; + return SQLITE_OK; +} + +void mcsrc_tokenizer_delete(Fts5Tokenizer *self) { + sqlite3_free(self); +} + +int mcsrc_tokenizer_tokenize(Fts5Tokenizer *self, + void *pCtx, + int flags, + const char *pText, int nText, + const char *pLocale, int nLocale, + int (*xToken)(void *pCtx, + int tflags, + const char *pToken, + int nToken, + int iStart, + int iEnd)) +{ + int i = 0; + int rc = SQLITE_OK; + + while (i < nText && rc == SQLITE_OK) { + unsigned char c = (unsigned char)pText[i]; + + /* Whitespace */ + if (c <= ' ') { i++; continue; } + + /* Line comment: skip to end of line */ + if (c == '/' && i + 1 < nText && pText[i+1] == '/') { + i += 2; + while (i < nText && pText[i] != '\n') i++; + continue; + } + + /* Block / doc comment: tokenize words inside for Javadoc search */ + if (c == '/' && i + 1 < nText && pText[i+1] == '*') { + i += 2; + while (i + 1 < nText && !(pText[i] == '*' && pText[i+1] == '/')) { + unsigned char cc = (unsigned char)pText[i]; + if (mcsrc_is_ident_start(cc)) { + int wStart = i; + while (i < nText && + !(pText[i] == '*' && i + 1 < nText && pText[i+1] == '/') && + mcsrc_is_ident_cont((unsigned char)pText[i])) i++; + rc = mcsrc_emit_ident(pText, wStart, i, pCtx, xToken); + } else { + i++; + } + } + if (i + 1 < nText) i += 2; + continue; + } + + /* String literal: tokenize identifier-like words inside */ + if (c == '"') { + i++; + while (i < nText && pText[i] != '"') { + if (pText[i] == '\\') { i += 2; continue; } + unsigned char sc = (unsigned char)pText[i]; + if (mcsrc_is_ident_start(sc)) { + int wStart = i; + while (i < nText && pText[i] != '"' && pText[i] != '\\' && + mcsrc_is_ident_cont((unsigned char)pText[i])) i++; + rc = mcsrc_emit_ident(pText, wStart, i, pCtx, xToken); + } else { + i++; + } + } + if (i < nText) i++; + continue; + } + + /* Char literal: skip */ + if (c == '\'') { + i++; + while (i < nText && pText[i] != '\'') { + if (pText[i] == '\\') { i += 2; continue; } + i++; + } + if (i < nText) i++; + continue; + } + + /* Identifier (including keywords and annotations after @) */ + if (mcsrc_is_ident_start(c)) { + int idStart = i; + while (i < nText && mcsrc_is_ident_cont((unsigned char)pText[i])) i++; + rc = mcsrc_emit_ident(pText, idStart, i, pCtx, xToken); + continue; + } + + /* Number literal (decimal, hex, float) */ + if (c >= '0' && c <= '9') { + char aBuf[MCSRC_TOK_BUF]; + int numStart = i; + unsigned char nc; + + if (c == '0' && i + 1 < nText && (pText[i+1] == 'x' || pText[i+1] == 'X')) { + /* Hexadecimal */ + i += 2; + while (i < nText) { + nc = (unsigned char)pText[i]; + if ((nc >= '0' && nc <= '9') || + (nc >= 'a' && nc <= 'f') || + (nc >= 'A' && nc <= 'F') || nc == '_') i++; + else break; + } + } else { + /* Decimal / float */ + while (i < nText) { + nc = (unsigned char)pText[i]; + if ((nc >= '0' && nc <= '9') || nc == '_' || nc == '.') { + i++; + } else if (nc == 'e' || nc == 'E') { + i++; + if (i < nText && (pText[i] == '+' || pText[i] == '-')) i++; + } else break; + } + } + /* Numeric suffix: L l F f D d */ + while (i < nText) { + nc = (unsigned char)pText[i]; + if (nc=='L'||nc=='l'||nc=='F'||nc=='f'||nc=='D'||nc=='d') i++; + else break; + } + + int numLen = i - numStart; + int tokLen = numLen < MCSRC_TOK_BUF ? numLen : MCSRC_TOK_BUF - 1; + int j; + for (j = 0; j < tokLen; j++) { + aBuf[j] = (char)mcsrc_lower((unsigned char)pText[numStart + j]); + } + rc = xToken(pCtx, 0, aBuf, tokLen, numStart, i); + continue; + } + + /* Skip @ (annotation prefix), operators, punctuation */ + i++; + } + + return (rc == SQLITE_DONE) ? SQLITE_OK : rc; +} + +struct fts5_tokenizer_v2 mcsrc_tokenizer = { + .iVersion = 2, + .xCreate = mcsrc_tokenizer_create, + .xDelete = mcsrc_tokenizer_delete, + .xTokenize = mcsrc_tokenizer_tokenize +}; + +// -------------------------------------------------- +// OFFSET FUNCTION + +typedef struct { + int iStart; + int iEnd; +} TokenPos; + +typedef struct { + TokenPos *aPos; + int nPos; + int nAlloc; + int bTokenized; + const char *pText; /* NOT owned; valid for duration of aux function call */ + int nText; +} ColTokens; + +static int mcsrc_offsets_token_cb(void *pCtx, int tflags, const char *pToken, + int nToken, int iStart, int iEnd) { + ColTokens *p = (ColTokens*)pCtx; + if (tflags & FTS5_TOKEN_COLOCATED) return SQLITE_OK; + if (p->nPos >= p->nAlloc) { + int nNew = p->nAlloc ? p->nAlloc * 2 : 64; + TokenPos *aNew = (TokenPos*)sqlite3_realloc(p->aPos, nNew * sizeof(TokenPos)); + if (!aNew) return SQLITE_NOMEM; + p->aPos = aNew; + p->nAlloc = nNew; + } + p->aPos[p->nPos].iStart = iStart; + p->aPos[p->nPos].iEnd = iEnd; + p->nPos++; + return SQLITE_OK; +} + +/* Returns records of "col phrase byteoffset bytesize snippet_length\nsnippet" +** for each phrase match in the current row. +** Parameters (all optional): +** pre - string inserted before the matched token in the snippet (default "") +** post - string inserted after the matched token in the snippet (default "") +** ellipsis - prefix/suffix when snippet does not reach the text boundary (default "...") +** maxToken - total tokens in context window around the match (default 11) +*/ +static void mcsrc_offsets(const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + sqlite3_context *pCtx, + int nVal, + sqlite3_value **apVal) +{ + const char *zPre = (nVal > 0) ? (const char*)sqlite3_value_text(apVal[0]) : ""; + const char *zPost= (nVal > 1) ? (const char*)sqlite3_value_text(apVal[1]) : ""; + const char *zEll = (nVal > 2) ? (const char*)sqlite3_value_text(apVal[2]) : "..."; + int maxToken = (nVal > 3) ? sqlite3_value_int(apVal[3]) : 11; + if (!zPre) zPre = ""; + if (!zPost) zPost = ""; + if (!zEll) zEll = "..."; + if (maxToken <= 0) maxToken = 11; + int ctxPre = (maxToken - 1) / 2; + int ctxPost = maxToken - 1 - ctxPre; + + int rc = SQLITE_OK; + int nInst = 0; + int nCol = 0; + int i; + ColTokens *aCols = 0; + sqlite3_str *pStr = 0; + + pStr = sqlite3_str_new(0); + if (!pStr) { sqlite3_result_error_nomem(pCtx); return; } + + rc = pApi->xInstCount(pFts, &nInst); + if (rc != SQLITE_OK) goto done; + + if (nInst == 0) goto done; + + nCol = pApi->xColumnCount(pFts); + aCols = (ColTokens*)sqlite3_malloc(nCol * sizeof(ColTokens)); + if (!aCols) { rc = SQLITE_NOMEM; goto done; } + memset(aCols, 0, nCol * sizeof(ColTokens)); + + for (i = 0; i < nInst; i++) { + int iPhrase, iCol, iOff; + int byteStart, byteSize; + int rs, re; + + rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff); + if (rc != SQLITE_OK) goto done; + + if (!aCols[iCol].bTokenized) { + const char *pText = 0; + int nText = 0; + rc = pApi->xColumnText(pFts, iCol, &pText, &nText); + if (rc != SQLITE_OK) goto done; + if (pText && nText > 0) { + aCols[iCol].pText = pText; + aCols[iCol].nText = nText; + rc = pApi->xTokenize(pFts, pText, nText, &aCols[iCol], + mcsrc_offsets_token_cb); + if (rc != SQLITE_OK && rc != SQLITE_DONE) goto done; + rc = SQLITE_OK; + } + aCols[iCol].bTokenized = 1; + } + + if (iOff < 0 || iOff >= aCols[iCol].nPos) continue; + + byteStart = aCols[iCol].aPos[iOff].iStart; + byteSize = aCols[iCol].aPos[iOff].iEnd - aCols[iCol].aPos[iOff].iStart; + + /* Compute snippet window [rs, re) in token space around match at iOff */ + rs = iOff - ctxPre; if (rs < 0) rs = 0; + re = iOff + ctxPost + 1; if (re > aCols[iCol].nPos) re = aCols[iCol].nPos; + + /* Build snippet into a temp str, then emit header + snippet */ + { + sqlite3_str *pSnip = sqlite3_str_new(0); + if (!pSnip) { rc = SQLITE_NOMEM; goto done; } + if (aCols[iCol].pText && re > rs) { + const char *pTxt = aCols[iCol].pText; + TokenPos *aPos = aCols[iCol].aPos; + /* leading ellipsis */ + if (rs > 0) sqlite3_str_appendall(pSnip, zEll); + /* context before match */ + if (rs < iOff) + sqlite3_str_append(pSnip, pTxt + aPos[rs].iStart, + aPos[iOff].iStart - aPos[rs].iStart); + /* matched token wrapped with pre/post */ + sqlite3_str_appendall(pSnip, zPre); + sqlite3_str_append(pSnip, pTxt + aPos[iOff].iStart, + aPos[iOff].iEnd - aPos[iOff].iStart); + sqlite3_str_appendall(pSnip, zPost); + /* context after match */ + if (iOff + 1 < re) + sqlite3_str_append(pSnip, pTxt + aPos[iOff].iEnd, + aPos[re - 1].iEnd - aPos[iOff].iEnd); + /* trailing ellipsis */ + if (re < aCols[iCol].nPos) sqlite3_str_appendall(pSnip, zEll); + } + { + int nSnip = sqlite3_str_length(pSnip); + char *zSnip = sqlite3_str_finish(pSnip); + if (!zSnip) { rc = SQLITE_NOMEM; goto done; } + sqlite3_str_appendf(pStr, "%d %d %d %d %d\n", + iCol, iPhrase, byteStart, byteSize, nSnip); + sqlite3_str_append(pStr, zSnip, nSnip); + sqlite3_free(zSnip); + } + } + } + +done: + if (aCols) { + for (i = 0; i < nCol; i++) sqlite3_free(aCols[i].aPos); + sqlite3_free(aCols); + } + if (rc != SQLITE_OK) { + sqlite3_free(sqlite3_str_finish(pStr)); + sqlite3_result_error_code(pCtx, rc); + } else { + char *zStr = sqlite3_str_finish(pStr); + if (!zStr) { + sqlite3_result_error_nomem(pCtx); + } else { + sqlite3_result_text(pCtx, zStr, -1, sqlite3_free); + } + } +} + +// -------------------------------------------------- +// ENTRYPOINT + +int mcsrc_entry_point(sqlite3 *db, char **pzErrMsg, const struct sqlite3_api_routines *pThunk) +{ + int res; + + fts5_api *fts5 = 0; + { + sqlite3_stmt *stmt = 0; + res = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &stmt, 0); + if (res != SQLITE_OK) return res; + + sqlite3_bind_pointer(stmt, 1, (void*) &fts5, "fts5_api_ptr", 0); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + } + + if (!fts5) return SQLITE_ERROR; + + res = fts5->xCreateTokenizer_v2(fts5, "mcsrc_tokenizer", 0, &mcsrc_tokenizer, 0); + if (res != SQLITE_OK) return res; + + res = fts5->xCreateFunction(fts5, "mcsrc_offsets", 0, &mcsrc_offsets, 0); + if (res != SQLITE_OK) return res; + + return SQLITE_OK; +} + +int sqlite3_wasm_extra_init(const char *z) +{ + sqlite3_auto_extension((void*) &mcsrc_entry_point); + return 0; +} diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts index ff95748..5b8b110 100644 --- a/src/workers/full-text-search/worker.ts +++ b/src/workers/full-text-search/worker.ts @@ -1,5 +1,5 @@ import * as Comlink from "comlink"; -import sqlite3InitModule, { type Database } from "@sqlite.org/sqlite-wasm"; +import sqlite3InitModule, { type Database } from "mcsrc-sqlite"; /** https://www.sqlite.org/fts5.html#the_snippet_function */ export interface FullTextSearchOptions { From a944f27727364d9a7a1056ec552d4c8cb79d963f Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Mon, 4 May 2026 00:51:24 +0700 Subject: [PATCH 09/10] regex --- package-lock.json | 27 +- package.json | 5 - packages/mcsrc-sqlite/.gitignore | 1 - packages/mcsrc-sqlite/Dockerfile | 36 -- packages/mcsrc-sqlite/index.d.ts | 2 - packages/mcsrc-sqlite/index.js | 1 - packages/mcsrc-sqlite/package.json | 13 - .../mcsrc-sqlite/sqlite3_wasm_extra_init.c | 454 ------------------ src/ui/FullTextSearchModal.tsx | 31 +- src/workers/full-text-search/client.ts | 17 +- src/workers/full-text-search/worker.ts | 123 ++--- 11 files changed, 90 insertions(+), 620 deletions(-) delete mode 100644 packages/mcsrc-sqlite/.gitignore delete mode 100644 packages/mcsrc-sqlite/Dockerfile delete mode 100644 packages/mcsrc-sqlite/index.d.ts delete mode 100644 packages/mcsrc-sqlite/index.js delete mode 100644 packages/mcsrc-sqlite/package.json delete mode 100644 packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c diff --git a/package-lock.json b/package-lock.json index 54ab947..1632351 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,10 +7,6 @@ "": { "name": "mcsrc", "version": "0.0.0", - "workspaces": [ - ".", - "packages/*" - ], "dependencies": { "@katana-project/zip": "^0.7.1", "@monaco-editor/react": "^4.7.0", @@ -20,7 +16,6 @@ "comlink": "^4.4.2", "dagre": "^0.8.5", "dexie": "^4.3.0", - "mcsrc-sqlite": "workspace:*", "monaco-editor": "^0.55.1", "react": "^19.2.4", "react-dom": "^19.2.4", @@ -3090,19 +3085,6 @@ "dev": true, "license": "CC0-1.0" }, - "node_modules/@sqlite.org/sqlite-wasm": { - "version": "3.53.0-build1", - "resolved": "https://registry.npmjs.org/@sqlite.org/sqlite-wasm/-/sqlite-wasm-3.53.0-build1.tgz", - "integrity": "sha512-PfWPWN2n+/37doa8oh2/oUXk4OOsRYZsxc1W1sDXIGb/Pu5Yrb+f2eyYpgQMGITVX7HVgxhs9P18Rc6I97ym/g==", - "dev": true, - "license": "Apache-2.0", - "workspaces": [ - "demos/*" - ], - "engines": { - "node": ">=22" - } - }, "node_modules/@standard-schema/spec": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz", @@ -4699,14 +4681,6 @@ "node": ">= 18" } }, - "node_modules/mcsrc": { - "resolved": "", - "link": true - }, - "node_modules/mcsrc-sqlite": { - "resolved": "packages/mcsrc-sqlite", - "link": true - }, "node_modules/miniflare": { "version": "4.20260312.0", "resolved": "https://registry.npmjs.org/miniflare/-/miniflare-4.20260312.0.tgz", @@ -5741,6 +5715,7 @@ }, "packages/mcsrc-sqlite": { "version": "0.0.0", + "extraneous": true, "devDependencies": { "@sqlite.org/sqlite-wasm": "^3.53.0-build1" } diff --git a/package.json b/package.json index 72fe10e..b45ecf3 100644 --- a/package.json +++ b/package.json @@ -3,10 +3,6 @@ "private": true, "version": "0.0.0", "type": "module", - "workspaces": [ - ".", - "packages/*" - ], "scripts": { "dev": "vite", "dev:javadoc": "vite --mode javadoc", @@ -29,7 +25,6 @@ "comlink": "^4.4.2", "dagre": "^0.8.5", "dexie": "^4.3.0", - "mcsrc-sqlite": "workspace:*", "monaco-editor": "^0.55.1", "react": "^19.2.4", "react-dom": "^19.2.4", diff --git a/packages/mcsrc-sqlite/.gitignore b/packages/mcsrc-sqlite/.gitignore deleted file mode 100644 index 7773828..0000000 --- a/packages/mcsrc-sqlite/.gitignore +++ /dev/null @@ -1 +0,0 @@ -dist/ \ No newline at end of file diff --git a/packages/mcsrc-sqlite/Dockerfile b/packages/mcsrc-sqlite/Dockerfile deleted file mode 100644 index 356943c..0000000 --- a/packages/mcsrc-sqlite/Dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -FROM emscripten/emsdk:5.0.6 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - tcl \ - wabt \ - zip \ - curl \ - unzip \ - openssl \ - && rm -rf /var/lib/apt/lists/* - -USER ubuntu -WORKDIR /home/ubuntu/build - -ARG SQLITE_NAME=sqlite-src-3530000 -ARG SQLITE_URL=https://www.sqlite.org/2026/${SQLITE_NAME}.zip -ARG SQLITE_SHA3=4ffbd00ba8db1e1172dbc69a5203a2c185556a32543e58585ba3713abf676fe5 - -RUN curl -L -o sqlite-src.zip ${SQLITE_URL} && \ - actual=$(openssl dgst -sha3-256 sqlite-src.zip | awk '{print $2}') && \ - if [ "$actual" != "${SQLITE_SHA3}" ]; then \ - echo "SHA3 verification failed: $actual"; \ - rm sqlite-src.zip; \ - exit 1; \ - fi && \ - unzip -o sqlite-src.zip && \ - mv ${SQLITE_NAME} sqlite-src - -RUN cd sqlite-src && ./configure && make sqlite3.c - -COPY sqlite3_wasm_extra_init.c sqlite-src/ext/wasm/sqlite3_wasm_extra_init.c - -RUN make -C sqlite-src/ext/wasm npm - -FROM scratch -COPY --from=0 /home/ubuntu/build/sqlite-src/ext/wasm/jswasm/ / diff --git a/packages/mcsrc-sqlite/index.d.ts b/packages/mcsrc-sqlite/index.d.ts deleted file mode 100644 index 101884c..0000000 --- a/packages/mcsrc-sqlite/index.d.ts +++ /dev/null @@ -1,2 +0,0 @@ -export { default } from "@sqlite.org/sqlite-wasm"; -export * from "@sqlite.org/sqlite-wasm"; diff --git a/packages/mcsrc-sqlite/index.js b/packages/mcsrc-sqlite/index.js deleted file mode 100644 index f03e578..0000000 --- a/packages/mcsrc-sqlite/index.js +++ /dev/null @@ -1 +0,0 @@ -export { default } from "./dist/sqlite3-bundler-friendly.mjs"; diff --git a/packages/mcsrc-sqlite/package.json b/packages/mcsrc-sqlite/package.json deleted file mode 100644 index 4f47c13..0000000 --- a/packages/mcsrc-sqlite/package.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "name": "mcsrc-sqlite", - "private": true, - "version": "0.0.0", - "type": "module", - "main": "index.js", - "scripts": { - "build": "docker build --output dist/ ." - }, - "devDependencies": { - "@sqlite.org/sqlite-wasm": "^3.53.0-build1" - } -} diff --git a/packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c b/packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c deleted file mode 100644 index 233c01e..0000000 --- a/packages/mcsrc-sqlite/sqlite3_wasm_extra_init.c +++ /dev/null @@ -1,454 +0,0 @@ -#include "sqlite3.h" -#include - -// -------------------------------------------------- -// TOKENIZER - -#define MCSRC_TOK_BUF 512 - -typedef struct McsrcTokenizer { - int iVersion; -} McsrcTokenizer; - -static int mcsrc_lower(unsigned char c) { - return (c >= 'A' && c <= 'Z') ? c + 32 : c; -} - -static int mcsrc_is_ident_start(unsigned char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c > 127; -} - -static int mcsrc_is_ident_cont(unsigned char c) { - return mcsrc_is_ident_start(c) || (c >= '0' && c <= '9'); -} - -/* -** Emit a Java identifier as: -** 1. full token lowercased (position N) -** 2. camelCase/underscore parts lowercased, colocated at position N -** -** Examples: -** getUserName -> "getusername", "get", "user", "name" -** parseXMLDoc -> "parsexmldoc", "parse", "xml", "doc" -** MAX_VALUE -> "max_value", "max", "value" -** HTML5Parser -> "html5parser", "html5", "parser" -*/ -static int mcsrc_emit_ident(const char *pText, int iStart, int iEnd, - void *pCtx, - int (*xToken)(void*, int, const char*, int, int, int)) -{ - char aBuf[MCSRC_TOK_BUF]; - const char *p = pText + iStart; - int n = iEnd - iStart; - int i, segStart, segEnd, rc; - - /* Emit full identifier lowercased */ - int nFull = n < MCSRC_TOK_BUF ? n : MCSRC_TOK_BUF - 1; - for (i = 0; i < nFull; i++) aBuf[i] = (char)mcsrc_lower((unsigned char)p[i]); - rc = xToken(pCtx, 0, aBuf, nFull, iStart, iEnd); - if (rc != SQLITE_OK) return rc; - - /* Emit split parts as colocated tokens */ - segStart = 0; - while (segStart < nFull) { - /* Skip leading underscores between segments */ - while (segStart < nFull && p[segStart] == '_') segStart++; - if (segStart >= nFull) break; - - segEnd = segStart + 1; - while (segEnd < nFull) { - unsigned char cur = (unsigned char)p[segEnd]; - unsigned char prev = (unsigned char)p[segEnd - 1]; - - if (cur == '_') break; - - /* letter -> digit: split (item3d -> item|3d) */ - if ((cur >= '0' && cur <= '9') && - ((prev >= 'a' && prev <= 'z') || (prev >= 'A' && prev <= 'Z'))) break; - - if (cur >= 'A' && cur <= 'Z') { - /* lower/digit -> upper: new word starts here */ - if ((prev >= 'a' && prev <= 'z') || (prev >= '0' && prev <= '9')) break; - /* upper run -> upper+lower: split before last upper (XMLDoc -> XML|Doc) */ - if ((prev >= 'A' && prev <= 'Z') && segEnd + 1 < nFull) { - unsigned char next = (unsigned char)p[segEnd + 1]; - if (next >= 'a' && next <= 'z') break; - } - } - segEnd++; - } - - int segLen = segEnd - segStart; - if (segLen > 0 && segLen < nFull) { - int tokLen = segLen < MCSRC_TOK_BUF ? segLen : MCSRC_TOK_BUF - 1; - for (i = 0; i < tokLen; i++) { - aBuf[i] = (char)mcsrc_lower((unsigned char)p[segStart + i]); - } - rc = xToken(pCtx, FTS5_TOKEN_COLOCATED, aBuf, tokLen, iStart, iEnd); - if (rc != SQLITE_OK) return rc; - } - - segStart = segEnd; - } - - return SQLITE_OK; -} - -int mcsrc_tokenizer_create(void *z, const char **argv, int argc, Fts5Tokenizer **out) -{ - McsrcTokenizer *p = (McsrcTokenizer*)sqlite3_malloc(sizeof(McsrcTokenizer)); - if (!p) return SQLITE_NOMEM; - p->iVersion = 1; - *out = (Fts5Tokenizer*)p; - return SQLITE_OK; -} - -void mcsrc_tokenizer_delete(Fts5Tokenizer *self) { - sqlite3_free(self); -} - -int mcsrc_tokenizer_tokenize(Fts5Tokenizer *self, - void *pCtx, - int flags, - const char *pText, int nText, - const char *pLocale, int nLocale, - int (*xToken)(void *pCtx, - int tflags, - const char *pToken, - int nToken, - int iStart, - int iEnd)) -{ - int i = 0; - int rc = SQLITE_OK; - - while (i < nText && rc == SQLITE_OK) { - unsigned char c = (unsigned char)pText[i]; - - /* Whitespace */ - if (c <= ' ') { i++; continue; } - - /* Line comment: skip to end of line */ - if (c == '/' && i + 1 < nText && pText[i+1] == '/') { - i += 2; - while (i < nText && pText[i] != '\n') i++; - continue; - } - - /* Block / doc comment: tokenize words inside for Javadoc search */ - if (c == '/' && i + 1 < nText && pText[i+1] == '*') { - i += 2; - while (i + 1 < nText && !(pText[i] == '*' && pText[i+1] == '/')) { - unsigned char cc = (unsigned char)pText[i]; - if (mcsrc_is_ident_start(cc)) { - int wStart = i; - while (i < nText && - !(pText[i] == '*' && i + 1 < nText && pText[i+1] == '/') && - mcsrc_is_ident_cont((unsigned char)pText[i])) i++; - rc = mcsrc_emit_ident(pText, wStart, i, pCtx, xToken); - } else { - i++; - } - } - if (i + 1 < nText) i += 2; - continue; - } - - /* String literal: tokenize identifier-like words inside */ - if (c == '"') { - i++; - while (i < nText && pText[i] != '"') { - if (pText[i] == '\\') { i += 2; continue; } - unsigned char sc = (unsigned char)pText[i]; - if (mcsrc_is_ident_start(sc)) { - int wStart = i; - while (i < nText && pText[i] != '"' && pText[i] != '\\' && - mcsrc_is_ident_cont((unsigned char)pText[i])) i++; - rc = mcsrc_emit_ident(pText, wStart, i, pCtx, xToken); - } else { - i++; - } - } - if (i < nText) i++; - continue; - } - - /* Char literal: skip */ - if (c == '\'') { - i++; - while (i < nText && pText[i] != '\'') { - if (pText[i] == '\\') { i += 2; continue; } - i++; - } - if (i < nText) i++; - continue; - } - - /* Identifier (including keywords and annotations after @) */ - if (mcsrc_is_ident_start(c)) { - int idStart = i; - while (i < nText && mcsrc_is_ident_cont((unsigned char)pText[i])) i++; - rc = mcsrc_emit_ident(pText, idStart, i, pCtx, xToken); - continue; - } - - /* Number literal (decimal, hex, float) */ - if (c >= '0' && c <= '9') { - char aBuf[MCSRC_TOK_BUF]; - int numStart = i; - unsigned char nc; - - if (c == '0' && i + 1 < nText && (pText[i+1] == 'x' || pText[i+1] == 'X')) { - /* Hexadecimal */ - i += 2; - while (i < nText) { - nc = (unsigned char)pText[i]; - if ((nc >= '0' && nc <= '9') || - (nc >= 'a' && nc <= 'f') || - (nc >= 'A' && nc <= 'F') || nc == '_') i++; - else break; - } - } else { - /* Decimal / float */ - while (i < nText) { - nc = (unsigned char)pText[i]; - if ((nc >= '0' && nc <= '9') || nc == '_' || nc == '.') { - i++; - } else if (nc == 'e' || nc == 'E') { - i++; - if (i < nText && (pText[i] == '+' || pText[i] == '-')) i++; - } else break; - } - } - /* Numeric suffix: L l F f D d */ - while (i < nText) { - nc = (unsigned char)pText[i]; - if (nc=='L'||nc=='l'||nc=='F'||nc=='f'||nc=='D'||nc=='d') i++; - else break; - } - - int numLen = i - numStart; - int tokLen = numLen < MCSRC_TOK_BUF ? numLen : MCSRC_TOK_BUF - 1; - int j; - for (j = 0; j < tokLen; j++) { - aBuf[j] = (char)mcsrc_lower((unsigned char)pText[numStart + j]); - } - rc = xToken(pCtx, 0, aBuf, tokLen, numStart, i); - continue; - } - - /* Skip @ (annotation prefix), operators, punctuation */ - i++; - } - - return (rc == SQLITE_DONE) ? SQLITE_OK : rc; -} - -struct fts5_tokenizer_v2 mcsrc_tokenizer = { - .iVersion = 2, - .xCreate = mcsrc_tokenizer_create, - .xDelete = mcsrc_tokenizer_delete, - .xTokenize = mcsrc_tokenizer_tokenize -}; - -// -------------------------------------------------- -// OFFSET FUNCTION - -typedef struct { - int iStart; - int iEnd; -} TokenPos; - -typedef struct { - TokenPos *aPos; - int nPos; - int nAlloc; - int bTokenized; - const char *pText; /* NOT owned; valid for duration of aux function call */ - int nText; -} ColTokens; - -static int mcsrc_offsets_token_cb(void *pCtx, int tflags, const char *pToken, - int nToken, int iStart, int iEnd) { - ColTokens *p = (ColTokens*)pCtx; - if (tflags & FTS5_TOKEN_COLOCATED) return SQLITE_OK; - if (p->nPos >= p->nAlloc) { - int nNew = p->nAlloc ? p->nAlloc * 2 : 64; - TokenPos *aNew = (TokenPos*)sqlite3_realloc(p->aPos, nNew * sizeof(TokenPos)); - if (!aNew) return SQLITE_NOMEM; - p->aPos = aNew; - p->nAlloc = nNew; - } - p->aPos[p->nPos].iStart = iStart; - p->aPos[p->nPos].iEnd = iEnd; - p->nPos++; - return SQLITE_OK; -} - -/* Returns records of "col phrase byteoffset bytesize snippet_length\nsnippet" -** for each phrase match in the current row. -** Parameters (all optional): -** pre - string inserted before the matched token in the snippet (default "") -** post - string inserted after the matched token in the snippet (default "") -** ellipsis - prefix/suffix when snippet does not reach the text boundary (default "...") -** maxToken - total tokens in context window around the match (default 11) -*/ -static void mcsrc_offsets(const Fts5ExtensionApi *pApi, - Fts5Context *pFts, - sqlite3_context *pCtx, - int nVal, - sqlite3_value **apVal) -{ - const char *zPre = (nVal > 0) ? (const char*)sqlite3_value_text(apVal[0]) : ""; - const char *zPost= (nVal > 1) ? (const char*)sqlite3_value_text(apVal[1]) : ""; - const char *zEll = (nVal > 2) ? (const char*)sqlite3_value_text(apVal[2]) : "..."; - int maxToken = (nVal > 3) ? sqlite3_value_int(apVal[3]) : 11; - if (!zPre) zPre = ""; - if (!zPost) zPost = ""; - if (!zEll) zEll = "..."; - if (maxToken <= 0) maxToken = 11; - int ctxPre = (maxToken - 1) / 2; - int ctxPost = maxToken - 1 - ctxPre; - - int rc = SQLITE_OK; - int nInst = 0; - int nCol = 0; - int i; - ColTokens *aCols = 0; - sqlite3_str *pStr = 0; - - pStr = sqlite3_str_new(0); - if (!pStr) { sqlite3_result_error_nomem(pCtx); return; } - - rc = pApi->xInstCount(pFts, &nInst); - if (rc != SQLITE_OK) goto done; - - if (nInst == 0) goto done; - - nCol = pApi->xColumnCount(pFts); - aCols = (ColTokens*)sqlite3_malloc(nCol * sizeof(ColTokens)); - if (!aCols) { rc = SQLITE_NOMEM; goto done; } - memset(aCols, 0, nCol * sizeof(ColTokens)); - - for (i = 0; i < nInst; i++) { - int iPhrase, iCol, iOff; - int byteStart, byteSize; - int rs, re; - - rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff); - if (rc != SQLITE_OK) goto done; - - if (!aCols[iCol].bTokenized) { - const char *pText = 0; - int nText = 0; - rc = pApi->xColumnText(pFts, iCol, &pText, &nText); - if (rc != SQLITE_OK) goto done; - if (pText && nText > 0) { - aCols[iCol].pText = pText; - aCols[iCol].nText = nText; - rc = pApi->xTokenize(pFts, pText, nText, &aCols[iCol], - mcsrc_offsets_token_cb); - if (rc != SQLITE_OK && rc != SQLITE_DONE) goto done; - rc = SQLITE_OK; - } - aCols[iCol].bTokenized = 1; - } - - if (iOff < 0 || iOff >= aCols[iCol].nPos) continue; - - byteStart = aCols[iCol].aPos[iOff].iStart; - byteSize = aCols[iCol].aPos[iOff].iEnd - aCols[iCol].aPos[iOff].iStart; - - /* Compute snippet window [rs, re) in token space around match at iOff */ - rs = iOff - ctxPre; if (rs < 0) rs = 0; - re = iOff + ctxPost + 1; if (re > aCols[iCol].nPos) re = aCols[iCol].nPos; - - /* Build snippet into a temp str, then emit header + snippet */ - { - sqlite3_str *pSnip = sqlite3_str_new(0); - if (!pSnip) { rc = SQLITE_NOMEM; goto done; } - if (aCols[iCol].pText && re > rs) { - const char *pTxt = aCols[iCol].pText; - TokenPos *aPos = aCols[iCol].aPos; - /* leading ellipsis */ - if (rs > 0) sqlite3_str_appendall(pSnip, zEll); - /* context before match */ - if (rs < iOff) - sqlite3_str_append(pSnip, pTxt + aPos[rs].iStart, - aPos[iOff].iStart - aPos[rs].iStart); - /* matched token wrapped with pre/post */ - sqlite3_str_appendall(pSnip, zPre); - sqlite3_str_append(pSnip, pTxt + aPos[iOff].iStart, - aPos[iOff].iEnd - aPos[iOff].iStart); - sqlite3_str_appendall(pSnip, zPost); - /* context after match */ - if (iOff + 1 < re) - sqlite3_str_append(pSnip, pTxt + aPos[iOff].iEnd, - aPos[re - 1].iEnd - aPos[iOff].iEnd); - /* trailing ellipsis */ - if (re < aCols[iCol].nPos) sqlite3_str_appendall(pSnip, zEll); - } - { - int nSnip = sqlite3_str_length(pSnip); - char *zSnip = sqlite3_str_finish(pSnip); - if (!zSnip) { rc = SQLITE_NOMEM; goto done; } - sqlite3_str_appendf(pStr, "%d %d %d %d %d\n", - iCol, iPhrase, byteStart, byteSize, nSnip); - sqlite3_str_append(pStr, zSnip, nSnip); - sqlite3_free(zSnip); - } - } - } - -done: - if (aCols) { - for (i = 0; i < nCol; i++) sqlite3_free(aCols[i].aPos); - sqlite3_free(aCols); - } - if (rc != SQLITE_OK) { - sqlite3_free(sqlite3_str_finish(pStr)); - sqlite3_result_error_code(pCtx, rc); - } else { - char *zStr = sqlite3_str_finish(pStr); - if (!zStr) { - sqlite3_result_error_nomem(pCtx); - } else { - sqlite3_result_text(pCtx, zStr, -1, sqlite3_free); - } - } -} - -// -------------------------------------------------- -// ENTRYPOINT - -int mcsrc_entry_point(sqlite3 *db, char **pzErrMsg, const struct sqlite3_api_routines *pThunk) -{ - int res; - - fts5_api *fts5 = 0; - { - sqlite3_stmt *stmt = 0; - res = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &stmt, 0); - if (res != SQLITE_OK) return res; - - sqlite3_bind_pointer(stmt, 1, (void*) &fts5, "fts5_api_ptr", 0); - sqlite3_step(stmt); - sqlite3_finalize(stmt); - } - - if (!fts5) return SQLITE_ERROR; - - res = fts5->xCreateTokenizer_v2(fts5, "mcsrc_tokenizer", 0, &mcsrc_tokenizer, 0); - if (res != SQLITE_OK) return res; - - res = fts5->xCreateFunction(fts5, "mcsrc_offsets", 0, &mcsrc_offsets, 0); - if (res != SQLITE_OK) return res; - - return SQLITE_OK; -} - -int sqlite3_wasm_extra_init(const char *z) -{ - sqlite3_auto_extension((void*) &mcsrc_entry_point); - return 0; -} diff --git a/src/ui/FullTextSearchModal.tsx b/src/ui/FullTextSearchModal.tsx index a34378a..09afe1a 100644 --- a/src/ui/FullTextSearchModal.tsx +++ b/src/ui/FullTextSearchModal.tsx @@ -13,10 +13,39 @@ type SearchState = | { state: "ok"; results: FullTextSearchResult[]; } | { state: "error"; error: string; }; +function parseRegexQuery(input: string): { pattern: string; flags: string } | null { + const m = input.match(/^\/(.+?)(?:\/([gimsuy]*))?$/); + if (!m) return null; + try { + new RegExp(m[1], m[2] ?? ""); + return { pattern: m[1], flags: m[2] ?? "" }; + } catch { + return null; + } +} + const query = new BehaviorSubject(""); const search$ = combineLatest([fullTextSearch, query]).pipe( distinctUntilChanged(), switchMap(([fts, query]) => { + if (query.startsWith("/")) { + if (query.length < 2) return of(SearchState({ + state: "error", + error: "Enter a regex pattern: /pattern/flags" + })); + + const regex = parseRegexQuery(query); + if (!regex) return of(SearchState({ + state: "error", + error: "Invalid regex" + })); + + return from(fts.findByRegex(regex.pattern, regex.flags, { maxTokens: 11 })).pipe( + map(results => SearchState({ state: "ok", results })), + startWith(SearchState({ state: "loading" })), + catchError(error => of(SearchState({ state: "error", error: String(error) })))); + } + if (query.length < 3) return of(SearchState({ state: "error", error: "Query must be at least 3 characters" @@ -122,7 +151,7 @@ const FullTextSearchModal = () => { query.next(q.trim())} />
{ + mergeMap(async ([jar, _]) => { if (currentInstance) { await currentInstance.destroy(); } @@ -19,6 +21,10 @@ export const fullTextSearch = minecraftJar.pipe( shareReplay({ bufferSize: 1, refCount: false }) ); +export function invalidateFullTextSearch() { + invalidator.next(invalidator.value + 1); +} + export class FullTextSearch { readonly #jar: MinecraftJar; constructor(jar: MinecraftJar) { @@ -53,4 +59,9 @@ export class FullTextSearch { const worker = await this.#worker(); return await worker.find(query, options); } + + async findByRegex(pattern: string, flags: string, options?: FullTextSearchOptions): Promise { + const worker = await this.#worker(); + return await worker.findByRegex(pattern, flags, options); + } } diff --git a/src/workers/full-text-search/worker.ts b/src/workers/full-text-search/worker.ts index 5b8b110..e9fbaaa 100644 --- a/src/workers/full-text-search/worker.ts +++ b/src/workers/full-text-search/worker.ts @@ -1,7 +1,5 @@ import * as Comlink from "comlink"; -import sqlite3InitModule, { type Database } from "mcsrc-sqlite"; -/** https://www.sqlite.org/fts5.html#the_snippet_function */ export interface FullTextSearchOptions { pre?: string; post?: string; @@ -21,98 +19,67 @@ export interface FullTextSearchResult { } export class FullTextSearchWorker { - #db?: Database; + #sources = new Map(); #enc = new TextEncoder(); - #dec = new TextDecoder(); - - async init(name: string): Promise { - try { - console.log("Loading SQLite3 Module..."); - const sqlite3 = await sqlite3InitModule(); - console.log("Loading SQLite3 Module... Done."); - - this.#db = new sqlite3.oo1.DB(`/fts.${name}.sqlite3`); - this.#db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS sources USING fts5(key, source, tokenize='porter mcsrc_tokenizer');"); - return undefined; - } catch (err: any) { - console.error(err); - return String(err); - } - } + + init(_name: string): void {} destroy() { - this.#db?.close(); close(); } index(key: string, source: string) { - if (!this.#db) { - console.error("DB not initialized"); - return; - } - - source = source - .replace(/^\s*package\s+[^\r\n;]+;\s*\r?\n?/m, "") - .replace(/^\s*import\s+[^\r\n;]+;\s*\r?\n?/gm, "") - .trim(); - - this.#db.exec({ - sql: "INSERT INTO sources(key, source) VALUES(?, ?)", - bind: [key, source] - }); + this.#sources.set(key, source); } find(query: string, options?: FullTextSearchOptions): FullTextSearchResult[] { - if (!this.#db) { - console.error("DB not initialized"); - return []; - } + return this.findByRegex(query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'i', options); + } - console.log("Starting full text search..."); + findByRegex(pattern: string, flags: string, options?: FullTextSearchOptions): FullTextSearchResult[] { + const re = new RegExp(pattern, flags.includes('g') ? flags : flags + 'g'); + const pre = options?.pre ?? "["; + const post = options?.post ?? "]"; + const ellipsis = options?.ellipsis ?? "…"; + const contextChars = 80; + + console.log("Starting search..."); const startTime = performance.now(); - const res = this.#db.selectObjects(` - SELECT - key, - mcsrc_offsets(sources, ?, ?, ?, ?) AS offsets - FROM sources - WHERE source MATCH ? - ORDER BY rank; - `, [options?.pre ?? "[", options?.post ?? "]", options?.ellipsis ?? "…", options?.maxTokens ?? 10, query]); - - const out = res.map((r: any) => ({ - key: r["key"] as string, - regions: this.#parseOffsets(r["offsets"] as string), - })); - const elapsedMs = performance.now() - startTime; - console.log(`Finished in ${elapsedMs.toFixed(3)} ms`); - return out; - } + const results: FullTextSearchResult[] = []; + + for (const [key, source] of this.#sources) { + re.lastIndex = 0; + const regions: FullTextSearchRegion[] = []; + + let m: RegExpExecArray | null; + while ((m = re.exec(source)) !== null) { + const charStart = m.index; + const charEnd = charStart + m[0].length; + + const byteStart = this.#enc.encode(source.slice(0, charStart)).length; + const byteEnd = byteStart + this.#enc.encode(m[0]).length; - #parseOffsets(s: string): FullTextSearchRegion[] { - if (!s) return []; - - const bytes = this.#enc.encode(s); - const regions: FullTextSearchRegion[] = []; - let pos = 0; - - while (pos < bytes.length) { - const newline = bytes.indexOf(10, pos); // '\n' - const header = this.#dec.decode(bytes.slice(pos, newline)); - const [_col, _phrase, byteOffset, byteSize, snippetByteLen] = header.trim().split(/\s+/).map(Number); - pos = newline + 1; - - const snippetBytes = bytes.slice(pos, pos + snippetByteLen); - const snippet = this.#dec.decode(snippetBytes); - regions.push({ - start: byteOffset, - end: byteOffset + byteSize, - snippet - }); - pos += snippetByteLen; + const snipCharStart = Math.max(0, charStart - contextChars); + const snipCharEnd = Math.min(source.length, charEnd + contextChars); + const snippet = + (snipCharStart > 0 ? ellipsis : '') + + source.slice(snipCharStart, charStart) + + pre + m[0] + post + + source.slice(charEnd, snipCharEnd) + + (snipCharEnd < source.length ? ellipsis : ''); + + regions.push({ start: byteStart, end: byteEnd, snippet }); + } + + if (regions.length > 0) { + results.push({ key, regions }); + } } - return regions; + const elapsedMs = performance.now() - startTime; + console.log(`Finished in ${elapsedMs.toFixed(3)} ms`); + return results; } } Comlink.expose(new FullTextSearchWorker()); From 6dab30776aca37c9a0de045ada873926b1bdeacd Mon Sep 17 00:00:00 2001 From: Dimas Firmansyah Date: Mon, 4 May 2026 01:18:35 +0700 Subject: [PATCH 10/10] manual fix package-lock --- package-lock.json | 7 ------- 1 file changed, 7 deletions(-) diff --git a/package-lock.json b/package-lock.json index 1632351..ce08ac8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5712,13 +5712,6 @@ "optional": true } } - }, - "packages/mcsrc-sqlite": { - "version": "0.0.0", - "extraneous": true, - "devDependencies": { - "@sqlite.org/sqlite-wasm": "^3.53.0-build1" - } } } }