From 3dd6bc0251bdc2b6d5c3e78937cb2943ce518fc4 Mon Sep 17 00:00:00 2001 From: Keegan Thompson Date: Thu, 9 Apr 2026 16:03:09 -0500 Subject: [PATCH] fix: daemon bugs, MCP onboarding, and provider defaults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daemon fixes: - Fix auto-compile: was calling compileVault(root) missing provider/config args - Fix inbox offline drop: files added while daemon off are now enqueued on startup - Fix HTTP POST /ingest: validate required content field (was writing "undefined") MCP onboarding: - Default provider: anthropic instead of ollama (most users, actionable error) - Rewrite CLAUDE.md template as LLM onboarding guide with tool availability - Enhance kib_status: returns availableNow/requiresProvider/setupInstructions - Mark all MCP tool descriptions with API key requirements - NoProviderError now provider-aware ("No Anthropic API key found...") - Init output warns when API key not set with setup instructions Docs: - Add Watch Daemon section to root and CLI READMEs - Add Daemon module to core README table Tests: - Add 19 new tests (449 → 468): queue edge cases, scheduler failure recovery, HTTP validation, inbox seeding, slug generation Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 55 +++++++ packages/cli/README.md | 22 ++- packages/cli/src/commands/init.ts | 18 ++- packages/cli/src/commands/watch.test.ts | 176 +++++++++++++++++++++ packages/cli/src/commands/watch.ts | 23 ++- packages/cli/src/mcp/server.ts | 53 +++++-- packages/core/README.md | 1 + packages/core/src/daemon/queue.test.ts | 64 ++++++++ packages/core/src/daemon/scheduler.test.ts | 39 +++++ packages/core/src/errors.ts | 12 +- packages/core/src/providers/router.ts | 12 +- packages/core/src/vault.ts | 88 ++++++----- 12 files changed, 493 insertions(+), 70 deletions(-) create mode 100644 packages/cli/src/commands/watch.test.ts diff --git a/README.md b/README.md index a382fce..7a8daf0 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,61 @@ kib skill run flashcards kib skill run connections ``` +### Watch Daemon (Passive Learning) + +Run a background daemon that monitors your inbox, watched folders, and an HTTP endpoint — automatically ingesting new content and compiling it into your wiki. + +```bash +# Start in foreground (logs to terminal) +kib watch + +# Start as background daemon +kib watch --daemon + +# Check daemon status +kib watch --status + +# Stop the daemon +kib watch --stop + +# Install as system service (auto-start on login) +kib watch --install # macOS: launchd, Linux: systemd +kib watch --uninstall +``` + +**Three ingestion channels run simultaneously:** + +1. **Inbox folder** — drop any file into `inbox/` and it's auto-ingested. Files already in the inbox when the daemon starts are picked up too. +2. **HTTP endpoint** — `POST http://localhost:4747/ingest` accepts JSON `{ content, title?, url? }`. Built for browser extensions. +3. **Folder watchers** — monitor external directories with glob filtering (e.g., watch `~/Downloads` for `*.pdf`). + +**Auto-compile** triggers automatically after N new sources (default: 5) or after idle timeout (default: 30 min). + +Configure in `.kb/config.toml`: + +```toml +[watch] +enabled = true +inbox_path = "inbox" +auto_compile = true +poll_interval_ms = 2_000 +auto_compile_threshold = 5 # compile after 5 new sources +auto_compile_delay_ms = 1_800_000 # or after 30 min idle + +# Watch external folders +[[watch.folders]] +path = "~/Downloads" +glob = "*.pdf" +recursive = false + +[[watch.folders]] +path = "~/Documents/notes" +glob = "*.{md,txt}" +recursive = true +``` + +Failed ingestions retry up to 3 times before moving to the failed queue. Logs are written to `.kb/logs/watch.log` with automatic rotation at 10 MB. + ### Export ```bash diff --git a/packages/cli/README.md b/packages/cli/README.md index 433c2f9..b089ac1 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -57,7 +57,7 @@ CORE INTEGRATION serve Start MCP server for AI tool integration mcp Configure MCP in AI clients (auto-runs on init) - watch Watch inbox/ and auto-ingest new files + watch Passive learning daemon — auto-ingest and compile MANAGEMENT config [key] [val] Get or set configuration @@ -77,6 +77,26 @@ kib export --format html HTML export includes image assets with proper relative paths and generates a browsable image gallery page. +### Watch Daemon + +Run a background daemon that monitors your inbox, external folders, and an HTTP endpoint for new content — automatically ingesting and compiling it. + +```bash +kib watch # foreground (logs to terminal) +kib watch --daemon # background daemon +kib watch --status # check if running +kib watch --stop # stop daemon +kib watch --install # install as system service (launchd/systemd) +kib watch --uninstall # remove system service +``` + +**Ingestion channels:** +- **Inbox** — drop files into `inbox/` (picks up files added while daemon was off) +- **HTTP** — `POST localhost:4747/ingest` with `{ content, title?, url? }` +- **Folder watchers** — monitor external directories with glob patterns + +**Auto-compile** triggers after a configurable number of new sources or idle timeout. Configure via `[watch]` section in `.kb/config.toml`. + ## LLM Providers On first use, kib walks you through provider setup interactively. Or set via environment: diff --git a/packages/cli/src/commands/init.ts b/packages/cli/src/commands/init.ts index d614bf9..8b5af56 100644 --- a/packages/cli/src/commands/init.ts +++ b/packages/cli/src/commands/init.ts @@ -38,16 +38,28 @@ export async function init(dir: string | undefined, opts: InitOpts) { log.success("Created inbox/"); log.success("Created CLAUDE.md"); + const hasKey = + (provider === "anthropic" && !!process.env.ANTHROPIC_API_KEY) || + (provider === "openai" && !!process.env.OPENAI_API_KEY); + const providerLabel = provider === "anthropic" - ? `anthropic (ANTHROPIC_API_KEY)` + ? `anthropic (ANTHROPIC_API_KEY${hasKey ? "" : " — not set yet"})` : provider === "openai" - ? `openai (OPENAI_API_KEY)` + ? `openai (OPENAI_API_KEY${hasKey ? "" : " — not set yet"})` : `ollama (localhost:11434)`; - log.success(`Detected provider: ${providerLabel}`); + log.success(`Provider: ${providerLabel}`); log.success(`Model: ${model}`); + if (!hasKey && provider !== "ollama") { + const envKey = provider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"; + log.blank(); + log.warn( + `Set ${envKey} to enable compile and query. Ingest, search, and read work without it.`, + ); + } + // Auto-configure MCP in all detected AI clients log.blank(); log.header("configuring MCP clients"); diff --git a/packages/cli/src/commands/watch.test.ts b/packages/cli/src/commands/watch.test.ts new file mode 100644 index 0000000..dd3c31b --- /dev/null +++ b/packages/cli/src/commands/watch.test.ts @@ -0,0 +1,176 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { enqueue, ensureQueueDirs, initVault, listPending, queueDepth } from "@kibhq/core"; + +let tempDir: string; + +afterEach(async () => { + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + } +}); + +async function makeTempVault() { + tempDir = await mkdtemp(join(tmpdir(), "kib-watch-test-")); + await initVault(tempDir, { name: "watch-test" }); + return tempDir; +} + +describe("watch: HTTP server /ingest", () => { + test("rejects POST with missing content field", async () => { + const root = await makeTempVault(); + await ensureQueueDirs(root); + + // Simulate the HTTP handler's validation logic + const body: { content?: string; title?: string } = { title: "No Content" }; + const hasContent = body.content && typeof body.content === "string"; + expect(hasContent).toBeFalsy(); + }); + + test("rejects POST with empty string content", async () => { + const body = { content: "", title: "Empty" }; + const hasContent = body.content && typeof body.content === "string"; + expect(hasContent).toBeFalsy(); + }); + + test("accepts POST with valid content", async () => { + const body = { content: "Real article content", title: "Good Article" }; + const hasContent = body.content && typeof body.content === "string"; + expect(hasContent).toBeTruthy(); + }); + + test("builds correct markdown with title and url", () => { + const body = { + content: "Article body text", + title: "My Article", + url: "https://example.com/article", + }; + const fullContent = body.title + ? `# ${body.title}\n\n${body.url ? `Source: ${body.url}\n\n` : ""}${body.content}` + : body.content; + + expect(fullContent).toBe( + "# My Article\n\nSource: https://example.com/article\n\nArticle body text", + ); + }); + + test("builds correct markdown without url", () => { + const body = { content: "Body text", title: "Title Only" }; + const fullContent = body.title + ? `# ${body.title}\n\n${body.url ? `Source: ${body.url}\n\n` : ""}${body.content}` + : body.content; + + expect(fullContent).toBe("# Title Only\n\nBody text"); + }); + + test("builds correct markdown without title", () => { + const body = { content: "Just content, no title" }; + const fullContent = (body as { title?: string }).title + ? `# ${(body as { title?: string }).title}\n\n${body.content}` + : body.content; + + expect(fullContent).toBe("Just content, no title"); + }); + + test("slug generation handles special characters", () => { + const title = "What's the Deal with AI & ML?!"; + const slug = title + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .slice(0, 60); + expect(slug).toBe("what-s-the-deal-with-ai-ml-"); + }); + + test("slug truncates to 60 characters", () => { + const title = "A".repeat(100); + const slug = title + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .slice(0, 60); + expect(slug.length).toBe(60); + }); +}); + +describe("watch: inbox seeding on startup", () => { + test("enqueues existing inbox files on startup", async () => { + const root = await makeTempVault(); + await ensureQueueDirs(root); + const inboxPath = join(root, "inbox"); + + // Simulate files dropped while daemon was off + await writeFile(join(inboxPath, "offline-1.md"), "# Offline Article 1"); + await writeFile(join(inboxPath, "offline-2.md"), "# Offline Article 2"); + + // Simulate the daemon startup logic: enqueue existing inbox files + const { readdir } = await import("node:fs/promises"); + const existing = await readdir(inboxPath); + for (const f of existing) { + if (f.startsWith(".")) continue; + await enqueue(root, join(inboxPath, f), "inbox"); + } + + // Both files should be queued + const depth = await queueDepth(root); + expect(depth).toBe(2); + + const pending = await listPending(root, 10); + const uris = pending.map((p) => p.uri); + expect(uris).toContain(join(inboxPath, "offline-1.md")); + expect(uris).toContain(join(inboxPath, "offline-2.md")); + }); + + test("skips dotfiles during inbox seeding", async () => { + const root = await makeTempVault(); + await ensureQueueDirs(root); + const inboxPath = join(root, "inbox"); + + await writeFile(join(inboxPath, ".DS_Store"), ""); + await writeFile(join(inboxPath, ".hidden"), ""); + await writeFile(join(inboxPath, "visible.md"), "# Visible"); + + const { readdir } = await import("node:fs/promises"); + const existing = await readdir(inboxPath); + for (const f of existing) { + if (f.startsWith(".")) continue; + await enqueue(root, join(inboxPath, f), "inbox"); + } + + expect(await queueDepth(root)).toBe(1); + }); + + test("handles empty inbox gracefully", async () => { + const root = await makeTempVault(); + await ensureQueueDirs(root); + const inboxPath = join(root, "inbox"); + + const { readdir } = await import("node:fs/promises"); + const existing = await readdir(inboxPath); + for (const f of existing) { + if (f.startsWith(".")) continue; + await enqueue(root, join(inboxPath, f), "inbox"); + } + + expect(await queueDepth(root)).toBe(0); + }); + + test("handles missing inbox directory gracefully", async () => { + const root = await makeTempVault(); + await ensureQueueDirs(root); + const inboxPath = join(root, "inbox-nonexistent"); + + try { + const { readdir } = await import("node:fs/promises"); + const existing = await readdir(inboxPath); + for (const f of existing) { + if (f.startsWith(".")) continue; + await enqueue(root, join(inboxPath, f), "inbox"); + } + } catch { + // Should not throw — this is the expected path + } + + expect(await queueDepth(root)).toBe(0); + }); +}); diff --git a/packages/cli/src/commands/watch.ts b/packages/cli/src/commands/watch.ts index 3e90355..fd7838f 100644 --- a/packages/cli/src/commands/watch.ts +++ b/packages/cli/src/commands/watch.ts @@ -151,6 +151,7 @@ async function startWatch(root: string, config: VaultConfig): Promise<() => void CompileScheduler, startFolderWatchers, compileVault, + createProvider, isLocked, } = await import("@kibhq/core"); @@ -185,7 +186,9 @@ async function startWatch(root: string, config: VaultConfig): Promise<() => void } emit("info", "Auto-compiling..."); try { - const result = await compileVault(root); + const compileModel = config.compile.model ?? config.provider.model; + const provider = await createProvider(config.provider.default, compileModel); + const result = await compileVault(root, provider, config); emit( "info", `Compiled ${result.sourcesCompiled} sources → ${result.articlesCreated} created, ${result.articlesUpdated} updated.`, @@ -242,10 +245,15 @@ async function startWatch(root: string, config: VaultConfig): Promise<() => void const processed = new Set(); - // Seed with existing files + // Enqueue existing inbox files (may have been added while daemon was off). + // Ingest dedup handles already-ingested content, so this is safe. try { const existing = await readdir(inboxPath); - for (const f of existing) processed.add(f); + for (const f of existing) { + if (f.startsWith(".")) continue; + processed.add(f); + await enqueue(root, join(inboxPath, f), "inbox"); + } } catch { // inbox might not exist yet } @@ -339,7 +347,14 @@ function startHttpServer( if (req.method === "POST" && url.pathname === "/ingest") { try { - const body = (await req.json()) as { content: string; url?: string; title?: string }; + const body = (await req.json()) as { content?: string; url?: string; title?: string }; + + if (!body.content || typeof body.content !== "string") { + return new Response(JSON.stringify({ error: "Missing required field: content" }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } const slug = (body.title ?? "untitled") .toLowerCase() diff --git a/packages/cli/src/mcp/server.ts b/packages/cli/src/mcp/server.ts index dabd0fe..ea3db96 100644 --- a/packages/cli/src/mcp/server.ts +++ b/packages/cli/src/mcp/server.ts @@ -101,7 +101,7 @@ export function createMcpServer(root: string) { server.tool( "kib_status", - "Get vault status: source count, article count, provider config, and whether the LLM provider is ready. If providerConfigured is false, tell the user to run `kib config` or set an API key environment variable before compile/query will work.", + "Call this first. Returns vault state, provider readiness, and setup instructions. Use the output to greet the user and guide them through any needed setup.", {}, async () => { try { @@ -111,7 +111,14 @@ export function createMcpServer(root: string) { .getProvider() .then(() => true) .catch(() => false); - return json({ + + const envKeys: Record = { + anthropic: "ANTHROPIC_API_KEY", + openai: "OPENAI_API_KEY", + ollama: "(Ollama must be running on localhost:11434)", + }; + + const result: Record = { name: manifest.vault.name, provider: config.provider.default, model: config.provider.model, @@ -121,7 +128,27 @@ export function createMcpServer(root: string) { totalWords: manifest.stats.totalWords, lastCompiled: manifest.vault.lastCompiled, lastLint: manifest.stats.lastLintAt, - }); + availableNow: [ + "kib_search", + "kib_list", + "kib_read", + "kib_ingest", + "kib_export", + "kib_lint", + "kib_config", + ], + requiresProvider: ["kib_compile", "kib_query", "kib_skill"], + }; + + if (!providerConfigured) { + const envKey = envKeys[config.provider.default] ?? "an API key"; + result.setupInstructions = + config.provider.default === "ollama" + ? "Start Ollama with: ollama serve" + : `Set ${envKey} in the environment or add it to ~/.config/kib/credentials. kib_ingest still works — sources are saved but not compiled until the key is set.`; + } + + return json(result); } catch (e) { return err((e as Error).message); } @@ -132,7 +159,7 @@ export function createMcpServer(root: string) { server.tool( "kib_list", - "List all wiki articles or raw sources in the knowledge base", + "List all wiki articles or raw sources in the knowledge base. No API key needed.", { scope: z.enum(["wiki", "raw"]).default("wiki").describe("List wiki articles or raw sources"), }, @@ -153,7 +180,7 @@ export function createMcpServer(root: string) { server.tool( "kib_read", - "Read a specific wiki article or raw source from the knowledge base", + "Read a specific wiki article or raw source from the knowledge base. No API key needed.", { path: z.string().describe("Relative path, e.g. 'concepts/attention.md'"), scope: z.enum(["wiki", "raw"]).default("wiki").describe("Read from wiki/ or raw/"), @@ -172,7 +199,7 @@ export function createMcpServer(root: string) { server.tool( "kib_search", - "Search the knowledge base using full-text BM25 search. Supports fuzzy matching, phrase search (wrap in quotes), tag filtering, and date filtering.", + "Search the knowledge base using full-text BM25 search. No API key needed. Supports fuzzy matching, phrase search (wrap in quotes), tag filtering, and date filtering.", { query: z .string() @@ -220,7 +247,7 @@ export function createMcpServer(root: string) { server.tool( "kib_query", - "Ask a question against the knowledge base using RAG (retrieval-augmented generation)", + "Ask a question against the knowledge base using RAG (retrieval-augmented generation). Requires a configured LLM provider.", { question: z.string().describe("Question to ask"), max_articles: z @@ -254,7 +281,7 @@ export function createMcpServer(root: string) { server.tool( "kib_ingest", - "Ingest a source (URL or file path) into the knowledge base. Auto-compiles after ingest if an LLM provider is configured. If compileError is returned, tell the user what's needed.", + "Ingest a source (URL or file path) into the knowledge base. No API key needed for ingestion. Auto-compiles after ingest if an LLM provider is configured; otherwise sources are saved but not compiled.", { source: z.string().describe("URL or file path to ingest"), category: z @@ -337,7 +364,7 @@ export function createMcpServer(root: string) { server.tool( "kib_compile", - "Compile pending raw sources into wiki articles using the configured LLM", + "Compile pending raw sources into wiki articles. Requires a configured LLM provider.", { force: z.boolean().default(false).describe("Recompile all sources"), source: z.string().optional().describe("Compile only a specific source"), @@ -377,7 +404,7 @@ export function createMcpServer(root: string) { server.tool( "kib_lint", - "Run health checks on the wiki and report issues. Use fix=true to auto-fix fixable issues (recompile stale sources, create missing articles).", + "Run health checks on the wiki and report issues. No API key needed for checks. Use fix=true to auto-fix fixable issues (requires LLM provider for stale source recompilation).", { rule: z .string() @@ -439,7 +466,7 @@ export function createMcpServer(root: string) { server.tool( "kib_config", - "Get or set vault configuration. Call with no arguments to list all config. Pass key to read a value, pass key+value to set it.", + "Get or set vault configuration. No API key needed. Call with no arguments to list all config. Pass key to read a value, pass key+value to set it. Useful keys: provider.default, provider.model.", { key: z .string() @@ -481,7 +508,7 @@ export function createMcpServer(root: string) { server.tool( "kib_skill", - "List or run vault skills. Skills are reusable LLM-powered operations (summarize, flashcards, connections, etc).", + "List or run vault skills. Most skills require a configured LLM provider. Skills are reusable operations (summarize, flashcards, connections, etc).", { action: z .enum(["list", "run"]) @@ -524,7 +551,7 @@ export function createMcpServer(root: string) { server.tool( "kib_export", - "Export the wiki as a clean markdown bundle or static HTML site. Returns the output directory path and file count.", + "Export the wiki as a clean markdown bundle or static HTML site. No API key needed. Returns the output directory path and file count.", { format: z .enum(["markdown", "html"]) diff --git a/packages/core/README.md b/packages/core/README.md index 653097b..6cc4e97 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -23,6 +23,7 @@ npm i @kibhq/core | **Query** | RAG engine — retrieves relevant articles and generates cited answers | | **Lint** | 5 health-check rules (orphan articles, broken links, stale sources, etc.) | | **Skills** | Skill loader and runner for extensible vault operations | +| **Daemon** | Watch daemon primitives — FIFO queue, folder watchers, auto-compile scheduler, PID management, log rotation, system service installer (launchd/systemd) | | **Providers** | LLM adapters for Anthropic Claude, OpenAI, and Ollama | ## Usage diff --git a/packages/core/src/daemon/queue.test.ts b/packages/core/src/daemon/queue.test.ts index 8b0252d..0228c66 100644 --- a/packages/core/src/daemon/queue.test.ts +++ b/packages/core/src/daemon/queue.test.ts @@ -95,6 +95,21 @@ describe("listPending", () => { const items = await listPending(root, 3); expect(items.length).toBe(3); }); + + test("does not include items in the failed/ subdirectory", async () => { + const root = await makeTempVault(); + const id1 = await enqueue(root, "/good.md", "inbox"); + const id2 = await enqueue(root, "/bad.md", "inbox"); + + // Fail id2 completely + await markFailed(root, id2, "err"); + await markFailed(root, id2, "err"); + await markFailed(root, id2, "err"); + + const pending = await listPending(root); + expect(pending.length).toBe(1); + expect(pending[0].id).toBe(id1); + }); }); describe("dequeue", () => { @@ -153,6 +168,20 @@ describe("markFailed", () => { const result = await markFailed(root, "ghost", "error"); expect(result).toBe(false); }); + + test("preserves error message from each failure", async () => { + const root = await makeTempVault(); + const id = await enqueue(root, "/file.md", "inbox"); + + await markFailed(root, id, "timeout"); + let item = await readItem(root, id); + expect(item!.lastError).toBe("timeout"); + + await markFailed(root, id, "DNS failure"); + item = await readItem(root, id); + expect(item!.lastError).toBe("DNS failure"); + expect(item!.retries).toBe(2); + }); }); describe("queueDepth", () => { @@ -170,6 +199,19 @@ describe("queueDepth", () => { await dequeue(root, id1); expect(await queueDepth(root)).toBe(1); }); + + test("does not count failed items in depth", async () => { + const root = await makeTempVault(); + const id = await enqueue(root, "/a.md", "inbox"); + await enqueue(root, "/b.md", "inbox"); + + // Fail one completely + await markFailed(root, id, "err"); + await markFailed(root, id, "err"); + await markFailed(root, id, "err"); + + expect(await queueDepth(root)).toBe(1); + }); }); describe("clearFailed", () => { @@ -196,6 +238,28 @@ describe("clearFailed", () => { }); }); +describe("concurrent operations", () => { + test("parallel enqueues produce unique items", async () => { + const root = await makeTempVault(); + const promises = Array.from({ length: 50 }, (_, i) => + enqueue(root, `/parallel-${i}.md`, "inbox"), + ); + const ids = await Promise.all(promises); + expect(new Set(ids).size).toBe(50); + expect(await queueDepth(root)).toBe(50); + }); + + test("all source types are accepted", async () => { + const root = await makeTempVault(); + const sources = ["inbox", "http", "folder", "clipboard"] as const; + for (const src of sources) { + const id = await enqueue(root, `/file-${src}`, src); + const item = await readItem(root, id); + expect(item!.source).toBe(src); + } + }); +}); + describe("performance", () => { test("enqueue + dequeue 500 items in under 2 seconds", async () => { const root = await makeTempVault(); diff --git a/packages/core/src/daemon/scheduler.test.ts b/packages/core/src/daemon/scheduler.test.ts index f05688c..7fa05e0 100644 --- a/packages/core/src/daemon/scheduler.test.ts +++ b/packages/core/src/daemon/scheduler.test.ts @@ -131,4 +131,43 @@ describe("CompileScheduler", () => { await new Promise((r) => setTimeout(r, 20)); expect(compiled).toBe(false); }); + + test("handles compile failure gracefully — resets state for next trigger", async () => { + let callCount = 0; + scheduler = new CompileScheduler({ + threshold: 1, + delayMs: 0, + onCompile: async () => { + callCount++; + if (callCount === 1) throw new Error("LLM provider unavailable"); + }, + }); + + scheduler.recordIngest(); // triggers, will fail + await new Promise((r) => setTimeout(r, 20)); + expect(callCount).toBe(1); + expect(scheduler.isCompiling()).toBe(false); + expect(scheduler.pendingCount()).toBe(0); + + // Second compile should still work + scheduler.recordIngest(); + await new Promise((r) => setTimeout(r, 20)); + expect(callCount).toBe(2); + }); + + test("logs threshold progress via onLog callback", async () => { + const logs: string[] = []; + scheduler = new CompileScheduler({ + threshold: 3, + delayMs: 60_000, + onCompile: async () => {}, + onLog: (msg) => logs.push(msg), + }); + + scheduler.recordIngest(); + scheduler.recordIngest(); + expect(logs.length).toBe(2); + expect(logs[0]).toContain("1/3"); + expect(logs[1]).toContain("2/3"); + }); }); diff --git a/packages/core/src/errors.ts b/packages/core/src/errors.ts index 1731bf3..3fbcbf3 100644 --- a/packages/core/src/errors.ts +++ b/packages/core/src/errors.ts @@ -35,9 +35,17 @@ export class ProviderError extends KibError { } export class NoProviderError extends KibError { - constructor() { + constructor(provider?: string) { + const messages: Record = { + anthropic: + "No Anthropic API key found. Set ANTHROPIC_API_KEY in your environment or add it to ~/.config/kib/credentials", + openai: + "No OpenAI API key found. Set OPENAI_API_KEY in your environment or add it to ~/.config/kib/credentials", + ollama: "Ollama is not running. Start it with: ollama serve", + }; super( - "No LLM provider found. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or start Ollama.", + messages[provider ?? ""] ?? + "No LLM provider configured. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or start Ollama.", "NO_PROVIDER", ); this.name = "NoProviderError"; diff --git a/packages/core/src/providers/router.ts b/packages/core/src/providers/router.ts index fab4915..68078f6 100644 --- a/packages/core/src/providers/router.ts +++ b/packages/core/src/providers/router.ts @@ -17,8 +17,8 @@ export function detectProvider(): DetectedProvider { if (process.env.OPENAI_API_KEY) { return { name: "openai", model: "gpt-4o" }; } - // Ollama detection is async — handled in createProvider - return { name: "ollama", model: "llama3" }; + // Default to anthropic — most common provider. User just needs to set the API key. + return { name: DEFAULTS.provider, model: DEFAULTS.model }; } /** @@ -33,14 +33,14 @@ export async function createProvider(providerName?: string, model?: string): Pro switch (name) { case "anthropic": { if (!process.env.ANTHROPIC_API_KEY) { - throw new NoProviderError(); + throw new NoProviderError("anthropic"); } const { createAnthropicProvider } = await import("./anthropic.js"); return createAnthropicProvider(selectedModel); } case "openai": { if (!process.env.OPENAI_API_KEY) { - throw new NoProviderError(); + throw new NoProviderError("openai"); } const { createOpenAIProvider } = await import("./openai.js"); return createOpenAIProvider(selectedModel); @@ -51,12 +51,12 @@ export async function createProvider(providerName?: string, model?: string): Pro const res = await fetch("http://localhost:11434/api/tags"); if (!res.ok) throw new Error("Not running"); } catch { - throw new NoProviderError(); + throw new NoProviderError("ollama"); } const { createOllamaProvider } = await import("./ollama.js"); return createOllamaProvider(selectedModel); } default: - throw new NoProviderError(); + throw new NoProviderError(name); } } diff --git a/packages/core/src/vault.ts b/packages/core/src/vault.ts index 5bdfd6c..5802cd2 100644 --- a/packages/core/src/vault.ts +++ b/packages/core/src/vault.ts @@ -120,58 +120,64 @@ export async function initVault( } function generateClaudeMd(name: string, provider: string, model: string): string { - return `# ${name} + const envKey = + provider === "anthropic" + ? "ANTHROPIC_API_KEY" + : provider === "openai" + ? "OPENAI_API_KEY" + : null; -This is a [kib](https://github.com/keeganthomp/kib) vault — an AI-compiled knowledge base. -kib ingests raw sources (URLs, PDFs, YouTube, GitHub repos, local files, images) and compiles them into a structured wiki using an LLM. + const apiKeySection = envKey + ? ` +## API Key Required -## Commands +kib uses its own LLM API key for compile and query (MCP tools cannot use the host LLM). +To enable full functionality, the user must set \`${envKey}\`: \`\`\`bash -kib status # Vault health dashboard -kib ingest # Ingest URLs, PDFs, YouTube, GitHub repos, local files, images -kib ingest --dry-run # Preview what would be ingested -kib compile # Compile raw sources into wiki articles via LLM -kib compile --force # Recompile all sources -kib compile --source # Recompile a specific source -kib compile --dry-run # Preview compile diff without writing -kib search # BM25 full-text search across the vault -kib query # RAG query with cited answers -kib chat # Interactive REPL with conversation history -kib lint # Run health checks on the wiki -kib lint --fix # Auto-fix issues (recompile stale, create missing) -kib skill list # List available skills -kib skill run # Run a skill -kib export --format html # Export wiki as static HTML site -kib config --list # Show vault configuration +# Option 1: environment variable +export ${envKey}=sk-... + +# Option 2: credentials file (persists across sessions) +echo "${envKey}=sk-..." >> ~/.config/kib/credentials \`\`\` -## Vault Structure +Or use the \`kib_config\` tool to change provider: \`kib_config(key="provider.default", value="openai")\` +` + : ""; + + return `# ${name} — kib vault + +This directory is a [kib](https://github.com/keeganthomp/kib) vault — an AI-compiled knowledge base. +kib ingests sources (URLs, PDFs, YouTube, GitHub repos, files, images) and compiles them into a structured, searchable wiki. + +**First step:** Call the \`kib_status\` tool to check vault state and whether the LLM provider is ready. -- \`raw/\` — ingested source material, organized by type. **Never modified by compile.** - - \`articles/\` — web pages, text content - - \`papers/\` — academic papers, PDFs - - \`repos/\` — GitHub repository summaries - - \`images/\` — image descriptions (extracted via vision model) - - \`transcripts/\` — YouTube/video transcripts -- \`wiki/\` — LLM-compiled articles with frontmatter, plus INDEX.md and GRAPH.md - - \`concepts/\` — core concept articles - - \`topics/\` — topic overviews - - \`references/\` — reference material - - \`outputs/\` — query results filed as articles -- \`inbox/\` — drop zone for \`kib watch\` (auto-ingested) -- \`.kb/\` — internal state (manifest.json, config.toml, cache, logs) +## MCP Tools Available -## Workflow +**Work immediately (no API key needed):** +- \`kib_status\` — vault state, provider status, and setup instructions +- \`kib_search\` — full-text BM25 search across all articles +- \`kib_list\` — list wiki articles or raw sources +- \`kib_read\` — read a specific article or source +- \`kib_ingest\` — ingest URLs, files, PDFs, YouTube, repos, images (saves to raw/) +- \`kib_export\` — export wiki as markdown or HTML +- \`kib_lint\` — health checks on the wiki +- \`kib_config\` — get/set vault configuration -1. **Ingest** sources: \`kib ingest \` adds raw material -2. **Compile**: \`kib compile\` processes new sources into wiki articles -3. **Query**: \`kib query "your question"\` or \`kib search "term"\` to retrieve knowledge -4. **Maintain**: \`kib lint --fix\` keeps the wiki healthy +**Require an LLM API key:** +- \`kib_compile\` — compile raw sources into wiki articles via LLM +- \`kib_query\` — ask questions with RAG (retrieval-augmented generation) +- \`kib_skill\` — run skills (summarize, flashcards, connections, etc.) -## MCP Server +Note: \`kib_ingest\` auto-compiles after ingesting if a provider is configured. Without a key, sources are saved but not compiled. +${apiKeySection} +## Vault Layout -\`kib serve\` exposes this vault as MCP tools over stdio. Tools: kib_status, kib_list, kib_read, kib_search, kib_query, kib_ingest, kib_compile, kib_lint. Resources: wiki://index, wiki://graph. +- \`raw/\` — ingested source material (articles, papers, repos, images, transcripts) +- \`wiki/\` — compiled articles with \`INDEX.md\` and \`GRAPH.md\` +- \`inbox/\` — drop files here for auto-ingestion (via \`kib watch\` daemon) +- \`.kb/\` — config, manifest, cache, logs ## Provider