Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,61 @@ kib skill run flashcards
kib skill run connections
```

### Watch Daemon (Passive Learning)

Run a background daemon that monitors your inbox, watched folders, and an HTTP endpoint — automatically ingesting new content and compiling it into your wiki.

```bash
# Start in foreground (logs to terminal)
kib watch

# Start as background daemon
kib watch --daemon

# Check daemon status
kib watch --status

# Stop the daemon
kib watch --stop

# Install as system service (auto-start on login)
kib watch --install # macOS: launchd, Linux: systemd
kib watch --uninstall
```

**Three ingestion channels run simultaneously:**

1. **Inbox folder** — drop any file into `inbox/` and it's auto-ingested. Files already in the inbox when the daemon starts are picked up too.
2. **HTTP endpoint** — `POST http://localhost:4747/ingest` accepts JSON `{ content, title?, url? }`. Built for browser extensions.
3. **Folder watchers** — monitor external directories with glob filtering (e.g., watch `~/Downloads` for `*.pdf`).

**Auto-compile** triggers automatically after N new sources (default: 5) or after idle timeout (default: 30 min).

Configure in `.kb/config.toml`:

```toml
[watch]
enabled = true
inbox_path = "inbox"
auto_compile = true
poll_interval_ms = 2_000
auto_compile_threshold = 5 # compile after 5 new sources
auto_compile_delay_ms = 1_800_000 # or after 30 min idle

# Watch external folders
[[watch.folders]]
path = "~/Downloads"
glob = "*.pdf"
recursive = false

[[watch.folders]]
path = "~/Documents/notes"
glob = "*.{md,txt}"
recursive = true
```

Failed ingestions retry up to 3 times before moving to the failed queue. Logs are written to `.kb/logs/watch.log` with automatic rotation at 10 MB.

### Export

```bash
Expand Down
22 changes: 21 additions & 1 deletion packages/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ CORE
INTEGRATION
serve Start MCP server for AI tool integration
mcp Configure MCP in AI clients (auto-runs on init)
watch Watch inbox/ and auto-ingest new files
watch Passive learning daemon — auto-ingest and compile

MANAGEMENT
config [key] [val] Get or set configuration
Expand All @@ -77,6 +77,26 @@ kib export --format html

HTML export includes image assets with proper relative paths and generates a browsable image gallery page.

### Watch Daemon

Run a background daemon that monitors your inbox, external folders, and an HTTP endpoint for new content — automatically ingesting and compiling it.

```bash
kib watch # foreground (logs to terminal)
kib watch --daemon # background daemon
kib watch --status # check if running
kib watch --stop # stop daemon
kib watch --install # install as system service (launchd/systemd)
kib watch --uninstall # remove system service
```

**Ingestion channels:**
- **Inbox** — drop files into `inbox/` (picks up files added while daemon was off)
- **HTTP** — `POST localhost:4747/ingest` with `{ content, title?, url? }`
- **Folder watchers** — monitor external directories with glob patterns

**Auto-compile** triggers after a configurable number of new sources or idle timeout. Configure via `[watch]` section in `.kb/config.toml`.

## LLM Providers

On first use, kib walks you through provider setup interactively. Or set via environment:
Expand Down
18 changes: 15 additions & 3 deletions packages/cli/src/commands/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,28 @@ export async function init(dir: string | undefined, opts: InitOpts) {
log.success("Created inbox/");
log.success("Created CLAUDE.md");

const hasKey =
(provider === "anthropic" && !!process.env.ANTHROPIC_API_KEY) ||
(provider === "openai" && !!process.env.OPENAI_API_KEY);

const providerLabel =
provider === "anthropic"
? `anthropic (ANTHROPIC_API_KEY)`
? `anthropic (ANTHROPIC_API_KEY${hasKey ? "" : " — not set yet"})`
: provider === "openai"
? `openai (OPENAI_API_KEY)`
? `openai (OPENAI_API_KEY${hasKey ? "" : " — not set yet"})`
: `ollama (localhost:11434)`;

log.success(`Detected provider: ${providerLabel}`);
log.success(`Provider: ${providerLabel}`);
log.success(`Model: ${model}`);

if (!hasKey && provider !== "ollama") {
const envKey = provider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY";
log.blank();
log.warn(
`Set ${envKey} to enable compile and query. Ingest, search, and read work without it.`,
);
}

// Auto-configure MCP in all detected AI clients
log.blank();
log.header("configuring MCP clients");
Expand Down
176 changes: 176 additions & 0 deletions packages/cli/src/commands/watch.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import { afterEach, describe, expect, test } from "bun:test";
import { mkdtemp, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { enqueue, ensureQueueDirs, initVault, listPending, queueDepth } from "@kibhq/core";

let tempDir: string;

afterEach(async () => {
if (tempDir) {
await rm(tempDir, { recursive: true, force: true });
}
});

async function makeTempVault() {
tempDir = await mkdtemp(join(tmpdir(), "kib-watch-test-"));
await initVault(tempDir, { name: "watch-test" });
return tempDir;
}

describe("watch: HTTP server /ingest", () => {
test("rejects POST with missing content field", async () => {
const root = await makeTempVault();
await ensureQueueDirs(root);

// Simulate the HTTP handler's validation logic
const body: { content?: string; title?: string } = { title: "No Content" };
const hasContent = body.content && typeof body.content === "string";
expect(hasContent).toBeFalsy();
});

test("rejects POST with empty string content", async () => {
const body = { content: "", title: "Empty" };
const hasContent = body.content && typeof body.content === "string";
expect(hasContent).toBeFalsy();
});

test("accepts POST with valid content", async () => {
const body = { content: "Real article content", title: "Good Article" };
const hasContent = body.content && typeof body.content === "string";
expect(hasContent).toBeTruthy();
});

test("builds correct markdown with title and url", () => {
const body = {
content: "Article body text",
title: "My Article",
url: "https://example.com/article",
};
const fullContent = body.title
? `# ${body.title}\n\n${body.url ? `Source: ${body.url}\n\n` : ""}${body.content}`
: body.content;

expect(fullContent).toBe(
"# My Article\n\nSource: https://example.com/article\n\nArticle body text",
);
});

test("builds correct markdown without url", () => {
const body = { content: "Body text", title: "Title Only" };
const fullContent = body.title
? `# ${body.title}\n\n${body.url ? `Source: ${body.url}\n\n` : ""}${body.content}`
: body.content;

expect(fullContent).toBe("# Title Only\n\nBody text");
});

test("builds correct markdown without title", () => {
const body = { content: "Just content, no title" };
const fullContent = (body as { title?: string }).title
? `# ${(body as { title?: string }).title}\n\n${body.content}`
: body.content;

expect(fullContent).toBe("Just content, no title");
});

test("slug generation handles special characters", () => {
const title = "What's the Deal with AI & ML?!";
const slug = title
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.slice(0, 60);
expect(slug).toBe("what-s-the-deal-with-ai-ml-");
});

test("slug truncates to 60 characters", () => {
const title = "A".repeat(100);
const slug = title
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.slice(0, 60);
expect(slug.length).toBe(60);
});
});

describe("watch: inbox seeding on startup", () => {
test("enqueues existing inbox files on startup", async () => {
const root = await makeTempVault();
await ensureQueueDirs(root);
const inboxPath = join(root, "inbox");

// Simulate files dropped while daemon was off
await writeFile(join(inboxPath, "offline-1.md"), "# Offline Article 1");
await writeFile(join(inboxPath, "offline-2.md"), "# Offline Article 2");

// Simulate the daemon startup logic: enqueue existing inbox files
const { readdir } = await import("node:fs/promises");
const existing = await readdir(inboxPath);
for (const f of existing) {
if (f.startsWith(".")) continue;
await enqueue(root, join(inboxPath, f), "inbox");
}

// Both files should be queued
const depth = await queueDepth(root);
expect(depth).toBe(2);

const pending = await listPending(root, 10);
const uris = pending.map((p) => p.uri);
expect(uris).toContain(join(inboxPath, "offline-1.md"));
expect(uris).toContain(join(inboxPath, "offline-2.md"));
});

test("skips dotfiles during inbox seeding", async () => {
const root = await makeTempVault();
await ensureQueueDirs(root);
const inboxPath = join(root, "inbox");

await writeFile(join(inboxPath, ".DS_Store"), "");
await writeFile(join(inboxPath, ".hidden"), "");
await writeFile(join(inboxPath, "visible.md"), "# Visible");

const { readdir } = await import("node:fs/promises");
const existing = await readdir(inboxPath);
for (const f of existing) {
if (f.startsWith(".")) continue;
await enqueue(root, join(inboxPath, f), "inbox");
}

expect(await queueDepth(root)).toBe(1);
});

test("handles empty inbox gracefully", async () => {
const root = await makeTempVault();
await ensureQueueDirs(root);
const inboxPath = join(root, "inbox");

const { readdir } = await import("node:fs/promises");
const existing = await readdir(inboxPath);
for (const f of existing) {
if (f.startsWith(".")) continue;
await enqueue(root, join(inboxPath, f), "inbox");
}

expect(await queueDepth(root)).toBe(0);
});

test("handles missing inbox directory gracefully", async () => {
const root = await makeTempVault();
await ensureQueueDirs(root);
const inboxPath = join(root, "inbox-nonexistent");

try {
const { readdir } = await import("node:fs/promises");
const existing = await readdir(inboxPath);
for (const f of existing) {
if (f.startsWith(".")) continue;
await enqueue(root, join(inboxPath, f), "inbox");
}
} catch {
// Should not throw — this is the expected path
}

expect(await queueDepth(root)).toBe(0);
});
});
23 changes: 19 additions & 4 deletions packages/cli/src/commands/watch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ async function startWatch(root: string, config: VaultConfig): Promise<() => void
CompileScheduler,
startFolderWatchers,
compileVault,
createProvider,
isLocked,
} = await import("@kibhq/core");

Expand Down Expand Up @@ -185,7 +186,9 @@ async function startWatch(root: string, config: VaultConfig): Promise<() => void
}
emit("info", "Auto-compiling...");
try {
const result = await compileVault(root);
const compileModel = config.compile.model ?? config.provider.model;
const provider = await createProvider(config.provider.default, compileModel);
const result = await compileVault(root, provider, config);
emit(
"info",
`Compiled ${result.sourcesCompiled} sources → ${result.articlesCreated} created, ${result.articlesUpdated} updated.`,
Expand Down Expand Up @@ -242,10 +245,15 @@ async function startWatch(root: string, config: VaultConfig): Promise<() => void

const processed = new Set<string>();

// Seed with existing files
// Enqueue existing inbox files (may have been added while daemon was off).
// Ingest dedup handles already-ingested content, so this is safe.
try {
const existing = await readdir(inboxPath);
for (const f of existing) processed.add(f);
for (const f of existing) {
if (f.startsWith(".")) continue;
processed.add(f);
await enqueue(root, join(inboxPath, f), "inbox");
}
} catch {
// inbox might not exist yet
}
Expand Down Expand Up @@ -339,7 +347,14 @@ function startHttpServer(

if (req.method === "POST" && url.pathname === "/ingest") {
try {
const body = (await req.json()) as { content: string; url?: string; title?: string };
const body = (await req.json()) as { content?: string; url?: string; title?: string };

if (!body.content || typeof body.content !== "string") {
return new Response(JSON.stringify({ error: "Missing required field: content" }), {
status: 400,
headers: { "Content-Type": "application/json" },
});
}

const slug = (body.title ?? "untitled")
.toLowerCase()
Expand Down
Loading
Loading