diff --git a/README.md b/README.md index 557a174..a382fce 100644 --- a/README.md +++ b/README.md @@ -252,7 +252,7 @@ You can also set provider via environment variables: | Provider | Env Variable | Default Model | |---|---|---| -| **Anthropic** | `ANTHROPIC_API_KEY` | claude-sonnet-4-6-20250514 | +| **Anthropic** | `ANTHROPIC_API_KEY` | claude-sonnet-4-6 | | **OpenAI** | `OPENAI_API_KEY` | gpt-4o | | **Ollama** | (auto-detect on localhost:11434) | llama3 | diff --git a/docs/provider-config.md b/docs/provider-config.md index 9595ac1..410f07a 100644 --- a/docs/provider-config.md +++ b/docs/provider-config.md @@ -33,7 +33,7 @@ kib config provider.model gpt-4o | Feature | Anthropic | OpenAI | Ollama | |---------|-----------|--------|--------| -| Default model | claude-sonnet-4-6-20250514 | gpt-4o | llama3 | +| Default model | claude-sonnet-4-6 | gpt-4o | llama3 | | Fast model | claude-haiku-4-5-20251001 | gpt-4o | llama3 | | Vision (image ingest) | Yes | Yes | No | | Embeddings (vector search) | No | Yes (text-embedding-3-small) | Yes (nomic-embed-text) | @@ -72,7 +72,7 @@ Run `kib init` and follow the prompts to select a provider and enter your API ke Used for heavy operations (compile, query, chat): ```bash -kib config provider.model claude-sonnet-4-6-20250514 +kib config provider.model claude-sonnet-4-6 ``` ### Fast model @@ -92,7 +92,7 @@ Override the model for specific operations: kib config compile.model gpt-4o # Use a different model for queries -kib config query.model claude-sonnet-4-6-20250514 +kib config query.model claude-sonnet-4-6 ``` These override `provider.model` for that specific operation only. @@ -104,7 +104,7 @@ All provider settings live in `.kb/config.toml`: ```toml [provider] default = "anthropic" -model = "claude-sonnet-4-6-20250514" +model = "claude-sonnet-4-6" fast_model = "claude-haiku-4-5-20251001" [compile] diff --git a/docs/vault-format.md b/docs/vault-format.md index dfcbe11..faf7dff 100644 --- a/docs/vault-format.md +++ b/docs/vault-format.md @@ -48,7 +48,7 @@ The manifest is the source of truth for vault state. Schema version: `"1"`. "created": "2024-01-15T10:00:00.000Z", "lastCompiled": "2024-01-15T12:00:00.000Z", "provider": "anthropic", - "model": "claude-sonnet-4-6-20250514" + "model": "claude-sonnet-4-6" }, "sources": { "src_a1b2c3d4e5f6": { @@ -116,7 +116,7 @@ Vault configuration lives at `.kb/config.toml`: ```toml [provider] default = "anthropic" -model = "claude-sonnet-4-6-20250514" +model = "claude-sonnet-4-6" fast_model = "claude-haiku-4-5-20251001" [compile] diff --git a/examples/ml-research/.kb/config.toml b/examples/ml-research/.kb/config.toml index 113004c..8498b4d 100644 --- a/examples/ml-research/.kb/config.toml +++ b/examples/ml-research/.kb/config.toml @@ -1,6 +1,6 @@ [provider] default = "anthropic" -model = "claude-sonnet-4-6-20250514" +model = "claude-sonnet-4-6" fast_model = "claude-haiku-4-5-20251001" [compile] diff --git a/examples/ml-research/.kb/manifest.json b/examples/ml-research/.kb/manifest.json index 188e980..eaddc37 100644 --- a/examples/ml-research/.kb/manifest.json +++ b/examples/ml-research/.kb/manifest.json @@ -5,7 +5,7 @@ "created": "2026-03-01T09:00:00.000Z", "lastCompiled": "2026-03-15T14:30:00.000Z", "provider": "anthropic", - "model": "claude-sonnet-4-6-20250514" + "model": "claude-sonnet-4-6" }, "sources": { "src_att001": { diff --git a/examples/reading-list/.kb/config.toml b/examples/reading-list/.kb/config.toml index eab4e09..98b0bda 100644 --- a/examples/reading-list/.kb/config.toml +++ b/examples/reading-list/.kb/config.toml @@ -1,6 +1,6 @@ [provider] default = "anthropic" -model = "claude-sonnet-4-6-20250514" +model = "claude-sonnet-4-6" fast_model = "claude-haiku-4-5-20251001" [compile] diff --git a/examples/reading-list/.kb/manifest.json b/examples/reading-list/.kb/manifest.json index b1121d2..80f77df 100644 --- a/examples/reading-list/.kb/manifest.json +++ b/examples/reading-list/.kb/manifest.json @@ -5,7 +5,7 @@ "created": "2026-01-05T10:00:00.000Z", "lastCompiled": "2026-04-01T16:00:00.000Z", "provider": "anthropic", - "model": "claude-sonnet-4-6-20250514" + "model": "claude-sonnet-4-6" }, "sources": { "src_tfas001": { diff --git a/packages/cli/README.md b/packages/cli/README.md index ac03a4f..433c2f9 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -83,7 +83,7 @@ On first use, kib walks you through provider setup interactively. Or set via env | Provider | Env Variable | Default Model | |----------|-------------|---------------| -| Anthropic | `ANTHROPIC_API_KEY` | claude-sonnet-4-6-20250514 | +| Anthropic | `ANTHROPIC_API_KEY` | claude-sonnet-4-6 | | OpenAI | `OPENAI_API_KEY` | gpt-4o | | Ollama | (auto-detect localhost:11434) | llama3 | diff --git a/packages/core/README.md b/packages/core/README.md index 20190c4..653097b 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -59,7 +59,7 @@ console.log(answer.answer); | Provider | Env Variable | Default Model | |----------|-------------|---------------| -| Anthropic | `ANTHROPIC_API_KEY` | claude-sonnet-4-6-20250514 | +| Anthropic | `ANTHROPIC_API_KEY` | claude-sonnet-4-6 | | OpenAI | `OPENAI_API_KEY` | gpt-4o | | Ollama | (auto-detect localhost:11434) | llama3 | diff --git a/packages/core/src/compile/compiler.ts b/packages/core/src/compile/compiler.ts index 94fa761..08cfedf 100644 --- a/packages/core/src/compile/compiler.ts +++ b/packages/core/src/compile/compiler.ts @@ -348,11 +348,7 @@ async function compileSingleSource( tags: Array.isArray(frontmatter.tags) ? (frontmatter.tags as string[]) : [], summary: (frontmatter.summary as string) ?? "", wordCount: countWords(body), - category: ((frontmatter.category as string) ?? "topic") as - | "concept" - | "topic" - | "reference" - | "output", + category: normalizeCategory((frontmatter.category as string) ?? "topic"), }; const articleTitle = (frontmatter.title as string) ?? articleSlug; @@ -570,11 +566,13 @@ async function compileVaultInner( const msg = (result.reason as Error).message ?? String(result.reason); if ( msg.includes("401") || + msg.includes("404") || msg.includes("authentication") || msg.includes("No LLM provider") ) { throw result.reason; } + allWarnings.push(`Failed to compile ${sourcePath}: ${msg}`); options.onProgress?.(`Failed to compile ${sourcePath}: ${msg}`); } } @@ -629,11 +627,13 @@ async function compileVaultInner( const msg = (err as Error).message ?? String(err); if ( msg.includes("401") || + msg.includes("404") || msg.includes("authentication") || msg.includes("No LLM provider") ) { throw err; } + allWarnings.push(`Failed to compile ${sourcePath}: ${msg}`); options.onProgress?.(`Failed to compile ${sourcePath}: ${msg}`); } } @@ -714,7 +714,7 @@ async function compileVaultInner( const totalOutputTokens = perSourceUsage.reduce((sum, u) => sum + u.outputTokens, 0); return { - sourcesCompiled: sourcesToCompile.length, + sourcesCompiled: perSourceUsage.length, articlesCreated: totalCreated, articlesUpdated: totalUpdated, articlesDeleted: totalDeleted, @@ -793,6 +793,23 @@ function categoryForSourceType(sourceType: string): string { } } +type ArticleCategory = "concept" | "topic" | "reference" | "output"; + +/** Normalize category from plural directory name to singular schema value */ +function normalizeCategory(raw: string): ArticleCategory { + const map: Record = { + concepts: "concept", + concept: "concept", + topics: "topic", + topic: "topic", + references: "reference", + reference: "reference", + outputs: "output", + output: "output", + }; + return map[raw.toLowerCase()] ?? "topic"; +} + /** * Load existing wiki articles that a source previously produced. */ diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index a7795c8..4c7300c 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -24,7 +24,7 @@ export const RAW_CATEGORIES = ["articles", "papers", "repos", "images", "transcr /** Default config values */ export const DEFAULTS = { provider: "anthropic", - model: "claude-sonnet-4-6-20250514", + model: "claude-sonnet-4-6", fastModel: "claude-haiku-4-5-20251001", maxSourcesPerPass: 10, searchMaxResults: 20, @@ -34,7 +34,7 @@ export const DEFAULTS = { maxFileSizeMb: 50, compileArticleMinWords: 200, compileArticleMaxWords: 1000, - contextWindow: 200_000, // tokens — conservative default for Claude Sonnet + contextWindow: 200_000, // tokens — conservative default (Sonnet 4.6 supports 1M) maxSourceTokens: 32_000, // auto-summarize sources larger than this maxParallel: 3, // max concurrent source compilations tokensPerChar: 0.25, // rough estimate: ~4 chars per token diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh new file mode 100755 index 0000000..16dc031 --- /dev/null +++ b/scripts/smoke-test.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# Smoke test — runs kib end-to-end with real API calls +# Usage: ./scripts/smoke-test.sh +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT="$SCRIPT_DIR/.." +VAULT="/tmp/kib-smoke-test-$$" +PASS=0 +FAIL=0 + +kib() { cd "$VAULT" && bun run "$ROOT/packages/cli/bin/kib.ts" "$@" 2>&1; cd "$ROOT"; } + +green() { printf "\033[32m✓ %s\033[0m\n" "$1"; } +red() { printf "\033[31m✗ %s\033[0m\n" "$1"; } + +cleanup() { rm -rf "$VAULT"; } +trap cleanup EXIT + +echo "" +echo "=== kib smoke test ===" +echo "vault: $VAULT" +echo "" + +# -- init -- +bun run "$ROOT/packages/cli/bin/kib.ts" init "$VAULT" > /dev/null 2>&1 +if [ -f "$VAULT/.kb/manifest.json" ]; then green "init"; ((PASS++)); else red "init"; ((FAIL++)); fi + +# -- ingest local file -- +cat > "$VAULT/test.md" << 'EOF' +# Neural Networks + +A neural network is a computational model inspired by biological neurons. It consists of layers of interconnected nodes that process information. Key types include CNNs for images, RNNs for sequences, and Transformers for attention-based processing. +EOF +OUTPUT=$(kib ingest "$VAULT/test.md") +if echo "$OUTPUT" | grep -q "Ingested 1 source"; then green "ingest (local file)"; ((PASS++)); else red "ingest (local file)"; ((FAIL++)); echo " $OUTPUT"; fi + +# -- ingest web -- +OUTPUT=$(kib ingest "https://en.wikipedia.org/wiki/Gradient_descent") +if echo "$OUTPUT" | grep -q "Ingested 1 source"; then green "ingest (web)"; ((PASS++)); else red "ingest (web)"; ((FAIL++)); echo " $OUTPUT"; fi + +# -- status -- +OUTPUT=$(kib status --json) +if echo "$OUTPUT" | grep -q '"sources": 2'; then green "status"; ((PASS++)); else red "status"; ((FAIL++)); echo " $OUTPUT"; fi + +# -- compile (calls LLM) -- +echo "" +echo "Compiling (calling LLM)..." +OUTPUT=$(kib compile) +if echo "$OUTPUT" | grep -q "article"; then green "compile"; ((PASS++)); else red "compile"; ((FAIL++)); echo " $OUTPUT"; fi +echo " $(echo "$OUTPUT" | grep 'tokens used' || echo 'no token info')" + +# -- search -- +OUTPUT=$(kib search "neural network") +if echo "$OUTPUT" | grep -qi "neural"; then green "search"; ((PASS++)); else red "search"; ((FAIL++)); fi + +# -- query (calls LLM) -- +echo "" +echo "Querying (calling LLM)..." +OUTPUT=$(kib query "what is a neural network?") +if echo "$OUTPUT" | grep -qi "neural"; then green "query"; ((PASS++)); else red "query"; ((FAIL++)); fi + +# -- lint -- +OUTPUT=$(kib lint) +if echo "$OUTPUT" | grep -q "Checking articles"; then green "lint (ran)"; ((PASS++)); else red "lint"; ((FAIL++)); echo " $OUTPUT"; fi + +# -- export markdown -- +OUTPUT=$(kib export --format markdown) +if [ -f "$VAULT/export/INDEX.md" ]; then green "export (markdown)"; ((PASS++)); else red "export (markdown)"; ((FAIL++)); fi + +# -- export html -- +OUTPUT=$(kib export --format html --output "$VAULT/html-export") +if [ -f "$VAULT/html-export/INDEX.html" ]; then green "export (html)"; ((PASS++)); else red "export (html)"; ((FAIL++)); fi + +# -- skill list -- +OUTPUT=$(kib skill list) +if echo "$OUTPUT" | grep -q "summarize"; then green "skill list"; ((PASS++)); else red "skill list"; ((FAIL++)); fi + +# -- config -- +OUTPUT=$(kib config provider.model) +if echo "$OUTPUT" | grep -q "claude-sonnet-4-6"; then green "config"; ((PASS++)); else red "config"; ((FAIL++)); echo " $OUTPUT"; fi + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] && echo "All good." || exit 1