diff --git a/hindsight-tools/mission-sandbox/.gitignore b/hindsight-tools/mission-sandbox/.gitignore
new file mode 100644
index 000000000..3baed7658
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/.gitignore
@@ -0,0 +1,8 @@
+node_modules
+dist
+.next
+.next-*
+standalone
+next-env.d.ts
+*.tsbuildinfo
+projects
diff --git a/hindsight-tools/mission-sandbox/README.md b/hindsight-tools/mission-sandbox/README.md
new file mode 100644
index 000000000..5f1e659a8
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/README.md
@@ -0,0 +1,130 @@
+# @vectorize-io/hindsight-mission-sandbox
+
+Tune Hindsight's **retain (extraction)** and **observation (consolidation)** missions against your
+own task, then verify with an **external validator** (a benchmark like LOCOMO, or your app's eval).
+
+The tool is deliberately small and opinionated:
+
+- **You bring** the documents and a way to score success (the validator). The tool does **not**
+  measure accuracy or label facts — task success is decoupled from the tool.
+- **You refine a mission with feedback.** After looking at validator results, you hand the tool
+  _feedback_ (and optional failing examples); it rewrites the current mission. No good/bad labeling.
+- **Retain iterates across versioned banks** (`<project>-v1`, `-v2`, …) so you can point the
+  validator at any version and compare. **Observations iterate in place** (clear + re-consolidate),
+  since they're re-derived from the same facts.
+
+## The loop
+
+```
+init (bind docs)
+  └─ retain mission  (feedback + examples → refine retain mission)
+     └─ retain apply (ingest docs into a NEW bank <project>-vN)
+        └─ VALIDATE EXTERNALLY against <project>-vN  ─┐
+   ┌──────────────────────────────────────────────────┘  failures become the next feedback
+   ▼
+  retain mission (feedback) → retain apply → validate → …
+
+observe mission (feedback → refine obs mission) → observe apply (clear obs + re-consolidate on current bank) → validate
+```
+
+The validator is never inside the tool. A typical round: run your eval against `<project>-vN`,
+read what failed, then `retain mission <project> --feedback "<what to fix>" --example "<failing case>"`
+→ `retain apply` (new version) → re-validate.
+
+## Commands
+
+```bash
+# bind a project to its documents (no ingest yet)
+mission-sandbox init <project> --documents <path> [--api-url URL]
+
+# RETAIN loop — iterates across versioned banks
+mission-sandbox retain mission <project> --feedback "<what to change>" [--example "<failing case>" ...]
+mission-sandbox retain apply   <project>          # ingest docs → new bank <project>-vN, prints the bank id
+
+# OBSERVE loop — iterates in place on the current bank
+mission-sandbox observe mission <project> --feedback "<what to change>" [--example "<...>" ...]
+mission-sandbox observe apply   <project>         # clear observations on current bank + re-consolidate
+
+mission-sandbox status <project>                  # bound docs, current missions, versions (+ bank ids)
+mission-sandbox ui <projects-dir>                 # minimal UI: project status + versions
+```
+
+- `retain mission` / `observe mission` refine the **current** mission from your feedback (+ examples);
+  the first call (no prior mission) treats the feedback as the initial spec. The LLM sees the current
+  mission + feedback + examples — nothing else, no labels.
+- `retain apply` always creates the **next** version bank and ingests into it. Point your validator
+  (e.g. LOCOMO `--template`/the bank id) at that version.
+- `--model` overrides the Gemini model used for mission refinement (default `gemini-2.5-flash`, or
+  `HINDSIGHT_API_LLM_MODEL`). Mission refinement is the **only** LLM call the tool makes; ingestion +
+  consolidation run on the Hindsight deployment.
+
+## Verifying with LOCOMO (example external validator)
+
+The LOCOMO runner is unchanged and is the **only** thing that measures accuracy. Build a template
+from a version's missions and point the runner at it (default mode — **no `--use-reflect`**):
+
+```bash
+# representative subset: trim the runner's input to N per category (data only — restore after)
+cd hindsight-dev/benchmarks/locomo/datasets && cp locomo10.json locomo10.full.json
+N=5   # widen to 10+ once a mission looks good, to confirm it generalises and surface weak categories
+python3 - "$N" <<'PY'
+import json, sys
+n=int(sys.argv[1]); d=json.load(open("locomo10.json"))
+for s in d:
+    if s["sample_id"]!="<id>": continue
+    s["qa"]=[q for c in (1,2,3,4) for q in [x for x in s["qa"] if x.get("category")==c and x.get("answer")][:n]]
+json.dump(d,open("locomo10.json","w"))
+PY
+
+# verify a version's missions
+python3 -c "import json;p=json.load(open('<project>/project.json'));v=p['versions'][-1]; \
+  json.dump({'version':'1','bank':{'retain_mission':v['retainMission'],'observations_mission':v.get('observeMission')}}, \
+  open('<project>/template.json','w'))"
+set -a; source hindsight-api-slim/.env; set +a; export HINDSIGHT_API_LLM_MODEL=gemini-2.5-flash
+uv run --project hindsight-dev python hindsight-dev/benchmarks/locomo/locomo_benchmark.py \
+  --conversation <id> --wait-consolidation --template <project>/template.json
+# results: hindsight-dev/benchmarks/locomo/results/benchmark_results.json (by-category is_correct)
+mv hindsight-dev/benchmarks/locomo/datasets/locomo10.full.json hindsight-dev/benchmarks/locomo/datasets/locomo10.json
+```
+
+Read accuracy **by category**; a weak category is your next `--feedback`. Notes from real runs:
+single-question swings between runs are **recall variance** (each apply re-ingests) — watch
+category trends; and verify a "failure" against the transcript before chasing it (some benchmark
+golds are wrong).
+
+## Project model (`project.json`)
+
+```jsonc
+{
+  "documents": "/path/to/docs", // bound at init
+  "apiUrl": "http://localhost:8888",
+  "retain": { "mission": "…", "feedback": ["…"] },
+  "observe": { "mission": "…", "feedback": ["…"] },
+  "versions": [
+    {
+      "n": 1,
+      "bank": "<project>-v1",
+      "retainMission": "…",
+      "observeMission": "…",
+      "createdAt": "…",
+    },
+  ],
+  "currentVersion": 1,
+}
+```
+
+## Setup
+
+```bash
+npm install
+npm run build --workspace @vectorize-io/hindsight-mission-sandbox
+export GEMINI_API_KEY=...   # or GOOGLE_API_KEY, or a Gemini HINDSIGHT_API_LLM_* in your .env
+```
+
+## Development
+
+```bash
+npm run test       # vitest unit tests for core
+npm run typecheck  # tsc for the lib + the Next app
+npm run build      # build the core lib (dist) + the minimal Next UI
+```
diff --git a/hindsight-tools/mission-sandbox/bin/cli.js b/hindsight-tools/mission-sandbox/bin/cli.js
new file mode 100755
index 000000000..27e7cb818
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/bin/cli.js
@@ -0,0 +1,13 @@
+#!/usr/bin/env node
+// Thin launcher: delegate to the compiled CLI. Run `npm run build:lib` (or `npm run build`)
+// to produce dist/. For development without a build, use `npm run cli -- <args>` (tsx).
+import("../dist/cli/index.js").catch((err) => {
+  if (err && err.code === "ERR_MODULE_NOT_FOUND") {
+    console.error(
+      "mission-sandbox: build output missing. Run `npm run build` in the package first."
+    );
+  } else {
+    console.error(err);
+  }
+  process.exit(1);
+});
diff --git a/hindsight-tools/mission-sandbox/next.config.ts b/hindsight-tools/mission-sandbox/next.config.ts
new file mode 100644
index 000000000..b1e2befbb
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/next.config.ts
@@ -0,0 +1,9 @@
+import type { NextConfig } from "next";
+
+const nextConfig: NextConfig = {
+  output: "standalone",
+  // core is consumed as a built package (dist); its heavy runtime deps stay external.
+  serverExternalPackages: ["@google/genai", "@vectorize-io/hindsight-client"],
+};
+
+export default nextConfig;
diff --git a/hindsight-tools/mission-sandbox/package.json b/hindsight-tools/mission-sandbox/package.json
new file mode 100644
index 000000000..851a7fa85
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/package.json
@@ -0,0 +1,74 @@
+{
+  "name": "@vectorize-io/hindsight-mission-sandbox",
+  "version": "0.1.0",
+  "description": "Iterate on Hindsight observation missions with a fast feedback loop — CLI + Next.js UI",
+  "type": "module",
+  "main": "./dist/core/index.js",
+  "types": "./dist/core/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/core/index.d.ts",
+      "import": "./dist/core/index.js"
+    },
+    "./core": {
+      "types": "./dist/core/index.d.ts",
+      "import": "./dist/core/index.js"
+    }
+  },
+  "bin": {
+    "mission-sandbox": "bin/cli.js"
+  },
+  "files": [
+    "bin",
+    "dist",
+    "standalone",
+    "public"
+  ],
+  "scripts": {
+    "build": "npm run build:lib && npm run build:ui",
+    "build:lib": "tsc -p tsconfig.lib.json",
+    "build:ui": "NODE_ENV=production next build && npm run build:standalone",
+    "build:standalone": "rm -rf standalone && SERVER_JS=$(find .next/standalone -path '*/node_modules' -prune -o -name 'server.js' -print | head -1) && test -n \"$SERVER_JS\" || (echo 'Error: server.js not found in .next/standalone - standalone build failed' && exit 1) && STANDALONE_ROOT=$(dirname \"$SERVER_JS\") && cp -r \"$STANDALONE_ROOT\" standalone && cp -r .next/standalone/node_modules standalone/node_modules && mkdir -p standalone/.next && cp -r .next/static standalone/.next/static && mkdir -p standalone/public && (cp -r public/* standalone/public/ 2>/dev/null || true)",
+    "dev": "npm run build:lib && next dev -p ${PORT:-7777}",
+    "cli": "tsx src/cli/index.ts",
+    "start": "next start -p ${PORT:-7777}",
+    "lint": "next lint",
+    "typecheck": "tsc -p tsconfig.lib.json --noEmit && tsc --noEmit",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "prepublishOnly": "npm run build"
+  },
+  "keywords": [
+    "hindsight",
+    "memory",
+    "observations",
+    "mission",
+    "prompt-optimization"
+  ],
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/vectorize-io/hindsight.git",
+    "directory": "hindsight-tools/mission-sandbox"
+  },
+  "dependencies": {
+    "@google/genai": "^2.7.0",
+    "@vectorize-io/hindsight-client": "^0.7.0",
+    "commander": "^14.0.0",
+    "next": "^16.2.6",
+    "react": "^19.2.0",
+    "react-dom": "^19.2.0"
+  },
+  "devDependencies": {
+    "@tailwindcss/postcss": "^4.1.17",
+    "@types/node": "^24.10.0",
+    "@types/react": "^19.2.2",
+    "@types/react-dom": "^19.2.2",
+    "eslint": "^9.39.1",
+    "eslint-config-next": "^16.0.1",
+    "tailwindcss": "^4.1.17",
+    "tsx": "^4.19.2",
+    "typescript": "^5.9.3",
+    "vitest": "^4.1.2"
+  }
+}
diff --git a/hindsight-tools/mission-sandbox/postcss.config.mjs b/hindsight-tools/mission-sandbox/postcss.config.mjs
new file mode 100644
index 000000000..c7bcb4b1e
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/postcss.config.mjs
@@ -0,0 +1,5 @@
+const config = {
+  plugins: ["@tailwindcss/postcss"],
+};
+
+export default config;
diff --git a/hindsight-tools/mission-sandbox/src/app/api/extract/route.ts b/hindsight-tools/mission-sandbox/src/app/api/extract/route.ts
new file mode 100644
index 000000000..b778a4198
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/api/extract/route.ts
@@ -0,0 +1,28 @@
+import { runExtractPreview } from "@vectorize-io/hindsight-mission-sandbox/core";
+
+import { projectDir } from "@/app/lib/project-context";
+
+export const runtime = "nodejs";
+export const dynamic = "force-dynamic";
+
+/** Dry-run extraction preview: what does this mission extract from the given text? (no ingest) */
+export async function POST(req: Request) {
+  const body = (await req.json().catch(() => ({}))) as {
+    project?: string;
+    content?: string;
+    retainMission?: string | null;
+  };
+  if (!body.project || !body.content) {
+    return Response.json({ error: "project and content are required" }, { status: 400 });
+  }
+  try {
+    const facts = await runExtractPreview({
+      projectDir: projectDir(body.project),
+      content: body.content,
+      retainMission: body.retainMission,
+    });
+    return Response.json({ facts });
+  } catch (e) {
+    return Response.json({ error: e instanceof Error ? e.message : String(e) }, { status: 500 });
+  }
+}
diff --git a/hindsight-tools/mission-sandbox/src/app/components/ExtractPanel.tsx b/hindsight-tools/mission-sandbox/src/app/components/ExtractPanel.tsx
new file mode 100644
index 000000000..bb505d57c
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/components/ExtractPanel.tsx
@@ -0,0 +1,106 @@
+"use client";
+
+import { useState } from "react";
+
+interface PreviewFact {
+  text: string;
+  factType: string;
+  occurredStart: string | null;
+  occurredEnd: string | null;
+  entities: string[];
+}
+
+/**
+ * Dry-run extraction preview: paste text + an optional mission, see what the retain step would
+ * extract — with no ingestion, no persistence. Backed by the /memories/extract API.
+ */
+export function ExtractPanel({
+  project,
+  defaultMission,
+}: {
+  project: string;
+  defaultMission: string | null;
+}) {
+  const [content, setContent] = useState("");
+  const [mission, setMission] = useState(defaultMission ?? "");
+  const [facts, setFacts] = useState<PreviewFact[] | null>(null);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  async function run() {
+    setLoading(true);
+    setError(null);
+    setFacts(null);
+    try {
+      const res = await fetch("/api/extract", {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify({ project, content, retainMission: mission || null }),
+      });
+      const data = await res.json();
+      if (!res.ok) throw new Error(data.error || `HTTP ${res.status}`);
+      setFacts(data.facts as PreviewFact[]);
+    } catch (e) {
+      setError(e instanceof Error ? e.message : String(e));
+    } finally {
+      setLoading(false);
+    }
+  }
+
+  return (
+    <details className="mt-5 rounded-lg border border-[var(--border)] p-4">
+      <summary className="cursor-pointer text-sm font-semibold uppercase tracking-wide text-[var(--muted)]">
+        Dry-run extraction (preview a mission, no ingest)
+      </summary>
+
+      <label className="mt-3 block text-xs uppercase tracking-wider text-[var(--muted)]">
+        text
+      </label>
+      <textarea
+        className="mt-1 h-28 w-full rounded-md border border-[var(--border)] bg-[var(--surface-2)] p-2 text-sm"
+        placeholder="Paste a document / chunk to extract facts from…"
+        value={content}
+        onChange={(e) => setContent(e.target.value)}
+      />
+
+      <label className="mt-3 block text-xs uppercase tracking-wider text-[var(--muted)]">
+        retain mission (override — defaults to the project&apos;s current mission)
+      </label>
+      <textarea
+        className="mt-1 h-20 w-full rounded-md border border-[var(--border)] bg-[var(--surface-2)] p-2 text-xs"
+        value={mission}
+        onChange={(e) => setMission(e.target.value)}
+      />
+
+      <button
+        className="mt-3 rounded-md border border-[var(--accent)] px-3 py-1.5 text-sm text-[var(--accent)] disabled:opacity-50"
+        onClick={run}
+        disabled={loading || !content.trim()}
+      >
+        {loading ? "Extracting…" : "Extract (dry-run)"}
+      </button>
+
+      {error ? <p className="mt-2 text-sm text-[var(--bad)]">{error}</p> : null}
+
+      {facts ? (
+        <div className="mt-3">
+          <div className="text-xs uppercase tracking-wider text-[var(--muted)]">
+            {facts.length} fact{facts.length === 1 ? "" : "s"} extracted
+          </div>
+          <ul className="mt-1 space-y-1">
+            {facts.map((f, i) => (
+              <li key={i} className="border-l-2 border-[var(--border)] pl-3 text-sm">
+                {f.text}
+                <span className="ml-1 text-xs text-[var(--muted)]">
+                  [{f.factType}
+                  {f.occurredStart ? ` · ${f.occurredStart.slice(0, 10)}` : ""}
+                  {f.entities.length ? ` · ${f.entities.join(", ")}` : ""}]
+                </span>
+              </li>
+            ))}
+          </ul>
+        </div>
+      ) : null}
+    </details>
+  );
+}
diff --git a/hindsight-tools/mission-sandbox/src/app/global-error.tsx b/hindsight-tools/mission-sandbox/src/app/global-error.tsx
new file mode 100644
index 000000000..a7bc643f6
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/global-error.tsx
@@ -0,0 +1,37 @@
+"use client";
+
+export default function GlobalError({
+  reset,
+}: {
+  error: Error & { digest?: string };
+  reset: () => void;
+}) {
+  return (
+    <html lang="en">
+      <body
+        style={{
+          background: "#0b0f17",
+          color: "#e6edf6",
+          fontFamily: "system-ui, sans-serif",
+          padding: "3rem",
+        }}
+      >
+        <h2>Something went wrong</h2>
+        <p style={{ color: "#8b9bb4" }}>The Mission Sandbox UI hit an unexpected error.</p>
+        <button
+          onClick={() => reset()}
+          style={{
+            marginTop: "1rem",
+            borderRadius: "0.5rem",
+            background: "#4c8dff",
+            color: "white",
+            padding: "0.5rem 1rem",
+            border: 0,
+          }}
+        >
+          Try again
+        </button>
+      </body>
+    </html>
+  );
+}
diff --git a/hindsight-tools/mission-sandbox/src/app/globals.css b/hindsight-tools/mission-sandbox/src/app/globals.css
new file mode 100644
index 000000000..7c08a6516
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/globals.css
@@ -0,0 +1,59 @@
+@import "tailwindcss";
+
+:root {
+  --background: #0c0d10;
+  --surface: #14161b;
+  --surface-2: #1b1e25;
+  --border: #262a33;
+  --text: #eceef2;
+  --muted: #9aa3b2;
+  --good: #3fb950;
+  --bad: #f0726a;
+  --accent: #6aa0ff;
+}
+
+html,
+body {
+  background: var(--background);
+  color: var(--text);
+  font-family:
+    ui-sans-serif,
+    system-ui,
+    -apple-system,
+    "Segoe UI",
+    Roboto,
+    sans-serif;
+  font-size: 15px;
+  line-height: 1.65;
+  -webkit-font-smoothing: antialiased;
+}
+
+* {
+  box-sizing: border-box;
+}
+
+a {
+  color: inherit;
+  text-decoration: none;
+}
+
+code {
+  font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
+  font-size: 0.85em;
+}
+
+/* Disclosure triangles: subtle, no default marker clutter. */
+summary {
+  list-style: none;
+  user-select: none;
+}
+summary::-webkit-details-marker {
+  display: none;
+}
+summary::before {
+  content: "▸ ";
+  color: var(--muted);
+}
+details[open] > summary::before {
+  content: "▾ ";
+}
diff --git a/hindsight-tools/mission-sandbox/src/app/layout.tsx b/hindsight-tools/mission-sandbox/src/app/layout.tsx
new file mode 100644
index 000000000..5d9a728ce
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/layout.tsx
@@ -0,0 +1,16 @@
+import type { Metadata } from "next";
+
+import "./globals.css";
+
+export const metadata: Metadata = {
+  title: "Mission Sandbox",
+  description: "Iterate on Hindsight observation missions with a fast feedback loop.",
+};
+
+export default function RootLayout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <body>{children}</body>
+    </html>
+  );
+}
diff --git a/hindsight-tools/mission-sandbox/src/app/lib/project-context.ts b/hindsight-tools/mission-sandbox/src/app/lib/project-context.ts
new file mode 100644
index 000000000..a8f83a1c6
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/lib/project-context.ts
@@ -0,0 +1,71 @@
+import { promises as fs } from "node:fs";
+import path from "node:path";
+
+import {
+  Project,
+  readStatus,
+  type ProjectStatus,
+} from "@vectorize-io/hindsight-mission-sandbox/core";
+
+export type { ProjectStatus };
+
+export interface ProjectListItem {
+  name: string;
+  apiUrl: string;
+  versions: number;
+  currentBank: string | null;
+  createdAt: string;
+}
+
+/** Root directory that holds all named projects, set by `mission-sandbox ui [dir]`. */
+export function projectsRoot(): string {
+  const env = process.env.MISSION_SANDBOX_PROJECTS_DIR;
+  return env ? path.resolve(env) : process.cwd();
+}
+
+/** Resolve a project name to its directory under the root, rejecting path traversal. */
+export function projectDir(name: string): string {
+  const slug = name
+    .trim()
+    .replace(/[^a-zA-Z0-9 _-]/g, "")
+    .replace(/\s+/g, "-");
+  if (!slug) throw new Error(`Invalid project name: ${JSON.stringify(name)}`);
+  const root = projectsRoot();
+  const dir = path.resolve(root, slug);
+  if (dir !== root && !dir.startsWith(root + path.sep)) throw new Error("Invalid project path");
+  return dir;
+}
+
+/** List every initialized project under the root, newest first. */
+export async function listProjects(): Promise<ProjectListItem[]> {
+  const root = projectsRoot();
+  let entries;
+  try {
+    entries = await fs.readdir(root, { withFileTypes: true });
+  } catch {
+    return [];
+  }
+  const items: ProjectListItem[] = [];
+  for (const entry of entries) {
+    if (!entry.isDirectory()) continue;
+    const dir = path.join(root, entry.name);
+    if (!(await Project.exists(dir))) continue;
+    const proj = await Project.load(dir);
+    items.push({
+      name: proj.name,
+      apiUrl: proj.apiUrl,
+      versions: proj.versions.length,
+      currentBank: proj.currentBank(),
+      createdAt: proj.createdAt,
+    });
+  }
+  items.sort((a, b) => b.createdAt.localeCompare(a.createdAt));
+  return items;
+}
+
+/** Read one project's status, or null if it doesn't exist. */
+export async function getStatus(name: string): Promise<ProjectStatus | null> {
+  const dir = projectDir(name);
+  if (!(await Project.exists(dir))) return null;
+  return readStatus(dir);
+}
diff --git a/hindsight-tools/mission-sandbox/src/app/not-found.tsx b/hindsight-tools/mission-sandbox/src/app/not-found.tsx
new file mode 100644
index 000000000..271094011
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/not-found.tsx
@@ -0,0 +1,8 @@
+export default function NotFound() {
+  return (
+    <main style={{ padding: "3rem", color: "#e6edf6" }}>
+      <h2>404 — Not found</h2>
+      <p style={{ color: "#8b9bb4" }}>This page does not exist.</p>
+    </main>
+  );
+}
diff --git a/hindsight-tools/mission-sandbox/src/app/page.tsx b/hindsight-tools/mission-sandbox/src/app/page.tsx
new file mode 100644
index 000000000..cb27961b6
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/app/page.tsx
@@ -0,0 +1,341 @@
+import Link from "next/link";
+
+import { ExtractPanel } from "@/app/components/ExtractPanel";
+import { getStatus, listProjects, projectsRoot } from "@/app/lib/project-context";
+
+export const dynamic = "force-dynamic";
+
+type StatusVersion = {
+  n: number;
+  bank: string;
+  retainMission: string | null;
+  observeMission: string | null;
+  feedback: string[];
+  notes: string;
+  createdAt: string;
+};
+
+export default async function Page({
+  searchParams,
+}: {
+  searchParams: Promise<{ project?: string }>;
+}) {
+  const { project } = await searchParams;
+  return (
+    <main className="mx-auto max-w-2xl px-6 py-12">
+      {project ? await ProjectView({ name: project }) : await ProjectList()}
+    </main>
+  );
+}
+
+async function ProjectList() {
+  const projects = await listProjects();
+  return (
+    <>
+      <h1 className="text-xl font-semibold">Mission Sandbox</h1>
+      <p className="mt-1 text-sm text-[var(--muted)]">
+        Projects in <code>{projectsRoot()}</code> — driven from the CLI; this view is read-only.
+      </p>
+      {projects.length === 0 ? (
+        <p className="mt-8 text-sm text-[var(--muted)]">
+          No projects yet. Create one with <code>mission-sandbox init</code>.
+        </p>
+      ) : (
+        <ul className="mt-8">
+          {projects.map((p) => (
+            <li key={p.name} className="border-t border-[var(--border)]">
+              <Link
+                href={`/?project=${encodeURIComponent(p.name)}`}
+                className="flex items-baseline justify-between py-3 hover:text-[var(--accent)]"
+              >
+                <span className="font-medium">{p.name}</span>
+                <span className="text-xs text-[var(--muted)]">
+                  {p.versions} version{p.versions === 1 ? "" : "s"}
+                  {p.currentBank ? ` · ${p.currentBank}` : ""}
+                </span>
+              </Link>
+            </li>
+          ))}
+        </ul>
+      )}
+    </>
+  );
+}
+
+async function ProjectView({ name }: { name: string }) {
+  const s = await getStatus(name);
+  if (!s) return <p className="text-sm text-[var(--bad)]">No project named “{name}”.</p>;
+  const versions = [...s.versions].reverse();
+  return (
+    <>
+      <Link href="/" className="text-xs text-[var(--muted)] hover:text-[var(--accent)]">
+        ← all projects
+      </Link>
+      <h1 className="mt-2 text-xl font-semibold">{s.name}</h1>
+      <p className="mt-1 text-xs text-[var(--muted)]">
+        <code>{s.documents}</code> · {s.apiUrl} · current{" "}
+        <code className="text-[var(--text)]">{s.currentBank ?? "none"}</code>
+      </p>
+
+      <Timeline steps={s.steps} />
+
+      <GoldenPanel
+        goldenCount={s.goldenCount}
+        goldenAt={s.goldenAt}
+        curations={s.curations}
+        lastCheck={s.lastCheck}
+      />
+
+      {s.currentBank ? <ExtractPanel project={s.name} defaultMission={s.retainMission} /> : null}
+
+      {versions.length === 0 ? (
+        <p className="mt-10 text-sm text-[var(--muted)]">
+          No versions yet. Set a mission (<code>retain mission</code>) and run{" "}
+          <code>retain apply</code>.
+        </p>
+      ) : (
+        <div className="mt-10">
+          {versions.map((v) => (
+            <VersionRow key={v.n} version={v} current={v.n === s.currentVersion} />
+          ))}
+        </div>
+      )}
+
+      <WorkingMissions
+        retain={s.retainMission}
+        observe={s.observeMission}
+        retainFeedback={s.retainFeedback}
+        observeFeedback={s.observeFeedback}
+      />
+    </>
+  );
+}
+
+function VersionRow({ version, current }: { version: StatusVersion; current: boolean }) {
+  return (
+    <section className="border-t border-[var(--border)] py-5">
+      <div className="flex items-baseline justify-between">
+        <h2 className="text-lg font-semibold">
+          v{version.n}
+          {current ? (
+            <span className="ml-2 align-middle text-xs text-[var(--accent)]">current</span>
+          ) : null}
+        </h2>
+        <span className="text-xs text-[var(--muted)]">
+          <code>{version.bank}</code> · {version.createdAt.slice(0, 16).replace("T", " ")}
+        </span>
+      </div>
+
+      {version.feedback.length > 0 ? (
+        <div className="mt-3">
+          <Label>feedback</Label>
+          <ul className="mt-1 space-y-1">
+            {version.feedback.map((f, i) => (
+              <li key={i} className="border-l-2 border-[var(--border)] pl-3 text-sm">
+                {f}
+              </li>
+            ))}
+          </ul>
+        </div>
+      ) : null}
+
+      <div className="mt-3">
+        <Label>notes</Label>
+        <p className="mt-1 whitespace-pre-wrap border-l-2 border-[var(--accent)] pl-3 text-sm">
+          {version.notes ? version.notes : <span className="text-[var(--muted)]">—</span>}
+        </p>
+      </div>
+
+      {version.retainMission ? (
+        <details className="mt-3 text-sm text-[var(--muted)]">
+          <summary className="cursor-pointer">retain mission</summary>
+          <p className="mt-1 whitespace-pre-wrap pl-4 text-[var(--text)]">
+            {version.retainMission}
+          </p>
+        </details>
+      ) : null}
+      {version.observeMission ? (
+        <details className="mt-1 text-sm text-[var(--muted)]">
+          <summary className="cursor-pointer">observation mission</summary>
+          <p className="mt-1 whitespace-pre-wrap pl-4 text-[var(--text)]">
+            {version.observeMission}
+          </p>
+        </details>
+      ) : null}
+    </section>
+  );
+}
+
+function WorkingMissions({
+  retain,
+  observe,
+  retainFeedback,
+  observeFeedback,
+}: {
+  retain: string | null;
+  observe: string | null;
+  retainFeedback: string[];
+  observeFeedback: string[];
+}) {
+  return (
+    <details className="mt-8 border-t border-[var(--border)] pt-5 text-sm text-[var(--muted)]">
+      <summary className="cursor-pointer">working missions (used on next apply)</summary>
+      <div className="mt-3 space-y-4">
+        <MissionBlock label="retain" mission={retain} feedback={retainFeedback} />
+        <MissionBlock label="observation" mission={observe} feedback={observeFeedback} />
+      </div>
+    </details>
+  );
+}
+
+function MissionBlock({
+  label,
+  mission,
+  feedback,
+}: {
+  label: string;
+  mission: string | null;
+  feedback: string[];
+}) {
+  return (
+    <div>
+      <Label>{label}</Label>
+      <p className="mt-1 whitespace-pre-wrap text-[var(--text)]">{mission ?? "—"}</p>
+      {feedback.length > 0 ? (
+        <ol className="mt-1 list-decimal space-y-0.5 pl-5 text-xs">
+          {feedback.map((f, i) => (
+            <li key={i}>{f}</li>
+          ))}
+        </ol>
+      ) : null}
+    </div>
+  );
+}
+
+const STEP_ICON: Record<string, string> = {
+  init: "📥",
+  "retain mission": "✎",
+  "observe mission": "✎",
+  "retain apply": "⚙",
+  "observe apply": "⚙",
+  trace: "🔍",
+  curate: "✂",
+  snapshot: "📌",
+  "retain check": "✓",
+  eval: "🎯",
+  note: "🗒",
+};
+
+function Timeline({
+  steps,
+}: {
+  steps: { id: string; at: string; kind: string; summary: string; detail: string | null }[];
+}) {
+  if (steps.length === 0) return null;
+  return (
+    <section className="mt-5">
+      <h2 className="text-sm font-semibold uppercase tracking-wide text-[var(--muted)]">
+        Activity ({steps.length})
+      </h2>
+      <ol className="mt-2">
+        {steps.map((s, i) => (
+          <li key={s.id} className="flex gap-3 border-l border-[var(--border)] pl-4 pb-4 relative">
+            <span className="absolute -left-2 top-0 text-xs">{STEP_ICON[s.kind] ?? "•"}</span>
+            <div className="min-w-0 flex-1">
+              <div className="flex items-baseline justify-between gap-2">
+                <span className="text-sm">
+                  <span className="font-medium">
+                    {i + 1}. {s.kind}
+                  </span>{" "}
+                  <span className="text-[var(--muted)]">— {s.summary}</span>
+                </span>
+                <span className="shrink-0 text-xs text-[var(--muted)]">{s.at.slice(11, 16)}</span>
+              </div>
+              {s.detail ? (
+                <details className="mt-1 text-xs text-[var(--muted)]">
+                  <summary className="cursor-pointer">detail</summary>
+                  <pre className="mt-1 whitespace-pre-wrap font-sans text-[var(--text)]">
+                    {s.detail}
+                  </pre>
+                </details>
+              ) : null}
+            </div>
+          </li>
+        ))}
+      </ol>
+    </section>
+  );
+}
+
+function GoldenPanel({
+  goldenCount,
+  goldenAt,
+  curations,
+  lastCheck,
+}: {
+  goldenCount: number;
+  goldenAt: string | null;
+  curations: {
+    id: string;
+    memoryId: string;
+    kind: string;
+    before: string;
+    after: string | null;
+    reason: string | null;
+  }[];
+  lastCheck: { coverage: number; covered: number; total: number; docs: number; at: string } | null;
+}) {
+  if (goldenCount === 0 && curations.length === 0) return null;
+  return (
+    <div className="mt-5 rounded-lg border border-[var(--accent)] p-4">
+      <div className="flex items-baseline justify-between">
+        <span className="text-sm font-semibold">Golden snapshot (Phase 1 → 2)</span>
+        <span className="text-xs text-[var(--muted)]">
+          {goldenCount} memories
+          {goldenAt ? ` · frozen ${goldenAt.slice(0, 16).replace("T", " ")}` : ""}
+        </span>
+      </div>
+
+      {lastCheck ? (
+        <p className="mt-2 text-sm">
+          <Label>last mission check</Label> <b>{(lastCheck.coverage * 100).toFixed(0)}%</b> coverage
+          ({lastCheck.covered}/{lastCheck.total} golden across {lastCheck.docs} doc
+          {lastCheck.docs === 1 ? "" : "s"})
+        </p>
+      ) : (
+        <p className="mt-2 text-xs text-[var(--muted)]">No `retain check` run yet.</p>
+      )}
+
+      {curations.length > 0 ? (
+        <div className="mt-3">
+          <Label>curations ({curations.length})</Label>
+          <ul className="mt-1 space-y-1 text-sm">
+            {curations.map((c) => (
+              <li key={c.id} className="border-l-2 border-[var(--border)] pl-3">
+                <span
+                  className={c.kind === "invalidate" ? "text-[var(--bad)]" : "text-[var(--accent)]"}
+                >
+                  {c.kind}
+                </span>{" "}
+                {c.kind === "edit" ? (
+                  <span className="text-[var(--muted)]">
+                    “{c.before.slice(0, 50)}…” → “{(c.after ?? "").slice(0, 60)}…”
+                  </span>
+                ) : (
+                  <span className="text-[var(--muted)]">“{c.before.slice(0, 70)}…”</span>
+                )}
+                {c.reason ? <div className="text-xs text-[var(--muted)]">— {c.reason}</div> : null}
+              </li>
+            ))}
+          </ul>
+        </div>
+      ) : null}
+    </div>
+  );
+}
+
+function Label({ children }: { children: React.ReactNode }) {
+  return (
+    <span className="text-[0.7rem] uppercase tracking-wider text-[var(--muted)]">{children}</span>
+  );
+}
diff --git a/hindsight-tools/mission-sandbox/src/cli/index.ts b/hindsight-tools/mission-sandbox/src/cli/index.ts
new file mode 100644
index 000000000..f0807b479
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/cli/index.ts
@@ -0,0 +1,295 @@
+#!/usr/bin/env node
+/** mission-sandbox CLI — headless driver for the validator-driven mission loop. */
+
+import { spawn } from "node:child_process";
+import { existsSync } from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { Command } from "commander";
+
+import {
+  loadProjectEnv,
+  readStatus,
+  runCurate,
+  runInit,
+  runInspect,
+  runLog,
+  runMission,
+  runTrace,
+  runNote,
+  runObserveApply,
+  runRetainApply,
+  runRetainCheck,
+  runSnapshot,
+  type CurationKind,
+  type MissionKind,
+} from "../core/index.js";
+
+// Pick up the Hindsight deployment's .env (LLM provider/model/key, API key) for headless runs.
+loadProjectEnv();
+
+const log = (msg: string) => process.stdout.write(`${msg}\n`);
+
+function packageRoot(): string {
+  // dist/cli/index.js or src/cli/index.ts -> two levels up is the package root
+  return path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..");
+}
+
+function launchUi(projectsDir: string, port: string): void {
+  const root = packageRoot();
+  const standalone = path.join(root, "standalone", "server.js");
+  const env = {
+    ...process.env,
+    PORT: port,
+    MISSION_SANDBOX_PROJECTS_DIR: path.resolve(projectsDir),
+  };
+
+  if (existsSync(standalone)) {
+    log(`Starting Mission Sandbox UI (standalone) on http://localhost:${port}`);
+    spawn("node", [standalone], { stdio: "inherit", env });
+    return;
+  }
+
+  const nextBin = path.join(
+    root,
+    "node_modules",
+    ".bin",
+    process.platform === "win32" ? "next.cmd" : "next"
+  );
+  if (!existsSync(nextBin)) {
+    throw new Error(
+      "UI not built and Next.js not found. Run `npm run build` first, or `npm run dev` for development."
+    );
+  }
+  log(`Starting Mission Sandbox UI (dev) on http://localhost:${port}`);
+  spawn(nextBin, ["dev", "-p", port], { stdio: "inherit", env, cwd: root });
+}
+
+const program = new Command();
+program
+  .name("mission-sandbox")
+  .description("Tune Hindsight retain/observation missions; verify with an external validator.");
+
+program
+  .command("init")
+  .description("Bind a project to its documents path + API config (no ingest)")
+  .argument("<project>", "Project directory to create")
+  .requiredOption("--documents <path>", "Path to documents dir or file (bound for re-ingest)")
+  .option("--api-url <url>", "Hindsight API URL", "http://localhost:8888")
+  .option("--api-key <key>", "Hindsight API key (optional; or set HINDSIGHT_API_KEY)")
+  .option("--name <name>", "Bank-id prefix (defaults to the project directory name)")
+  .action(
+    async (
+      project: string,
+      opts: { documents: string; apiUrl: string; apiKey?: string; name?: string }
+    ) => {
+      await runInit(
+        {
+          projectDir: project,
+          documents: opts.documents,
+          apiUrl: opts.apiUrl,
+          apiKey: opts.apiKey,
+          name: opts.name,
+        },
+        log
+      );
+    }
+  );
+
+const MODEL_HELP =
+  "Gemini model for mission refinement (defaults to HINDSIGHT_API_LLM_MODEL or gemini-2.5-flash)";
+
+function missionCommand(kind: MissionKind): Command {
+  return new Command("mission")
+    .description(`Refine the ${kind} mission from feedback (+ optional examples)`)
+    .argument("<project>", "Project directory")
+    .requiredOption("--feedback <text>", "What to change, based on your validator's results")
+    .option("--example <text...>", "Failing example(s) to ground the refinement", [])
+    .option("--model <model>", MODEL_HELP)
+    .action(
+      async (project: string, opts: { feedback: string; example: string[]; model?: string }) => {
+        await runMission(
+          {
+            projectDir: project,
+            kind,
+            feedback: opts.feedback,
+            examples: opts.example,
+            model: opts.model,
+          },
+          log
+        );
+      }
+    );
+}
+
+const retain = new Command("retain").description("Retain (extraction) loop — versioned banks");
+retain.addCommand(missionCommand("retain"));
+retain
+  .command("apply")
+  .description("Ingest documents into a NEW bank <project>-vN with the current missions")
+  .argument("<project>", "Project directory")
+  .action(async (project: string) => {
+    await runRetainApply({ projectDir: project }, log);
+  });
+retain
+  .command("check")
+  .description("Phase 2: re-extract per-doc into a scratch bank, score coverage of the golden set")
+  .argument("<project>", "Project directory")
+  .option("--doc <id...>", "Limit to specific doc ids (default: all golden docs)")
+  .option("--model <model>", MODEL_HELP)
+  .action(async (project: string, opts: { doc?: string[]; model?: string }) => {
+    const { perDoc } = await runRetainCheck(
+      { projectDir: project, docs: opts.doc, model: opts.model },
+      log
+    );
+    for (const d of perDoc) {
+      if (d.missing.length) log(`  ${d.docId} missing: ${d.missing.slice(0, 3).join(" | ")}`);
+    }
+  });
+program.addCommand(retain);
+
+const observe = new Command("observe").description("Observation (consolidation) loop — in place");
+observe.addCommand(missionCommand("observe"));
+observe
+  .command("apply")
+  .description("Clear observations on the current bank and re-consolidate")
+  .argument("<project>", "Project directory")
+  .action(async (project: string) => {
+    await runObserveApply({ projectDir: project }, log);
+  });
+program.addCommand(observe);
+
+// -- Phase 1: curate the current bank to a golden snapshot ----------------------
+
+program
+  .command("inspect")
+  .description("List facts in the current bank (filter by --doc / --grep) to trace a failure")
+  .argument("<project>", "Project directory")
+  .option("--doc <id>", "Filter by document id")
+  .option("--grep <text>", "Full-text search")
+  .action(async (project: string, opts: { doc?: string; grep?: string }) => {
+    const rows = await runInspect({ projectDir: project, doc: opts.doc, grep: opts.grep });
+    log(`${rows.length} fact(s):`);
+    for (const r of rows) log(`  ${r.id}  [${r.docId ?? "?"}]  ${r.text}`);
+  });
+
+program
+  .command("trace")
+  .description("Recall what the bank retrieves for a question + show the evidence doc's memories")
+  .argument("<project>", "Project directory")
+  .requiredOption("--query <text>", "The (failing) eval question")
+  .option("--doc <id...>", "Evidence document id(s) the answer should come from")
+  .action(async (project: string, opts: { query: string; doc?: string[] }) => {
+    const { retrieved, evidence } = await runTrace({
+      projectDir: project,
+      query: opts.query,
+      docs: opts.doc,
+    });
+    log(`Retrieved for "${opts.query}":`);
+    for (const r of retrieved.slice(0, 8)) log(`  ${r.id}  [${r.docId ?? "?"}]  ${r.text}`);
+    for (const e of evidence) {
+      log(`\nEvidence doc ${e.docId} (${e.facts.length} facts):`);
+      for (const f of e.facts) log(`  ${f.id}  ${f.text}`);
+    }
+  });
+
+program
+  .command("curate")
+  .description("Edit / invalidate / revert a memory in place (no re-ingest)")
+  .argument("<project>", "Project directory")
+  .argument("<memoryId>", "Memory id (from `inspect`)")
+  .option("--edit <text>", "Replace the memory text")
+  .option("--invalidate", "Soft-retire the memory")
+  .option("--revert", "Restore an invalidated memory")
+  .option("--reason <text>", "Reason (recorded)")
+  .action(
+    async (
+      project: string,
+      memoryId: string,
+      opts: { edit?: string; invalidate?: boolean; revert?: boolean; reason?: string }
+    ) => {
+      const kind: CurationKind = opts.edit ? "edit" : opts.revert ? "revert" : "invalidate";
+      await runCurate(
+        { projectDir: project, memoryId, kind, text: opts.edit, reason: opts.reason },
+        log
+      );
+    }
+  );
+
+program
+  .command("log")
+  .description("Record a free-form step in the activity log (e.g. an external eval result)")
+  .argument("<project>", "Project directory")
+  .argument("<summary>", "One-line summary, e.g. 'eval summer-plans → FAIL'")
+  .option("--kind <kind>", "Step label", "eval")
+  .option("--detail <text>", "Optional detail")
+  .action(async (project: string, summary: string, opts: { kind: string; detail?: string }) => {
+    await runLog({ projectDir: project, kind: opts.kind, summary, detail: opts.detail });
+    log(`logged: ${summary}`);
+  });
+
+program
+  .command("snapshot")
+  .description("Freeze the current bank's memories as the golden target (Phase 1 output)")
+  .argument("<project>", "Project directory")
+  .action(async (project: string) => {
+    await runSnapshot({ projectDir: project }, log);
+  });
+
+program
+  .command("note")
+  .description("Set free-text notes on a version (e.g. validator results)")
+  .argument("<project>", "Project directory")
+  .argument("<text>", "Note text")
+  .option("--version <n>", "Version number (defaults to the current version)")
+  .action(async (project: string, text: string, opts: { version?: string }) => {
+    await runNote(
+      {
+        projectDir: project,
+        notes: text,
+        version: opts.version ? Number(opts.version) : undefined,
+      },
+      log
+    );
+  });
+
+program
+  .command("status")
+  .description("Show bound docs, current missions, and versions")
+  .argument("<project>", "Project directory")
+  .action(async (project: string) => {
+    const s = await readStatus(project);
+    log(`Project: ${s.name}  (docs: ${s.documents})`);
+    log(`API: ${s.apiUrl}`);
+    log(`Current: ${s.currentBank ?? "(none — run `retain apply`)"}`);
+    log(`\nRetain mission:\n${s.retainMission ?? "(none)"}`);
+    log(`\nObservation mission:\n${s.observeMission ?? "(none)"}`);
+    log(`\nVersions (${s.versions.length}):`);
+    for (const v of s.versions) {
+      const marker = v.n === s.currentVersion ? "*" : " ";
+      log(`  ${marker} v${v.n}  ${v.bank}  ${v.createdAt}`);
+      if (v.notes) log(`      notes: ${v.notes.replace(/\n/g, "\n             ")}`);
+    }
+    log(`\nGolden: ${s.goldenCount} memories${s.goldenAt ? ` (frozen ${s.goldenAt})` : ""}`);
+    log(`Curations: ${s.curations.length}`);
+    if (s.lastCheck) {
+      log(
+        `Last check: ${(s.lastCheck.coverage * 100).toFixed(0)}% coverage (${s.lastCheck.covered}/${s.lastCheck.total} golden, ${s.lastCheck.docs} docs)`
+      );
+    }
+  });
+
+program
+  .command("ui")
+  .description("Open the read-only UI to view project status + versions")
+  .argument("[projects-dir]", "Directory holding projects", ".")
+  .option("-p, --port <port>", "Port", "7777")
+  .action((projectsDir: string, opts: { port: string }) => {
+    launchUi(projectsDir, opts.port);
+  });
+
+program.parseAsync(process.argv).catch((err: unknown) => {
+  process.stderr.write(`Error: ${err instanceof Error ? err.message : String(err)}\n`);
+  process.exit(1);
+});
diff --git a/hindsight-tools/mission-sandbox/src/core/apply.ts b/hindsight-tools/mission-sandbox/src/core/apply.ts
new file mode 100644
index 000000000..a131943a3
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/apply.ts
@@ -0,0 +1,102 @@
+/**
+ * `retain apply` — ingest the bound documents into a NEW versioned bank with the current missions.
+ * `observe apply` — clear observations on the current bank and re-consolidate with the current
+ *                   observation mission (in place; no new version).
+ */
+
+import { collectDocuments } from "./docs.js";
+import { resolveApiKey, SandboxApi } from "./hindsight.js";
+import { provisionAndIngest } from "./pipeline.js";
+import { Project } from "./store.js";
+import type { ProgressFn } from "./types.js";
+
+export interface ApplyParams {
+  projectDir: string;
+  apiKey?: string;
+}
+
+export interface RetainApplyResult {
+  version: number;
+  bank: string;
+  observationCount: number;
+}
+
+/** Ingest documents into a fresh `<name>-vN` bank with the current retain + observation missions. */
+export async function runRetainApply(
+  params: ApplyParams,
+  onProgress: ProgressFn
+): Promise<RetainApplyResult> {
+  const proj = await Project.load(params.projectDir);
+  const documents = await collectDocuments(proj.documents);
+  if (documents.length === 0) throw new Error(`No .txt/.md documents found at ${proj.documents}`);
+
+  // Attach the retain feedback that accumulated since the previous version.
+  const consumed = proj.versions.reduce((sum, v) => sum + v.feedback.length, 0);
+  const version = proj.addVersion({
+    retainMission: proj.retain.mission,
+    observeMission: proj.observe.mission,
+    feedback: proj.retain.feedback.slice(consumed),
+  });
+  await proj.save();
+
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+  onProgress(`Applying retain → version ${version.n} (bank ${version.bank})`);
+  const { observationCount } = await provisionAndIngest(
+    api,
+    version.bank,
+    { retainMission: proj.retain.mission, observationsMission: proj.observe.mission },
+    documents,
+    onProgress
+  );
+
+  proj.addStep(
+    "retain apply",
+    `v${version.n} → ${version.bank} (${observationCount} observations)`,
+    version.retainMission
+  );
+  await proj.save();
+  onProgress(
+    `\nVersion ${version.n} ready — bank: ${version.bank} (${observationCount} observations).`
+  );
+  onProgress("Point your validator at this bank, then feed failures back via `retain mission`.");
+  return { version: version.n, bank: version.bank, observationCount };
+}
+
+export interface ObserveApplyResult {
+  bank: string;
+  observationCount: number;
+}
+
+/** Re-consolidate observations on the current bank with the current observation mission. */
+export async function runObserveApply(
+  params: ApplyParams,
+  onProgress: ProgressFn
+): Promise<ObserveApplyResult> {
+  const proj = await Project.load(params.projectDir);
+  const bank = proj.currentBank();
+  if (!bank) throw new Error("No current version — run `retain apply` first.");
+
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+  onProgress(`Applying observation mission to ${bank}…`);
+  await api.updateObservationsMission(bank, proj.observe.mission);
+
+  const cleared = await api.clearObservations(bank);
+  onProgress(`Cleared ${cleared} observation(s); re-consolidating…`);
+  await api.triggerConsolidation(bank);
+  await api.waitForConsolidation(bank, { onProgress });
+
+  // Record the observation mission against the current version.
+  const current = proj.versions.find((v) => v.n === proj.currentVersion);
+  if (current) current.observeMission = proj.observe.mission;
+  await proj.save();
+
+  const observationCount = (await api.getStats(bank)).totalObservations;
+  proj.addStep(
+    "observe apply",
+    `re-consolidated ${bank} (${observationCount} observations)`,
+    proj.observe.mission
+  );
+  await proj.save();
+  onProgress(`\nDone — bank ${bank} now has ${observationCount} observations.`);
+  return { bank, observationCount };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/docs.ts b/hindsight-tools/mission-sandbox/src/core/docs.ts
new file mode 100644
index 000000000..b40623da1
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/docs.ts
@@ -0,0 +1,36 @@
+/** Filesystem helper: load .txt/.md documents from a file or directory into memory. */
+
+import { promises as fs } from "node:fs";
+import path from "node:path";
+
+/** A document loaded from disk at apply-time (documents are not stored in the project). */
+export interface LoadedDocument {
+  name: string;
+  content: string;
+}
+
+export async function collectDocuments(target: string): Promise<LoadedDocument[]> {
+  const resolved = path.resolve(target);
+  const stat = await fs.stat(resolved).catch(() => {
+    throw new Error(`Path not found: ${target}`);
+  });
+
+  const files: string[] = [];
+  if (stat.isFile()) {
+    files.push(resolved);
+  } else if (stat.isDirectory()) {
+    const walk = async (dir: string): Promise<void> => {
+      for (const entry of await fs.readdir(dir, { withFileTypes: true })) {
+        const full = path.join(dir, entry.name);
+        if (entry.isDirectory()) await walk(full);
+        else if (/\.(txt|md)$/i.test(entry.name)) files.push(full);
+      }
+    };
+    await walk(resolved);
+  }
+
+  files.sort();
+  return Promise.all(
+    files.map(async (f) => ({ name: path.basename(f), content: await fs.readFile(f, "utf8") }))
+  );
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/env.ts b/hindsight-tools/mission-sandbox/src/core/env.ts
new file mode 100644
index 000000000..b304500ff
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/env.ts
@@ -0,0 +1,37 @@
+/**
+ * Load a .env file into process.env so the tool can reuse the Hindsight deployment's config
+ * (LLM provider/model/key, API key). Resolution order:
+ *   1. MISSION_SANDBOX_ENV_FILE if set (explicit path)
+ *   2. the nearest .env walking up from the current working directory
+ *
+ * Uses Node's built-in loader, which does NOT overwrite already-set process.env values, so an
+ * explicitly exported variable still wins over the file.
+ */
+
+import { existsSync } from "node:fs";
+import path from "node:path";
+
+export function loadProjectEnv(): string | null {
+  const candidates: string[] = [];
+  if (process.env.MISSION_SANDBOX_ENV_FILE) {
+    candidates.push(path.resolve(process.env.MISSION_SANDBOX_ENV_FILE));
+  }
+  let dir = process.cwd();
+  for (let i = 0; i < 8; i++) {
+    candidates.push(path.join(dir, ".env"));
+    const parent = path.dirname(dir);
+    if (parent === dir) break;
+    dir = parent;
+  }
+
+  for (const file of candidates) {
+    if (!existsSync(file)) continue;
+    try {
+      process.loadEnvFile(file);
+      return file;
+    } catch {
+      // Unreadable/malformed — try the next candidate.
+    }
+  }
+  return null;
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/hindsight.ts b/hindsight-tools/mission-sandbox/src/core/hindsight.ts
new file mode 100644
index 000000000..26f75dfab
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/hindsight.ts
@@ -0,0 +1,223 @@
+/**
+ * Thin wrapper over the Hindsight API.
+ *
+ * The generated TS SDK (@vectorize-io/hindsight-client) covers createBank / retain /
+ * updateBankConfig. Consolidation control, observation clearing and bank stats are not in the
+ * SDK, so we call those endpoints directly with fetch.
+ */
+
+import { HindsightClient } from "@vectorize-io/hindsight-client";
+
+import type { ProgressFn } from "./types.js";
+
+export interface BankStats {
+  pendingConsolidation: number;
+  totalObservations: number;
+}
+
+/** A fact (world/experience memory) as returned by the list endpoint. */
+export interface FactRow {
+  id: string;
+  text: string;
+  factType: string;
+  docId: string | null;
+}
+
+/**
+ * Resolve the Hindsight deployment API key: an explicit value (from the project or a flag)
+ * wins, otherwise fall back to the HINDSIGHT_API_KEY env var. Returns undefined when unset
+ * (i.e. the deployment is unauthenticated).
+ */
+export function resolveApiKey(explicit?: string | null): string | undefined {
+  return explicit || process.env.HINDSIGHT_API_KEY || undefined;
+}
+
+export class SandboxApi {
+  private readonly sdk: HindsightClient;
+  private readonly baseUrl: string;
+  private readonly apiKey?: string;
+
+  constructor(apiUrl: string, apiKey?: string) {
+    this.baseUrl = apiUrl.replace(/\/+$/, "");
+    this.apiKey = apiKey;
+    this.sdk = new HindsightClient({ baseUrl: this.baseUrl, apiKey });
+  }
+
+  private headers(): Record<string, string> {
+    const h: Record<string, string> = { "Content-Type": "application/json" };
+    if (this.apiKey) h.Authorization = `Bearer ${this.apiKey}`;
+    return h;
+  }
+
+  private async raw(method: string, pathSuffix: string, body?: unknown): Promise<unknown> {
+    const res = await fetch(`${this.baseUrl}${pathSuffix}`, {
+      method,
+      headers: this.headers(),
+      body: body === undefined ? undefined : JSON.stringify(body),
+    });
+    if (!res.ok) {
+      const text = await res.text().catch(() => "");
+      throw new Error(`${method} ${pathSuffix} failed: ${res.status} ${text}`);
+    }
+    return res.status === 204 ? null : res.json();
+  }
+
+  /** Create the bank with the retain + observation missions for this version. */
+  async createBank(
+    bankId: string,
+    opts: { retainMission?: string | null; observationsMission?: string | null } = {}
+  ): Promise<void> {
+    await this.sdk.createBank(bankId, {
+      enableObservations: true,
+      retainMission: opts.retainMission || undefined,
+      observationsMission: opts.observationsMission || undefined,
+    });
+  }
+
+  async retain(bankId: string, content: string, documentId: string): Promise<void> {
+    await this.sdk.retain(bankId, content, { documentId, updateMode: "replace" });
+  }
+
+  /** List facts (world/experience memories), optionally filtered by document or search query. */
+  async listFacts(bankId: string, opts: { docId?: string; q?: string } = {}): Promise<FactRow[]> {
+    const out: FactRow[] = [];
+    const qs = new URLSearchParams({ limit: "200" });
+    if (opts.docId) qs.set("document_id", opts.docId);
+    if (opts.q) qs.set("q", opts.q);
+    let offset = 0;
+    for (;;) {
+      qs.set("offset", String(offset));
+      const page = (await this.raw("GET", `/v1/default/banks/${bankId}/memories/list?${qs}`)) as {
+        items?: Array<Record<string, unknown>>;
+      };
+      const items = page.items ?? [];
+      for (const m of items) {
+        const factType = String(m.type ?? m.fact_type ?? "");
+        if (factType === "observation") continue;
+        out.push({
+          id: String(m.id ?? ""),
+          text: String(m.text ?? ""),
+          factType,
+          docId: m.document_id != null ? String(m.document_id) : null,
+        });
+      }
+      if (items.length < 200) break;
+      offset += items.length;
+    }
+    return out;
+  }
+
+  /** Recall facts for a query (what the bank would retrieve to answer it). */
+  async recall(bankId: string, query: string, limit = 10): Promise<FactRow[]> {
+    const res = (await this.raw("POST", `/v1/default/banks/${bankId}/memories/recall`, {
+      query,
+      budget: "mid",
+    })) as { results?: Array<Record<string, unknown>> };
+    return (res.results ?? []).slice(0, limit).map((m) => ({
+      id: String(m.id ?? ""),
+      text: String(m.text ?? ""),
+      factType: String(m.type ?? m.fact_type ?? ""),
+      docId: m.document_id != null ? String(m.document_id) : null,
+    }));
+  }
+
+  /**
+   * Dry-run fact extraction: extract facts from `content` with the given retain mission, WITHOUT
+   * persisting (no resolution/links/embeddings). The API chunks internally, so this faithfully
+   * reproduces what ingestion would extract — used by Phase 2 to score mission→golden coverage
+   * without re-ingesting. The bank only supplies LLM/extraction config; it is not modified.
+   */
+  async dryRunExtract(
+    bankId: string,
+    content: string,
+    opts: { retainMission?: string | null; extractionMode?: string; chunkSize?: number } = {}
+  ): Promise<FactRow[]> {
+    const facts = await this.dryRunExtractItems(bankId, content, opts);
+    return facts.map((m) => ({
+      id: "",
+      text: String(m.text ?? ""),
+      factType: String(m.fact_type ?? ""),
+      docId: null,
+    }));
+  }
+
+  /** Dry-run extraction returning the extracted-fact items (text, fact_type, dates, entities). */
+  async dryRunExtractItems(
+    bankId: string,
+    content: string,
+    opts: { retainMission?: string | null; extractionMode?: string; chunkSize?: number } = {}
+  ): Promise<Array<Record<string, unknown>>> {
+    const body: Record<string, unknown> = { content };
+    if (opts.retainMission != null) body.retain_mission = opts.retainMission;
+    if (opts.extractionMode) body.retain_extraction_mode = opts.extractionMode;
+    if (opts.chunkSize) body.retain_chunk_size = opts.chunkSize;
+    const res = (await this.raw(
+      "POST",
+      `/v1/default/banks/${bankId}/memories/dry-run-extract`,
+      body
+    )) as {
+      facts?: Array<Record<string, unknown>>;
+    };
+    return res.facts ?? [];
+  }
+
+  /** Fetch a single memory's text (for recording the before-state of a curation). */
+  async getMemoryText(bankId: string, memoryId: string): Promise<string> {
+    const m = (await this.raw("GET", `/v1/default/banks/${bankId}/memories/${memoryId}`)) as {
+      text?: string;
+    };
+    return m?.text ?? "";
+  }
+
+  /** Curate a single memory: edit text / invalidate / revert (PATCH, in place — no re-ingest). */
+  async updateMemory(
+    bankId: string,
+    memoryId: string,
+    body: { text?: string; state?: "valid" | "invalidated"; reason?: string }
+  ): Promise<void> {
+    await this.raw("PATCH", `/v1/default/banks/${bankId}/memories/${memoryId}`, body);
+  }
+
+  async updateObservationsMission(bankId: string, mission: string | null): Promise<void> {
+    await this.sdk.updateBankConfig(bankId, { observationsMission: mission || undefined });
+  }
+
+  async triggerConsolidation(bankId: string): Promise<void> {
+    await this.raw("POST", `/v1/default/banks/${bankId}/consolidate`);
+  }
+
+  async clearObservations(bankId: string): Promise<number> {
+    const res = (await this.raw("DELETE", `/v1/default/banks/${bankId}/observations`)) as {
+      deleted_count?: number;
+    } | null;
+    return res?.deleted_count ?? 0;
+  }
+
+  async getStats(bankId: string): Promise<BankStats> {
+    const res = (await this.raw("GET", `/v1/default/banks/${bankId}/stats`)) as {
+      pending_consolidation?: number;
+      total_observations?: number;
+    };
+    return {
+      pendingConsolidation: res.pending_consolidation ?? 0,
+      totalObservations: res.total_observations ?? 0,
+    };
+  }
+
+  /** Poll bank stats until no consolidation remains pending. */
+  async waitForConsolidation(
+    bankId: string,
+    opts: { timeoutMs?: number; pollMs?: number; onProgress?: ProgressFn } = {}
+  ): Promise<void> {
+    const timeoutMs = opts.timeoutMs ?? 600_000;
+    const pollMs = opts.pollMs ?? 3_000;
+    const start = Date.now();
+    while (Date.now() - start < timeoutMs) {
+      const stats = await this.getStats(bankId);
+      if (stats.pendingConsolidation === 0) return;
+      opts.onProgress?.(`Waiting for consolidation… (${stats.pendingConsolidation} pending)`);
+      await new Promise((r) => setTimeout(r, pollMs));
+    }
+    throw new Error(`Consolidation did not complete within ${Math.round(timeoutMs / 1000)}s`);
+  }
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/index.ts b/hindsight-tools/mission-sandbox/src/core/index.ts
new file mode 100644
index 000000000..7c2e52a17
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/index.ts
@@ -0,0 +1,31 @@
+export * from "./types.js";
+export { loadProjectEnv } from "./env.js";
+export { Project } from "./store.js";
+export { SandboxApi, resolveApiKey } from "./hindsight.js";
+export type { BankStats } from "./hindsight.js";
+export { MissionLlm, DEFAULT_MODEL } from "./llm.js";
+export { collectDocuments } from "./docs.js";
+export type { LoadedDocument } from "./docs.js";
+export { runInit } from "./init.js";
+export type { InitParams, InitResult } from "./init.js";
+export { runMission } from "./mission.js";
+export type { MissionParams, MissionResult } from "./mission.js";
+export { runRetainApply, runObserveApply } from "./apply.js";
+export type { ApplyParams, RetainApplyResult, ObserveApplyResult } from "./apply.js";
+export { runNote } from "./note.js";
+export type { NoteParams, NoteResult } from "./note.js";
+export { runInspect, runTrace, runCurate, runSnapshot, runLog } from "./phase1.js";
+export type {
+  InspectParams,
+  TraceParams,
+  TraceResult,
+  CurateParams,
+  SnapshotResult,
+} from "./phase1.js";
+export { runRetainCheck } from "./phase2.js";
+export type { CheckParams, CheckSummary, DocCoverage } from "./phase2.js";
+export { runExtractPreview } from "./preview.js";
+export type { PreviewParams, PreviewFact } from "./preview.js";
+export type { FactRow } from "./hindsight.js";
+export { readStatus } from "./status.js";
+export type { ProjectStatus } from "./status.js";
diff --git a/hindsight-tools/mission-sandbox/src/core/init.ts b/hindsight-tools/mission-sandbox/src/core/init.ts
new file mode 100644
index 000000000..56a1e609d
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/init.ts
@@ -0,0 +1,42 @@
+/** `init` — bind a project to its documents path + API config. No ingestion. */
+
+import { promises as fs } from "node:fs";
+import path from "node:path";
+
+import { Project } from "./store.js";
+import type { ProgressFn } from "./types.js";
+
+export interface InitParams {
+  projectDir: string;
+  /** Path to the documents directory (or file) to ingest on each `retain apply`. */
+  documents: string;
+  apiUrl: string;
+  apiKey?: string;
+  /** Bank-id prefix; defaults to the project directory name. */
+  name?: string;
+}
+
+export interface InitResult {
+  name: string;
+  documents: string;
+}
+
+export async function runInit(params: InitParams, onProgress: ProgressFn): Promise<InitResult> {
+  const documents = path.resolve(params.documents);
+  await fs.stat(documents).catch(() => {
+    throw new Error(`Documents path not found: ${params.documents}`);
+  });
+  const name = params.name ?? path.basename(path.resolve(params.projectDir));
+
+  const proj = await Project.create(params.projectDir, {
+    name,
+    documents,
+    apiUrl: params.apiUrl,
+    apiKey: params.apiKey,
+  });
+  proj.addStep("init", `bound to ${documents}`, `api: ${proj.apiUrl}`);
+  await proj.save();
+  onProgress(`Initialized project '${name}' bound to ${documents} (api: ${proj.apiUrl}).`);
+  onProgress("Next: set a mission with `retain mission --feedback`, then `retain apply`.");
+  return { name, documents };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/llm.ts b/hindsight-tools/mission-sandbox/src/core/llm.ts
new file mode 100644
index 000000000..868850fcc
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/llm.ts
@@ -0,0 +1,186 @@
+/**
+ * LLM layer for mission refinement, backed by Google Gemini (@google/genai).
+ *
+ * The API key is read from GEMINI_API_KEY (or GOOGLE_API_KEY) unless passed explicitly. Mission
+ * refinement is the only LLM call the tool makes — there is no labeling or scoring.
+ */
+
+import { GoogleGenAI, Type } from "@google/genai";
+import type { GenerateContentParameters, GenerateContentResponse } from "@google/genai";
+
+import type { MissionKind } from "./types.js";
+
+export interface CoverageResult {
+  /** Indices (into the golden list) that are semantically reproduced by the candidate set. */
+  coveredIndices: number[];
+  missing: string[];
+}
+
+/** A golden memory to score, optionally carrying the Phase-1 edit that produced it. */
+export interface GoldenForCoverage {
+  text: string;
+  curatedFrom?: string | null;
+  curateReason?: string | null;
+}
+
+export const DEFAULT_MODEL = "gemini-2.5-flash";
+
+const KIND_BLURB: Record<MissionKind, string> = {
+  retain:
+    `A "retain mission" steers what facts and entities get extracted from documents during ` +
+    `ingestion. It is injected alongside the system's built-in extraction rules.`,
+  observe:
+    `An "observation mission" controls how raw facts get consolidated into observations — ` +
+    `synthesized summaries derived from multiple facts.`,
+};
+
+function systemPrompt(kind: MissionKind): string {
+  return `You are an expert at writing ${kind} missions for a memory system.
+
+${KIND_BLURB[kind]}
+
+You are given the current mission and the user's feedback (and optionally concrete failing
+examples) gathered from an external evaluation of the memory. Rewrite the mission so it addresses
+the feedback while preserving what already works.
+
+Rules:
+- Output a single improved mission: concise and actionable (a few sentences to a short paragraph).
+- Fold the feedback in directly; do not just append it.
+- If there is no current mission, write one from scratch that satisfies the feedback.
+- Respond with ONLY the mission text — no preamble, explanation, or wrapper.`;
+}
+
+/** The Hindsight deployment's configured LLM, but only when it's a Gemini-family provider. */
+function hindsightGeminiConfig(): { key?: string; model?: string } {
+  const provider = (process.env.HINDSIGHT_API_LLM_PROVIDER ?? "").toLowerCase();
+  // An empty provider is treated as Gemini-compatible; OpenAI/etc. keys won't work here.
+  if (provider && !["gemini", "google", "vertexai"].includes(provider)) return {};
+  return { key: process.env.HINDSIGHT_API_LLM_API_KEY, model: process.env.HINDSIGHT_API_LLM_MODEL };
+}
+
+function resolveApiKey(explicit?: string): string {
+  const key =
+    explicit ||
+    process.env.GEMINI_API_KEY ||
+    process.env.GOOGLE_API_KEY ||
+    hindsightGeminiConfig().key;
+  if (!key) {
+    throw new Error(
+      "No Gemini API key found. Set GEMINI_API_KEY/GOOGLE_API_KEY, or configure a Gemini " +
+        "HINDSIGHT_API_LLM_* in your .env."
+    );
+  }
+  return key;
+}
+
+function resolveModel(explicit?: string): string {
+  return explicit?.trim() || hindsightGeminiConfig().model || DEFAULT_MODEL;
+}
+
+export class MissionLlm {
+  private readonly ai: GoogleGenAI;
+  readonly model: string;
+
+  constructor(opts: { apiKey?: string; model?: string } = {}) {
+    this.ai = new GoogleGenAI({ apiKey: resolveApiKey(opts.apiKey) });
+    this.model = resolveModel(opts.model);
+  }
+
+  /**
+   * generateContent with bounded exponential backoff on TRANSIENT Gemini errors (429/500/503,
+   * UNAVAILABLE, overloaded). A single 503 used to abort a whole `retain check` run (no retry on the
+   * per-doc coverage judge), so the loop never reached its summary. Permanent 4xx (bad key, etc.)
+   * still fail fast.
+   */
+  private async generate(req: GenerateContentParameters): Promise<GenerateContentResponse> {
+    const MAX_ATTEMPTS = 5;
+    for (let attempt = 1; ; attempt++) {
+      try {
+        return await this.ai.models.generateContent(req);
+      } catch (e) {
+        const msg = e instanceof Error ? e.message : String(e);
+        const transient =
+          /"code":\s*(429|500|503)|UNAVAILABLE|INTERNAL|RESOURCE_EXHAUSTED|overloaded/i.test(msg);
+        if (!transient || attempt >= MAX_ATTEMPTS) throw e;
+        // Exponential backoff: 1s, 2s, 4s, 8s (capped) — attempt-based, no RNG needed.
+        await new Promise((r) => setTimeout(r, Math.min(1000 * 2 ** (attempt - 1), 8000)));
+      }
+    }
+  }
+
+  /**
+   * Refine (or create) a mission from the current mission + the user's feedback and optional
+   * failing examples. This is the tool's single LLM operation.
+   */
+  async refineMission(
+    kind: MissionKind,
+    currentMission: string | null,
+    feedback: string,
+    examples: string[] = []
+  ): Promise<string> {
+    const examplesSection = examples.length
+      ? `\n\n## Failing examples\n${examples.map((e) => `- ${e}`).join("\n")}`
+      : "";
+    const response = await this.generate({
+      model: this.model,
+      contents:
+        `## Current mission\n${currentMission ?? "(no mission set — using system defaults)"}\n\n` +
+        `## Feedback\n${feedback}${examplesSection}\n\nWrite the improved mission.`,
+      config: { systemInstruction: systemPrompt(kind), temperature: 0.3 },
+    });
+    const text = response.text;
+    if (!text) throw new Error("Empty response from Gemini while refining mission");
+    return text.trim();
+  }
+
+  /**
+   * Phase 2 objective: which golden memories are semantically reproduced by the candidate set?
+   * One structured call — returns the covered golden indices and the missing golden texts.
+   */
+  async coverage(golden: GoldenForCoverage[], candidate: string[]): Promise<CoverageResult> {
+    if (golden.length === 0) return { coveredIndices: [], missing: [] };
+    const goldenList = golden
+      .map((g, i) => {
+        let line = `[${i}] ${g.text}`;
+        if (g.curatedFrom) {
+          line +=
+            `\n    (CURATED — edited from "${g.curatedFrom}"` +
+            (g.curateReason ? `; reason: ${g.curateReason}` : "") +
+            `. Covered ONLY if the candidate reproduces THIS specific change, not just the original fact.)`;
+        }
+        return line;
+      })
+      .join("\n");
+    const candList = candidate.length ? candidate.map((c) => `- ${c}`).join("\n") : "(none)";
+    const response = await this.generate({
+      model: this.model,
+      contents:
+        `GOLDEN memories (the target):\n${goldenList}\n\n` +
+        `CANDIDATE memories (what a mission just extracted):\n${candList}\n\n` +
+        `Return the indices of GOLDEN memories whose information is present in the CANDIDATE set ` +
+        `(allow paraphrase / different wording), and the texts of those that are missing.`,
+      config: {
+        systemInstruction:
+          "You compare two sets of extracted memories. A golden memory is 'covered' if a candidate " +
+          "memory conveys the same fact, even if worded differently or split/merged. For a golden " +
+          "memory marked CURATED, it is covered only if the candidate reproduces the curated change " +
+          "(the specific improvement noted), not merely the pre-edit fact.",
+        temperature: 0,
+        responseMimeType: "application/json",
+        responseSchema: {
+          type: Type.OBJECT,
+          properties: {
+            coveredIndices: { type: Type.ARRAY, items: { type: Type.INTEGER } },
+            missing: { type: Type.ARRAY, items: { type: Type.STRING } },
+          },
+          required: ["coveredIndices", "missing"],
+          propertyOrdering: ["coveredIndices", "missing"],
+        },
+      },
+    });
+    const txt = response.text;
+    if (!txt) throw new Error("Empty response from Gemini while computing coverage");
+    const parsed = JSON.parse(txt) as { coveredIndices?: number[]; missing?: string[] };
+    return { coveredIndices: parsed.coveredIndices ?? [], missing: parsed.missing ?? [] };
+  }
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/mission.ts b/hindsight-tools/mission-sandbox/src/core/mission.ts
new file mode 100644
index 000000000..cc8d8d2c4
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/mission.ts
@@ -0,0 +1,53 @@
+/** `retain mission` / `observe mission` — refine a mission from feedback (+ optional examples). */
+
+import { MissionLlm } from "./llm.js";
+import { Project } from "./store.js";
+import type { MissionKind, ProgressFn } from "./types.js";
+
+export interface MissionParams {
+  projectDir: string;
+  kind: MissionKind;
+  feedback: string;
+  examples?: string[];
+  model?: string;
+  apiKey?: string;
+}
+
+export interface MissionResult {
+  kind: MissionKind;
+  mission: string;
+}
+
+export async function runMission(
+  params: MissionParams,
+  onProgress: ProgressFn
+): Promise<MissionResult> {
+  const proj = await Project.load(params.projectDir);
+  const state = params.kind === "retain" ? proj.retain : proj.observe;
+
+  const llm = new MissionLlm({ apiKey: params.apiKey, model: params.model });
+  onProgress(`Refining ${params.kind} mission from feedback (${llm.model})…`);
+  const mission = await llm.refineMission(
+    params.kind,
+    state.mission,
+    params.feedback,
+    params.examples ?? []
+  );
+
+  state.mission = mission;
+  state.feedback.push(params.feedback);
+  proj.addStep(
+    `${params.kind} mission`,
+    `refined from feedback`,
+    `feedback: ${params.feedback}\n\nnew mission: ${mission}`
+  );
+  await proj.save();
+
+  onProgress(`\nNew ${params.kind} mission:\n${mission}`);
+  onProgress(
+    params.kind === "retain"
+      ? "\nNext: `retain apply` to ingest into a new version bank."
+      : "\nNext: `observe apply` to re-consolidate the current bank."
+  );
+  return { kind: params.kind, mission };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/note.ts b/hindsight-tools/mission-sandbox/src/core/note.ts
new file mode 100644
index 000000000..a1f09781d
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/note.ts
@@ -0,0 +1,25 @@
+/** `note` — set free-text notes on a version (e.g. validator results). */
+
+import { Project } from "./store.js";
+import type { ProgressFn } from "./types.js";
+
+export interface NoteParams {
+  projectDir: string;
+  notes: string;
+  /** Version to annotate; defaults to the current version. */
+  version?: number;
+}
+
+export interface NoteResult {
+  version: number;
+  bank: string;
+}
+
+export async function runNote(params: NoteParams, onProgress: ProgressFn): Promise<NoteResult> {
+  const proj = await Project.load(params.projectDir);
+  const version = proj.setVersionNotes(params.notes, params.version);
+  proj.addStep("note", `v${version.n}`, params.notes);
+  await proj.save();
+  onProgress(`Noted v${version.n} (${version.bank}).`);
+  return { version: version.n, bank: version.bank };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/phase1.ts b/hindsight-tools/mission-sandbox/src/core/phase1.ts
new file mode 100644
index 000000000..9699eb20e
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/phase1.ts
@@ -0,0 +1,166 @@
+/**
+ * Phase 1 — curate the current bank to a GOLDEN snapshot (in place, no re-ingest).
+ *   inspect : find memories (by doc / text) to trace an eval failure.
+ *   curate  : edit / invalidate / revert a memory (PATCH).
+ *   snapshot: freeze the current memory set as the golden target.
+ */
+
+import { resolveApiKey, SandboxApi, type FactRow } from "./hindsight.js";
+import { Project } from "./store.js";
+import type { CurationKind, ProgressFn } from "./types.js";
+
+function bankOrThrow(proj: Project): string {
+  const bank = proj.currentBank();
+  if (!bank) throw new Error("No current bank — run `retain apply` first.");
+  return bank;
+}
+
+export interface InspectParams {
+  projectDir: string;
+  doc?: string;
+  grep?: string;
+  apiKey?: string;
+}
+
+export async function runInspect(params: InspectParams): Promise<FactRow[]> {
+  const proj = await Project.load(params.projectDir);
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+  const rows = await api.listFacts(bankOrThrow(proj), { docId: params.doc, q: params.grep });
+  return rows;
+}
+
+export interface TraceParams {
+  projectDir: string;
+  query: string;
+  /** Evidence document ids the answer should come from (e.g. mapped from a benchmark's evidence). */
+  docs?: string[];
+  apiKey?: string;
+}
+
+export interface TraceResult {
+  retrieved: FactRow[];
+  evidence: { docId: string; facts: FactRow[] }[];
+}
+
+/**
+ * Evidence-based tracing: recall what the bank retrieves for a failing question, and list the
+ * memories in the evidence document(s) — so you can see whether the answer-fact is present but
+ * not retrieved (→ curate for retrieval) or missing entirely (→ fix the mission).
+ */
+export async function runTrace(params: TraceParams): Promise<TraceResult> {
+  const proj = await Project.load(params.projectDir);
+  const bank = bankOrThrow(proj);
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+  const retrieved = await api.recall(bank, params.query);
+  const evidence = [];
+  for (const docId of params.docs ?? []) {
+    evidence.push({ docId, facts: await api.listFacts(bank, { docId }) });
+  }
+  const detail =
+    `retrieved:\n${retrieved
+      .slice(0, 6)
+      .map((r) => `- [${r.docId ?? "?"}] ${r.text}`)
+      .join("\n")}` +
+    evidence
+      .map(
+        (e) => `\n\nevidence ${e.docId}:\n${e.facts.map((f) => `- ${f.id} ${f.text}`).join("\n")}`
+      )
+      .join("");
+  proj.addStep("trace", `"${params.query}"`, detail);
+  await proj.save();
+  return { retrieved, evidence };
+}
+
+/** Record a free-form step — e.g. an external eval result ("eval summer → FAIL"). */
+export async function runLog(params: {
+  projectDir: string;
+  kind: string;
+  summary: string;
+  detail?: string;
+}): Promise<void> {
+  const proj = await Project.load(params.projectDir);
+  proj.addStep(params.kind, params.summary, params.detail);
+  await proj.save();
+}
+
+export interface CurateParams {
+  projectDir: string;
+  memoryId: string;
+  kind: CurationKind;
+  text?: string;
+  reason?: string;
+  apiKey?: string;
+}
+
+export async function runCurate(params: CurateParams, onProgress: ProgressFn): Promise<void> {
+  const proj = await Project.load(params.projectDir);
+  const bank = bankOrThrow(proj);
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+
+  if (params.kind === "edit" && !params.text) throw new Error("--edit requires --text");
+  const before = await api.getMemoryText(bank, params.memoryId).catch(() => "");
+
+  const body =
+    params.kind === "edit"
+      ? { text: params.text, reason: params.reason }
+      : {
+          state: params.kind === "invalidate" ? ("invalidated" as const) : ("valid" as const),
+          reason: params.reason,
+        };
+  await api.updateMemory(bank, params.memoryId, body);
+
+  const stepDetail =
+    params.kind === "edit"
+      ? `"${before.slice(0, 80)}" → "${params.text}"${params.reason ? `\nreason: ${params.reason}` : ""}`
+      : `"${before.slice(0, 100)}"${params.reason ? `\nreason: ${params.reason}` : ""}`;
+  proj.addStep("curate", `${params.kind} ${params.memoryId.slice(0, 8)}`, stepDetail);
+  proj.curations.push({
+    id: `c${proj.curations.length + 1}`,
+    memoryId: params.memoryId,
+    kind: params.kind,
+    before,
+    after: params.kind === "edit" ? (params.text ?? null) : null,
+    reason: params.reason ?? null,
+    at: new Date().toISOString(),
+  });
+  await proj.save();
+  onProgress(`${params.kind} ${params.memoryId} on ${bank}.`);
+}
+
+export interface SnapshotResult {
+  total: number;
+  docs: number;
+}
+
+export async function runSnapshot(
+  params: { projectDir: string; apiKey?: string },
+  onProgress: ProgressFn
+): Promise<SnapshotResult> {
+  const proj = await Project.load(params.projectDir);
+  const bank = bankOrThrow(proj);
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+
+  onProgress(`Freezing golden memory set from ${bank}…`);
+  const rows = await api.listFacts(bank);
+  proj.golden = rows.map((r) => {
+    // Attach curation provenance: the latest edit applied to this memory, if any.
+    const edit = [...proj.curations]
+      .reverse()
+      .find((c) => c.memoryId === r.id && c.kind === "edit");
+    return {
+      docId: r.docId ?? "",
+      text: r.text,
+      factType: r.factType,
+      curatedFrom: edit?.before ?? null,
+      curateReason: edit?.reason ?? null,
+    };
+  });
+  proj.goldenAt = new Date().toISOString();
+  const docCount = new Set(proj.golden.map((g) => g.docId)).size;
+  proj.addStep("snapshot", `froze ${proj.golden.length} golden memories across ${docCount} docs`);
+  await proj.save();
+
+  const docs = new Set(proj.golden.map((g) => g.docId)).size;
+  onProgress(`Golden snapshot: ${proj.golden.length} memories across ${docs} doc(s).`);
+  return { total: proj.golden.length, docs };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/phase2.ts b/hindsight-tools/mission-sandbox/src/core/phase2.ts
new file mode 100644
index 000000000..f5b80e99f
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/phase2.ts
@@ -0,0 +1,109 @@
+/**
+ * Phase 2 — does the current retain mission reproduce the GOLDEN snapshot?
+ * For each doc, run a **dry-run extraction** (the API extracts at chunk level and returns candidate
+ * facts WITHOUT persisting), then score coverage of that doc's golden memories with one LLM call.
+ * No re-ingest, no scratch bank, no recall — just extraction fidelity vs the golden target.
+ */
+
+import { collectDocuments } from "./docs.js";
+import { resolveApiKey, SandboxApi } from "./hindsight.js";
+import { MissionLlm, type GoldenForCoverage } from "./llm.js";
+import { documentId } from "./pipeline.js";
+import { Project } from "./store.js";
+import type { CheckResult, ProgressFn } from "./types.js";
+
+export interface CheckParams {
+  projectDir: string;
+  /** Limit the check to specific docIds (defaults to every doc that has golden memories). */
+  docs?: string[];
+  model?: string;
+  apiKey?: string;
+}
+
+export interface DocCoverage {
+  docId: string;
+  covered: number;
+  total: number;
+  missing: string[];
+}
+
+export interface CheckSummary {
+  result: CheckResult;
+  perDoc: DocCoverage[];
+}
+
+export async function runRetainCheck(
+  params: CheckParams,
+  onProgress: ProgressFn
+): Promise<CheckSummary> {
+  const proj = await Project.load(params.projectDir);
+  if (proj.golden.length === 0) throw new Error("No golden snapshot — run `snapshot` first.");
+  if (!proj.retain.mission) throw new Error("No retain mission set — run `retain mission` first.");
+  const bank = proj.currentBank();
+  if (!bank) throw new Error("No current bank — run `retain apply` first.");
+
+  // Golden grouped by doc (carrying curation provenance for the coverage judge).
+  const goldenByDoc = new Map<string, GoldenForCoverage[]>();
+  for (const g of proj.golden) {
+    const list = goldenByDoc.get(g.docId) ?? [];
+    list.push({ text: g.text, curatedFrom: g.curatedFrom, curateReason: g.curateReason });
+    goldenByDoc.set(g.docId, list);
+  }
+  // Documents on disk, keyed by their docId.
+  const docs = await collectDocuments(proj.documents);
+  const contentByDoc = new Map(docs.map((d) => [documentId(d.name), d.content]));
+
+  const targetDocs = (params.docs ?? [...goldenByDoc.keys()]).filter((d) => goldenByDoc.has(d));
+  if (targetDocs.length === 0) throw new Error("No matching docs to check.");
+
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+  const llm = new MissionLlm({ apiKey: params.apiKey, model: params.model });
+
+  const perDoc: DocCoverage[] = [];
+  for (const docId of targetDocs) {
+    const content = contentByDoc.get(docId);
+    if (!content) {
+      onProgress(`  ${docId}: no document content on disk — skipped`);
+      continue;
+    }
+    onProgress(`  ${docId}: dry-run extracting + scoring coverage…`);
+    // Dry-run extraction with the CURRENT working mission — no persistence, no bank mutation.
+    const rows = await api.dryRunExtract(bank, content, { retainMission: proj.retain.mission });
+    const candidate = rows.map((r) => r.text);
+
+    const golden = goldenByDoc.get(docId) ?? [];
+    const cov = await llm.coverage(golden, candidate);
+    perDoc.push({
+      docId,
+      covered: cov.coveredIndices.length,
+      total: golden.length,
+      missing: cov.missing,
+    });
+    onProgress(`  ${docId}: ${cov.coveredIndices.length}/${golden.length} golden covered`);
+  }
+
+  const covered = perDoc.reduce((s, d) => s + d.covered, 0);
+  const total = perDoc.reduce((s, d) => s + d.total, 0);
+  const result: CheckResult = {
+    coverage: total ? covered / total : 0,
+    covered,
+    total,
+    docs: perDoc.length,
+    at: new Date().toISOString(),
+  };
+  proj.lastCheck = result;
+  const missing = perDoc.flatMap((d) => d.missing);
+  proj.addStep(
+    "retain check",
+    `${(result.coverage * 100).toFixed(0)}% coverage (${covered}/${total} golden, ${perDoc.length} doc${perDoc.length === 1 ? "" : "s"})`,
+    missing.length
+      ? `missing:\n${missing.map((m) => `- ${m}`).join("\n")}`
+      : "all golden reproduced"
+  );
+  await proj.save();
+
+  onProgress(
+    `\nCoverage: ${covered}/${total} golden memories (${(result.coverage * 100).toFixed(0)}%) across ${perDoc.length} doc(s).`
+  );
+  return { result, perDoc };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/pipeline.ts b/hindsight-tools/mission-sandbox/src/core/pipeline.ts
new file mode 100644
index 000000000..b6a4d6178
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/pipeline.ts
@@ -0,0 +1,39 @@
+/** Shared ingest pipeline: provision a versioned bank, retain documents, consolidate. */
+
+import type { LoadedDocument } from "./docs.js";
+import { SandboxApi } from "./hindsight.js";
+import type { ProgressFn } from "./types.js";
+
+/** Derive a document_id from a filename (drop the extension). */
+export function documentId(filename: string): string {
+  const base = filename.replace(/^.*[/\\]/, "");
+  return base.replace(/\.[^.]+$/, "");
+}
+
+/**
+ * Create the bank with the given missions, ingest every document, then run consolidation.
+ * Returns the resulting observation count. The bank is assumed fresh (a new version).
+ */
+export async function provisionAndIngest(
+  api: SandboxApi,
+  bankId: string,
+  missions: { retainMission?: string | null; observationsMission?: string | null },
+  documents: LoadedDocument[],
+  onProgress: ProgressFn
+): Promise<{ observationCount: number }> {
+  onProgress(`Creating bank ${bankId}…`);
+  await api.createBank(bankId, missions);
+
+  onProgress(`Ingesting ${documents.length} document(s)…`);
+  for (const doc of documents) {
+    onProgress(`  Retaining: ${doc.name} (${doc.content.length} chars)`);
+    await api.retain(bankId, doc.content, documentId(doc.name));
+  }
+
+  onProgress("Triggering consolidation…");
+  await api.triggerConsolidation(bankId);
+  await api.waitForConsolidation(bankId, { onProgress });
+  onProgress("Consolidation complete.");
+
+  return { observationCount: (await api.getStats(bankId)).totalObservations };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/preview.ts b/hindsight-tools/mission-sandbox/src/core/preview.ts
new file mode 100644
index 000000000..e4166003b
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/preview.ts
@@ -0,0 +1,38 @@
+/** `preview` — dry-run extraction: show what a retain mission extracts from text, without ingesting. */
+
+import { resolveApiKey, SandboxApi } from "./hindsight.js";
+import { Project } from "./store.js";
+
+export interface PreviewFact {
+  text: string;
+  factType: string;
+  occurredStart: string | null;
+  occurredEnd: string | null;
+  entities: string[];
+}
+
+export interface PreviewParams {
+  projectDir: string;
+  content: string;
+  /** Mission to test; defaults to the project's current working retain mission. */
+  retainMission?: string | null;
+  apiKey?: string;
+}
+
+export async function runExtractPreview(params: PreviewParams): Promise<PreviewFact[]> {
+  const proj = await Project.load(params.projectDir);
+  const bank = proj.currentBank();
+  if (!bank)
+    throw new Error("No current bank — run `retain apply` first (extraction config lives on it).");
+
+  const api = new SandboxApi(proj.apiUrl, resolveApiKey(params.apiKey ?? proj.apiKey));
+  const mission = params.retainMission !== undefined ? params.retainMission : proj.retain.mission;
+  const items = await api.dryRunExtractItems(bank, params.content, { retainMission: mission });
+  return items.map((m) => ({
+    text: String(m.text ?? ""),
+    factType: String(m.fact_type ?? ""),
+    occurredStart: m.occurred_start != null ? String(m.occurred_start) : null,
+    occurredEnd: m.occurred_end != null ? String(m.occurred_end) : null,
+    entities: Array.isArray(m.entities) ? (m.entities as string[]) : [],
+  }));
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/status.ts b/hindsight-tools/mission-sandbox/src/core/status.ts
new file mode 100644
index 000000000..440270240
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/status.ts
@@ -0,0 +1,45 @@
+/** `status` — read-only project state: bound docs, missions, versions, golden + curations + check. */
+
+import { Project } from "./store.js";
+import type { CheckResult, CurationEvent, ProjectVersion, StepEntry } from "./types.js";
+
+export interface ProjectStatus {
+  name: string;
+  dir: string;
+  documents: string;
+  apiUrl: string;
+  retainMission: string | null;
+  observeMission: string | null;
+  retainFeedback: string[];
+  observeFeedback: string[];
+  versions: ProjectVersion[];
+  currentVersion: number | null;
+  currentBank: string | null;
+  goldenCount: number;
+  goldenAt: string | null;
+  curations: CurationEvent[];
+  lastCheck: CheckResult | null;
+  steps: StepEntry[];
+}
+
+export async function readStatus(projectDir: string): Promise<ProjectStatus> {
+  const proj = await Project.load(projectDir);
+  return {
+    name: proj.name,
+    dir: proj.dir,
+    documents: proj.documents,
+    apiUrl: proj.apiUrl,
+    retainMission: proj.retain.mission,
+    observeMission: proj.observe.mission,
+    retainFeedback: proj.retain.feedback,
+    observeFeedback: proj.observe.feedback,
+    versions: proj.versions,
+    currentVersion: proj.currentVersion,
+    currentBank: proj.currentBank(),
+    goldenCount: proj.golden.length,
+    goldenAt: proj.goldenAt,
+    curations: proj.curations,
+    lastCheck: proj.lastCheck,
+    steps: proj.steps,
+  };
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/store.ts b/hindsight-tools/mission-sandbox/src/core/store.ts
new file mode 100644
index 000000000..a996a5d1f
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/store.ts
@@ -0,0 +1,189 @@
+/**
+ * Project persistence. Everything lives in `<dir>/project.json` (camelCase, 1:1 with ProjectMeta).
+ * No history dir, no facts/labels — `versions` is the durable record of what was applied.
+ */
+
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { existsSync } from "node:fs";
+import path from "node:path";
+
+import type {
+  CheckResult,
+  CurationEvent,
+  StepEntry,
+  GoldenMemory,
+  MissionState,
+  ProjectMeta,
+  ProjectVersion,
+} from "./types.js";
+
+const PROJECT_FILE = "project.json";
+
+function emptyMission(): MissionState {
+  return { mission: null, feedback: [] };
+}
+
+export class Project {
+  name: string;
+  documents: string;
+  apiUrl: string;
+  apiKey: string | null;
+  retain: MissionState;
+  observe: MissionState;
+  versions: ProjectVersion[];
+  currentVersion: number | null;
+  golden: GoldenMemory[];
+  goldenAt: string | null;
+  curations: CurationEvent[];
+  lastCheck: CheckResult | null;
+  steps: StepEntry[];
+  createdAt: string;
+  readonly dir: string;
+
+  private constructor(dir: string, meta: ProjectMeta) {
+    this.dir = dir;
+    this.name = meta.name;
+    this.documents = meta.documents;
+    this.apiUrl = meta.apiUrl;
+    this.apiKey = meta.apiKey;
+    this.retain = meta.retain;
+    this.observe = meta.observe;
+    this.versions = meta.versions;
+    this.currentVersion = meta.currentVersion;
+    this.golden = meta.golden;
+    this.goldenAt = meta.goldenAt;
+    this.curations = meta.curations;
+    this.lastCheck = meta.lastCheck;
+    this.steps = meta.steps;
+    this.createdAt = meta.createdAt;
+  }
+
+  static projectFile(dir: string): string {
+    return path.join(dir, PROJECT_FILE);
+  }
+
+  static async exists(dir: string): Promise<boolean> {
+    return existsSync(Project.projectFile(dir));
+  }
+
+  static async create(
+    dir: string,
+    init: { name: string; documents: string; apiUrl: string; apiKey?: string | null }
+  ): Promise<Project> {
+    await mkdir(dir, { recursive: true });
+    const proj = new Project(dir, {
+      name: init.name,
+      documents: init.documents,
+      apiUrl: init.apiUrl,
+      apiKey: init.apiKey ?? null,
+      retain: emptyMission(),
+      observe: emptyMission(),
+      versions: [],
+      currentVersion: null,
+      golden: [],
+      goldenAt: null,
+      curations: [],
+      lastCheck: null,
+      steps: [],
+      createdAt: new Date().toISOString(),
+    });
+    await proj.save();
+    return proj;
+  }
+
+  static async load(dir: string): Promise<Project> {
+    const raw = JSON.parse(await readFile(Project.projectFile(dir), "utf8")) as ProjectMeta;
+    return new Project(dir, {
+      name: raw.name,
+      documents: raw.documents,
+      apiUrl: raw.apiUrl,
+      apiKey: raw.apiKey ?? null,
+      retain: raw.retain ?? emptyMission(),
+      observe: raw.observe ?? emptyMission(),
+      versions: (raw.versions ?? []).map((v) => ({
+        ...v,
+        notes: v.notes ?? "",
+        feedback: v.feedback ?? [],
+      })),
+      currentVersion: raw.currentVersion ?? null,
+      golden: (raw.golden ?? []).map((g) => ({
+        ...g,
+        curatedFrom: g.curatedFrom ?? null,
+        curateReason: g.curateReason ?? null,
+      })),
+      goldenAt: raw.goldenAt ?? null,
+      curations: raw.curations ?? [],
+      lastCheck: raw.lastCheck ?? null,
+      steps: raw.steps ?? [],
+      createdAt: raw.createdAt,
+    });
+  }
+
+  async save(): Promise<void> {
+    const meta: ProjectMeta = {
+      name: this.name,
+      documents: this.documents,
+      apiUrl: this.apiUrl,
+      apiKey: this.apiKey,
+      retain: this.retain,
+      observe: this.observe,
+      versions: this.versions,
+      currentVersion: this.currentVersion,
+      golden: this.golden,
+      goldenAt: this.goldenAt,
+      curations: this.curations,
+      lastCheck: this.lastCheck,
+      steps: this.steps,
+      createdAt: this.createdAt,
+    };
+    await writeFile(Project.projectFile(this.dir), JSON.stringify(meta, null, 2));
+  }
+
+  /** Bank id of the current (latest applied) version, or null before the first `retain apply`. */
+  currentBank(): string | null {
+    if (this.currentVersion === null) return null;
+    return `${this.name}-v${this.currentVersion}`;
+  }
+
+  /** Record a new retain version: allocates the next number + bank id and makes it current. */
+  addVersion(missions: {
+    retainMission: string | null;
+    observeMission: string | null;
+    feedback?: string[];
+  }): ProjectVersion {
+    const n = this.versions.reduce((max, v) => Math.max(max, v.n), 0) + 1;
+    const version: ProjectVersion = {
+      n,
+      bank: `${this.name}-v${n}`,
+      retainMission: missions.retainMission,
+      observeMission: missions.observeMission,
+      feedback: missions.feedback ?? [],
+      notes: "",
+      createdAt: new Date().toISOString(),
+    };
+    this.versions.push(version);
+    this.currentVersion = n;
+    return version;
+  }
+
+  /** Append an action to the activity log (caller persists via save()). */
+  addStep(kind: string, summary: string, detail?: string | null): void {
+    this.steps.push({
+      id: `s${this.steps.length + 1}`,
+      at: new Date().toISOString(),
+      kind,
+      summary,
+      detail: detail ?? null,
+    });
+  }
+
+  /** Set the free-text notes on a version (defaults to the current version). */
+  setVersionNotes(notes: string, n?: number): ProjectVersion {
+    const target = n ?? this.currentVersion;
+    if (target === null) throw new Error("No version to annotate — run `retain apply` first.");
+    const version = this.versions.find((v) => v.n === target);
+    if (!version) throw new Error(`No version v${target}.`);
+    version.notes = notes;
+    return version;
+  }
+}
diff --git a/hindsight-tools/mission-sandbox/src/core/types.ts b/hindsight-tools/mission-sandbox/src/core/types.ts
new file mode 100644
index 000000000..b47da1467
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/core/types.ts
@@ -0,0 +1,97 @@
+/**
+ * Domain types for mission-sandbox.
+ *
+ * A project binds a documents path + API config, and tracks two missions (retain + observe) that
+ * you refine from feedback. Each `retain apply` ingests into a fresh versioned bank `<name>-vN`.
+ * Task success is measured by an EXTERNAL validator — the tool stores no facts, labels, or scores.
+ */
+
+/** Progress sink so CLI (console) and any caller can render the same step output. */
+export type ProgressFn = (message: string) => void;
+
+/** Which mission a feedback round targets. */
+export type MissionKind = "retain" | "observe";
+
+/** A mission and the feedback that has shaped it, newest last. */
+export interface MissionState {
+  mission: string | null;
+  feedback: string[];
+}
+
+/** One ingested bank version: the missions it was built with, plus free-text notes. */
+export interface ProjectVersion {
+  n: number;
+  bank: string;
+  retainMission: string | null;
+  observeMission: string | null;
+  /** The retain feedback entries that shaped this version (the delta since the previous one). */
+  feedback: string[];
+  /** Free-text notes for this version — e.g. validator results ("LOCOMO 4/5, missed X"). */
+  notes: string;
+  createdAt: string;
+}
+
+/** A frozen "golden" memory — the target Phase 2 optimizes the mission to reproduce. */
+export interface GoldenMemory {
+  docId: string;
+  text: string;
+  factType: string;
+  /** If this memory was edited in Phase 1: the text before the edit + the reason (provenance). */
+  curatedFrom: string | null;
+  curateReason: string | null;
+}
+
+export type CurationKind = "edit" | "invalidate" | "revert";
+
+/** A curation action applied to a memory in Phase 1 (in-place, no re-ingest). */
+export interface CurationEvent {
+  id: string;
+  memoryId: string;
+  kind: CurationKind;
+  before: string;
+  after: string | null;
+  reason: string | null;
+  at: string;
+}
+
+/** One recorded action in the loop — shown as a timeline in the UI. */
+export interface StepEntry {
+  id: string;
+  at: string;
+  /** Short command label, e.g. "trace", "curate", "retain check", "eval". */
+  kind: string;
+  summary: string;
+  detail: string | null;
+}
+
+export interface ProjectMeta {
+  /** Bank-id prefix; version banks are `${name}-v${n}`. */
+  name: string;
+  /** Absolute path to the documents directory bound at init. */
+  documents: string;
+  apiUrl: string;
+  apiKey: string | null;
+  retain: MissionState;
+  observe: MissionState;
+  versions: ProjectVersion[];
+  /** Version number of the current (latest applied) bank, or null before the first apply. */
+  currentVersion: number | null;
+  /** Phase 1 output: the frozen target memory set + the curations that produced it. */
+  golden: GoldenMemory[];
+  goldenAt: string | null;
+  curations: CurationEvent[];
+  /** Phase 2: result of the most recent `retain check` (coverage of golden by the current mission). */
+  lastCheck: CheckResult | null;
+  /** Chronological log of every command run — the loop's story for the UI. */
+  steps: StepEntry[];
+  createdAt: string;
+}
+
+/** Coverage of the golden set by the current retain mission (per-doc re-extraction). */
+export interface CheckResult {
+  coverage: number;
+  covered: number;
+  total: number;
+  docs: number;
+  at: string;
+}
diff --git a/hindsight-tools/mission-sandbox/src/instrumentation.ts b/hindsight-tools/mission-sandbox/src/instrumentation.ts
new file mode 100644
index 000000000..09cf049cb
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/src/instrumentation.ts
@@ -0,0 +1,7 @@
+/** Runs once at server startup — load the deployment's .env before any route handler runs. */
+export async function register() {
+  if (process.env.NEXT_RUNTIME !== "nodejs") return;
+  const { loadProjectEnv } = await import("@vectorize-io/hindsight-mission-sandbox/core");
+  const loaded = loadProjectEnv();
+  if (loaded) console.log(`[mission-sandbox] loaded env from ${loaded}`);
+}
diff --git a/hindsight-tools/mission-sandbox/tests/hindsight.test.ts b/hindsight-tools/mission-sandbox/tests/hindsight.test.ts
new file mode 100644
index 000000000..e076e1c49
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/tests/hindsight.test.ts
@@ -0,0 +1,101 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import { SandboxApi } from "../src/core/hindsight.js";
+
+vi.mock("@vectorize-io/hindsight-client", () => ({
+  HindsightClient: class {
+    createBank = vi.fn();
+    retain = vi.fn();
+    updateBankConfig = vi.fn();
+  },
+}));
+
+function jsonResponse(body: unknown, ok = true, status = 200): Response {
+  return {
+    ok,
+    status,
+    json: async () => body,
+    text: async () => JSON.stringify(body),
+  } as unknown as Response;
+}
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe("SandboxApi raw endpoints", () => {
+  it("reads pending consolidation + total from /stats", async () => {
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValue(jsonResponse({ pending_consolidation: 4, total_observations: 12 }));
+    vi.stubGlobal("fetch", fetchMock);
+
+    const api = new SandboxApi("http://localhost:8888/");
+    const stats = await api.getStats("bank-1");
+
+    expect(stats).toEqual({ pendingConsolidation: 4, totalObservations: 12 });
+    expect(fetchMock).toHaveBeenCalledWith(
+      "http://localhost:8888/v1/default/banks/bank-1/stats",
+      expect.objectContaining({ method: "GET" })
+    );
+  });
+
+  it("returns deleted_count from clearObservations", async () => {
+    vi.stubGlobal("fetch", vi.fn().mockResolvedValue(jsonResponse({ deleted_count: 7 })));
+    const api = new SandboxApi("http://localhost:8888");
+    expect(await api.clearObservations("bank-1")).toBe(7);
+  });
+
+  it("throws with status + body on a failed request", async () => {
+    vi.stubGlobal("fetch", vi.fn().mockResolvedValue(jsonResponse({ detail: "boom" }, false, 500)));
+    const api = new SandboxApi("http://localhost:8888");
+    await expect(api.triggerConsolidation("bank-1")).rejects.toThrow(/500/);
+  });
+
+  it("polls until consolidation drains", async () => {
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce(jsonResponse({ pending_consolidation: 2 }))
+      .mockResolvedValueOnce(jsonResponse({ pending_consolidation: 0 }));
+    vi.stubGlobal("fetch", fetchMock);
+
+    const api = new SandboxApi("http://localhost:8888");
+    await api.waitForConsolidation("bank-1", { pollMs: 1 });
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+  });
+});
+
+describe("SandboxApi.dryRunExtract", () => {
+  it("POSTs to /memories/dry-run-extract with the mission override and parses facts", async () => {
+    const fetchMock = vi.fn().mockResolvedValue(
+      jsonResponse({
+        facts: [
+          { text: "Alice lives in Berlin.", fact_type: "world", entities: ["Alice", "Berlin"] },
+          { text: "Alice is a nurse.", fact_type: "world", entities: ["Alice"] },
+        ],
+        usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 },
+      })
+    );
+    vi.stubGlobal("fetch", fetchMock);
+
+    const api = new SandboxApi("http://localhost:8888");
+    const facts = await api.dryRunExtract("bank-1", "Alice lives in Berlin and is a nurse.", {
+      retainMission: "Capture where people live and their jobs.",
+    });
+
+    const [url, init] = fetchMock.mock.calls[0];
+    expect(url).toBe("http://localhost:8888/v1/default/banks/bank-1/memories/dry-run-extract");
+    expect(init.method).toBe("POST");
+    const body = JSON.parse(init.body);
+    expect(body.content).toContain("Alice lives in Berlin");
+    expect(body.retain_mission).toBe("Capture where people live and their jobs.");
+
+    // Dry-run facts are a subset (no id/document_id), so the mapped rows carry empty id + null docId.
+    expect(facts).toHaveLength(2);
+    expect(facts[0]).toMatchObject({
+      text: "Alice lives in Berlin.",
+      factType: "world",
+      docId: null,
+    });
+  });
+});
diff --git a/hindsight-tools/mission-sandbox/tests/store.test.ts b/hindsight-tools/mission-sandbox/tests/store.test.ts
new file mode 100644
index 000000000..a877d42fb
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/tests/store.test.ts
@@ -0,0 +1,88 @@
+import { promises as fs } from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+
+import { Project } from "../src/core/store.js";
+import type { ProjectMeta } from "../src/core/types.js";
+
+let dir: string;
+
+beforeEach(async () => {
+  dir = await fs.mkdtemp(path.join(os.tmpdir(), "mission-sandbox-"));
+});
+
+afterEach(async () => {
+  await fs.rm(dir, { recursive: true, force: true });
+});
+
+describe("Project store", () => {
+  it("creates, persists and reloads a project", async () => {
+    const projDir = path.join(dir, "proj");
+    const proj = await Project.create(projDir, {
+      name: "conv26",
+      documents: "/docs",
+      apiUrl: "http://localhost:8888",
+      apiKey: "secret-token",
+    });
+    proj.retain.mission = "Extract durable life facts.";
+    proj.retain.feedback.push("capture dates");
+    await proj.save();
+
+    const reloaded = await Project.load(projDir);
+    expect(reloaded.name).toBe("conv26");
+    expect(reloaded.apiUrl).toBe("http://localhost:8888");
+    expect(reloaded.apiKey).toBe("secret-token");
+    expect(reloaded.retain.mission).toBe("Extract durable life facts.");
+    expect(reloaded.retain.feedback).toEqual(["capture dates"]);
+    expect(reloaded.currentVersion).toBeNull();
+    expect(reloaded.currentBank()).toBeNull();
+  });
+
+  it("allocates sequential versioned banks and tracks the current one", async () => {
+    const projDir = path.join(dir, "proj");
+    const proj = await Project.create(projDir, {
+      name: "conv26",
+      documents: "/docs",
+      apiUrl: "http://localhost:8888",
+    });
+
+    const v1 = proj.addVersion({ retainMission: "m1", observeMission: null });
+    expect(v1.n).toBe(1);
+    expect(v1.bank).toBe("conv26-v1");
+    expect(proj.currentBank()).toBe("conv26-v1");
+
+    const v2 = proj.addVersion({ retainMission: "m2", observeMission: "o2" });
+    expect(v2.n).toBe(2);
+    expect(v2.bank).toBe("conv26-v2");
+    await proj.save();
+
+    const reloaded = await Project.load(projDir);
+    expect(reloaded.versions).toHaveLength(2);
+    expect(reloaded.currentVersion).toBe(2);
+    expect(reloaded.currentBank()).toBe("conv26-v2");
+    expect(reloaded.versions[0]).toMatchObject({ n: 1, bank: "conv26-v1", retainMission: "m1" });
+  });
+
+  it("persists project.json as camelCase ProjectMeta", async () => {
+    const projDir = path.join(dir, "proj");
+    const proj = await Project.create(projDir, {
+      name: "conv26",
+      documents: "/docs",
+      apiUrl: "http://localhost:8888",
+    });
+    proj.observe.mission = "Aggregate per-person profiles.";
+    proj.addVersion({ retainMission: "m1", observeMission: "Aggregate per-person profiles." });
+    await proj.save();
+
+    const raw = JSON.parse(
+      await fs.readFile(path.join(projDir, "project.json"), "utf8")
+    ) as ProjectMeta;
+    expect(raw.name).toBe("conv26");
+    expect(raw.documents).toBe("/docs");
+    expect(raw.observe.mission).toBe("Aggregate per-person profiles.");
+    expect(raw.versions[0]).toMatchObject({ n: 1, bank: "conv26-v1" });
+    expect(raw.currentVersion).toBe(1);
+  });
+});
diff --git a/hindsight-tools/mission-sandbox/tsconfig.json b/hindsight-tools/mission-sandbox/tsconfig.json
new file mode 100644
index 000000000..64aa48300
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/tsconfig.json
@@ -0,0 +1,33 @@
+{
+  "compilerOptions": {
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "react-jsx",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./src/*"]
+    },
+    "target": "ES2022"
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts",
+    ".next/dev/types/**/*.ts"
+  ],
+  "exclude": ["node_modules", "dist", "standalone"]
+}
diff --git a/hindsight-tools/mission-sandbox/tsconfig.lib.json b/hindsight-tools/mission-sandbox/tsconfig.lib.json
new file mode 100644
index 000000000..57c0cb2cf
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/tsconfig.lib.json
@@ -0,0 +1,19 @@
+{
+  "compilerOptions": {
+    "lib": ["esnext"],
+    "target": "ES2022",
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "resolveJsonModule": true,
+    "declaration": true,
+    "sourceMap": true,
+    "outDir": "dist",
+    "rootDir": "src",
+    "noEmit": false
+  },
+  "include": ["src/core/**/*.ts", "src/cli/**/*.ts"],
+  "exclude": ["node_modules", "dist", "src/app/**", "**/*.test.ts"]
+}
diff --git a/hindsight-tools/mission-sandbox/vitest.config.ts b/hindsight-tools/mission-sandbox/vitest.config.ts
new file mode 100644
index 000000000..ba29bd1c7
--- /dev/null
+++ b/hindsight-tools/mission-sandbox/vitest.config.ts
@@ -0,0 +1,8 @@
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+  test: {
+    include: ["tests/**/*.test.ts"],
+    environment: "node",
+  },
+});