From 475c47d2e94cf20dc2afe0e4a0d5b16fbd9ace9a Mon Sep 17 00:00:00 2001 From: Brandon Young <32550081+brandonyoungdev@users.noreply.github.com> Date: Tue, 2 Jun 2026 17:55:37 -0500 Subject: [PATCH 1/2] feat: dosu read/write commands, workflow hook, and PAT FTUE fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lands the workflow-hook FTUE for `dosu setup --agent --tool claude`. Experimental data shows this pattern drives ~10x the baseline knowledge- capture rate over passive MCP-only setups (see testing-c/dosu-interventions-findings.md). What's new - `dosu read [query]` and `dosu write ""` — CLI commands that call the new /v1/knowledge/search and /v1/topics backend endpoints. - Claude Code workflow-hook installer (installWorkflowHook in mcp/config-helpers.ts) — `dosu setup` now writes a UserPromptSubmit hook reminding the model to `dosu write` non-obvious facts at end-of-task. Idempotent install + remove. - `silent` option on installSkill() so the FTUE doesn't surface raw npx security warnings. What's fixed - PAT storage timeout — was hitting the 10s HTTP timeout on slow KMS calls. Now uses a 60s budget for /github-pat specifically (Client.doRequest({timeoutMs})), retries on 403/404 for read-after- write race, and surfaces backend `detail` from the new 503 handler. - Setup resumability — if a prior run created the data source but failed at PAT storage, rerunning detects it via findExistingGithubDataSource and skips straight to the PAT prompt. - doc-analyze messaging — distinguishes indexed-and-empty / data source missing / indexing didn't complete. The misleading "No docs found" no longer fires on real sync failures. - Recoverability audit — every error path in setup/, commands/ {read,write,ask}, and agent/ now ends with the exact recovery command. Top-level error handler appends `dosu logs --tail 30`. - Drive-by: knowledge.ts and suggest.ts were reading `ds.id` from dataSource.list — actual field is `ds.data_source_id`. Test plan - vitest 1106/1106 pass; 5 new test files - tsc, biome clean - Manually verified PAT storage end-to-end against local dev backend --- src/cli/cli.ts | 6 +- src/client/client.test.ts | 22 + src/client/client.ts | 36 +- src/commands/ask.ts | 9 +- src/commands/knowledge.test.ts | 14 +- src/commands/knowledge.ts | 4 +- src/commands/read.ts | 83 ++++ src/commands/skill.ts | 9 +- src/commands/suggest.test.ts | 6 +- src/commands/suggest.ts | 4 +- src/commands/write.ts | 70 ++++ src/index.ts | 3 + src/mcp/config-helpers.ts | 79 ++++ src/mcp/providers/claude.ts | 28 +- src/setup/doc-analyze-step.ts | 276 +++++++++++++ src/setup/flow.test.ts | 124 +++--- src/setup/flow.ts | 218 ++++++---- src/setup/github-pat-step.test.ts | 303 ++++++++++++++ src/setup/github-pat-step.ts | 639 ++++++++++++++++++++++++++++++ 19 files changed, 1781 insertions(+), 152 deletions(-) create mode 100644 src/commands/read.ts create mode 100644 src/commands/write.ts create mode 100644 src/setup/doc-analyze-step.ts create mode 100644 src/setup/github-pat-step.test.ts create mode 100644 src/setup/github-pat-step.ts diff --git a/src/cli/cli.ts b/src/cli/cli.ts index 049c390..5780857 100644 --- a/src/cli/cli.ts +++ b/src/cli/cli.ts @@ -13,12 +13,14 @@ import { integrationsCommand } from "../commands/integrations"; import { knowledgeCommand } from "../commands/knowledge"; import { membersCommand } from "../commands/members"; import { orgCommand } from "../commands/org"; +import { readCommand } from "../commands/read"; import { reviewCommand } from "../commands/review"; import { skillCommand } from "../commands/skill"; import { sourcesCommand } from "../commands/sources"; import { suggestCommand } from "../commands/suggest"; import { tagsCommand } from "../commands/tags"; import { threadsCommand } from "../commands/threads"; +import { writeCommand } from "../commands/write"; import { getConfigPath, isAuthenticated, @@ -247,12 +249,14 @@ export function createProgram(): Command { program.addCommand(knowledgeCommand()); program.addCommand(membersCommand()); program.addCommand(orgCommand()); + program.addCommand(readCommand()); program.addCommand(reviewCommand()); + program.addCommand(skillCommand()); program.addCommand(sourcesCommand()); program.addCommand(suggestCommand()); program.addCommand(tagsCommand()); program.addCommand(threadsCommand()); - program.addCommand(skillCommand()); + program.addCommand(writeCommand()); // setup program diff --git a/src/client/client.test.ts b/src/client/client.test.ts index 8f71cbf..643919f 100644 --- a/src/client/client.test.ts +++ b/src/client/client.test.ts @@ -172,6 +172,28 @@ describe("Client", () => { }); }); + describe("timeoutMs option", () => { + it("uses the default 10s timeout when no option is passed", async () => { + const setTimeoutSpy = vi.spyOn(globalThis, "setTimeout"); + mockFetch.mockResolvedValueOnce(jsonResponse({ ok: true })); + const client = new Client(makeConfig()); + await client.get("/test"); + const timeoutCall = setTimeoutSpy.mock.calls.find((c) => c[1] === 10_000); + expect(timeoutCall).toBeDefined(); + setTimeoutSpy.mockRestore(); + }); + + it("uses the override when opts.timeoutMs is passed to doRequest", async () => { + const setTimeoutSpy = vi.spyOn(globalThis, "setTimeout"); + mockFetch.mockResolvedValueOnce(jsonResponse({ ok: true })); + const client = new Client(makeConfig()); + await client.doRequest("POST", "/slow", { x: 1 }, { timeoutMs: 60_000 }); + const timeoutCall = setTimeoutSpy.mock.calls.find((c) => c[1] === 60_000); + expect(timeoutCall).toBeDefined(); + setTimeoutSpy.mockRestore(); + }); + }); + describe("doRequestRaw", () => { it("returns 401 without retrying or refreshing", async () => { mockFetch.mockResolvedValueOnce(jsonResponse({}, 401)); diff --git a/src/client/client.ts b/src/client/client.ts index 123dcb2..c87e549 100644 --- a/src/client/client.ts +++ b/src/client/client.ts @@ -36,6 +36,13 @@ export interface APIKeyResponse { key_prefix: string; } +const DEFAULT_REQUEST_TIMEOUT_MS = 10_000; + +export interface RequestOptions { + /** Override the per-request HTTP timeout (ms). Defaults to 10_000. */ + timeoutMs?: number; +} + export class Client { private baseURL: string; private config: Config; @@ -47,8 +54,17 @@ export class Client { /** * Performs an authenticated HTTP request with auto-refresh on 401/403. + * + * `opts.timeoutMs` overrides the default 10s timeout for endpoints known to + * exceed it (e.g. the GitHub PAT storage endpoint, which performs KMS + * encrypt + DB upsert + sync enqueue). */ - async doRequest(method: string, path: string, body?: unknown): Promise { + async doRequest( + method: string, + path: string, + body?: unknown, + opts?: RequestOptions, + ): Promise { if (!isAuthenticated(this.config)) { throw new Error("not authenticated - please run setup first"); } @@ -58,7 +74,7 @@ export class Client { await this.refreshToken(); } - let resp = await this.doRequestOnce(method, path, body); + let resp = await this.doRequestOnce(method, path, body, opts); // If backend says unauthorized, try refresh + retry once if (resp.status === 401 || resp.status === 403) { @@ -67,7 +83,7 @@ export class Client { } catch { throw new SessionExpiredError(); } - resp = await this.doRequestOnce(method, path, body); + resp = await this.doRequestOnce(method, path, body, opts); } return resp; @@ -76,11 +92,16 @@ export class Client { /** * Performs a single authenticated request without any retry/refresh logic. */ - async doRequestRaw(method: string, path: string): Promise { - return this.doRequestOnce(method, path); + async doRequestRaw(method: string, path: string, opts?: RequestOptions): Promise { + return this.doRequestOnce(method, path, undefined, opts); } - private async doRequestOnce(method: string, path: string, body?: unknown): Promise { + private async doRequestOnce( + method: string, + path: string, + body?: unknown, + opts?: RequestOptions, + ): Promise { const url = this.baseURL + path; const headers: Record = { "Content-Type": "application/json", @@ -93,7 +114,8 @@ export class Client { } const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), 10_000); + const timeoutMs = opts?.timeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS; + const timeout = setTimeout(() => controller.abort(), timeoutMs); options.signal = controller.signal; try { diff --git a/src/commands/ask.ts b/src/commands/ask.ts index bb9280d..29088c2 100644 --- a/src/commands/ask.ts +++ b/src/commands/ask.ts @@ -36,7 +36,9 @@ export function askCommand(): Command { const backendURL = getBackendURL(); if (!backendURL) { - console.error(pc.red("Backend URL not configured.")); + console.error( + pc.red("Backend URL not configured. Reinstall the CLI or set DOSU_BACKEND_URL_OVERRIDE."), + ); process.exit(1); } @@ -70,6 +72,9 @@ export function askCommand(): Command { detail = typeof raw === "string" ? raw : JSON.stringify(raw, null, 2); } catch {} console.error(pc.red(`Error: ${detail}`)); + console.error( + pc.dim("Run `dosu logs --tail 30` for details, or `dosu status` to check auth."), + ); process.exit(1); } @@ -101,7 +106,7 @@ export function askCommand(): Command { } } catch (err: unknown) { if (err instanceof Error && err.name === "AbortError") { - console.error(pc.red("Request timed out.")); + console.error(pc.red("Request timed out after 120s. Re-run, or simplify the question.")); process.exit(1); } throw err; diff --git a/src/commands/knowledge.test.ts b/src/commands/knowledge.test.ts index 3f639f1..e97e399 100644 --- a/src/commands/knowledge.test.ts +++ b/src/commands/knowledge.test.ts @@ -69,8 +69,8 @@ describe("knowledge search", () => { mockLoadConfig.mockReturnValue(validConfig); mockQuery .mockResolvedValueOnce([ - { id: "ds1", name: "GH" }, - { id: "ds2", name: "Slack" }, + { data_source_id: "ds1", name: "GH" }, + { data_source_id: "ds2", name: "Slack" }, ]) .mockResolvedValueOnce({ documents: [{ title: "Doc A", similarity: 0.95 }] }); @@ -90,7 +90,7 @@ describe("knowledge search", () => { it("outputs valid JSON with --json flag", async () => { mockLoadConfig.mockReturnValue(validConfig); mockQuery - .mockResolvedValueOnce([{ id: "ds1" }]) + .mockResolvedValueOnce([{ data_source_id: "ds1" }]) .mockResolvedValueOnce({ documents: [{ title: "Result", similarity: 0.8 }] }); await run("search", "--json", "query"); @@ -111,7 +111,9 @@ describe("knowledge search", () => { it("prints message when search returns empty", async () => { mockLoadConfig.mockReturnValue(validConfig); - mockQuery.mockResolvedValueOnce([{ id: "ds1" }]).mockResolvedValueOnce({ documents: [] }); + mockQuery + .mockResolvedValueOnce([{ data_source_id: "ds1" }]) + .mockResolvedValueOnce({ documents: [] }); await run("search", "query"); @@ -124,7 +126,9 @@ describe("knowledge search", () => { title: `Doc ${i}`, similarity: 0.9 - i * 0.1, })); - mockQuery.mockResolvedValueOnce([{ id: "ds1" }]).mockResolvedValueOnce({ documents: results }); + mockQuery + .mockResolvedValueOnce([{ data_source_id: "ds1" }]) + .mockResolvedValueOnce({ documents: results }); await run("search", "--limit", "3", "query"); diff --git a/src/commands/knowledge.ts b/src/commands/knowledge.ts index 1657503..9ce99d4 100644 --- a/src/commands/knowledge.ts +++ b/src/commands/knowledge.ts @@ -37,7 +37,9 @@ export function knowledgeCommand(): Command { excluded_provider_slugs: [], }); - const dataSourceIds = dataSources.map((ds: { id: string }) => ds.id); + const dataSourceIds = (dataSources as { data_source_id?: string }[]) + .map((ds) => ds.data_source_id) + .filter((id): id is string => typeof id === "string"); if (dataSourceIds.length === 0) { console.log(pc.dim("No data sources connected. Add data sources in the Dosu dashboard.")); return; diff --git a/src/commands/read.ts b/src/commands/read.ts new file mode 100644 index 0000000..e4f0002 --- /dev/null +++ b/src/commands/read.ts @@ -0,0 +1,83 @@ +/** + * `dosu read` — CLI shortcut for the MCP init_knowledge tool. + * + * Semantic search over the reviewed knowledge base. Returns results ranked + * by relevance without the LLM source-selection pass (faster for CLI use). + */ + +import { Command } from "commander"; +import pc from "picocolors"; +import { Client } from "../client/client"; +import { loadConfig } from "../config/config"; +import { logger } from "../debug/logger"; +import { printResult, truncate } from "./output"; + +function requireConfig() { + const cfg = loadConfig(); + if (!cfg.api_key) { + console.error(pc.red("Not configured. Run 'dosu setup' first.")); + process.exit(1); + } + if (!cfg.deployment_id) { + console.error(pc.red("Missing deployment config. Run 'dosu setup' to reconfigure.")); + process.exit(1); + } + return cfg; +} + +export function readCommand(): Command { + return new Command("read") + .description("Retrieve relevant context from the knowledge base") + .argument("[query]", "Optional search query") + .option("--limit ", "Maximum results (default: 10)", "10") + .option("--json", "Output as JSON") + .action(async (query: string | undefined, opts: { limit?: string; json?: boolean }) => { + const cfg = requireConfig(); + const apiClient = new Client(cfg); + const question = query ?? "What should I know before making changes?"; + + logger.debug("read", `Searching: ${question}`); + + const resp = await apiClient.doRequest("POST", "/v1/knowledge/search", { + // biome-ignore lint/style/noNonNullAssertion: checked in requireConfig + deployment_id: cfg.deployment_id!, + query: question, + top_k: Number.parseInt(opts.limit ?? "10", 10), + }); + + if (!resp.ok) { + let detail = `Request failed with status ${resp.status}`; + try { + const body = await resp.json(); + const raw = body.detail ?? detail; + detail = typeof raw === "string" ? raw : JSON.stringify(raw); + } catch {} + console.error(pc.red(`Error: ${detail}`)); + console.error( + pc.dim("Run `dosu logs --tail 30` for details, or `dosu status` to check auth."), + ); + process.exit(1); + } + + const body = await resp.json(); + + if (opts.json) { + printResult(body, opts); + return; + } + + const results: { title: string; content: string; url?: string }[] = body.results ?? []; + if (results.length === 0) { + console.log(pc.dim("No results found.")); + return; + } + + for (const r of results) { + console.log(pc.bold(truncate(r.title, 80))); + if (r.content) { + console.log(pc.dim(truncate(r.content, 300))); + } + console.log(); + } + }); +} diff --git a/src/commands/skill.ts b/src/commands/skill.ts index 3f1123b..b7360c5 100644 --- a/src/commands/skill.ts +++ b/src/commands/skill.ts @@ -21,11 +21,16 @@ const SKILL_NAME = "dosu"; * what was installed. Network failure is non-fatal — the skill is still * installed, the SHA is just not cached (the update checker will fill it * in on the next stale check). + * + * Pass `silent: true` to suppress raw npx output (used during setup to avoid + * showing security-risk warnings in the FTUE). */ -export async function installSkill(): Promise<{ success: boolean; sha?: string }> { +export async function installSkill( + opts: { silent?: boolean } = {}, +): Promise<{ success: boolean; sha?: string }> { try { execSync(`npx skills add ${SKILL_REPO} -g -s ${SKILL_NAME} -y`, { - stdio: "inherit", + stdio: opts.silent ? "pipe" : "inherit", }); } catch (err) { logger.error("skill", `Failed to install skill: ${err}`); diff --git a/src/commands/suggest.test.ts b/src/commands/suggest.test.ts index 656da50..4e9471d 100644 --- a/src/commands/suggest.test.ts +++ b/src/commands/suggest.test.ts @@ -98,7 +98,7 @@ describe("suggest list", () => { describe("suggest generate", () => { it("fetches data sources then calls suggestedDoc.generate", async () => { mockLoadConfig.mockReturnValue(validConfig); - mockQuery.mockResolvedValueOnce([{ id: "ds1" }, { id: "ds2" }]); // dataSource.list + mockQuery.mockResolvedValueOnce([{ data_source_id: "ds1" }, { data_source_id: "ds2" }]); // dataSource.list mockMutate.mockResolvedValueOnce({}); await run("generate"); @@ -111,7 +111,7 @@ describe("suggest generate", () => { it("outputs JSON with --json", async () => { mockLoadConfig.mockReturnValue(validConfig); - mockQuery.mockResolvedValueOnce([{ id: "ds1" }]); + mockQuery.mockResolvedValueOnce([{ data_source_id: "ds1" }]); mockMutate.mockResolvedValueOnce({ status: "generating" }); await run("generate", "--json"); @@ -121,7 +121,7 @@ describe("suggest generate", () => { it("prints human-readable confirmation", async () => { mockLoadConfig.mockReturnValue(validConfig); - mockQuery.mockResolvedValueOnce([{ id: "ds1" }]); + mockQuery.mockResolvedValueOnce([{ data_source_id: "ds1" }]); mockMutate.mockResolvedValueOnce({}); await run("generate"); diff --git a/src/commands/suggest.ts b/src/commands/suggest.ts index 086f8e6..11003fe 100644 --- a/src/commands/suggest.ts +++ b/src/commands/suggest.ts @@ -85,7 +85,9 @@ export function suggestCommand(): Command { org_id: cfg.org_id, excluded_provider_slugs: [], }); - const dataSourceIds = dataSources.map((ds: { id: string }) => ds.id); + const dataSourceIds = (dataSources as { data_source_id?: string }[]) + .map((ds) => ds.data_source_id) + .filter((id): id is string => typeof id === "string"); const result = await client.suggestedDoc.generate.mutate({ knowledgeStoreId: ksId, diff --git a/src/commands/write.ts b/src/commands/write.ts new file mode 100644 index 0000000..5f5157e --- /dev/null +++ b/src/commands/write.ts @@ -0,0 +1,70 @@ +/** + * `dosu write` — CLI shortcut for the MCP save_topic tool. + * + * Saves a candidate topic to the knowledge base. The candidate enters + * Dosu's server-side review pipeline before being published and indexed + * for semantic search. + */ + +import { Command } from "commander"; +import pc from "picocolors"; +import { Client } from "../client/client"; +import { loadConfig } from "../config/config"; +import { logger } from "../debug/logger"; +import { printResult } from "./output"; + +function requireConfig() { + const cfg = loadConfig(); + if (!cfg.api_key) { + console.error(pc.red("Not configured. Run 'dosu setup' first.")); + process.exit(1); + } + if (!cfg.deployment_id) { + console.error(pc.red("Missing deployment config. Run 'dosu setup' to reconfigure.")); + process.exit(1); + } + return cfg; +} + +export function writeCommand(): Command { + return new Command("write") + .description("Save a fact or insight to the knowledge base") + .argument("", "The fact or insight to save") + .option("--json", "Output as JSON") + .action(async (fact: string, opts: { json?: boolean }) => { + const cfg = requireConfig(); + const apiClient = new Client(cfg); + + logger.debug("write", `Saving topic: ${fact.slice(0, 60)}`); + + const resp = await apiClient.doRequest("POST", "/v1/topics", { + // biome-ignore lint/style/noNonNullAssertion: checked in requireConfig + deployment_id: cfg.deployment_id!, + name: fact.trim().split(/\s+/).slice(0, 8).join(" "), + context: fact, + }); + + if (!resp.ok) { + let detail = `Request failed with status ${resp.status}`; + try { + const body = await resp.json(); + const raw = body.detail ?? detail; + detail = typeof raw === "string" ? raw : JSON.stringify(raw); + } catch {} + console.error(pc.red(`Error: ${detail}`)); + console.error( + pc.dim("Run `dosu logs --tail 30` for details, or `dosu status` to check auth."), + ); + process.exit(1); + } + + if (opts.json) { + const body = await resp.json(); + printResult(body, opts); + return; + } + + const deployment = cfg.deployment_name ?? "Dosu"; + console.log(pc.green(`Saved to ${deployment}`)); + }); +} diff --git a/src/index.ts b/src/index.ts index 1f0a368..369ddcf 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,5 +11,8 @@ process.on("SIGINT", () => process.exit(0)); execute().catch((err) => { console.error(err.message ?? err); + console.error( + "\nRun `dosu logs --tail 30` for details, or re-run with `--debug` to stream debug logs.", + ); process.exit(1); }); diff --git a/src/mcp/config-helpers.ts b/src/mcp/config-helpers.ts index d04edc2..494c58d 100644 --- a/src/mcp/config-helpers.ts +++ b/src/mcp/config-helpers.ts @@ -176,3 +176,82 @@ export function removeJSONServer(configPath: string, topKey: string): void { } saveJSONConfig(configPath, jsonCfg); } + +// Marker used to identify the Dosu workflow hook entry so we can be idempotent. +const DOSU_HOOK_MARKER = "dosu write"; + +// biome-ignore lint/suspicious/noExplicitAny: hook entries are arbitrary JSON +type HookEntry = Record; + +/** + * Installs a UserPromptSubmit hook into a Claude Code config file that reminds + * the agent to save non-obvious facts with `dosu write` after each task. + * Idempotent — no-ops if the hook is already present. + */ +export function installWorkflowHook(configPath: string): void { + const cfg = loadJSONConfig(configPath); + const hooks: Record = cfg.hooks ?? {}; + const submitHooks: HookEntry[] = Array.isArray(hooks.UserPromptSubmit) + ? hooks.UserPromptSubmit + : []; + + const alreadyInstalled = submitHooks.some((entry: HookEntry) => + (entry.hooks ?? []).some( + (h: HookEntry) => typeof h.command === "string" && h.command.includes(DOSU_HOOK_MARKER), + ), + ); + if (alreadyInstalled) return; + + submitHooks.push({ + hooks: [ + { + type: "command", + command: + "echo 'When you finish this task: if you discovered any non-obvious facts (constraints, gotchas, decisions), save them with: dosu write \"\"'", + }, + ], + }); + + cfg.hooks = { ...hooks, UserPromptSubmit: submitHooks }; + saveJSONConfig(configPath, cfg); +} + +/** + * Removes the Dosu workflow hook from a Claude Code config file. + * Idempotent — no-ops if the hook is not present. + */ +export function removeWorkflowHook(configPath: string): void { + let cfg: JsonConfig; + try { + cfg = loadJSONConfig(configPath); + } catch { + return; + } + + const hooks: Record = cfg.hooks ?? {}; + const submitHooks: HookEntry[] = Array.isArray(hooks.UserPromptSubmit) + ? hooks.UserPromptSubmit + : []; + + const filtered = submitHooks.filter( + (entry: HookEntry) => + !(entry.hooks ?? []).some( + (h: HookEntry) => typeof h.command === "string" && h.command.includes(DOSU_HOOK_MARKER), + ), + ); + + if (filtered.length === submitHooks.length) return; // nothing changed + + if (filtered.length === 0) { + delete hooks.UserPromptSubmit; + } else { + hooks.UserPromptSubmit = filtered; + } + + if (Object.keys(hooks).length === 0) { + delete cfg.hooks; + } else { + cfg.hooks = hooks; + } + saveJSONConfig(configPath, cfg); +} diff --git a/src/mcp/providers/claude.ts b/src/mcp/providers/claude.ts index b1aa010..ec602bf 100644 --- a/src/mcp/providers/claude.ts +++ b/src/mcp/providers/claude.ts @@ -1,14 +1,36 @@ import { join } from "node:path"; +import type { Config } from "../../config/config"; +import { installWorkflowHook, removeWorkflowHook } from "../config-helpers"; +import { expandHome } from "../detect"; import { createJSONProvider } from "./base"; -export const ClaudeProvider = () => - createJSONProvider({ +const CLAUDE_GLOBAL_PATH = "~/.claude.json"; + +export const ClaudeProvider = () => { + const base = createJSONProvider({ providerName: "Claude Code", providerID: "claude", local: true, priorityValue: 1, paths: ["~/.claude"], - globalPath: "~/.claude.json", + globalPath: CLAUDE_GLOBAL_PATH, topKey: "mcpServers", localConfigPath: (cwd) => join(cwd, ".mcp.json"), }); + + return { + ...base, + install(cfg: Config, global: boolean): void { + base.install(cfg, global); + if (global) { + installWorkflowHook(expandHome(CLAUDE_GLOBAL_PATH)); + } + }, + remove(global: boolean): void { + base.remove(global); + if (global) { + removeWorkflowHook(expandHome(CLAUDE_GLOBAL_PATH)); + } + }, + }; +}; diff --git a/src/setup/doc-analyze-step.ts b/src/setup/doc-analyze-step.ts new file mode 100644 index 0000000..3d4eb93 --- /dev/null +++ b/src/setup/doc-analyze-step.ts @@ -0,0 +1,276 @@ +/** + * Setup step: auto-import docs from a connected repo and queue AI analysis. + * + * Called after `stepConnectGitHubPat` successfully creates a data source. + * Unlike the interactive `stepImportGitHubDocs`, this step runs silently + * during the FTUE — no file picker, imports all markdown automatically. + * + * Never throws — returns partial results on failure. + */ + +import * as p from "@clack/prompts"; +import { createTypedClient, type TypedClient } from "../client/trpc"; +import type { Config } from "../config/config"; +import { logger } from "../debug/logger"; +import type { GithubPatStepResult } from "./github-pat-step"; + +const DOC_SCAN_POLL_INTERVAL_MS = 2_000; +const DOC_SCAN_POLL_TIMEOUT_MS = 60_000; +const IMPORT_STATUS_POLL_INTERVAL_MS = 2_000; +const IMPORT_STATUS_MAX_ERRORS = 3; + +const IMPORTABLE_EXTENSIONS = new Set([".md", ".mdx", ".txt", ".rst", ".markdown"]); + +interface ImportableFile { + id: string; + file_path: string; + repository_slug: string | null; + is_synced?: boolean; + file_ext?: string; +} + +type AsyncTaskState = "PROGRESS" | "SUCCESS" | "FAILURE"; + +interface ImportTaskStatusResponse { + task_id: string; + state: AsyncTaskState; + detail: { + total: number; + completed: number; + failed: number; + } | null; +} + +export interface DocAnalyzeStepResult { + imported_count: number; + failed_count: number; + suggestions_queued: boolean; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function isDocFile(f: ImportableFile): boolean { + if (f.file_ext) return IMPORTABLE_EXTENSIONS.has(f.file_ext.toLowerCase()); + const ext = f.file_path.slice(f.file_path.lastIndexOf(".")).toLowerCase(); + return IMPORTABLE_EXTENSIONS.has(ext); +} + +type WaitForFilesOutcome = + /** Data source finished indexing and we got the file list back. */ + | { kind: "indexed"; files: ImportableFile[] } + /** Backend deleted the data source — repo unreachable. */ + | { kind: "data_source_missing" } + /** Polling timed out before the data source reached is_indexed=true. */ + | { kind: "indexing_timeout"; files: ImportableFile[] }; + +async function waitForFiles( + trpc: TypedClient, + spaceID: string, + dataSourceID: string, + orgID: string, +): Promise { + const deadline = Date.now() + DOC_SCAN_POLL_TIMEOUT_MS; + while (Date.now() < deadline) { + try { + // Check if data source is indexed yet + const sources = (await trpc.dataSource.list.query({ + org_id: orgID, + excluded_provider_slugs: [], + })) as { data_source_id?: string; is_indexed?: boolean }[]; + const ds = sources.find((s) => s.data_source_id === dataSourceID); + if (ds?.is_indexed) { + const files = (await trpc.docImports.listImportableGithubFiles.query( + spaceID, + )) as ImportableFile[]; + return { kind: "indexed", files }; + } + if (!ds) { + return { kind: "data_source_missing" }; + } + } catch (err: unknown) { + logger.warn( + "setup", + `waitForFiles poll error: ${err instanceof Error ? err.message : String(err)}`, + ); + } + await sleep(DOC_SCAN_POLL_INTERVAL_MS); + } + // Timed out before is_indexed=true — best-effort fetch in case files were + // produced anyway, but mark the outcome so the caller can warn the user + // about the underlying sync problem rather than mis-reporting "no docs". + let files: ImportableFile[] = []; + try { + files = (await trpc.docImports.listImportableGithubFiles.query(spaceID)) as ImportableFile[]; + } catch { + /* swallow — return empty */ + } + return { kind: "indexing_timeout", files }; +} + +async function pollImportCompletion( + trpc: TypedClient, + taskID: string, +): Promise<{ imported: number; failed: number }> { + let errors = 0; + while (true) { + try { + const status = (await trpc.docImports.getImportStatus.query( + taskID, + )) as ImportTaskStatusResponse | null; + + if (!status) { + errors += 1; + if (errors >= IMPORT_STATUS_MAX_ERRORS) return { imported: 0, failed: 0 }; + } else { + errors = 0; + if (status.state !== "PROGRESS") { + const detail = status.detail; + return { + imported: detail?.completed ?? 0, + failed: detail?.failed ?? 0, + }; + } + } + } catch (err: unknown) { + errors += 1; + logger.warn( + "setup", + `getImportStatus error: ${err instanceof Error ? err.message : String(err)}`, + ); + if (errors >= IMPORT_STATUS_MAX_ERRORS) return { imported: 0, failed: 0 }; + } + await sleep(IMPORT_STATUS_POLL_INTERVAL_MS); + } +} + +export async function stepAnalyzeDocs( + cfg: Config, + patResult: GithubPatStepResult, +): Promise { + const { data_source_id, space_id } = patResult; + if (!data_source_id || !space_id || !cfg.org_id) { + return { imported_count: 0, failed_count: 0, suggestions_queued: false }; + } + + logger.info("setup", `Step: analyze docs for data_source=${data_source_id}`); + + const trpc = createTypedClient(cfg); + const s = p.spinner(); + s.start("Scanning repo for docs..."); + + // Wait for the data source to be indexed (or give up) + // biome-ignore lint/style/noNonNullAssertion: checked above + const outcome = await waitForFiles(trpc, space_id, data_source_id, cfg.org_id!); + const docFiles = outcome.kind === "data_source_missing" ? [] : outcome.files.filter(isDocFile); + + // Distinguish "indexing didn't complete" from "repo really has no docs" — + // the previous behavior conflated them, which was misleading when the PAT + // hadn't been stored and sync never ran. + if (outcome.kind === "data_source_missing") { + s.stop("Repo not reachable"); + p.log.warn( + "Dosu could not reach the repo — the data source was removed. " + + "Re-run `dosu setup` to retry the connection.", + ); + return { imported_count: 0, failed_count: 0, suggestions_queued: false }; + } + + if (outcome.kind === "indexing_timeout" && docFiles.length === 0) { + s.stop("Indexing didn't complete in time"); + p.log.warn( + "The repo hasn't finished indexing yet — Dosu's GitHub sync may not have access " + + "(common when the PAT wasn't stored). Run `dosu sources list` to check status, " + + "then re-run `dosu setup` once sync is healthy.", + ); + return { imported_count: 0, failed_count: 0, suggestions_queued: false }; + } + + if (docFiles.length === 0) { + s.stop("No docs found"); + p.log.info( + "No .md/.mdx/.txt/.rst files found in the repo to import. " + + "Add some documentation and run `dosu setup` again, or import files manually with `dosu docs import github`.", + ); + return { imported_count: 0, failed_count: 0, suggestions_queued: false }; + } + + s.message(`Found ${docFiles.length} doc${docFiles.length === 1 ? "" : "s"} — importing...`); + + // Get knowledge store ID + let ksID: string; + try { + const store = await trpc.knowledgeStore.getBySpaceId.query({ space_id }); + if (!store) { + s.stop("No knowledge store found"); + logger.warn("setup", "No knowledge store found for space"); + return { imported_count: 0, failed_count: 0, suggestions_queued: false }; + } + ksID = store.id; + } catch (err: unknown) { + s.stop("Could not reach knowledge store"); + logger.error( + "setup", + `getBySpaceId failed: ${err instanceof Error ? err.message : String(err)}`, + ); + return { imported_count: 0, failed_count: 0, suggestions_queued: false }; + } + + // Queue the import + let taskID: string | undefined; + try { + const importResult = (await trpc.docImports.importGithubFiles.mutate({ + knowledge_store_id: ksID, + space_id, + file_ids: docFiles.map((f) => f.id), + })) as { task_id?: string } | null; + taskID = importResult?.task_id; + } catch (err: unknown) { + s.stop("Import failed to start"); + logger.error( + "setup", + `importGithubFiles failed: ${err instanceof Error ? err.message : String(err)}`, + ); + return { imported_count: 0, failed_count: 0, suggestions_queued: false }; + } + + // Poll for completion if we got a task ID + let imported = 0; + let failed = 0; + if (taskID) { + const result = await pollImportCompletion(trpc, taskID); + imported = result.imported; + failed = result.failed; + } else { + // Queued in background with no task ID — treat as success + imported = docFiles.length; + } + + // Fire-and-forget: queue AI suggestion generation + let suggestionsQueued = false; + try { + await trpc.suggestedDoc.generate.mutate({ + knowledgeStoreId: ksID, + dataSourceIds: [data_source_id], + }); + suggestionsQueued = true; + logger.info("setup", "Suggestion generation queued"); + } catch (err: unknown) { + logger.warn( + "setup", + `suggestedDoc.generate failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } + + const summary = suggestionsQueued + ? `Imported ${imported} doc${imported === 1 ? "" : "s"} · Queued AI analysis` + : `Imported ${imported} doc${imported === 1 ? "" : "s"}`; + s.stop(summary); + + if (suggestionsQueued) { + p.log.info("Run `dosu suggest list` to see improvement opportunities once analysis completes."); + } + + return { imported_count: imported, failed_count: failed, suggestions_queued: suggestionsQueued }; +} diff --git a/src/setup/flow.test.ts b/src/setup/flow.test.ts index aa02e29..02d0d10 100644 --- a/src/setup/flow.test.ts +++ b/src/setup/flow.test.ts @@ -111,9 +111,16 @@ vi.mock("../commands/skill", () => ({ skillCommand: vi.fn(), })); -const { mockStepConnectGitHubRepo, mockStepImportGitHubDocs } = vi.hoisted(() => ({ +const { + mockStepConnectGitHubRepo, + mockStepImportGitHubDocs, + mockStepConnectGitHubPat, + mockStepAnalyzeDocs, +} = vi.hoisted(() => ({ mockStepConnectGitHubRepo: vi.fn(), mockStepImportGitHubDocs: vi.fn(), + mockStepConnectGitHubPat: vi.fn(), + mockStepAnalyzeDocs: vi.fn(), })); vi.mock("./github-step", () => ({ stepConnectGitHubRepo: (...args: unknown[]) => mockStepConnectGitHubRepo(...args), @@ -121,6 +128,12 @@ vi.mock("./github-step", () => ({ vi.mock("./github-doc-import-step", () => ({ stepImportGitHubDocs: (...args: unknown[]) => mockStepImportGitHubDocs(...args), })); +vi.mock("./github-pat-step", () => ({ + stepConnectGitHubPat: (...args: unknown[]) => mockStepConnectGitHubPat(...args), +})); +vi.mock("./doc-analyze-step", () => ({ + stepAnalyzeDocs: (...args: unknown[]) => mockStepAnalyzeDocs(...args), +})); import * as p from "@clack/prompts"; import { OAuthCallbackError } from "../auth/errors"; @@ -177,6 +190,12 @@ function mockToolSelection(selection: string[]) { function installSetupStepDefaults() { mockStepConnectGitHubRepo.mockResolvedValue({ advance: false, has_connected_repo: false }); mockStepImportGitHubDocs.mockResolvedValue({ advance: false }); + mockStepConnectGitHubPat.mockResolvedValue({ advance: true }); + mockStepAnalyzeDocs.mockResolvedValue({ + imported_count: 0, + failed_count: 0, + suggestions_queued: false, + }); } function installRemoteSetupDefaults() { @@ -224,7 +243,7 @@ function makeCfg(overrides: Partial = {}): Config { access_token: "tok", refresh_token: "ref", expires_at: Math.floor(Date.now() / 1000) + 3600, - deployment_id: "dep-123", + deployment_id: "d1", deployment_name: "TestDeploy", api_key: "key-abc", ...overrides, @@ -357,7 +376,7 @@ describe("stepConfigureTools", () => { const written = loadJSONConfig(configPath); expect(written.mcpServers).toBeDefined(); expect(written.mcpServers.dosu).toBeDefined(); - expect(written.mcpServers.dosu.url).toContain("dep-123"); + expect(written.mcpServers.dosu.url).toContain("d1"); expect(written.mcpServers.dosu.headers["X-Dosu-API-Key"]).toBe("key-abc"); }); @@ -628,17 +647,18 @@ describe("showTryItOutPrompt", () => { expect(p.log.message).not.toHaveBeenCalledWith(expect.stringContaining("host my AGENTS.md")); }); - it("suggests hosting AGENTS.md when the file exists and no docs were imported", () => { + it("surfaces dosu write and dosu read when no docs imported and AGENTS.md exists", () => { showTryItOutPrompt({ docsImported: false, hasAgentsMd: true }); - expect(p.log.message).toHaveBeenCalledWith(expect.stringContaining("host my AGENTS.md")); + expect(p.log.message).toHaveBeenCalledWith(expect.stringContaining("dosu write")); + expect(p.log.message).toHaveBeenCalledWith(expect.stringContaining("dosu read")); }); - it("suggests drafting AGENTS.md when neither docs are imported nor the file exists", () => { + it("surfaces dosu write and dosu read when neither docs imported nor AGENTS.md exists", () => { showTryItOutPrompt({ docsImported: false, hasAgentsMd: false }); - expect(p.log.message).toHaveBeenCalledWith(expect.stringContaining("draft an AGENTS.md")); - expect(p.log.message).not.toHaveBeenCalledWith(expect.stringContaining("host my AGENTS.md")); + expect(p.log.message).toHaveBeenCalledWith(expect.stringContaining("dosu write")); + expect(p.log.message).toHaveBeenCalledWith(expect.stringContaining("dosu read")); }); it("uses the OSS prompt regardless of other flags", () => { @@ -992,7 +1012,9 @@ describe("runSetup integration", () => { await runSetup(); - expect(p.log.error).toHaveBeenCalledWith("No organizations found for your account"); + expect(p.log.error).toHaveBeenCalledWith( + expect.stringContaining("No organizations found for your account"), + ); }); it("handles SessionExpiredError during org fetch", async () => { @@ -1037,7 +1059,7 @@ describe("runSetup integration", () => { await runSetup(); - expect(p.select).toHaveBeenCalledWith(expect.objectContaining({ message: "Select an MCP" })); + expect(p.select).toHaveBeenCalledWith(expect.objectContaining({ message: "Select a project" })); const saved = loadConfig(); expect(saved.deployment_id).toBe("d2"); }); @@ -1209,7 +1231,7 @@ describe("runSetup integration", () => { await runSetup({ deploymentID: "nonexistent" }); - expect(p.log.error).toHaveBeenCalledWith("MCP nonexistent not found"); + expect(p.log.error).toHaveBeenCalledWith(expect.stringContaining("MCP nonexistent not found")); }); it("handles resolve deployment fetch error", async () => { @@ -1310,7 +1332,9 @@ describe("runSetup integration", () => { await runSetup(); - expect(p.log.error).toHaveBeenCalledWith("No MCP available for API key creation"); + expect(p.log.error).toHaveBeenCalledWith( + expect.stringContaining("No MCP available for API key creation"), + ); const saved = loadConfig(); expect(saved.deployment_id).toBeUndefined(); expect(p.outro).not.toHaveBeenCalled(); @@ -1327,7 +1351,9 @@ describe("runSetup integration", () => { await runSetup(); - expect(p.log.error).toHaveBeenCalledWith("No MCP available for API key creation"); + expect(p.log.error).toHaveBeenCalledWith( + expect.stringContaining("No MCP available for API key creation"), + ); const saved = loadConfig(); expect(saved.deployment_id).toBeUndefined(); expect(p.outro).not.toHaveBeenCalled(); @@ -1417,7 +1443,8 @@ describe("runSetup integration", () => { await runSetup(); expect(mockInstallSkill).toHaveBeenCalled(); - expect(p.log.success).toHaveBeenCalledWith(expect.stringContaining("Skill installed")); + const spinnerInstance = vi.mocked(p.spinner).mock.results.at(-1)?.value; + expect(spinnerInstance?.stop).toHaveBeenCalledWith(expect.stringContaining("skill installed")); }); it("does not install skill when the one-shot confirm unticks it", async () => { @@ -1458,7 +1485,7 @@ describe("runSetup integration", () => { expect(mockInstallSkill).toHaveBeenCalled(); }); - it("shows the GitHub docs import label in the one-shot confirm", async () => { + it("auto-accepts all defaults on first-run onboarding without a confirm prompt", async () => { const cfg = makeCfg(); saveConfig(cfg); @@ -1472,18 +1499,16 @@ describe("runSetup integration", () => { await runSetup(); + // First-run onboarding skips the one-shot confirm — no "Dosu will set" prompt shown. const oneShotCall = vi .mocked(p.multiselect) .mock.calls.find(([args]) => String((args as { message?: string }).message ?? "").includes("Dosu will set"), ); - const options = (oneShotCall?.[0] as { options: Array<{ label: string }> }).options; - expect(options.map((option) => String(option.label))).toEqual( - expect.arrayContaining([expect.stringContaining("Import docs from GitHub")]), - ); - expect(options.map((option) => String(option.label))).toEqual( - expect.arrayContaining([expect.stringContaining("Keep them up to date")]), - ); + expect(oneShotCall).toBeUndefined(); + + // Skill should still be installed automatically. + expect(mockInstallSkill).toHaveBeenCalled(); }); }); @@ -1509,7 +1534,8 @@ describe("runInstallSkill", () => { expect(result).toBe(true); expect(mockInstallSkill).toHaveBeenCalled(); - expect(p.log.success).toHaveBeenCalledWith(expect.stringContaining("Skill installed")); + const spinnerInstance = vi.mocked(p.spinner).mock.results.at(-1)?.value; + expect(spinnerInstance?.stop).toHaveBeenCalledWith(expect.stringContaining("skill installed")); }); it("returns false and logs error when installSkill reports failure", async () => { @@ -1711,17 +1737,17 @@ describe("runSetup checkpoint behavior", () => { cli_onboarding_enabled: true, }); vi.spyOn(providersModule, "allSetupProviders").mockReturnValue([]); - mockStepConnectGitHubRepo.mockResolvedValue({ + mockStepConnectGitHubPat.mockResolvedValue({ advance: true, - has_connected_repo: true, - created_data_source_ids: ["ds-1"], + data_source_id: "ds-1", + space_id: "space-1", + deployment_id: "dep-1", + repo_slug: "owner/repo", }); - mockStepImportGitHubDocs.mockResolvedValue({ - advance: true, - imported: true, + mockStepAnalyzeDocs.mockResolvedValue({ imported_count: 3, failed_count: 0, - task_id: "task-1", + suggestions_queued: true, }); await runSetup(); @@ -1731,7 +1757,7 @@ describe("runSetup checkpoint behavior", () => { expect.objectContaining({ event: "cli_onboarding_github_connected" }), expect.objectContaining({ event: "cli_onboarding_docs_imported", - properties: expect.objectContaining({ imported_count: 3, task_id: "task-1" }), + properties: expect.objectContaining({ imported_count: 3 }), }), expect.objectContaining({ event: "cli_onboarding_activated", @@ -1888,7 +1914,7 @@ describe("runSetup checkpoint behavior", () => { expect(saved.deployment_id).toBe("dep-owner"); }); - it("always starts the GitHub step from repo selection during first-run onboarding", async () => { + it("calls stepConnectGitHubPat during first-run onboarding", async () => { saveConfig(makeCfg()); setupAuthed(); mockTrpc.user.getCliOnboardingContext.query.mockResolvedValue({ @@ -1897,24 +1923,26 @@ describe("runSetup checkpoint behavior", () => { cli_onboarding_enabled: true, }); vi.spyOn(providersModule, "allSetupProviders").mockReturnValue([]); - mockStepConnectGitHubRepo.mockResolvedValue({ + mockStepConnectGitHubPat.mockResolvedValue({ advance: true, - has_connected_repo: true, - deployment_id: "dep-github", + data_source_id: "ds-1", space_id: "space-1", + deployment_id: "dep-github", + repo_slug: "owner/repo", + }); + mockStepAnalyzeDocs.mockResolvedValue({ + imported_count: 5, + failed_count: 0, + suggestions_queued: true, }); - mockStepImportGitHubDocs.mockResolvedValue({ advance: true }); await runSetup(); - expect(mockStepConnectGitHubRepo).toHaveBeenCalledTimes(1); - expect(mockStepImportGitHubDocs).toHaveBeenCalledWith( - expect.any(Object), - expect.objectContaining({ waitForFreshDocs: true }), - ); + expect(mockStepConnectGitHubPat).toHaveBeenCalledTimes(1); + expect(mockStepAnalyzeDocs).toHaveBeenCalledTimes(1); }); - it("does not wait for fresh docs when no repo was connected in this run", async () => { + it("skips doc analysis when PAT step returns no data_source_id", async () => { saveConfig(makeCfg()); setupAuthed(); mockTrpc.user.getCliOnboardingContext.query.mockResolvedValue({ @@ -1923,17 +1951,11 @@ describe("runSetup checkpoint behavior", () => { cli_onboarding_enabled: true, }); vi.spyOn(providersModule, "allSetupProviders").mockReturnValue([]); - mockStepConnectGitHubRepo.mockResolvedValue({ - advance: true, - has_connected_repo: false, - }); - mockStepImportGitHubDocs.mockResolvedValue({ advance: true }); + mockStepConnectGitHubPat.mockResolvedValue({ advance: true }); // no data_source_id await runSetup(); - expect(mockStepImportGitHubDocs).toHaveBeenCalledWith( - expect.any(Object), - expect.objectContaining({ waitForFreshDocs: false }), - ); + expect(mockStepConnectGitHubPat).toHaveBeenCalledTimes(1); + expect(mockStepAnalyzeDocs).not.toHaveBeenCalled(); }); }); diff --git a/src/setup/flow.ts b/src/setup/flow.ts index 6c7b0b2..4af5435 100644 --- a/src/setup/flow.ts +++ b/src/setup/flow.ts @@ -147,6 +147,28 @@ export async function runSetup(opts: SetupOptions = {}): Promise { }); return; } + } else { + // Stored deployment_id exists — verify it's still reachable before + // spending the rest of setup on a stale reference (e.g. after switching + // between local dev and prod, or if a deployment was deleted). + const deployments = await fetchDeployments(apiClient); + const exists = deployments.some((d) => d.deployment_id === cfg.deployment_id); + if (!exists) { + p.log.warn( + `Project "${cfg.deployment_name ?? cfg.deployment_id}" no longer exists — select a new one.`, + ); + cfg.deployment_id = undefined; + cfg.deployment_name = undefined; + cfg.api_key = undefined; + saveConfig(cfg); + const ok = await resolveDeployment(apiClient, cfg, opts); + if (!ok) { + await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_failed", { + reason: "deployment_resolution_failed", + }); + return; + } + } } // API key: `stepMintAPIKey` is idempotent — it validates an existing key @@ -161,11 +183,17 @@ export async function runSetup(opts: SetupOptions = {}): Promise { cfg.api_key = apiKey; saveConfig(cfg); - // One-shot confirm: MCP + skill are always listed (user picks what to - // (re)run); GitHub docs import only shows during first-run onboarding. - const choices = await stepOneShotConfirm({ - includeGitHub: cloudSetupContext?.kind === "onboarding", - }); + // One-shot confirm: on first-run onboarding, auto-accept all defaults so + // new users reach value faster. On re-runs, show the multiselect so users + // can pick what to re-run. + let choices: OneShotChoices | null; + if (cloudSetupContext?.kind === "onboarding") { + choices = { configureMcp: true, installSkill: true, connectGitHub: true }; + } else { + choices = await stepOneShotConfirm({ + includeGitHub: cfg.mode !== MODE_OSS, + }); + } if (!choices) { await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_cancelled", { reason: "options_cancelled", @@ -215,53 +243,48 @@ export async function runSetup(opts: SetupOptions = {}): Promise { } } + // GitHub repo connection: PAT-first flow (user picks PAT or GitHub App). + // Runs for both first-run onboarding and Cloud re-runs when the user ticked + // connectGitHub. OSS mode skips this entirely. let githubOnboardingDone = !choices.connectGitHub; - if (choices.connectGitHub && cloudSetupContext?.kind === "onboarding") { - const { stepConnectGitHubRepo } = await import("./github-step"); - const connectResult = await stepConnectGitHubRepo(cfg); - if (!connectResult.advance) { - await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_cancelled", { - reason: "github_connect_not_advanced", - has_connected_repo: connectResult.has_connected_repo, - }); - return; - } - if ( - connectResult.has_connected_repo || - (connectResult.created_data_source_ids?.length ?? 0) > 0 - ) { - await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_github_connected", { - created_data_source_count: connectResult.created_data_source_ids?.length ?? 0, - created_repository_slugs: connectResult.created_repository_slugs, - }); - } - if (connectResult.space_id && !cfg.space_id) { - cfg.space_id = connectResult.space_id; + if (choices.connectGitHub && cfg.mode !== MODE_OSS) { + const { stepConnectGitHubPat } = await import("./github-pat-step"); + const patResult = await stepConnectGitHubPat(apiClient, cfg); + + if (patResult.space_id && !cfg.space_id) { + cfg.space_id = patResult.space_id; saveConfig(cfg); } - const { stepImportGitHubDocs } = await import("./github-doc-import-step"); - const importResult = await stepImportGitHubDocs(cfg, { - waitForFreshDocs: Boolean(connectResult.deployment_id), - expectedDataSourceIds: connectResult.created_data_source_ids, - }); - if (!importResult.advance) { - await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_failed", { - reason: "github_docs_import_failed", - }); - return; - } - docsImported = importResult.imported === true && (importResult.imported_count ?? 0) > 0; - if (docsImported) { - await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_docs_imported", { - imported_count: importResult.imported_count ?? 0, - failed_count: importResult.failed_count ?? 0, - task_id: importResult.task_id, - }); - await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_activated", { - imported_count: importResult.imported_count ?? 0, - failed_count: importResult.failed_count ?? 0, + if (patResult.data_source_id) { + await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_github_connected", { + created_data_source_count: 1, + created_repository_slugs: patResult.repo_slug ? [patResult.repo_slug] : [], }); + + if (patResult.pat_stored === false) { + // Sync can't run without a stored PAT — skip the 60s indexing poll + // and tell the user explicitly what to do next instead of timing out + // into a misleading "No docs found". + p.log.info( + "Skipping doc analysis — Dosu can't sync the repo until the PAT is stored. " + + "Re-run `dosu setup` once the previous PAT-storage error is resolved.", + ); + } else { + const { stepAnalyzeDocs } = await import("./doc-analyze-step"); + const analyzeResult = await stepAnalyzeDocs(cfg, patResult); + docsImported = analyzeResult.imported_count > 0; + if (docsImported) { + await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_docs_imported", { + imported_count: analyzeResult.imported_count, + failed_count: analyzeResult.failed_count, + }); + await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_activated", { + imported_count: analyzeResult.imported_count, + failed_count: analyzeResult.failed_count, + }); + } + } } githubOnboardingDone = true; } @@ -360,7 +383,7 @@ async function stepOneShotConfirm(opts: { if (opts.includeGitHub) { items.push({ value: "connectGitHub", - label: `Import docs from GitHub ${dim("(Keep them up to date)")}`, + label: "Connect a GitHub repo", }); } @@ -407,22 +430,29 @@ async function stepConfigureMcpTools(cfg: Config): Promise { logger.info("setup", "Step: install skill"); + const s = p.spinner(); + s.start("Installing Dosu skill..."); try { - const result = await installSkill(); + const result = await installSkill({ silent: true }); if (result.success) { logger.info("setup", `Skill installed${result.sha ? ` sha=${result.sha}` : ""}`); - p.log.success("Skill installed"); + s.stop("Dosu skill installed"); return true; } + s.stop("Skill install failed"); p.log.error("Failed to install skill. Run `dosu skill install` to retry."); return false; } catch (err: unknown) { /* v8 ignore next -- err is always Error in practice */ const msg = err instanceof Error ? err.message : String(err); logger.error("setup", `Skill install failed: ${msg}`); + s.stop("Skill install failed"); p.log.error(`Skill install failed: ${msg}`); return false; } @@ -530,7 +560,9 @@ async function resolveCloudSetupContext(cfg: Config): Promise { if (!targetOrg) { - p.log.error("Could not determine your onboarding organization."); + p.log.error( + "Could not determine your onboarding organization. Visit https://app.dosu.dev to create or join one, then re-run `dosu setup`.", + ); return false; } const deployment = await resolveOnboardingDeployment(apiClient, targetOrg); if (!deployment) { - p.log.error(`No MCP found for ${targetOrg.name}.`); + p.log.error( + `No MCP found for ${targetOrg.name}. Create one at https://app.dosu.dev/mcp, then re-run \`dosu setup\`.`, + ); return false; } @@ -655,7 +695,9 @@ async function stepSelectOrg(apiClient: Client): Promise { try { const orgs = await apiClient.getOrgs(); if (orgs.length === 0) { - p.log.error("No organizations found for your account"); + p.log.error( + "No organizations found for your account. Visit https://app.dosu.dev to create or join one, then re-run `dosu setup`.", + ); return null; } if (orgs.length === 1) { @@ -678,7 +720,7 @@ async function stepSelectOrg(apiClient: Client): Promise { } /* v8 ignore next -- err is always Error in practice */ p.log.error( - `Organization selection failed: ${err instanceof Error ? err.message : String(err)}`, + `Organization selection failed: ${err instanceof Error ? err.message : String(err)}. Run \`dosu logs --tail 30\` for details, then re-run \`dosu setup\`.`, ); return null; } @@ -690,7 +732,9 @@ async function stepResolveDeployment(apiClient: Client, id: string): Promise d.deployment_id === id); if (!d) { logger.warn("setup", `Deployment ${id} not found`); - p.log.error(`MCP ${id} not found`); + p.log.error( + `MCP ${id} not found. Run \`dosu deployments list\` to see available MCPs, then re-run \`dosu setup --deployment \`.`, + ); return null; } logger.info("setup", `Resolved deployment: ${d.name}`); @@ -698,7 +742,9 @@ async function stepResolveDeployment(apiClient: Client, id: string): Promise d.org_id === org.org_id); if (deployments.length === 0) { - p.log.error(`No MCPs found for ${org.name}`); + p.log.error( + `No MCPs found for ${org.name}. Create one at https://app.dosu.dev/mcp, then re-run \`dosu setup\`.`, + ); return null; } if (deployments.length === 1) { @@ -718,7 +766,7 @@ async function stepSelectDeployment(apiClient: Client, org: Org): Promise ({ label: d.name, value: d.deployment_id })), }); if (p.isCancel(selected)) return null; @@ -727,14 +775,16 @@ async function stepSelectDeployment(apiClient: Client, org: Org): Promise { if (!cfg.deployment_id) { - p.log.error("No MCP available for API key creation"); + p.log.error("No MCP available for API key creation. Re-run `dosu setup` to pick a deployment."); return null; } @@ -757,7 +807,18 @@ async function stepMintAPIKey(apiClient: Client, cfg: Config): Promise { + const agentPrompt = (() => { if (opts.mode === MODE_OSS) { return `What can Dosu help me with? Pick an open source library related to my project and explain how it works.`; } if (opts.docsImported) { return `Use Dosu to summarize the most important docs in my repo.`; } - if (opts.hasAgentsMd) { - return `Please use Dosu to host my AGENTS.md`; - } - return `Ask Dosu to draft an AGENTS.md for this project.`; + return `Ask Dosu to explain the most important patterns in this codebase.`; })(); - p.log.message(`Try it out! Paste this into your agent:\n\n${info(prompt)}`); + + p.log.message( + [ + "Try it out:", + "", + ` Ask your agent: ${info(agentPrompt)}`, + "", + ` Save a fact: ${info(`dosu write "something non-obvious about this project"`)}`, + ` Read context: ${info(`dosu read "what should I know before making changes?"`)}`, + ].join("\n"), + ); } diff --git a/src/setup/github-pat-step.test.ts b/src/setup/github-pat-step.test.ts new file mode 100644 index 0000000..43426cd --- /dev/null +++ b/src/setup/github-pat-step.test.ts @@ -0,0 +1,303 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +vi.mock("../debug/logger", () => ({ + logger: { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + init: vi.fn(), + getLogPath: vi.fn(() => "/tmp/test-debug.log"), + }, +})); + +import type { Client } from "../client/client"; +import { + findExistingGithubDataSource, + patStorageFailureMessage, + type StorePatResult, + storePat, +} from "./github-pat-step"; + +function jsonResponse(data: unknown, status = 200): Response { + return new Response(JSON.stringify(data), { + status, + headers: { "Content-Type": "application/json" }, + }); +} + +function abortError(): Error { + const err = new Error("The operation was aborted."); + err.name = "AbortError"; + return err; +} + +describe("storePat", () => { + let doRequest: ReturnType; + let client: Pick; + + beforeEach(() => { + doRequest = vi.fn(); + client = { doRequest } as unknown as Pick; + }); + + it("returns ok on 200 first try", async () => { + doRequest.mockResolvedValueOnce(new Response(null, { status: 200 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result).toEqual({ ok: true, status: 200, reason: "ok" }); + expect(doRequest).toHaveBeenCalledTimes(1); + }); + + it("passes the long timeout to doRequest", async () => { + doRequest.mockResolvedValueOnce(new Response(null, { status: 200 })); + await storePat(client as Client, "ds-1", "ghp_x", { baseBackoffMs: 0 }); + const [, , , opts] = doRequest.mock.calls[0]; + expect(opts).toEqual({ timeoutMs: 60_000 }); + }); + + it("retries on 403 then succeeds", async () => { + doRequest + .mockResolvedValueOnce(new Response("forbidden", { status: 403 })) + .mockResolvedValueOnce(new Response(null, { status: 200 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result.ok).toBe(true); + expect(doRequest).toHaveBeenCalledTimes(2); + }); + + it("retries on 404 then succeeds", async () => { + doRequest + .mockResolvedValueOnce(new Response("not found", { status: 404 })) + .mockResolvedValueOnce(new Response(null, { status: 200 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result.ok).toBe(true); + expect(doRequest).toHaveBeenCalledTimes(2); + }); + + it("gives up after maxAttempts on persistent 403", async () => { + doRequest.mockResolvedValue(new Response("forbidden", { status: 403 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result).toMatchObject({ ok: false, status: 403, reason: "permission" }); + expect(doRequest).toHaveBeenCalledTimes(3); + }); + + it("does not retry on 422 — validation errors are user-visible immediately", async () => { + doRequest.mockResolvedValueOnce(new Response("bad pat", { status: 422 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result).toMatchObject({ ok: false, status: 422, reason: "validation" }); + expect(doRequest).toHaveBeenCalledTimes(1); + }); + + it("does not retry on 500 — server failure surfaces as 'server'", async () => { + doRequest.mockResolvedValueOnce(new Response("oops", { status: 500 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result).toMatchObject({ ok: false, status: 500, reason: "server" }); + expect(doRequest).toHaveBeenCalledTimes(1); + }); + + it("classifies 503 as 'unavailable' and extracts FastAPI detail", async () => { + doRequest.mockResolvedValueOnce( + new Response(JSON.stringify({ detail: "KMS reauth needed" }), { + status: 503, + headers: { "Content-Type": "application/json" }, + }), + ); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result).toMatchObject({ + ok: false, + status: 503, + reason: "unavailable", + detail: "KMS reauth needed", + }); + expect(doRequest).toHaveBeenCalledTimes(1); + }); + + it("leaves detail undefined when the error body is not JSON", async () => { + doRequest.mockResolvedValueOnce(new Response("Internal Server Error", { status: 500 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result.detail).toBeUndefined(); + }); + + it("classifies AbortError as timeout after exhausting attempts", async () => { + doRequest.mockRejectedValue(abortError()); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result).toMatchObject({ ok: false, reason: "timeout" }); + expect(doRequest).toHaveBeenCalledTimes(3); + }); + + it("classifies non-abort throws as network errors", async () => { + doRequest.mockRejectedValue(new Error("connect ECONNREFUSED")); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result).toMatchObject({ ok: false, reason: "network" }); + expect(doRequest).toHaveBeenCalledTimes(3); + }); + + it("succeeds on retry after transient AbortError", async () => { + doRequest + .mockRejectedValueOnce(abortError()) + .mockResolvedValueOnce(new Response(null, { status: 200 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + }); + expect(result.ok).toBe(true); + expect(doRequest).toHaveBeenCalledTimes(2); + }); + + it("honors maxAttempts override of 1 (no retry)", async () => { + doRequest.mockResolvedValueOnce(new Response("forbidden", { status: 403 })); + const result = await storePat(client as Client, "ds-1", "ghp_x", { + baseBackoffMs: 0, + maxAttempts: 1, + }); + expect(result.ok).toBe(false); + expect(doRequest).toHaveBeenCalledTimes(1); + }); +}); + +describe("patStorageFailureMessage", () => { + it("timeout mentions the 60s budget and the rerun command", () => { + const msg = patStorageFailureMessage({ ok: false, status: null, reason: "timeout" }); + expect(msg).toContain("timed out"); + expect(msg).toContain("dosu setup"); + expect(msg).toContain("dosu logs --tail 30"); + }); + + it("permission mentions 403 and the rerun command", () => { + const msg = patStorageFailureMessage({ ok: false, status: 403, reason: "permission" }); + expect(msg).toContain("403"); + expect(msg).toContain("dosu setup"); + }); + + it("validation tells the user to regenerate the PAT, not just rerun", () => { + const msg = patStorageFailureMessage({ ok: false, status: 422, reason: "validation" }); + expect(msg).toContain("Generate a new PAT"); + expect(msg).toContain("repo"); + }); + + it("not_found mentions visibility and the rerun command", () => { + const msg = patStorageFailureMessage({ ok: false, status: 404, reason: "not_found" }); + expect(msg).toContain("404"); + expect(msg).toContain("dosu setup"); + }); + + it("network mentions the network error and recovery", () => { + const msg = patStorageFailureMessage({ ok: false, status: null, reason: "network" }); + expect(msg).toContain("network"); + expect(msg).toContain("dosu setup"); + }); + + it("server falls back to the generic recovery line with status", () => { + const msg = patStorageFailureMessage({ ok: false, status: 500, reason: "server" }); + expect(msg).toContain("500"); + expect(msg).toContain("dosu setup"); + }); + + it("unavailable surfaces the backend detail when present", () => { + const msg = patStorageFailureMessage({ + ok: false, + status: 503, + reason: "unavailable", + detail: "KMS reauth needed — run gcloud auth application-default login", + }); + expect(msg).toContain("KMS reauth needed"); + }); + + it("unavailable without detail still gives a recoverable message", () => { + const msg = patStorageFailureMessage({ ok: false, status: 503, reason: "unavailable" }); + expect(msg).toContain("503"); + expect(msg).toContain("dosu setup"); + }); + + it("server surfaces detail when present even on generic 500-class errors", () => { + const msg = patStorageFailureMessage({ + ok: false, + status: 500, + reason: "server", + detail: "database connection refused", + }); + expect(msg).toContain("database connection refused"); + }); +}); + +describe("findExistingGithubDataSource", () => { + function makeTrpc( + sources: Array<{ data_source_id?: string; provider_slug?: string; repository_id?: number }>, + spaceDeployments: Array<{ deployment_id: string }> = [{ deployment_id: "dep-1" }], + ) { + return { + dataSource: { list: { query: vi.fn().mockResolvedValue(sources) } }, + workspaces: { listForSpace: { query: vi.fn().mockResolvedValue(spaceDeployments) } }, + }; + } + + it("returns the existing GitHub data source matching repository_id", async () => { + const trpc = makeTrpc([ + { data_source_id: "ds-slack", provider_slug: "slack", repository_id: undefined }, + { data_source_id: "ds-gh", provider_slug: "github", repository_id: 12345 }, + ]); + const result = await findExistingGithubDataSource(trpc, "org-1", "space-1", 12345); + expect(result).toEqual({ data_source_id: "ds-gh", deployment_id: "dep-1" }); + }); + + it("returns null when no data source matches the repo id", async () => { + const trpc = makeTrpc([ + { data_source_id: "ds-gh", provider_slug: "github", repository_id: 99999 }, + ]); + const result = await findExistingGithubDataSource(trpc, "org-1", "space-1", 12345); + expect(result).toBeNull(); + }); + + it("returns null when the matching data source has no deployment", async () => { + const trpc = makeTrpc( + [{ data_source_id: "ds-gh", provider_slug: "github", repository_id: 12345 }], + [], + ); + const result = await findExistingGithubDataSource(trpc, "org-1", "space-1", 12345); + expect(result).toBeNull(); + }); + + it("returns null on tRPC errors instead of throwing", async () => { + const trpc = { + dataSource: { list: { query: vi.fn().mockRejectedValue(new Error("network")) } }, + workspaces: { listForSpace: { query: vi.fn() } }, + }; + const result = await findExistingGithubDataSource(trpc, "org-1", "space-1", 12345); + expect(result).toBeNull(); + }); + + it("coerces stringified repository_id to number when filtering", async () => { + // Backend's vw_data_source view may return repository_id as a stringified bigint. + const trpc = makeTrpc([ + { + data_source_id: "ds-gh", + provider_slug: "github", + repository_id: "12345" as unknown as number, + }, + ]); + const result = await findExistingGithubDataSource(trpc, "org-1", "space-1", 12345); + expect(result?.data_source_id).toBe("ds-gh"); + }); +}); + +// Suppress unused import warning — jsonResponse is reserved for future test cases +// that need richer JSON bodies. Vitest's TS config will flag it otherwise. +void jsonResponse; diff --git a/src/setup/github-pat-step.ts b/src/setup/github-pat-step.ts new file mode 100644 index 0000000..1a3efec --- /dev/null +++ b/src/setup/github-pat-step.ts @@ -0,0 +1,639 @@ +/** + * Setup step: connect a GitHub repo via Personal Access Token (PAT). + * + * Alternative to the GitHub App OAuth flow — no browser redirect, no App + * installation. The user pastes a read-only PAT; we call the GitHub API to + * fetch repo metadata, create a Dosu data source, encrypt and store the PAT + * on the backend, then trigger a sync. + * + * Never throws — returns `{ advance: false }` on any unrecoverable failure + * so the caller can continue without crashing setup. + */ + +import { execSync } from "node:child_process"; +import * as p from "@clack/prompts"; +import type { Client } from "../client/client"; +import { createTypedClient } from "../client/trpc"; +import type { Config } from "../config/config"; +import { logger } from "../debug/logger"; +import { DEFAULT_DEPLOYMENT_CONFIG_GITHUB } from "./default-deployment-config"; +import { dim, info } from "./styles"; + +const DATA_SOURCE_VERIFY_POLL_INTERVAL_MS = 1_000; +const DATA_SOURCE_VERIFY_POLL_TIMEOUT_MS = 10_000; + +// PAT storage triggers KMS encrypt + DB upsert + sync enqueue on the backend, +// which routinely exceeds the default 10s client timeout. Give it 60s. +const STORE_PAT_TIMEOUT_MS = 60_000; +// Retries cover the read-after-write race where data_source.create has +// returned but `user_can_modify_data_source` hasn't seen the row yet. +const STORE_PAT_MAX_ATTEMPTS = 3; +const STORE_PAT_BASE_BACKOFF_MS = 500; + +export interface DetectedRepo { + owner: string; + name: string; + slug: string; +} + +export interface GithubPatStepResult { + advance: boolean; + data_source_id?: string; + space_id?: string; + deployment_id?: string; + repo_slug?: string; + /** + * True when a PAT was successfully stored (or wasn't needed — e.g. the + * GitHub App path). False if the user reached the PAT step but storage + * failed, signalling downstream steps (doc analyze, sync wait) to skip + * polling since the repo isn't reachable. + */ + pat_stored?: boolean; +} + +interface GitHubRepoMeta { + id: number; + name: string; + full_name: string; + description: string | null; + default_branch: string; + private: boolean; + stargazers_count?: number; +} + +// biome-ignore lint/suspicious/noExplicitAny: tRPC types are untyped at this level +type AnyClient = any; + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +export function detectGitRepo(cwd: string = process.cwd()): DetectedRepo | null { + let url: string; + try { + url = execSync("git config --get remote.origin.url", { + cwd, + stdio: ["ignore", "pipe", "ignore"], + }) + .toString() + .trim(); + } catch { + return null; + } + if (!url) return null; + + let m = url.match(/^git@github\.com:([^/]+)\/([^/]+?)(?:\.git)?$/i); + if (!m) { + m = url.match(/^https?:\/\/github\.com\/([^/]+)\/([^/]+?)(?:\.git)?\/?$/i); + } + if (!m) return null; + + const [, owner, name] = m; + return { owner, name, slug: `${owner}/${name}` }; +} + +async function fetchGitHubRepoMeta( + owner: string, + name: string, + pat: string, +): Promise { + try { + const resp = await fetch(`https://api.github.com/repos/${owner}/${name}`, { + headers: { + Authorization: `Bearer ${pat}`, + Accept: "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + }); + if (!resp.ok) { + logger.warn("setup", `GitHub API returned ${resp.status} for ${owner}/${name}`); + return null; + } + return (await resp.json()) as GitHubRepoMeta; + } catch (err: unknown) { + logger.error( + "setup", + `GitHub API fetch failed: ${err instanceof Error ? err.message : String(err)}`, + ); + return null; + } +} + +async function ensureGithubRepoRow( + apiClient: Client, + repoMeta: GitHubRepoMeta, + slug: string, +): Promise { + try { + const resp = await apiClient.doRequest("POST", "/v1/github/repositories", { + repository_id: repoMeta.id, + slug, + name: repoMeta.name, + description: repoMeta.description ?? "", + is_private: repoMeta.private, + stargazer_count: repoMeta.stargazers_count ?? null, + }); + if (!resp.ok) { + let body = ""; + try { + body = await resp.text(); + } catch { + /* ignore */ + } + logger.error("setup", `POST /v1/github/repositories → ${resp.status}: ${body}`); + return false; + } + return true; + } catch (err: unknown) { + logger.error( + "setup", + `Failed to register github repo row: ${err instanceof Error ? err.message : String(err)}`, + ); + return false; + } +} + +async function createDataSourceForRepo( + trpc: AnyClient, + apiClient: Client, + orgID: string, + spaceID: string, + repoMeta: GitHubRepoMeta, + slug: string, +): Promise<{ deployment_id: string; data_source_id: string } | null> { + try { + // Ensure github.repository row exists before FK-constrained workspace/data_source inserts. + const registered = await ensureGithubRepoRow(apiClient, repoMeta, slug); + if (!registered) { + logger.warn("setup", `Failed to register github.repository row for ${slug}`); + return null; + } + + const deployment = await trpc.workspaces.create.mutate({ + org_id: orgID, + space_id: spaceID, + enabled: true, + name: slug, + description: repoMeta.description ?? "", + provider_slug: "github", + repository_id: repoMeta.id, + metadata: { + app: { deployment_mode: "normal", setup_mode: "auto" }, + provider_slug: "github", + }, + config: DEFAULT_DEPLOYMENT_CONFIG_GITHUB, + }); + if (!deployment?.deployment_id) { + logger.warn("setup", `workspaces.create returned no deployment for ${slug}`); + return null; + } + + const dataSource = await trpc.dataSource.create.mutate({ + org_id: orgID, + provider_slug: "github", + name: slug, + description: repoMeta.description ?? "", + repository_id: repoMeta.id, + }); + if (!dataSource?.data_source_id) { + logger.warn("setup", `dataSource.create returned no data_source for ${slug}`); + // Roll back orphaned deployment + try { + await trpc.workspaces.delete.mutate(deployment.deployment_id); + } catch { + /* best-effort */ + } + return null; + } + const dataSourceID = dataSource.data_source_id; + + await trpc.dataSource.syncDataSource.mutate(dataSourceID); + + const spaceDeployments = await trpc.workspaces.listForSpace.query(spaceID); + await Promise.all( + spaceDeployments.map((d: { deployment_id: string }) => + trpc.deploymentDataSource.create.mutate({ + deployment_id: d.deployment_id, + data_source_id: dataSourceID, + }), + ), + ); + + return { deployment_id: deployment.deployment_id, data_source_id: dataSourceID }; + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + logger.error("setup", `Failed to create data source for ${slug}: ${msg}`); + return null; + } +} + +interface ExistingGithubDataSource { + data_source_id: string; + deployment_id: string; +} + +/** + * Find a GitHub data source already linked to this repo, if any. Used to + * resume a partial `dosu setup` run that created the data source but failed + * before storing the PAT — we re-prompt the PAT against the existing row + * instead of duplicating workspaces. + * + * Returns null if none exists or on any error (caller falls through to + * create-new path). + */ +export async function findExistingGithubDataSource( + trpc: AnyClient, + orgID: string, + spaceID: string, + repositoryID: number, +): Promise { + try { + const sources = (await trpc.dataSource.list.query({ + org_id: orgID, + excluded_provider_slugs: [], + })) as Array<{ + data_source_id?: string; + provider_slug?: string; + repository_id?: number | string; + }>; + const matching = sources.find( + (s) => + s.provider_slug === "github" && + s.data_source_id !== undefined && + Number(s.repository_id) === repositoryID, + ); + if (!matching?.data_source_id) return null; + + const spaceDeployments = (await trpc.workspaces.listForSpace.query(spaceID)) as Array<{ + deployment_id: string; + }>; + const deployment = spaceDeployments[0]; + if (!deployment?.deployment_id) return null; + + return { + data_source_id: matching.data_source_id, + deployment_id: deployment.deployment_id, + }; + } catch (err: unknown) { + logger.warn( + "setup", + `findExistingGithubDataSource failed: ${err instanceof Error ? err.message : String(err)}`, + ); + return null; + } +} + +async function waitForDataSourceAlive( + trpc: AnyClient, + orgID: string, + dataSourceID: string, +): Promise { + const deadline = Date.now() + DATA_SOURCE_VERIFY_POLL_TIMEOUT_MS; + while (Date.now() < deadline) { + try { + const sources = await trpc.dataSource.list.query({ + org_id: orgID, + excluded_provider_slugs: [], + }); + const found = (sources as { data_source_id?: string }[]).some( + (s) => s.data_source_id === dataSourceID, + ); + if (found) return true; + // Backend deleted it (repo unreachable) + return false; + } catch { + /* swallow — retry */ + } + await sleep(DATA_SOURCE_VERIFY_POLL_INTERVAL_MS); + } + return true; // assume alive if we can't confirm either way +} + +export interface StorePatResult { + ok: boolean; + /** HTTP status from the last attempt, or null if the request never returned. */ + status: number | null; + /** Reason categorization for the caller's error message. */ + reason: + | "ok" + | "timeout" + | "permission" + | "not_found" + | "validation" + | "unavailable" + | "server" + | "network"; + /** Truncated response body for diagnostics. */ + body?: string; + /** Extracted `detail` field from a JSON error body, when present. */ + detail?: string; +} + +/** + * Pull the `detail` field out of a FastAPI-style JSON error body. FastAPI + * always shapes errors as `{"detail": "..."}` (or `{"detail": [...]}` for + * validation), so this is the right field to surface to users. + */ +function extractDetail(body: string | undefined): string | undefined { + if (!body) return undefined; + try { + const parsed = JSON.parse(body) as { detail?: unknown }; + if (typeof parsed.detail === "string") return parsed.detail; + } catch { + /* not JSON — fall through */ + } + return undefined; +} + +export interface StorePatOptions { + /** Override base backoff between retries. Tests pass 0. */ + baseBackoffMs?: number; + /** Override total attempt budget. Tests pass 1 to disable retry. */ + maxAttempts?: number; + /** Override per-request timeout. Tests pass small values. */ + timeoutMs?: number; +} + +/** + * POSTs the PAT to the backend, with bounded retries on 403/404 to absorb the + * read-after-write race when `data_source.create` has just returned but the + * row isn't yet visible to `user_can_modify_data_source`. + * + * Distinguishes timeout vs permission vs validation vs server failures so the + * caller can surface a specific recovery message instead of "could not store". + */ +export async function storePat( + apiClient: Client, + dataSourceID: string, + pat: string, + opts: StorePatOptions = {}, +): Promise { + const maxAttempts = opts.maxAttempts ?? STORE_PAT_MAX_ATTEMPTS; + const baseBackoffMs = opts.baseBackoffMs ?? STORE_PAT_BASE_BACKOFF_MS; + const timeoutMs = opts.timeoutMs ?? STORE_PAT_TIMEOUT_MS; + + let lastStatus: number | null = null; + let lastBody: string | undefined; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + const resp = await apiClient.doRequest( + "POST", + `/data-sources/${dataSourceID}/github-pat`, + { pat }, + { timeoutMs }, + ); + lastStatus = resp.status; + if (resp.ok) { + return { ok: true, status: resp.status, reason: "ok" }; + } + + lastBody = await readBodyForLog(resp); + logger.error( + "setup", + `storePat attempt ${attempt} → ${resp.status}: ${lastBody ?? ""}`, + ); + + if ((resp.status === 403 || resp.status === 404) && attempt < maxAttempts) { + await sleep(baseBackoffMs * 2 ** (attempt - 1)); + continue; + } + + const reason: StorePatResult["reason"] = + resp.status === 403 + ? "permission" + : resp.status === 404 + ? "not_found" + : resp.status === 422 + ? "validation" + : resp.status === 503 + ? "unavailable" + : "server"; + return { + ok: false, + status: resp.status, + reason, + body: lastBody, + detail: extractDetail(lastBody), + }; + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + const aborted = err instanceof Error && err.name === "AbortError"; + logger.error("setup", `storePat attempt ${attempt} threw: ${msg}`); + if (attempt === maxAttempts) { + return { + ok: false, + status: lastStatus, + reason: aborted ? "timeout" : "network", + body: msg, + }; + } + await sleep(baseBackoffMs * 2 ** (attempt - 1)); + } + } + + return { ok: false, status: lastStatus, reason: "server", body: lastBody }; +} + +async function readBodyForLog(resp: Response): Promise { + try { + const text = await resp.text(); + return text.slice(0, 512); + } catch { + return undefined; + } +} + +/** + * Map a structured PAT-storage failure to a user-facing recovery message. + * Every branch ends with the exact next command the user should run. When + * the backend supplies a `detail` (FastAPI's standard error field), prefer + * it — it usually contains the specific cause (e.g. expired KMS credentials). + */ +export function patStorageFailureMessage(result: StorePatResult): string { + const rerun = "Re-run `dosu setup` to retry. Use `dosu logs --tail 30` to inspect the failure."; + switch (result.reason) { + case "timeout": + return `Repo connected, but storing the PAT timed out (>60s). ${rerun}`; + case "permission": + return `Repo connected, but the backend rejected the PAT (status 403 after retries). ${rerun}`; + case "not_found": + return `Repo connected, but the data source was not visible to the backend (status 404 after retries). ${rerun}`; + case "validation": + return `Repo connected, but the PAT was rejected as invalid (status 422). Generate a new PAT with the \`repo\` scope and re-run \`dosu setup\`.`; + case "unavailable": + // 503 — backend reported the dependency (typically KMS) is down. The + // detail is actionable so we surface it verbatim. + return result.detail + ? `Repo connected, but the backend can't store the PAT right now: ${result.detail}` + : `Repo connected, but the backend is temporarily unavailable (status 503). ${rerun}`; + case "network": + return `Repo connected, but the PAT could not be stored due to a network error. ${rerun}`; + default: + if (result.detail) { + return `Repo connected, but storing the PAT failed: ${result.detail}`; + } + return `Repo connected, but storing the PAT failed${ + result.status ? ` (status ${result.status})` : "" + }. ${rerun}`; + } +} + +export async function stepConnectGitHubPat( + apiClient: Client, + cfg: Config, +): Promise { + logger.info("setup", "Step: connect GitHub via PAT"); + + if (!cfg.org_id || !cfg.space_id) { + logger.warn("setup", "Missing org_id or space_id — skipping PAT step"); + return { advance: true }; // non-fatal skip + } + + const detected = detectGitRepo(); + if (!detected) { + p.log.info( + dim( + "Not in a GitHub repo — skipping codebase connection. Run `dosu setup` from a git repo to connect your codebase.", + ), + ); + return { advance: true }; + } + + // Explicit choice: PAT or GitHub App + const method = await p.select({ + message: `Connect ${info(detected.slug)} to Dosu`, + options: [ + { + value: "pat", + label: "GitHub PAT", + hint: "read-only token, no app install required", + }, + { + value: "app", + label: "GitHub App", + hint: "installs Dosu app on your org", + }, + { + value: "skip", + label: "Skip for now", + }, + ], + }); + + if (p.isCancel(method) || method === "skip") { + logger.info( + "setup", + `Repo connection skipped (${p.isCancel(method) ? "cancelled" : "user choice"})`, + ); + return { advance: true }; + } + + if (method === "app") { + // Hand off to the existing GitHub App flow. The App flow doesn't require + // PAT storage, so downstream doc-analyze can run unconditionally. + const { stepConnectGitHubRepo } = await import("./github-step"); + const result = await stepConnectGitHubRepo(cfg); + return { + advance: result.advance, + data_source_id: result.created_data_source_ids?.[0], + space_id: result.space_id, + deployment_id: result.deployment_id, + repo_slug: result.created_repository_slugs?.[0], + pat_stored: result.advance, + }; + } + + // PAT path + const tokenURL = `https://github.com/settings/tokens/new?scopes=repo&description=Dosu+CLI`; + p.log.info(dim(`Generate a read-only token: ${tokenURL}`)); + const pat = await p.password({ + message: `GitHub PAT for ${info(detected.slug)}`, + validate: (v) => (v.trim().length === 0 ? "PAT cannot be empty" : undefined), + }); + + if (p.isCancel(pat)) { + return { advance: true }; + } + + const s = p.spinner(); + s.start(`Connecting ${detected.slug}...`); + + // Validate PAT + fetch repo metadata from GitHub API + const repoMeta = await fetchGitHubRepoMeta(detected.owner, detected.name, pat); + if (!repoMeta) { + s.stop("Failed to reach GitHub"); + p.log.warn( + `Could not access ${detected.slug} with that PAT. ` + + "Check the token has at least read-only repo scope and try again.", + ); + return { advance: true }; + } + + // Resume an earlier partial run before creating a duplicate. If a GitHub + // data source for this repo already exists in the org, we just store the + // PAT against it — skipping the workspace/data-source create dance. + // + // Non-null assertions on org_id / space_id are safe — both are guarded at + // the top of this function (search "Missing org_id or space_id"). + const trpc = createTypedClient(cfg); + // biome-ignore lint/style/noNonNullAssertion: guarded above + const orgID = cfg.org_id!; + // biome-ignore lint/style/noNonNullAssertion: guarded above + const spaceID = cfg.space_id!; + const existing = await findExistingGithubDataSource(trpc, orgID, spaceID, repoMeta.id); + + let created: { deployment_id: string; data_source_id: string }; + if (existing) { + logger.info("setup", `Resuming PAT step for existing data source ${existing.data_source_id}`); + created = existing; + } else { + const fresh = await createDataSourceForRepo( + trpc, + apiClient, + orgID, + spaceID, + repoMeta, + detected.slug, + ); + if (!fresh) { + s.stop("Failed to connect repo"); + p.log.error( + `Could not create a Dosu data source for ${detected.slug}. Inspect the failure with \`dosu logs --tail 30\` and re-run \`dosu setup\`.`, + ); + return { advance: true }; + } + created = fresh; + + // Verify the data source wasn't deleted by the backend (repo unreachable via GitHub App) + const alive = await waitForDataSourceAlive(trpc, orgID, created.data_source_id); + if (!alive) { + s.stop("Repo not reachable"); + p.log.warn( + `${detected.slug} was connected but the sync failed — the repo may not be accessible to Dosu. ` + + "Store the PAT to enable direct access.", + ); + // Still store the PAT — it will be used for the next sync attempt + } + } + + // Encrypt and store the PAT on the backend + const stored = await storePat(apiClient, created.data_source_id, pat); + if (!stored.ok) { + s.stop(`Connected ${detected.slug}`); + p.log.warn(patStorageFailureMessage(stored)); + } else { + s.stop(`Connected ${detected.slug}`); + p.log.success(`Repository\n${dim(detected.slug)}`); + } + + logger.info("setup", `GitHub PAT step complete: data_source=${created.data_source_id}`); + return { + advance: true, + data_source_id: created.data_source_id, + space_id: cfg.space_id, + deployment_id: created.deployment_id, + repo_slug: detected.slug, + pat_stored: stored.ok, + }; +} From 44063585ffb0a8891b6237f2883f7448e391974d Mon Sep 17 00:00:00 2001 From: Brandon Young <32550081+brandonyoungdev@users.noreply.github.com> Date: Thu, 4 Jun 2026 21:19:27 -0500 Subject: [PATCH 2/2] fix: address Gemini code review findings on #73 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes from Gemini Code Assist review (skipping the GitHub URL credentials regex tweak — marginal value for interactive CLI use). Fix 1 — pollImportCompletion infinite loop (HIGH) - Add a 5-minute deadline so a stuck backend doc-import task can't hang setup forever. - Return { imported, failed, timed_out: true } on timeout so the caller can surface a meaningful warning instead of a silent { imported: 0 }. - Caller in stepAnalyzeDocs now logs a recovery hint pointing at `dosu docs import-status` / `dosu sources list`. Fix 2 — config wipe on transient network failure (HIGH, data loss) - The deployment-verification path was calling fetchDeployments(), which swallows errors and returns []. Combined with the "deployment not found → wipe local config" branch, this meant any transient API outage silently corrupted the user's deployment_id, deployment_name, and api_key in ~/.config/dosu-cli/config.json. - Now calls apiClient.getDeployments() directly, catches errors locally, logs a warning, and preserves the cached config. The wipe-and-re-pick branch only runs when the API explicitly returns a deployment list that excludes our id. Fix 4 — dosu write rejects empty / whitespace-only input - One-line validation at the top of the action. Exit with a helpful example instead of round-tripping garbage to /v1/topics for a 422. Tests - New src/commands/write.test.ts (5 tests) — empty / whitespace rejection, trimming, backend error surfacing, missing-config exit. - New src/setup/doc-analyze-step.test.ts (6 tests) — SUCCESS, FAILURE, timeout, max-errors (null + throw), recovery. - New src/setup/flow.test.ts tests (2) — preserves config on network failure, still wipes on explicit "not found". Full sweep: 1119/1119 pass (+13 from 1106). typecheck clean. biome clean. --- src/commands/write.test.ts | 114 ++++++++++++++++++++++++++++ src/commands/write.ts | 17 ++++- src/setup/doc-analyze-step.test.ts | 117 +++++++++++++++++++++++++++++ src/setup/doc-analyze-step.ts | 41 +++++++++- src/setup/flow.test.ts | 65 ++++++++++++++++ src/setup/flow.ts | 48 ++++++++---- 6 files changed, 380 insertions(+), 22 deletions(-) create mode 100644 src/commands/write.test.ts create mode 100644 src/setup/doc-analyze-step.test.ts diff --git a/src/commands/write.test.ts b/src/commands/write.test.ts new file mode 100644 index 0000000..2535993 --- /dev/null +++ b/src/commands/write.test.ts @@ -0,0 +1,114 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const mockDoRequest = vi.fn(); +vi.mock("../client/client", () => ({ + Client: vi.fn().mockImplementation(() => ({ doRequest: mockDoRequest })), +})); + +const mockLoadConfig = vi.fn(); +vi.mock("../config/config", () => ({ + loadConfig: (...args: unknown[]) => mockLoadConfig(...args), +})); + +vi.mock("../debug/logger", () => ({ + logger: { debug: vi.fn(), info: vi.fn(), warn: vi.fn(), error: vi.fn() }, +})); + +import { writeCommand } from "./write"; + +const validConfig = { + access_token: "t", + refresh_token: "r", + expires_at: 0, + api_key: "sk_user_test", + deployment_id: "dep-1", + deployment_name: "Default MCP", + org_id: "org1", + space_id: "sp1", +}; + +let logSpy: ReturnType; +let errorSpy: ReturnType; +// biome-ignore lint/suspicious/noExplicitAny: process.exit mock type mismatch +let exitSpy: any; + +beforeEach(() => { + mockDoRequest.mockReset(); + mockLoadConfig.mockReset(); + logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + exitSpy = vi.spyOn(process, "exit").mockImplementation((() => { + throw new Error("exit"); + }) as never); +}); + +afterEach(() => { + logSpy.mockRestore(); + errorSpy.mockRestore(); + exitSpy.mockRestore(); +}); + +async function run(...args: string[]) { + const cmd = writeCommand(); + cmd.exitOverride(); + await cmd.parseAsync(["node", "test", ...args]); +} + +function errorOutput(): string { + return errorSpy.mock.calls.map((c) => c.join(" ")).join("\n"); +} + +describe("dosu write", () => { + it("rejects empty string before any network call", async () => { + mockLoadConfig.mockReturnValue(validConfig); + + await expect(run("")).rejects.toThrow("exit"); + expect(errorOutput()).toContain("fact cannot be empty or whitespace-only"); + expect(mockDoRequest).not.toHaveBeenCalled(); + }); + + it("rejects whitespace-only string before any network call", async () => { + mockLoadConfig.mockReturnValue(validConfig); + + await expect(run(" \t\n ")).rejects.toThrow("exit"); + expect(errorOutput()).toContain("fact cannot be empty or whitespace-only"); + expect(mockDoRequest).not.toHaveBeenCalled(); + }); + + it("sends a trimmed body when the fact has surrounding whitespace", async () => { + mockLoadConfig.mockReturnValue(validConfig); + mockDoRequest.mockResolvedValue(new Response("{}", { status: 200 })); + + await run(" TOKEN_TTL=3600 is intentional, do not change. "); + + expect(mockDoRequest).toHaveBeenCalledTimes(1); + const [method, path, body] = mockDoRequest.mock.calls[0]; + expect(method).toBe("POST"); + expect(path).toBe("/v1/topics"); + expect(body.context).toBe("TOKEN_TTL=3600 is intentional, do not change."); + // name = first 8 whitespace-separated words of the trimmed content + expect(body.name.startsWith("TOKEN_TTL=3600")).toBe(true); + }); + + it("exits with helpful guidance on backend error", async () => { + mockLoadConfig.mockReturnValue(validConfig); + mockDoRequest.mockResolvedValue( + new Response(JSON.stringify({ detail: "knowledge store not found" }), { + status: 404, + headers: { "Content-Type": "application/json" }, + }), + ); + + await expect(run("a real fact")).rejects.toThrow("exit"); + expect(errorOutput()).toContain("knowledge store not found"); + expect(errorOutput()).toContain("dosu logs --tail 30"); + }); + + it("exits when not configured (no api_key)", async () => { + mockLoadConfig.mockReturnValue({ ...validConfig, api_key: undefined }); + + await expect(run("a fact")).rejects.toThrow("exit"); + expect(errorOutput()).toContain("Not configured"); + expect(mockDoRequest).not.toHaveBeenCalled(); + }); +}); diff --git a/src/commands/write.ts b/src/commands/write.ts index 5f5157e..a856623 100644 --- a/src/commands/write.ts +++ b/src/commands/write.ts @@ -32,16 +32,27 @@ export function writeCommand(): Command { .argument("", "The fact or insight to save") .option("--json", "Output as JSON") .action(async (fact: string, opts: { json?: boolean }) => { + const trimmed = fact.trim(); + if (trimmed.length === 0) { + console.error(pc.red("Error: fact cannot be empty or whitespace-only.")); + console.error( + pc.dim( + 'Pass a concise fact, e.g.: dosu write "TOKEN_TTL=3600 is intentional — do not change without coordinating with mobile team."', + ), + ); + process.exit(1); + } + const cfg = requireConfig(); const apiClient = new Client(cfg); - logger.debug("write", `Saving topic: ${fact.slice(0, 60)}`); + logger.debug("write", `Saving topic: ${trimmed.slice(0, 60)}`); const resp = await apiClient.doRequest("POST", "/v1/topics", { // biome-ignore lint/style/noNonNullAssertion: checked in requireConfig deployment_id: cfg.deployment_id!, - name: fact.trim().split(/\s+/).slice(0, 8).join(" "), - context: fact, + name: trimmed.split(/\s+/).slice(0, 8).join(" "), + context: trimmed, }); if (!resp.ok) { diff --git a/src/setup/doc-analyze-step.test.ts b/src/setup/doc-analyze-step.test.ts new file mode 100644 index 0000000..dedc289 --- /dev/null +++ b/src/setup/doc-analyze-step.test.ts @@ -0,0 +1,117 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +vi.mock("../debug/logger", () => ({ + logger: { debug: vi.fn(), info: vi.fn(), warn: vi.fn(), error: vi.fn() }, +})); + +import { pollImportCompletion } from "./doc-analyze-step"; + +/** + * Minimal stub of the TypedClient surface that pollImportCompletion touches. + * Other fields are unused by this function so we type-pun with `as never`. + */ +function makeTrpc(getImportStatus: ReturnType) { + return { + docImports: { + getImportStatus: { query: getImportStatus }, + }, + } as never; +} + +describe("pollImportCompletion", () => { + let getImportStatus: ReturnType; + + beforeEach(() => { + getImportStatus = vi.fn(); + }); + + it("returns counts when state transitions to SUCCESS", async () => { + getImportStatus.mockResolvedValueOnce({ + task_id: "t1", + state: "SUCCESS", + detail: { total: 5, completed: 4, failed: 1 }, + }); + + const result = await pollImportCompletion(makeTrpc(getImportStatus), "t1", { + intervalMs: 0, + timeoutMs: 1_000, + }); + + expect(result).toEqual({ imported: 4, failed: 1 }); + expect(result.timed_out).toBeUndefined(); + }); + + it("returns counts when state transitions to FAILURE", async () => { + getImportStatus.mockResolvedValueOnce({ + task_id: "t1", + state: "FAILURE", + detail: { total: 3, completed: 0, failed: 3 }, + }); + + const result = await pollImportCompletion(makeTrpc(getImportStatus), "t1", { + intervalMs: 0, + timeoutMs: 1_000, + }); + + expect(result).toEqual({ imported: 0, failed: 3 }); + }); + + it("times out and surfaces timed_out=true when state stays PROGRESS", async () => { + // Always reply PROGRESS so the loop never finds a terminal state. + getImportStatus.mockResolvedValue({ + task_id: "t1", + state: "PROGRESS", + detail: { total: 1, completed: 0, failed: 0 }, + }); + + const result = await pollImportCompletion(makeTrpc(getImportStatus), "t1", { + intervalMs: 0, + timeoutMs: 10, // 10ms — exit after ~1-2 polls + }); + + expect(result).toMatchObject({ imported: 0, failed: 0, timed_out: true }); + // Should have polled at least once before bailing + expect(getImportStatus.mock.calls.length).toBeGreaterThanOrEqual(1); + }); + + it("returns early after IMPORT_STATUS_MAX_ERRORS consecutive null responses", async () => { + getImportStatus.mockResolvedValue(null); + + const result = await pollImportCompletion(makeTrpc(getImportStatus), "t1", { + intervalMs: 0, + timeoutMs: 60_000, // big enough that the error budget is what trips, not the deadline + }); + + expect(result).toEqual({ imported: 0, failed: 0 }); + // 3 consecutive nulls trips the error budget + expect(getImportStatus.mock.calls.length).toBe(3); + }); + + it("returns early after IMPORT_STATUS_MAX_ERRORS consecutive throws", async () => { + getImportStatus.mockRejectedValue(new Error("network blip")); + + const result = await pollImportCompletion(makeTrpc(getImportStatus), "t1", { + intervalMs: 0, + timeoutMs: 60_000, + }); + + expect(result).toEqual({ imported: 0, failed: 0 }); + expect(getImportStatus.mock.calls.length).toBe(3); + }); + + it("recovers from a transient error then completes", async () => { + getImportStatus.mockRejectedValueOnce(new Error("blip")).mockResolvedValueOnce({ + task_id: "t1", + state: "SUCCESS", + detail: { total: 2, completed: 2, failed: 0 }, + }); + + const result = await pollImportCompletion(makeTrpc(getImportStatus), "t1", { + intervalMs: 0, + timeoutMs: 5_000, + }); + + expect(result).toEqual({ imported: 2, failed: 0 }); + expect(getImportStatus).toHaveBeenCalledTimes(2); + }); +}); diff --git a/src/setup/doc-analyze-step.ts b/src/setup/doc-analyze-step.ts index 3d4eb93..77b3fd6 100644 --- a/src/setup/doc-analyze-step.ts +++ b/src/setup/doc-analyze-step.ts @@ -18,6 +18,10 @@ const DOC_SCAN_POLL_INTERVAL_MS = 2_000; const DOC_SCAN_POLL_TIMEOUT_MS = 60_000; const IMPORT_STATUS_POLL_INTERVAL_MS = 2_000; const IMPORT_STATUS_MAX_ERRORS = 3; +// Upper bound for waiting on the doc-import workflow. Larger repos legitimately +// take a few minutes; anything past this is almost certainly a stuck backend +// task, and we should release the CLI rather than block setup forever. +const IMPORT_STATUS_POLL_TIMEOUT_MS = 5 * 60_000; const IMPORTABLE_EXTENSIONS = new Set([".md", ".mdx", ".txt", ".rst", ".markdown"]); @@ -109,12 +113,30 @@ async function waitForFiles( return { kind: "indexing_timeout", files }; } -async function pollImportCompletion( +export interface PollImportCompletionResult { + imported: number; + failed: number; + /** True when the deadline expired before the workflow reached a terminal state. */ + timed_out?: boolean; +} + +export interface PollImportCompletionOptions { + /** Override the wall-clock deadline. Tests pass small values. */ + timeoutMs?: number; + /** Override the per-attempt sleep. Tests pass 0. */ + intervalMs?: number; +} + +export async function pollImportCompletion( trpc: TypedClient, taskID: string, -): Promise<{ imported: number; failed: number }> { + opts: PollImportCompletionOptions = {}, +): Promise { + const timeoutMs = opts.timeoutMs ?? IMPORT_STATUS_POLL_TIMEOUT_MS; + const intervalMs = opts.intervalMs ?? IMPORT_STATUS_POLL_INTERVAL_MS; + const deadline = Date.now() + timeoutMs; let errors = 0; - while (true) { + while (Date.now() < deadline) { try { const status = (await trpc.docImports.getImportStatus.query( taskID, @@ -141,8 +163,13 @@ async function pollImportCompletion( ); if (errors >= IMPORT_STATUS_MAX_ERRORS) return { imported: 0, failed: 0 }; } - await sleep(IMPORT_STATUS_POLL_INTERVAL_MS); + await sleep(intervalMs); } + logger.warn( + "setup", + `pollImportCompletion timed out after ${timeoutMs}ms waiting for task ${taskID}`, + ); + return { imported: 0, failed: 0, timed_out: true }; } export async function stepAnalyzeDocs( @@ -242,6 +269,12 @@ export async function stepAnalyzeDocs( const result = await pollImportCompletion(trpc, taskID); imported = result.imported; failed = result.failed; + if (result.timed_out) { + p.log.warn( + "Doc import is still running in the background — the CLI stopped waiting after 5 minutes. " + + "Check progress later with `dosu docs import-status` or `dosu sources list`.", + ); + } } else { // Queued in background with no task ID — treat as success imported = docFiles.length; diff --git a/src/setup/flow.test.ts b/src/setup/flow.test.ts index 02d0d10..33edb31 100644 --- a/src/setup/flow.test.ts +++ b/src/setup/flow.test.ts @@ -1958,4 +1958,69 @@ describe("runSetup checkpoint behavior", () => { expect(mockStepConnectGitHubPat).toHaveBeenCalledTimes(1); expect(mockStepAnalyzeDocs).not.toHaveBeenCalled(); }); + + // Regression test for the data-loss bug: a transient network/API failure + // during the cached-deployment-verification step used to wipe deployment_id, + // deployment_name, and api_key from local config. Now we preserve them. + it("preserves cached deployment when getDeployments fails (network/API error)", async () => { + saveConfig(makeCfg()); // has deployment_id, deployment_name, api_key + setupAuthed({ + getDeployments: vi.fn().mockRejectedValue(new Error("ECONNRESET")), + }); + mockTrpc.user.getCliOnboardingContext.query.mockResolvedValue({ + user_id: "test-user-id", + finished_onboarding: true, + cli_onboarding_enabled: false, + }); + vi.spyOn(providersModule, "allSetupProviders").mockReturnValue([]); + + await runSetup(); + + const saved = loadConfig(); + // Cached deployment must NOT be wiped by a network failure + expect(saved.deployment_id).toBeDefined(); + expect(saved.deployment_name).toBeDefined(); + expect(saved.api_key).toBeDefined(); + // And we should NOT have emitted the "no longer exists" warning, since + // we never confirmed it's gone. + const warnCalls = vi.mocked(p.log.warn).mock.calls.map((c) => String(c[0])); + expect(warnCalls.some((m) => m.includes("no longer exists"))).toBe(false); + }); + + // Confirms the other half of the contract: when the API DOES return a list + // and our id isn't in it, we still wipe + re-pick. This guards against the + // fix above accidentally suppressing legitimate cleanups. + it("wipes cached deployment when getDeployments confirms it's gone", async () => { + saveConfig(makeCfg()); + setupAuthed({ + // Returns a list that doesn't contain our deployment_id + getDeployments: vi.fn().mockResolvedValue([ + { + deployment_id: "different-id", + name: "Other", + description: "", + provider_slug: "dosu_mcp", + enabled: true, + org_id: "o1", + org_name: "Org1", + space_id: "sp-other", + }, + ]), + }); + mockTrpc.user.getCliOnboardingContext.query.mockResolvedValue({ + user_id: "test-user-id", + finished_onboarding: true, + cli_onboarding_enabled: false, + }); + // User cancels the org-pick prompt so we exit without re-binding — + // this keeps the test focused on the wipe behaviour. + vi.mocked(p.select).mockResolvedValue(Symbol("cancel") as never); + vi.mocked(p.isCancel).mockImplementation((v) => typeof v === "symbol"); + vi.spyOn(providersModule, "allSetupProviders").mockReturnValue([]); + + await runSetup(); + + const warnCalls = vi.mocked(p.log.warn).mock.calls.map((c) => String(c[0])); + expect(warnCalls.some((m) => m.includes("no longer exists"))).toBe(true); + }); }); diff --git a/src/setup/flow.ts b/src/setup/flow.ts index 4af5435..d8b8f6e 100644 --- a/src/setup/flow.ts +++ b/src/setup/flow.ts @@ -151,22 +151,40 @@ export async function runSetup(opts: SetupOptions = {}): Promise { // Stored deployment_id exists — verify it's still reachable before // spending the rest of setup on a stale reference (e.g. after switching // between local dev and prod, or if a deployment was deleted). - const deployments = await fetchDeployments(apiClient); - const exists = deployments.some((d) => d.deployment_id === cfg.deployment_id); - if (!exists) { - p.log.warn( - `Project "${cfg.deployment_name ?? cfg.deployment_id}" no longer exists — select a new one.`, + // + // IMPORTANT: only wipe the cached config when the API explicitly returns + // a deployment list that excludes our id. A network/API failure must NOT + // count as "deployment doesn't exist" — that would silently corrupt the + // user's config every time prod has a transient outage. Call + // `getDeployments()` directly so we see the error instead of letting + // `fetchDeployments` swallow it into an empty array. + let deployments: Deployment[] | null = null; + try { + deployments = await apiClient.getDeployments(); + } catch (err: unknown) { + logger.warn( + "setup", + `Could not verify cached deployment (preserving config): ${err instanceof Error ? err.message : String(err)}`, ); - cfg.deployment_id = undefined; - cfg.deployment_name = undefined; - cfg.api_key = undefined; - saveConfig(cfg); - const ok = await resolveDeployment(apiClient, cfg, opts); - if (!ok) { - await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_failed", { - reason: "deployment_resolution_failed", - }); - return; + } + + if (deployments !== null) { + const exists = deployments.some((d) => d.deployment_id === cfg.deployment_id); + if (!exists) { + p.log.warn( + `Project "${cfg.deployment_name ?? cfg.deployment_id}" no longer exists — select a new one.`, + ); + cfg.deployment_id = undefined; + cfg.deployment_name = undefined; + cfg.api_key = undefined; + saveConfig(cfg); + const ok = await resolveDeployment(apiClient, cfg, opts); + if (!ok) { + await trackCliOnboardingEvent(cfg, onboardingRunID, "cli_onboarding_failed", { + reason: "deployment_resolution_failed", + }); + return; + } } } }