diff --git a/daemon/main.ts b/daemon/main.ts index 3de202b2b..68ebd93f0 100644 --- a/daemon/main.ts +++ b/daemon/main.ts @@ -34,6 +34,11 @@ import { import { downloadCache } from "./cache.ts"; import { createAIHandlers } from "./ai/handlers.ts"; import { createSandboxHandlers, type DeployParams } from "./sandbox.ts"; +import { + cacheConfigFromEnvs, + restoreCache, + snapshotCache, +} from "./sandboxCache.ts"; import { register, type TunnelConnection } from "./tunnel.ts"; import { createWorker, @@ -570,6 +575,17 @@ if (SANDBOX_MODE) { // Also update the module-level variable so getSiteName() returns the value setSiteName(site); + // Best-effort cache restore — populates /deno-dir from S3 so the + // upcoming deno cache resolution is mostly a no-op. Opt-in via + // DECO_CACHE_S3_BUCKET in the deploy body envs (admin forwards it + // there, NOT via SandboxClaim.spec.env: the operator rejects + // per-claim env when warm pool is in use). + // Any failure logs and continues with the cold-start path. Awaited + // because we want it done before the worker subprocess starts + // requesting modules. + const cacheCfg = cacheConfigFromEnvs(envs); + await restoreCache(cacheCfg).catch(() => {/* never throws */}); + // Use run command from deploy request. // If no runCommand provided, default to Deno runner only if dev.ts exists. // This prevents a timeout loop when the sandbox is used for non-Deco repos @@ -604,6 +620,14 @@ if (SANDBOX_MODE) { branch, }); + // Best-effort cache snapshot — fires after the worker is up so + // /deno-dir reflects every module the site actually needed. + // Detached: never blocks the deploy response. Failures are + // logged and swallowed inside snapshotCache(). + currentSite.gitReady + .then(() => snapshotCache(cacheCfg)) + .catch(() => {/* never throws */}); + // Always create AI handlers — OAuth can be used when no API key is set aiHandlers = createAIHandlers({ cwd: Deno.cwd(), diff --git a/daemon/sandboxCache.ts b/daemon/sandboxCache.ts new file mode 100644 index 000000000..aa44fd298 --- /dev/null +++ b/daemon/sandboxCache.ts @@ -0,0 +1,198 @@ +/** + * Sandbox runner cache — opt-in S3-backed restore/snapshot for /deno-dir. + * + * Skipping the Deno module download on cold start saves ~5s per wake-up + * on Deco/Storefront sites. The cache is *advisory*: every code path here + * swallows errors and returns a no-op result. A failed restore must let + * the runner fall back to the cold-start git clone + deno cache flow; a + * failed snapshot must not affect the user-visible deploy. + * + * Config arrives via the /sandbox/deploy POST body (envs field) — NOT via + * container env vars. The agent-sandbox operator rejects per-claim env when + * a warm pool is in use, so the admin forwards cache hints through the + * deploy body instead. The runner reads them from the request and passes + * them into restoreCache/snapshotCache as explicit config. + * + * Auth: standard AWS SDK credential chain — Pod Identity in EKS supplies + * credentials transparently via container metadata env vars. + * + * Layout in S3: + * s3:////cache.tgz (tar.gz of /deno-dir) + */ + +import { + GetObjectCommand, + PutObjectCommand, + S3Client, +} from "npm:@aws-sdk/client-s3@3.569.0"; + +const DENO_DIR = Deno.env.get("DENO_DIR") ?? "/deno-dir"; + +export interface CacheConfig { + bucket?: string; + region?: string; + /** S3 key (path within bucket), e.g. "/cache.tgz" */ + key?: string; +} + +const isEnabled = ( + cfg: CacheConfig, +): cfg is Required> & CacheConfig => + Boolean(cfg.bucket && cfg.key); + +let cachedClient: S3Client | null = null; +let cachedRegion: string | null = null; +const client = (region: string): S3Client => { + if (!cachedClient || cachedRegion !== region) { + cachedClient = new S3Client({ region }); + cachedRegion = region; + } + return cachedClient; +}; + +export interface CacheResult { + ok: boolean; + ms: number; + reason?: string; +} + +const log = (msg: string) => console.log(`[sandbox-cache] ${msg}`); +const warn = (msg: string) => console.warn(`[sandbox-cache] ${msg}`); + +/** + * Build a CacheConfig from the envs map received via /sandbox/deploy. + * Helper so callers don't have to know the env-var names. + */ +export function cacheConfigFromEnvs( + envs?: Record, +): CacheConfig { + return { + bucket: envs?.DECO_CACHE_S3_BUCKET, + region: envs?.DECO_CACHE_S3_REGION ?? "us-west-2", + key: envs?.DECO_CACHE_S3_KEY, + }; +} + +/** + * Restore the Deno-cache tarball from S3 into /deno-dir. + * Streams S3 body directly into `tar -xzf -` without buffering in Deno. + */ +export async function restoreCache(cfg: CacheConfig): Promise { + if (!isEnabled(cfg)) return { ok: false, ms: 0, reason: "disabled" }; + const { bucket, key, region = "us-west-2" } = cfg; + const start = performance.now(); + try { + const resp = await client(region).send( + new GetObjectCommand({ Bucket: bucket, Key: key }), + ); + const body = resp.Body; + if (!body) { + return { ok: false, ms: performance.now() - start, reason: "empty body" }; + } + + // Stream body → tar. tar reads the whole archive from stdin and unpacks + // into /deno-dir directly. No intermediate file or buffer. + const tar = new Deno.Command("tar", { + args: ["-xzf", "-", "-C", "/"], + stdin: "piped", + stdout: "null", + stderr: "piped", + }).spawn(); + + const stream = + (body as { transformToWebStream: () => ReadableStream }) + .transformToWebStream(); + await stream.pipeTo(tar.stdin); + const status = await tar.status; + + if (!status.success) { + const stderr = await new Response(tar.stderr).text().catch(() => ""); + return { + ok: false, + ms: performance.now() - start, + reason: `tar exit ${status.code}: ${stderr.slice(0, 200)}`, + }; + } + + const ms = performance.now() - start; + log(`restored cache from s3://${bucket}/${key} in ${ms.toFixed(0)}ms`); + return { ok: true, ms }; + } catch (err) { + const ms = performance.now() - start; + // NoSuchKey on first wake-up of a site is normal — log at info level. + const isMissing = (err as { name?: string }).name === "NoSuchKey" || + String(err).includes("NoSuchKey"); + const fmt = `${(err as Error).message ?? err}`; + if (isMissing) { + log( + `no cache yet for s3://${bucket}/${key} (cold start, ${ + ms.toFixed(0) + }ms)`, + ); + } else { + warn(`restore failed (${ms.toFixed(0)}ms): ${fmt}`); + } + return { ok: false, ms, reason: isMissing ? "missing" : fmt }; + } +} + +/** + * Tar /deno-dir and upload to S3. Best-effort — failures are logged and + * swallowed so a failed snapshot can't break a successful deploy. + * + * Buffers the tarball in memory before upload (one-shot PUT). For a typical + * site with ~200MB compressed cache this is fine; if cache sizes grow we + * should switch to @aws-sdk/lib-storage's streaming Upload. + */ +export async function snapshotCache(cfg: CacheConfig): Promise { + if (!isEnabled(cfg)) return { ok: false, ms: 0, reason: "disabled" }; + const { bucket, key, region = "us-west-2" } = cfg; + const start = performance.now(); + try { + // Skip if /deno-dir doesn't exist or is empty + const stat = await Deno.stat(DENO_DIR).catch(() => null); + if (!stat?.isDirectory) { + return { ok: false, ms: 0, reason: "no deno-dir" }; + } + + const tar = new Deno.Command("tar", { + args: ["-czf", "-", "-C", "/", DENO_DIR.replace(/^\//, "")], + stdout: "piped", + stderr: "piped", + }).spawn(); + + const body = new Uint8Array(await new Response(tar.stdout).arrayBuffer()); + const status = await tar.status; + if (!status.success) { + const stderr = await new Response(tar.stderr).text().catch(() => ""); + return { + ok: false, + ms: performance.now() - start, + reason: `tar exit ${status.code}: ${stderr.slice(0, 200)}`, + }; + } + + await client(region).send( + new PutObjectCommand({ + Bucket: bucket, + Key: key, + Body: body, + ContentType: "application/gzip", + }), + ); + + const ms = performance.now() - start; + log( + `snapshotted ${ + (body.length / 1024 / 1024).toFixed(1) + }MB → s3://${bucket}/${key} in ${ms.toFixed(0)}ms`, + ); + return { ok: true, ms }; + } catch (err) { + const ms = performance.now() - start; + warn( + `snapshot failed (${ms.toFixed(0)}ms): ${(err as Error).message ?? err}`, + ); + return { ok: false, ms, reason: `${(err as Error).message ?? err}` }; + } +}