superagent-ai · alanzabihi · Jul 2, 2026 · Jul 2, 2026
diff --git a/examples/node22-bookworm-computer-use-image.ts b/examples/node22-bookworm-computer-use-image.ts
@@ -57,6 +57,16 @@ const CUA_DRIVER_INSTALL_COMMAND = [
   '"$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/cua-driver/scripts/install.sh)"',
 ].join(' ');
 
+/**
+ * Sandbox memory default (GiB). Daytona's platform default (no `resources` override) is a 1GiB
+ * cgroup limit -- confirmed by inspecting `/sys/fs/cgroup/memory.max` inside a real sandbox -- which
+ * reliably gets a full `npm install` of AutoBrin-flue's dependency tree OOM-killed a few seconds in
+ * (see superagent-ai/benchpress#40). Confirmed empirically: 2/2 sandboxes at the 1GiB default were
+ * OOM-killed (`bash`'s own "Killed" job-control message, the exact signature from #40) within ~35s;
+ * 3/3 sandboxes at 4GiB completed the same `npm install` successfully in ~35-37s.
+ */
+const DEFAULT_SANDBOX_MEMORY_GIB = 4;
+
 export function buildNode22BookwormComputerUseImage(): Image {
   return Image.base('node:22-bookworm')
     .runCommands(
@@ -95,6 +105,7 @@ async function main(): Promise<void> {
     kind: 'image',
     image: buildNode22BookwormComputerUseImage(),
     autoStopInterval: AUTO_STOP_SAFETY_NET_MINUTES,
+    resources: { memory: DEFAULT_SANDBOX_MEMORY_GIB },
   });
 
   try {

diff --git a/src/contenders/autobrin.ts b/src/contenders/autobrin.ts
@@ -3,7 +3,7 @@ import { tmpdir } from 'node:os';
 import path from 'node:path';
 import { pathToFileURL } from 'node:url';
 import { spawn } from 'node:child_process';
-import type { Image, Sandbox } from '@daytona/sdk';
+import type { Image, Resources, Sandbox } from '@daytona/sdk';
 import type { AgentRunner, BenchmarkTask, ContenderClaim, ConfirmedFinding, NormalizedResult, ProposedPatch, RunContext, RunControls, TargetHandle } from './types.js';
 import { webappTargetMetadata } from './types.js';
 import { ensureAutobrinCheckout } from '../lib/checkout.js';
@@ -38,6 +38,15 @@ export type AutobrinContenderConfig = {
   visionModel?: string;
   /** Keep the sandbox running after the engagement instead of deleting it (debugging only). `transport: 'daytona'` only. */
   keepSandbox?: boolean;
+  /**
+   * Sandbox CPU/memory/disk allocation. `transport: 'daytona'` only, and only takes effect when
+   * creating from `image` -- Daytona snapshots fix their resources at snapshot-build time
+   * (`CreateSnapshotParams.resources`), so there is nothing to override at sandbox-creation time
+   * for `snapshot`. Omitting this uses the Daytona platform default, which is not large enough for
+   * a full `npm install` of AutoBrin-flue's dependency tree under load and can get the process
+   * OOM-killed -- see superagent-ai/benchpress#40.
+   */
+  resources?: Pick<Resources, 'cpu' | 'memory' | 'disk'>;
 };
 
 export type AutobrinRunOptions = {
@@ -370,6 +379,11 @@ export function createAutobrinRunner(options: AutobrinRunOptions): AgentRunner {
     if (!options.config.image && !options.config.snapshot) {
       throw new Error(`autobrin contender "${contenderId}": transport "daytona" requires "image" or "snapshot"`);
     }
+    if (options.config.resources && options.config.snapshot) {
+      throw new Error(
+        `autobrin contender "${contenderId}": "resources" is only supported when creating a sandbox from "image" (Daytona snapshots fix resources at build time)`,
+      );
+    }
   }
 
   return {
@@ -499,6 +513,7 @@ async function runViaDaytona(input: RunInput): Promise<NormalizedResult> {
     ref: config.ref,
     image: config.image,
     snapshot: config.snapshot,
+    resources: config.resources,
     visionModel: config.visionModel,
     payload,
     keepSandbox: config.keepSandbox,

diff --git a/src/daytona/launcher.ts b/src/daytona/launcher.ts
@@ -1,4 +1,4 @@
-import type { Image, Sandbox } from '@daytona/sdk';
+import type { Image, Resources, Sandbox } from '@daytona/sdk';
 import { ensureComputerUseAssets, ensureComputerUseStarted } from './assets.js';
 import { bootstrapAutobrinFlue, prepareRepoTarget, prepareWebappTarget } from './bootstrap.js';
 import {
@@ -18,6 +18,13 @@ export type DaytonaRunOptions = {
   /** String image ref (registry tag) or a declarative `Image` built with `Image.base(...)`. */
   image?: string | Image;
   snapshot?: string;
+  /**
+   * CPU/memory/disk allocation for the sandbox. Only takes effect when creating from `image`:
+   * `createSandbox()`'s snapshot params (`CreateSandboxFromSnapshotParams`) have no `resources`
+   * field, since Daytona snapshots fix resources at snapshot-build time. See
+   * `AutobrinContenderConfig.resources` in `src/contenders/autobrin.ts` (superagent-ai/benchpress#40).
+   */
+  resources?: Resources;
   visionModel?: string;
   payload: EngagementPayload | unknown;
   keepSandbox?: boolean;
@@ -73,6 +80,7 @@ export async function runDaytonaEngagement(options: DaytonaRunOptions): Promise<
             image: options.image!,
             envVars: sandboxEnv,
             autoStopInterval: AUTO_STOP_SAFETY_NET_MINUTES,
+            resources: options.resources,
           },
     );
 

diff --git a/tests/autobrin-contender.test.ts b/tests/autobrin-contender.test.ts
@@ -350,6 +350,26 @@ describe('createAutobrinRunner transport validation', () => {
     const runner = createAutobrinRunner({ config: { id: 'x', type: 'autobrin', transport: 'daytona', snapshot: 'daytona-large' } });
     expect(runner.id).toBe('x');
   });
+
+  // Regression (superagent-ai/benchpress#40): Daytona sandboxes had no way to request more than
+  // the platform default resources, so a full `npm install` of AutoBrin-flue's dependency tree
+  // could get OOM-killed. `resources` only has an effect when creating from `image` -- Daytona
+  // snapshots fix their resources at snapshot-build time, so `createSandbox()`'s snapshot params
+  // have no `resources` field to set (see `CreateSandboxFromSnapshotParams` in `@daytona/sdk`).
+  it('accepts transport "daytona" with "resources" combined with "image"', () => {
+    const runner = createAutobrinRunner({
+      config: { id: 'x', type: 'autobrin', transport: 'daytona', image: 'some-image', resources: { memory: 4 } },
+    });
+    expect(runner.id).toBe('x');
+  });
+
+  it('rejects "resources" combined with "snapshot" (snapshots fix resources at build time)', () => {
+    expect(() =>
+      createAutobrinRunner({
+        config: { id: 'x', type: 'autobrin', transport: 'daytona', snapshot: 'daytona-large', resources: { memory: 4 } },
+      }),
+    ).toThrow(/"resources" is only supported when creating a sandbox from "image"/);
+  });
 });
 
 describe('materializeTarget', () => {

diff --git a/tests/autobrin-daytona-sequencing.test.ts b/tests/autobrin-daytona-sequencing.test.ts
@@ -162,3 +162,65 @@ describe('autobrin daytona transport: webapp modality (superagent-ai/benchpress#
     expect(launcherMocks.runDaytonaEngagement).not.toHaveBeenCalled();
   });
 });
+
+// Regression (superagent-ai/benchpress#40): AutobrinContenderConfig had no way to request more
+// than the Daytona platform default sandbox resources, so a full `npm install` of AutoBrin-flue's
+// dependency tree could get OOM-killed. runDaytonaEngagement's own resources handling (only applied
+// for the "image" creation branch) is covered in tests/daytona-launcher.test.ts; these tests only
+// cover that runViaDaytona actually forwards config.resources into the options it builds.
+describe('autobrin daytona transport: resources config threading (superagent-ai/benchpress#40)', () => {
+  const baseTask = { id: 't1', benchmarkId: 'repo-cve-smoke' };
+  const baseTarget = {
+    benchmarkId: 'repo-cve-smoke',
+    taskId: 't1',
+    modality: 'repo' as const,
+    repo: 'owner/repo',
+    sha: 'abc123',
+  };
+  const baseControls = { model: 'kimi-azure/kimi-k2.6' };
+
+  const tmpDirs: string[] = [];
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    for (const dir of tmpDirs.splice(0)) rmSync(dir, { recursive: true, force: true });
+  });
+
+  function makeContext() {
+    const root = mkdtempSync(path.join(tmpdir(), 'benchpress-daytona-resources-'));
+    tmpDirs.push(root);
+    return { runId: 'run1', resultsDir: path.join(root, 'results'), engagementsDir: path.join(root, 'engagements') };
+  }
+
+  function mockSuccessfulEngagement() {
+    checkoutMocks.ensureAutobrinCheckout.mockReset().mockResolvedValue({ root: '/cache/x', ref: 'staging', commitSha: 'deadbeef' });
+    launcherMocks.runDaytonaEngagement.mockReset().mockResolvedValue({
+      sandboxId: 'sandbox-1',
+      engagement: { exitCode: 0, streamLogPath: 'x', resultPath: 'y', resultJson: {} },
+      computerUse: {},
+      keptSandbox: false,
+    });
+  }
+
+  it('forwards config.resources into the options passed to runDaytonaEngagement', async () => {
+    mockSuccessfulEngagement();
+    const runner = createAutobrinRunner({
+      config: { id: 'x', type: 'autobrin', transport: 'daytona', image: 'test-image', resources: { cpu: 2, memory: 4, disk: 20 } },
+    });
+
+    await runner.run({ task: baseTask, target: baseTarget, controls: baseControls, context: makeContext() });
+
+    const options = launcherMocks.runDaytonaEngagement.mock.calls[0]?.[0] as { resources?: unknown };
+    expect(options.resources).toEqual({ cpu: 2, memory: 4, disk: 20 });
+  });
+
+  it('omits resources entirely when not configured (unchanged default behavior)', async () => {
+    mockSuccessfulEngagement();
+    const runner = createAutobrinRunner({ config: { id: 'x', type: 'autobrin', transport: 'daytona', image: 'test-image' } });
+
+    await runner.run({ task: baseTask, target: baseTarget, controls: baseControls, context: makeContext() });
+
+    const options = launcherMocks.runDaytonaEngagement.mock.calls[0]?.[0] as { resources?: unknown };
+    expect(options.resources).toBeUndefined();
+  });
+});
diff --git a/tests/daytona-launcher.test.ts b/tests/daytona-launcher.test.ts
@@ -221,3 +221,54 @@ describe('runDaytonaEngagement computer-use start gating (superagent-ai/benchpre
     expect(clientMocks.deleteDaytonaSandbox).toHaveBeenCalledWith('sandbox-1', baseOptions.env);
   });
 });
+
+// Regression (superagent-ai/benchpress#40): sandboxes had no way to request more than the Daytona
+// platform default resources, so a full `npm install` of AutoBrin-flue's dependency tree could get
+// OOM-killed (confirmed live: the platform default is a 1GiB cgroup memory limit that reliably
+// OOM-kills that install; see the PR description for the empirical repro).
+describe('runDaytonaEngagement resources threading (superagent-ai/benchpress#40)', () => {
+  beforeEach(() => {
+    vi.spyOn(console, 'error').mockImplementation(() => undefined);
+    clientMocks.createSandbox.mockReset().mockResolvedValue({ id: 'sandbox-1' });
+    clientMocks.deleteDaytonaSandbox.mockReset().mockResolvedValue(undefined);
+    assetsMocks.ensureComputerUseStarted.mockReset().mockResolvedValue(true);
+    engagementMocks.runEngagementViaHttp.mockReset().mockResolvedValue({
+      exitCode: 0,
+      streamLogPath: '/logs/stream.jsonl',
+      resultPath: '/result.json',
+      resultJson: { status: 'ok' },
+    });
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  const payload = { modality: 'repo' as const, repo: 'owner/repo' };
+  const env = { DAYTONA_API_KEY: 'test' };
+
+  it('passes resources through to createSandbox when creating from "image"', async () => {
+    await runDaytonaEngagement({ image: 'test-image', resources: { cpu: 2, memory: 4, disk: 20 }, payload, env });
+
+    expect(clientMocks.createSandbox).toHaveBeenCalledTimes(1);
+    const params = clientMocks.createSandbox.mock.calls[0]?.[1] as Record<string, unknown>;
+    expect(params.kind).toBe('image');
+    expect(params.resources).toEqual({ cpu: 2, memory: 4, disk: 20 });
+  });
+
+  it('omits resources when not provided (unchanged default behavior)', async () => {
+    await runDaytonaEngagement({ image: 'test-image', payload, env });
+
+    const params = clientMocks.createSandbox.mock.calls[0]?.[1] as Record<string, unknown>;
+    expect(params.resources).toBeUndefined();
+  });
+
+  it('never applies resources on the "snapshot" branch (Daytona snapshots fix resources at build time)', async () => {
+    await runDaytonaEngagement({ snapshot: 'daytona-large', resources: { memory: 4 }, payload, env });
+
+    expect(clientMocks.createSandbox).toHaveBeenCalledTimes(1);
+    const params = clientMocks.createSandbox.mock.calls[0]?.[1] as Record<string, unknown>;
+    expect(params.kind).toBe('snapshot');
+    expect(params).not.toHaveProperty('resources');
+  });
+});
diff --git a/tests/daytona.test.ts b/tests/daytona.test.ts
@@ -3,8 +3,9 @@ import { mkdtempSync, rmSync, writeFileSync } from 'node:fs';
 import { connect, createServer } from 'node:net';
 import { tmpdir } from 'node:os';
 import path from 'node:path';
-import { afterEach, describe, expect, it } from 'vitest';
-import { getDaytonaClientConfig } from '../src/daytona/client.js';
+import type { Daytona } from '@daytona/sdk';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+import { createSandbox, getDaytonaClientConfig } from '../src/daytona/client.js';
 import { buildEngagementRunScript } from '../src/daytona/engagement.js';
 import { assertAllowedFlueRef, buildSandboxEnv } from '../src/daytona/env.js';
 import {
@@ -32,6 +33,29 @@ describe('daytona client config', () => {
   });
 });
 
+// Regression (superagent-ai/benchpress#40): confirms createSandbox() generically forwards a
+// "resources" field (cpu/memory/disk) through to daytona.create() -- it never had a special case
+// blocking it, but nothing upstream ever set it either. See src/daytona/launcher.ts for where
+// "resources" actually gets set on the SandboxCreateInput passed here.
+describe('createSandbox (superagent-ai/benchpress#40: resources threading)', () => {
+  it('forwards "resources" (and other fields) to daytona.create(), stripping only "kind"', async () => {
+    const create = vi.fn(async (_params: Record<string, unknown>, _options: Record<string, unknown>) => ({ id: 'fake-sandbox' }));
+    const fakeDaytona = { create } as unknown as Daytona;
+
+    await createSandbox(fakeDaytona, {
+      kind: 'image',
+      image: 'test-image',
+      resources: { cpu: 2, memory: 4, disk: 20 },
+      envVars: { FOO: 'bar' },
+    });
+
+    expect(create).toHaveBeenCalledTimes(1);
+    const [params, options] = create.mock.calls[0]!;
+    expect(params).toEqual({ image: 'test-image', resources: { cpu: 2, memory: 4, disk: 20 }, envVars: { FOO: 'bar' } });
+    expect(options).toEqual({ timeout: 120 });
+  });
+});
+
 describe('daytona env', () => {
   it('enforces branch pins for AUTOBRIN_FLUE_REF', () => {
     expect(() => assertAllowedFlueRef('feature/foo')).toThrow('branch pin');