From 24b1c51b84ce192bda0365dcb37c8e0b49f3c051 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 11:06:37 +0800 Subject: [PATCH 01/12] feat(cli,core): /compress accepts custom focus instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends /compress to take a trailing instruction string (max 2000 chars) that is passed through tryCompressChat → tryCompress → CompressOptions and appended to the compression side-query system prompt as an "Additional Instructions:" block. Mirrors claude-code /compact . Empty / whitespace-only args fall back to the prior behaviour. --- .../src/ui/commands/compressCommand.test.ts | 81 +++++++++++++++++++ .../cli/src/ui/commands/compressCommand.ts | 19 ++++- packages/core/src/core/client.ts | 2 + packages/core/src/core/geminiChat.ts | 8 ++ .../src/services/chatCompressionService.ts | 53 +++++++++++- 5 files changed, 159 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/ui/commands/compressCommand.test.ts b/packages/cli/src/ui/commands/compressCommand.test.ts index 03f6c756a5..e83a9fe74a 100644 --- a/packages/cli/src/ui/commands/compressCommand.test.ts +++ b/packages/cli/src/ui/commands/compressCommand.test.ts @@ -77,6 +77,8 @@ describe('compressCommand', () => { expect(mockTryCompressChat).toHaveBeenCalledWith( expect.stringMatching(/^compress-\d+$/), true, + undefined, + undefined, ); expect(context.ui.addItem).toHaveBeenCalledWith( @@ -131,4 +133,83 @@ describe('compressCommand', () => { await compressCommand.action!(context, ''); expect(context.ui.setPendingItem).toHaveBeenCalledWith(null); }); + + describe('custom instructions argument', () => { + beforeEach(() => { + mockTryCompressChat.mockResolvedValue({ + originalTokenCount: 200, + compressionStatus: CompressionStatus.COMPRESSED, + newTokenCount: 100, + } satisfies ChatCompressionInfo); + }); + + it('forwards trimmed instructions as the 4th argument', async () => { + const ctx = createMockCommandContext({ + services: { + config: { + getGeminiClient: () => + ({ + tryCompressChat: mockTryCompressChat, + }) as unknown as GeminiClient, + }, + }, + invocation: { + raw: '/compress focus on auth bug ', + name: 'compress', + args: ' focus on auth bug ', + }, + }); + await compressCommand.action!(ctx, ''); + expect(mockTryCompressChat).toHaveBeenCalledWith( + expect.stringMatching(/^compress-\d+$/), + true, + undefined, + 'focus on auth bug', + ); + }); + + it('passes undefined when args is empty or whitespace only', async () => { + const ctx = createMockCommandContext({ + services: { + config: { + getGeminiClient: () => + ({ + tryCompressChat: mockTryCompressChat, + }) as unknown as GeminiClient, + }, + }, + invocation: { raw: '/compress ', name: 'compress', args: ' ' }, + }); + await compressCommand.action!(ctx, ''); + expect(mockTryCompressChat).toHaveBeenCalledWith( + expect.stringMatching(/^compress-\d+$/), + true, + undefined, + undefined, + ); + }); + + it('caps overlong instructions at 2000 chars', async () => { + const long = 'x'.repeat(3000); + const ctx = createMockCommandContext({ + services: { + config: { + getGeminiClient: () => + ({ + tryCompressChat: mockTryCompressChat, + }) as unknown as GeminiClient, + }, + }, + invocation: { + raw: `/compress ${long}`, + name: 'compress', + args: long, + }, + }); + await compressCommand.action!(ctx, ''); + const call = mockTryCompressChat.mock.calls[0]; + expect(call[3]).toBeDefined(); + expect((call[3] as string).length).toBe(2000); + }); + }); }); diff --git a/packages/cli/src/ui/commands/compressCommand.ts b/packages/cli/src/ui/commands/compressCommand.ts index 4178bf1bed..79980af762 100644 --- a/packages/cli/src/ui/commands/compressCommand.ts +++ b/packages/cli/src/ui/commands/compressCommand.ts @@ -10,6 +10,13 @@ import type { SlashCommand } from './types.js'; import { CommandKind } from './types.js'; import { t } from '../../i18n/index.js'; +// Cap user-supplied compression instructions. The compression side-query has +// no input-truncation retry today, so an unbounded instruction string would +// inflate the side-query prompt and risk a PTL the compaction path can't +// recover from. 2000 chars is generous for human-typed focus directives +// without exposing that failure mode. +const MAX_COMPRESS_INSTRUCTIONS_CHARS = 2000; + export const compressCommand: SlashCommand = { name: 'compress', altNames: ['summarize'], @@ -54,9 +61,19 @@ export const compressCommand: SlashCommand = { }; } + const rawArgs = context.invocation?.args?.trim() ?? ''; + const customInstructions = rawArgs + ? rawArgs.slice(0, MAX_COMPRESS_INSTRUCTIONS_CHARS) + : undefined; + const doCompress = async () => { const promptId = `compress-${Date.now()}`; - return await geminiClient.tryCompressChat(promptId, true); + return await geminiClient.tryCompressChat( + promptId, + true, + abortSignal, + customInstructions, + ); }; if (executionMode === 'acp') { diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 15146b45ac..065aec1631 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -2141,6 +2141,7 @@ export class GeminiClient { prompt_id: string, force: boolean = false, signal?: AbortSignal, + customInstructions?: string, ): Promise { const previousSessionStartContext = this.lastSessionStartContext; const previousSessionStartSource = this.lastSessionStartSource; @@ -2149,6 +2150,7 @@ export class GeminiClient { this.config.getModel(), force, signal, + customInstructions ? { customInstructions } : undefined, ); if (info.compressionStatus === CompressionStatus.COMPRESSED) { const chat = this.getChat(); diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index b424c193ec..4fe41dd82f 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -227,6 +227,13 @@ interface TryCompressOptions { * post-compression guards that may roll the in-memory chat state back. */ deferChatCompressionRecord?: boolean; + /** + * Forwarded to the compression side-query system prompt. Sourced from + * `/compress ` invocation arg; appended after the base prompt as + * an `Additional Instructions:` block so the summary model can focus + * on the user's stated concern. + */ + customInstructions?: string; } const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = { @@ -1420,6 +1427,7 @@ export class GeminiChat { pendingUserMessage: options?.pendingUserMessage, precomputedEffectiveTokens: options?.precomputedEffectiveTokens, trigger: options?.trigger, + customInstructions: options?.customInstructions, signal, }); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index 92c81384ea..a6ddfbb0a1 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -180,6 +180,37 @@ export interface CompressOptions { * (review #4168 R1.3 / R1.4) */ precomputedEffectiveTokens?: number; + /** + * User-supplied focus directives passed to the compression side-query. + * Appended to the system prompt as an `Additional Instructions:` block. + * Sourced from `/compress `. PreCompact hooks may further append + * `additionalContext` via `hookSpecificOutput`; user text always comes + * first, hook text last (matches claude-code mergeHookInstructions). + */ + customInstructions?: string; +} + +/** + * Compose the compression side-query system prompt: the base template, + * optionally followed by an `Additional Instructions:` block containing + * the user's `/compress ` directives and any `additionalContext` + * returned by PreCompact hooks. Order is user-first / hook-appended so an + * explicit user intent outranks a global hook policy when both speak. + */ +function buildCompressionSystemPrompt( + userInstructions: string | undefined, + hookInstructions: string, +): string { + const base = getCompressionPrompt(); + const parts: string[] = []; + if (userInstructions && userInstructions.trim().length > 0) { + parts.push(userInstructions.trim()); + } + if (hookInstructions.length > 0) { + parts.push(hookInstructions); + } + if (parts.length === 0) return base; + return `${base}\n\nAdditional Instructions:\n${parts.join('\n\n')}`; } export class ChatCompressionService { @@ -288,7 +319,11 @@ export class ChatCompressionService { }; } - // Fire PreCompact hook before compression begins + // Fire PreCompact hook before compression begins. Pass any user-supplied + // `/compress` instructions so hook scripts can read / log / amend them + // via `hookSpecificOutput.additionalContext`. The aggregator concatenates + // additionalContext across all hooks with '\n' separators. + let hookExtraInstructions = ''; const hookSystem = config.getHookSystem(); if (hookSystem) { const preCompactTrigger = @@ -296,7 +331,16 @@ export class ChatCompressionService { ? PreCompactTrigger.Manual : PreCompactTrigger.Auto; try { - await hookSystem.firePreCompactEvent(preCompactTrigger, '', signal); + const result = await hookSystem.firePreCompactEvent( + preCompactTrigger, + opts.customInstructions ?? '', + signal, + ); + const merged = + result.finalOutput?.hookSpecificOutput?.['additionalContext']; + if (typeof merged === 'string' && merged.trim().length > 0) { + hookExtraInstructions = merged.trim(); + } } catch (err) { config.getDebugLogger().warn(`PreCompact hook failed: ${err}`); } @@ -339,7 +383,10 @@ export class ChatCompressionService { // Best-effort: failures fall back to NOOP and the next turn re-triggers // compression anyway, so don't burn 7 retries blocking the user mid-turn. maxAttempts: 1, - systemInstruction: getCompressionPrompt(), + systemInstruction: buildCompressionSystemPrompt( + opts.customInstructions, + hookExtraInstructions, + ), contents: [ ...slim.slimmedHistory, { From af7ee1f66a8f77bde3a1ca29023727660b4237f4 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 11:08:33 +0800 Subject: [PATCH 02/12] test(core): cover /compress customInstructions + PreCompact hook merge --- .../services/chatCompressionService.test.ts | 208 ++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index 2aa0e405a5..f98d9b2f51 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -2261,3 +2261,211 @@ describe('ChatCompressionService.compress — single-turn computer-use regressio expect(flatText).toContain('"app":"Safari"'); }); }); + +describe('ChatCompressionService.compress — customInstructions plumbing', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + // Tiny helper to keep each case readable. Builds a 4-message history + // (passes the curatedHistory.length >= 2 guard) and a config with all + // accessors required by compress(). hookSystem is overridable so each + // test can shape the PreCompact return value. + function setup(opts: { hookSystem?: unknown }) { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u1' }] }, + { role: 'model', parts: [{ text: 'm1' }] }, + { role: 'user', parts: [{ text: 'u2' }] }, + { role: 'model', parts: [{ text: 'm2' }] }, + ]; + const getHistoryMock = vi.fn().mockReturnValue(history); + const mockChat = { + getHistory: getHistoryMock, + getHistoryShallow: getHistoryMock, + } as unknown as GeminiChat; + const hookSystem = opts.hookSystem ?? { + firePreCompactEvent: vi.fn().mockResolvedValue(undefined), + firePostCompactEvent: vi.fn().mockResolvedValue(undefined), + }; + const mockConfig = { + getChatCompression: vi.fn(), + getBaseLlmClient: vi.fn(), + getContentGeneratorConfig: vi + .fn() + .mockReturnValue({ contextWindowSize: 200_000 }), + getHookSystem: vi.fn().mockReturnValue(hookSystem), + getModel: () => 'test-model', + getApprovalMode: () => 'default', + getDebugLogger: () => ({ warn: vi.fn(), debug: vi.fn() }), + getTargetDir: () => '/tmp/test-workspace', + } as unknown as Config; + return { mockChat, mockConfig, hookSystem }; + } + + it('appends customInstructions to the side-query systemInstruction', async () => { + const spy = vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + totalTokenCount: 1500, + }, + } as never); + const { mockChat, mockConfig } = setup({}); + + const service = new ChatCompressionService(); + await service.compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + customInstructions: 'focus on the auth bug', + }); + + const passed = spy.mock.calls[0]![1] as { systemInstruction: string }; + expect(passed.systemInstruction).toContain('Additional Instructions:'); + expect(passed.systemInstruction).toContain('focus on the auth bug'); + }); + + it('forwards customInstructions verbatim to firePreCompactEvent', async () => { + vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + totalTokenCount: 1500, + }, + } as never); + const firePreCompactEvent = vi.fn().mockResolvedValue(undefined); + const { mockChat, mockConfig } = setup({ + hookSystem: { + firePreCompactEvent, + firePostCompactEvent: vi.fn().mockResolvedValue(undefined), + }, + }); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + customInstructions: 'focus auth', + }); + + expect(firePreCompactEvent).toHaveBeenCalledWith( + PreCompactTrigger.Manual, + 'focus auth', + undefined, + ); + }); + + it('appends PreCompact hook additionalContext when no user instructions', async () => { + const spy = vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + totalTokenCount: 1500, + }, + } as never); + const { mockChat, mockConfig } = setup({ + hookSystem: { + firePreCompactEvent: vi.fn().mockResolvedValue({ + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + hookSpecificOutput: { + additionalContext: 'prefer Chinese summaries', + }, + }, + }), + firePostCompactEvent: vi.fn().mockResolvedValue(undefined), + }, + }); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + }); + + const passed = spy.mock.calls[0]![1] as { systemInstruction: string }; + expect(passed.systemInstruction).toContain('Additional Instructions:'); + expect(passed.systemInstruction).toContain('prefer Chinese summaries'); + }); + + it('orders user instructions before hook additionalContext', async () => { + const spy = vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + totalTokenCount: 1500, + }, + } as never); + const { mockChat, mockConfig } = setup({ + hookSystem: { + firePreCompactEvent: vi.fn().mockResolvedValue({ + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + hookSpecificOutput: { additionalContext: 'HOOK_TEXT' }, + }, + }), + firePostCompactEvent: vi.fn().mockResolvedValue(undefined), + }, + }); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + customInstructions: 'USER_TEXT', + }); + + const passed = spy.mock.calls[0]![1] as { systemInstruction: string }; + const userIdx = passed.systemInstruction.indexOf('USER_TEXT'); + const hookIdx = passed.systemInstruction.indexOf('HOOK_TEXT'); + expect(userIdx).toBeGreaterThan(-1); + expect(hookIdx).toBeGreaterThan(-1); + expect(userIdx).toBeLessThan(hookIdx); + }); + + it('omits the Additional Instructions block when neither source supplies any', async () => { + const spy = vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + totalTokenCount: 1500, + }, + } as never); + const { mockChat, mockConfig } = setup({}); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + }); + + const passed = spy.mock.calls[0]![1] as { systemInstruction: string }; + expect(passed.systemInstruction).not.toContain('Additional Instructions:'); + }); +}); From 83b7a4be7d6039d4938a970759ecd8ee2b0ac263 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 11:11:51 +0800 Subject: [PATCH 03/12] feat(core): restore plan-mode + subagent snapshot after compaction Adds two optional ComposePostCompactOptions: - planModeActive: when true, emits a reminder so the post-compact agent does not forget destructive tools remain gated. - runningSubagents: when non-empty, emits a block listing each running/paused task by id, status, and description. Both blocks are spliced into the merged user attachment Content before file/image restorations. XML-significant characters in descriptions are escaped to prevent an adversarial subagent description from closing the wrapper tag. Wiring at the call site arrives in the next commit. --- .../services/postCompactAttachments.test.ts | 176 ++++++++++++++++++ .../src/services/postCompactAttachments.ts | 103 ++++++++++ 2 files changed, 279 insertions(+) diff --git a/packages/core/src/services/postCompactAttachments.test.ts b/packages/core/src/services/postCompactAttachments.test.ts index 6ee8e9225a..2c557f921b 100644 --- a/packages/core/src/services/postCompactAttachments.test.ts +++ b/packages/core/src/services/postCompactAttachments.test.ts @@ -1218,3 +1218,179 @@ describe('postProcessSummary', () => { expect(out).toMatch(/resume.*prior task/i); }); }); + +describe('composePostCompactHistory — plan-mode reminder', () => { + it('injects a plan-mode reminder when planModeActive is true', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const result = await composePostCompactHistory(history, 'SUMMARY', { + planModeActive: true, + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + expect(flat).toContain(''); + expect(flat).toMatch(/may not execute modification/i); + }); + + it('omits the plan-mode reminder when planModeActive is false or unset', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + for (const opts of [{}, { planModeActive: false }]) { + const result = await composePostCompactHistory(history, 'SUMMARY', opts); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + expect(flat).not.toContain(''); + } + }); +}); + +describe('composePostCompactHistory — subagent snapshot', () => { + it('renders a block listing running and paused tasks', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: [ + { + id: 'agent-1', + description: 'Run the bookmark-app E2E', + status: 'running', + startTime: 1000, + }, + { + id: 'agent-2', + description: 'Refactor session manager', + status: 'paused', + startTime: 2000, + }, + ], + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + expect(flat).toContain(''); + expect(flat).toContain('agent-1'); + expect(flat).toContain('Run the bookmark-app E2E'); + expect(flat).toContain('agent-2'); + expect(flat).toContain('Refactor session manager'); + expect(flat).toMatch(/running/); + expect(flat).toMatch(/paused/); + }); + + it('omits the snapshot block when runningSubagents is empty or undefined', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const empty = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: [], + }); + const undefSnap = await composePostCompactHistory(history, 'SUMMARY', {}); + for (const r of [empty, undefSnap]) { + const flat = r + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + expect(flat).not.toContain(''); + } + }); + + it('truncates very long descriptions to keep the snapshot bounded', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: [ + { + id: 'agent-x', + description: 'x'.repeat(1000), + status: 'running', + startTime: 1, + }, + ], + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + expect(flat).toMatch(/x{200}…/); + expect(flat).not.toMatch(/x{300}/); + }); + + it('escapes XML-sensitive characters in descriptions to prevent injection', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: [ + { + id: 'agent-x', + description: 'injected', + status: 'running', + startTime: 1, + }, + ], + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + // The literal `` payload from the description must + // not appear unescaped — that would let an adversarial subagent + // description close our wrapper tag and inject arbitrary XML. + const closes = flat.match(/<\/background-tasks>/g) ?? []; + expect(closes.length).toBe(1); + expect(flat).toContain('</background-tasks>'); + expect(flat).toContain('<evil>'); + }); + + it('sorts subagents by startTime ascending', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: [ + { + id: 'late', + description: 'late', + status: 'running', + startTime: 3000, + }, + { + id: 'early', + description: 'early', + status: 'paused', + startTime: 1000, + }, + { + id: 'mid', + description: 'mid', + status: 'running', + startTime: 2000, + }, + ], + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + const earlyIdx = flat.indexOf('early'); + const midIdx = flat.indexOf('mid'); + const lateIdx = flat.indexOf('late'); + expect(earlyIdx).toBeLessThan(midIdx); + expect(midIdx).toBeLessThan(lateIdx); + }); +}); diff --git a/packages/core/src/services/postCompactAttachments.ts b/packages/core/src/services/postCompactAttachments.ts index 5011e17c06..8d17683a53 100644 --- a/packages/core/src/services/postCompactAttachments.ts +++ b/packages/core/src/services/postCompactAttachments.ts @@ -542,6 +542,20 @@ export function postProcessSummary(rawSummary: string): string { return `${body}\n\n${RESUME_TRAILER}`; } +/** + * Minimal projection of a background subagent task carried into post-compact + * attachments. Decoupled from the registry's `AgentTask` so the attachment + * layer does not import the registry types and so tests can build cases + * inline. + */ +export interface SubagentSnapshot { + id: string; + description: string; + status: 'running' | 'paused'; + /** ms epoch when the task was registered. Used for stable ordering. */ + startTime: number; +} + export interface ComposePostCompactOptions { /** * Workspace root. When set, file paths from history that resolve @@ -571,6 +585,24 @@ export interface ComposePostCompactOptions { * `QWEN_COMPACT_MAX_RECENT_IMAGES`). */ maxImages?: number; + /** + * When `true`, prepend a `` reminder block before the + * file/image attachments so the post-compact agent does not forget that + * destructive tools remain gated. Sourced from `config.getApprovalMode() + * === ApprovalMode.PLAN` at the call site. The summary itself may + * mention plan mode but cannot be trusted to — the reminder is a + * structural guarantee. + */ + planModeActive?: boolean; + /** + * Snapshot of background subagent tasks (running or paused) at + * compaction time. Rendered as a `` reminder block. + * Empty array or `undefined` renders no block. Terminal-state tasks + * (completed/failed/cancelled) should already be filtered out by the + * caller — they have already emitted their notification XML and need + * no reminder. + */ + runningSubagents?: SubagentSnapshot[]; } /** @@ -628,6 +660,59 @@ function safeRealpath(p: string): string { } } +const PLAN_MODE_REMINDER_TEXT = + '\n' + + 'You are currently in PLAN mode. You may research, read files, and ' + + 'propose plans, but you may not execute modification tools (write_file, ' + + 'edit, shell mutations, etc.) until the user exits plan mode. The summary ' + + 'above may not reflect this constraint — honor plan mode regardless.\n' + + ''; + +/** Cap per-task description text in the snapshot block. Prevents a + * pathologically long subagent description from inflating the post-compact + * history. 200 chars keeps the snapshot useful as a pointer without making + * it a full progress log. */ +const MAX_SUBAGENT_DESC_CHARS = 200; + +function buildPlanModeReminderPart(): Part { + return { text: PLAN_MODE_REMINDER_TEXT }; +} + +/** + * Escape the three XML-significant characters in subagent descriptions + * before splicing them inside our `` wrapper. Without + * this, a description containing `` could close the + * wrapper tag and inject arbitrary structure into the model's view. + */ +function escapeForXmlText(text: string): string { + return text + .replace(/&/g, '&') + .replace(//g, '>'); +} + +function buildSubagentSnapshotPart(snaps: SubagentSnapshot[]): Part | null { + if (snaps.length === 0) return null; + const sorted = [...snaps].sort((a, b) => a.startTime - b.startTime); + const lines = sorted.map((s) => { + const truncated = + s.description.length > MAX_SUBAGENT_DESC_CHARS + ? s.description.slice(0, MAX_SUBAGENT_DESC_CHARS) + '…' + : s.description; + return `- [${s.status}] ${s.id}: ${escapeForXmlText(truncated)}`; + }); + return { + text: + '\n' + + 'The following background subagent tasks were active at compaction. ' + + 'The summary above does not include their per-task state. Use ' + + '`task_stop` / `send_message` to interact; do not assume they ' + + 'completed.\n' + + lines.join('\n') + + '\n', + }; +} + export async function composePostCompactHistory( history: Content[], summary: string, @@ -638,6 +723,8 @@ export async function composePostCompactHistory( signal, maxFiles = POST_COMPACT_MAX_FILES_TO_RESTORE, maxImages = POST_COMPACT_MAX_IMAGES_TO_RESTORE, + planModeActive, + runningSubagents, } = options; // Workspace-boundary filter on the extracted file paths (Finding 4). @@ -654,7 +741,23 @@ export async function composePostCompactHistory( // Contents produces consecutive same-role entries, which // geminiChat.test.ts:6289 enforces against and which Gemini // providers reject as 400 "consecutive same-role content". + // + // Order within the merged user Content: + // 1. plan-mode reminder (if active) — most behaviourally critical + // 2. background subagent snapshot — informs next-turn dispatch + // 3. file restoration blocks — model-readable file contents + // 4. image restoration block — recent screenshots + // Reminder text comes first so a token-conservative model that skims the + // attachment still sees the plan-mode constraint and task pointers before + // it gets to file bodies. const postAckParts: Part[] = []; + if (planModeActive) { + postAckParts.push(buildPlanModeReminderPart()); + } + if (runningSubagents && runningSubagents.length > 0) { + const snap = buildSubagentSnapshotPart(runningSubagents); + if (snap) postAckParts.push(snap); + } for (const block of fileBlocks) { for (const part of block.parts ?? []) postAckParts.push(part); } From 8e916838d22a1a6470a2f49f52b63495f2353a3c Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 11:16:03 +0800 Subject: [PATCH 04/12] feat(core): wire plan-mode + subagent snapshot into post-compact attachments ChatCompressionService.compress now passes: - planModeActive: derived from config.getApprovalMode() === ApprovalMode.PLAN - runningSubagents: filtered from BackgroundTaskRegistry to agent-kind tasks in 'running' or 'paused' state into composePostCompactHistory. Adds collectActiveSubagents() helper that returns [] when the registry is absent so older SDK consumers without it keep working. --- .../services/chatCompressionService.test.ts | 237 ++++++++++++++++++ .../src/services/chatCompressionService.ts | 37 +++ 2 files changed, 274 insertions(+) diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index f98d9b2f51..9957dbd9f5 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -2469,3 +2469,240 @@ describe('ChatCompressionService.compress — customInstructions plumbing', () = expect(passed.systemInstruction).not.toContain('Additional Instructions:'); }); }); + +describe('ChatCompressionService.compress — plan-mode + subagent attachment wiring', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + function setupWithAppState(opts: { + approvalMode?: string; + backgroundTasks?: Array<{ + id: string; + kind: string; + description: string; + status: string; + startTime: number; + }>; + }) { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u1' }] }, + { role: 'model', parts: [{ text: 'm1' }] }, + { role: 'user', parts: [{ text: 'u2' }] }, + { role: 'model', parts: [{ text: 'm2' }] }, + ]; + const getHistoryMock = vi.fn().mockReturnValue(history); + const mockChat = { + getHistory: getHistoryMock, + getHistoryShallow: getHistoryMock, + } as unknown as GeminiChat; + const mockConfig = { + getChatCompression: vi.fn(), + getBaseLlmClient: vi.fn(), + getContentGeneratorConfig: vi + .fn() + .mockReturnValue({ contextWindowSize: 200_000 }), + getHookSystem: vi.fn().mockReturnValue({ + firePreCompactEvent: vi.fn().mockResolvedValue(undefined), + firePostCompactEvent: vi.fn().mockResolvedValue(undefined), + }), + getModel: () => 'test-model', + getApprovalMode: () => opts.approvalMode ?? 'default', + getBackgroundTaskRegistry: () => ({ + getAll: () => opts.backgroundTasks ?? [], + }), + getDebugLogger: () => ({ warn: vi.fn(), debug: vi.fn() }), + getTargetDir: () => '/tmp/test-workspace', + } as unknown as Config; + return { mockChat, mockConfig }; + } + + function stubSideQuery() { + vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + totalTokenCount: 1500, + }, + } as never); + } + + it('passes planModeActive=true when getApprovalMode() returns "plan"', async () => { + stubSideQuery(); + const composeSpy = vi + .spyOn(postCompactModule, 'composePostCompactHistory') + .mockResolvedValue([ + { role: 'user', parts: [{ text: 's' }] }, + { role: 'model', parts: [{ text: 'ack' }] }, + ]); + const { mockChat, mockConfig } = setupWithAppState({ + approvalMode: 'plan', + }); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + }); + + expect(composeSpy).toHaveBeenCalledOnce(); + const opts = composeSpy.mock.calls[0]![2] as { + planModeActive?: boolean; + }; + expect(opts.planModeActive).toBe(true); + }); + + it('passes planModeActive=false for non-plan approval modes', async () => { + stubSideQuery(); + const composeSpy = vi + .spyOn(postCompactModule, 'composePostCompactHistory') + .mockResolvedValue([ + { role: 'user', parts: [{ text: 's' }] }, + { role: 'model', parts: [{ text: 'ack' }] }, + ]); + const { mockChat, mockConfig } = setupWithAppState({ + approvalMode: 'auto-edit', + }); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + }); + + const opts = composeSpy.mock.calls[0]![2] as { + planModeActive?: boolean; + }; + expect(opts.planModeActive).toBe(false); + }); + + it('filters background tasks to running/paused agent tasks only', async () => { + stubSideQuery(); + const composeSpy = vi + .spyOn(postCompactModule, 'composePostCompactHistory') + .mockResolvedValue([ + { role: 'user', parts: [{ text: 's' }] }, + { role: 'model', parts: [{ text: 'ack' }] }, + ]); + const { mockChat, mockConfig } = setupWithAppState({ + backgroundTasks: [ + { + id: 'r', + kind: 'agent', + description: 'd', + status: 'running', + startTime: 1, + }, + { + id: 'p', + kind: 'agent', + description: 'd', + status: 'paused', + startTime: 2, + }, + { + id: 'c', + kind: 'agent', + description: 'd', + status: 'completed', + startTime: 3, + }, + { + id: 'f', + kind: 'agent', + description: 'd', + status: 'failed', + startTime: 4, + }, + { + id: 'x', + kind: 'agent', + description: 'd', + status: 'cancelled', + startTime: 5, + }, + // Non-agent kinds (shell, monitor) must also be excluded — they + // do not have a "task" the post-compact agent should send_message + // to, only the agent kind is interactive. + { + id: 's1', + kind: 'shell', + description: 'd', + status: 'running', + startTime: 6, + }, + ], + }); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + }); + + const opts = composeSpy.mock.calls[0]![2] as { + runningSubagents?: Array<{ id: string; status: string }>; + }; + expect(opts.runningSubagents?.map((t) => t.id)).toEqual(['r', 'p']); + }); + + it('passes an empty runningSubagents array when the registry is missing', async () => { + stubSideQuery(); + const composeSpy = vi + .spyOn(postCompactModule, 'composePostCompactHistory') + .mockResolvedValue([ + { role: 'user', parts: [{ text: 's' }] }, + { role: 'model', parts: [{ text: 'ack' }] }, + ]); + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u1' }] }, + { role: 'model', parts: [{ text: 'm1' }] }, + ]; + const getHistoryMock = vi.fn().mockReturnValue(history); + const mockChat = { + getHistory: getHistoryMock, + getHistoryShallow: getHistoryMock, + } as unknown as GeminiChat; + const mockConfig = { + getChatCompression: vi.fn(), + getBaseLlmClient: vi.fn(), + getContentGeneratorConfig: vi + .fn() + .mockReturnValue({ contextWindowSize: 200_000 }), + getHookSystem: vi.fn().mockReturnValue({ + firePreCompactEvent: vi.fn().mockResolvedValue(undefined), + firePostCompactEvent: vi.fn().mockResolvedValue(undefined), + }), + getModel: () => 'test-model', + getApprovalMode: () => 'default', + // getBackgroundTaskRegistry intentionally omitted to simulate older + // SDK consumers / test harnesses that haven't wired it. + getDebugLogger: () => ({ warn: vi.fn(), debug: vi.fn() }), + getTargetDir: () => '/tmp/test-workspace', + } as unknown as Config; + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + }); + + const opts = composeSpy.mock.calls[0]![2] as { + runningSubagents?: unknown[]; + }; + expect(opts.runningSubagents).toEqual([]); + }); +}); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index a6ddfbb0a1..454c478af6 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -6,6 +6,7 @@ import type { Content } from '@google/genai'; import type { Config } from '../config/config.js'; +import { ApprovalMode } from '../config/config.js'; import type { GeminiChat } from '../core/geminiChat.js'; import { type ChatCompressionInfo, @@ -30,6 +31,7 @@ import { countToolResponseImages, postProcessSummary, stripAnalysisBlock, + type SubagentSnapshot, } from './postCompactAttachments.js'; /** @@ -190,6 +192,35 @@ export interface CompressOptions { customInstructions?: string; } +/** + * Project active background subagent tasks into the minimal shape + * `composePostCompactHistory` needs. `running` and `paused` are the only + * statuses the post-compact agent might need to act on; terminal states + * (completed / failed / cancelled) already emitted their notification XML + * and need no reminder. Only `agent` kinds are interactive (shell and + * monitor kinds are excluded — they don't have a send_message channel). + * + * Returns `[]` (not `undefined`) when the registry is absent so the + * downstream attachment builder takes the empty-array branch and emits + * no block, rather than treating `undefined` as a configuration error. + */ +function collectActiveSubagents(config: Config): SubagentSnapshot[] { + const registry = config.getBackgroundTaskRegistry?.(); + if (!registry) return []; + return registry + .getAll() + .filter( + (t) => + t.kind === 'agent' && (t.status === 'running' || t.status === 'paused'), + ) + .map((t) => ({ + id: t.id, + description: t.description, + status: t.status as 'running' | 'paused', + startTime: t.startTime, + })); +} + /** * Compose the compression side-query system prompt: the base template, * optionally followed by an `Additional Instructions:` block containing @@ -508,6 +539,12 @@ export class ChatCompressionService { signal, maxFiles: tuning.maxRecentFiles, maxImages: tuning.maxRecentImages, + // Restore plan-mode reminder + running-subagent snapshot so the + // post-compact agent does not lose either piece of mid-session + // state. Both reduce to no-ops when the corresponding source is + // empty. + planModeActive: config.getApprovalMode?.() === ApprovalMode.PLAN, + runningSubagents: collectActiveSubagents(config), }, ); } catch (err) { From 356a32c1a6b3128e766b44c91496d9e3917544d1 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 11:23:41 +0800 Subject: [PATCH 05/12] fix(core,test): use HookSystem.getAdditionalContext accessor; smoke-test /compress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - chatCompressionService: read PreCompact hook output via result.getAdditionalContext() — the wrapper returns DefaultHookOutput (not the raw AggregatedHookResult), and the accessor sanitises < / > consistently with every other call-site in the repo. - Test mocks now return a DefaultHookOutput-shaped stub via a tiny makeHookOutput() helper rather than the aggregator shape. - New integration smoke test for `/compress focus on the scientist mentioned` exercising the args plumbing end-to-end. --- .../context-compress-interactive.test.ts | 52 +++++++++++++++++++ .../services/chatCompressionService.test.ts | 42 ++++++++------- .../src/services/chatCompressionService.ts | 9 ++-- 3 files changed, 80 insertions(+), 23 deletions(-) diff --git a/integration-tests/interactive/context-compress-interactive.test.ts b/integration-tests/interactive/context-compress-interactive.test.ts index 1018c846b3..8e9655f6ed 100644 --- a/integration-tests/interactive/context-compress-interactive.test.ts +++ b/integration-tests/interactive/context-compress-interactive.test.ts @@ -105,4 +105,56 @@ describe('Interactive Mode', () => { expect(compressionFailed).toBe(true); }); + + it.skipIf(process.platform === 'win32')( + 'should forward /compress instructions through to the side-query', + async () => { + await rig.setup('interactive-compress-instructions-test', { + settings: { + security: { + auth: { + selectedType: 'openai', + }, + }, + }, + }); + + const { ptyProcess } = rig.runInteractive(); + + let fullOutput = ''; + ptyProcess.onData((data) => (fullOutput += data)); + + const isReady = await rig.waitForText('Type your message', 15000); + expect( + isReady, + 'CLI did not start up in interactive mode correctly', + ).toBe(true); + + // Seed history so /compress has material to summarize. + const seedPrompt = + 'Dont do anything except returning a 1000 token long paragragh with the at the end to indicate end of response. This is a moderately long sentence.'; + + await type(ptyProcess, seedPrompt); + await type(ptyProcess, '\r'); + + await rig.waitForText('einstein', 25000); + + // Fire /compress with a trailing instruction. We are not asserting on + // summary CONTENT (model behaviour) — only that the wiring runs + // end-to-end and the compression telemetry event lands. Earlier unit + // tests cover the prompt-composition path; this is the smoke test that + // the args plumbing reaches the side-query. + await type(ptyProcess, '/compress focus on the scientist mentioned'); + await new Promise((resolve) => setTimeout(resolve, 100)); + await type(ptyProcess, '\r'); + + const foundEvent = await rig.waitForTelemetryEvent( + 'chat_compression', + 90000, + ); + expect(foundEvent, 'chat_compression telemetry event was not found').toBe( + true, + ); + }, + ); }); diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index 9957dbd9f5..e838aadcc2 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -2267,6 +2267,18 @@ describe('ChatCompressionService.compress — customInstructions plumbing', () = vi.restoreAllMocks(); }); + // The HookSystem wrapper returns DefaultHookOutput | undefined to consumers + // (see hookSystem.ts:274-287). Source code calls `result?.getAdditionalContext()`, + // so mocks must expose that method — not the raw AggregatedHookResult shape + // that hookEventHandler returns. This tiny helper builds a stand-in. + function makeHookOutput(opts: { additionalContext?: string }): { + getAdditionalContext: () => string | undefined; + } { + return { + getAdditionalContext: () => opts.additionalContext, + }; + } + // Tiny helper to keep each case readable. Builds a 4-message history // (passes the curatedHistory.length >= 2 guard) and a config with all // accessors required by compress(). hookSystem is overridable so each @@ -2374,17 +2386,11 @@ describe('ChatCompressionService.compress — customInstructions plumbing', () = } as never); const { mockChat, mockConfig } = setup({ hookSystem: { - firePreCompactEvent: vi.fn().mockResolvedValue({ - success: true, - allOutputs: [], - errors: [], - totalDuration: 0, - finalOutput: { - hookSpecificOutput: { - additionalContext: 'prefer Chinese summaries', - }, - }, - }), + firePreCompactEvent: vi + .fn() + .mockResolvedValue( + makeHookOutput({ additionalContext: 'prefer Chinese summaries' }), + ), firePostCompactEvent: vi.fn().mockResolvedValue(undefined), }, }); @@ -2414,15 +2420,11 @@ describe('ChatCompressionService.compress — customInstructions plumbing', () = } as never); const { mockChat, mockConfig } = setup({ hookSystem: { - firePreCompactEvent: vi.fn().mockResolvedValue({ - success: true, - allOutputs: [], - errors: [], - totalDuration: 0, - finalOutput: { - hookSpecificOutput: { additionalContext: 'HOOK_TEXT' }, - }, - }), + firePreCompactEvent: vi + .fn() + .mockResolvedValue( + makeHookOutput({ additionalContext: 'HOOK_TEXT' }), + ), firePostCompactEvent: vi.fn().mockResolvedValue(undefined), }, }); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index 454c478af6..34c9825133 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -367,9 +367,12 @@ export class ChatCompressionService { opts.customInstructions ?? '', signal, ); - const merged = - result.finalOutput?.hookSpecificOutput?.['additionalContext']; - if (typeof merged === 'string' && merged.trim().length > 0) { + // `getAdditionalContext()` sanitises (`<`/`>` → `<`/`>`) so a + // hook can't inject XML structure into the summary prompt. Mirrors + // every other call-site in this repo (toolHookTriggers, agent.ts, + // client.ts) — keep it consistent. + const merged = result?.getAdditionalContext(); + if (merged && merged.trim().length > 0) { hookExtraInstructions = merged.trim(); } } catch (err) { From 5d0487228d597be649b46cb8fb4c8363aecf36b5 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 11:38:16 +0800 Subject: [PATCH 06/12] test(core): update client.test.ts to match new tryCompressChat signature --- packages/core/src/core/client.test.ts | 33 ++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index bd1b43e311..f33309fbfa 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -1930,7 +1930,38 @@ describe('Gemini Client (client.ts)', () => { await client.tryCompressChat('p1', true, signal); - expect(tryCompress).toHaveBeenCalledWith('p1', 'the-model', true, signal); + // 5th arg is the `options` bag for `customInstructions` plumbing; + // omitted here means undefined which is the correct contract. + expect(tryCompress).toHaveBeenCalledWith( + 'p1', + 'the-model', + true, + signal, + undefined, + ); + }); + + it('forwards customInstructions through the options bag when supplied', async () => { + const tryCompress = vi.fn().mockResolvedValue({ + originalTokenCount: 0, + newTokenCount: 0, + compressionStatus: CompressionStatus.NOOP, + }); + client['chat'] = { + tryCompress, + getHistory: vi.fn().mockReturnValue([]), + } as unknown as GeminiChat; + vi.mocked(mockConfig.getModel).mockReturnValue('the-model'); + + await client.tryCompressChat('p1', true, undefined, 'focus on auth bug'); + + expect(tryCompress).toHaveBeenCalledWith( + 'p1', + 'the-model', + true, + undefined, + { customInstructions: 'focus on auth bug' }, + ); }); it('flips forceFullIdeContext on a successful compression', async () => { From 745915cc1ba2ac3a4dafc3f0294e1e07aab19eeb Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 11:40:49 +0800 Subject: [PATCH 07/12] feat(core): cap subagent snapshot at 30 entries with overflow notice Code-review follow-up. Pathological sessions with hundreds of backgrounded agents could otherwise produce a multi-KB block. Newest 30 rows are kept (highest startTime); older ones are summarised on a trailing line so the model knows the snapshot is partial. --- .../services/postCompactAttachments.test.ts | 48 +++++++++++++++++++ .../src/services/postCompactAttachments.ts | 20 +++++++- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/packages/core/src/services/postCompactAttachments.test.ts b/packages/core/src/services/postCompactAttachments.test.ts index 2c557f921b..985684c98e 100644 --- a/packages/core/src/services/postCompactAttachments.test.ts +++ b/packages/core/src/services/postCompactAttachments.test.ts @@ -1356,6 +1356,54 @@ describe('composePostCompactHistory — subagent snapshot', () => { expect(flat).toContain('<evil>'); }); + it('caps the snapshot at MAX_SUBAGENT_SNAPSHOT_COUNT and notes the overflow', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + // Build 35 tasks; cap is 30, so 5 should overflow into the trailing line. + const subs = Array.from({ length: 35 }, (_, i) => ({ + id: `agent-${i}`, + description: `task ${i}`, + status: 'running' as const, + startTime: i, + })); + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: subs, + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + // Newest 30 retained (agent-5 .. agent-34); oldest 5 dropped (agent-0..4). + expect(flat).toContain('agent-34'); + expect(flat).toContain('agent-5'); + expect(flat).not.toMatch(/\bagent-0\b/); + expect(flat).not.toMatch(/\bagent-4\b/); + expect(flat).toMatch(/and 5 older tasks not shown/); + }); + + it('uses singular "task" in the overflow line when exactly one is hidden', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const subs = Array.from({ length: 31 }, (_, i) => ({ + id: `agent-${i}`, + description: `t`, + status: 'running' as const, + startTime: i, + })); + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: subs, + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + expect(flat).toMatch(/and 1 older task not shown/); + }); + it('sorts subagents by startTime ascending', async () => { const history: Content[] = [ { role: 'user', parts: [{ text: 'u' }] }, diff --git a/packages/core/src/services/postCompactAttachments.ts b/packages/core/src/services/postCompactAttachments.ts index 8d17683a53..231e1081c9 100644 --- a/packages/core/src/services/postCompactAttachments.ts +++ b/packages/core/src/services/postCompactAttachments.ts @@ -674,6 +674,14 @@ const PLAN_MODE_REMINDER_TEXT = * it a full progress log. */ const MAX_SUBAGENT_DESC_CHARS = 200; +/** Cap on the number of subagent rows the snapshot lists. Defends against + * pathological sessions with hundreds of backgrounded agents that never + * completed — without this, a malformed registry could produce a multi-KB + * attachment block that consumes the post-compact prompt budget. Newest + * agents (highest startTime) are kept; older ones are dropped with a + * trailing "and N more" line so the model knows the snapshot is partial. */ +const MAX_SUBAGENT_SNAPSHOT_COUNT = 30; + function buildPlanModeReminderPart(): Part { return { text: PLAN_MODE_REMINDER_TEXT }; } @@ -694,13 +702,23 @@ function escapeForXmlText(text: string): string { function buildSubagentSnapshotPart(snaps: SubagentSnapshot[]): Part | null { if (snaps.length === 0) return null; const sorted = [...snaps].sort((a, b) => a.startTime - b.startTime); - const lines = sorted.map((s) => { + // Keep the NEWEST rows (highest startTime). When over the cap, the model + // is most likely interacting with recent tasks; older long-runners are + // surfaced via the trailing summary line so nothing silently disappears. + const overflow = Math.max(0, sorted.length - MAX_SUBAGENT_SNAPSHOT_COUNT); + const shown = overflow > 0 ? sorted.slice(overflow) : sorted; + const lines = shown.map((s) => { const truncated = s.description.length > MAX_SUBAGENT_DESC_CHARS ? s.description.slice(0, MAX_SUBAGENT_DESC_CHARS) + '…' : s.description; return `- [${s.status}] ${s.id}: ${escapeForXmlText(truncated)}`; }); + if (overflow > 0) { + lines.push( + `- (… and ${overflow} older task${overflow === 1 ? '' : 's'} not shown)`, + ); + } return { text: '\n' + From 699389ee4b5b7accecc860d784d484b19615329a Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 13:52:59 +0800 Subject: [PATCH 08/12] fix(core,test): flatten subagent description newlines; type-safe ApprovalMode in tests Second code-review pass found two real issues: 1. Subagent descriptions containing `\n`/`\r`/`\t` would split across multiple lines inside the `` bullet list, letting the second line read as a sibling row (or worse, an orphan paragraph between two `- [..]` entries). Flatten whitespace before the slice so each task stays on one line. 2. The plan-mode wiring tests passed `'plan'` / `'auto-edit'` as plain strings instead of `ApprovalMode.PLAN` / `ApprovalMode.AUTO_EDIT`. Source code compares against the enum; a future enum value change would have silently passed the tests. Import and use the enum. --- .../services/chatCompressionService.test.ts | 14 ++++--- .../services/postCompactAttachments.test.ts | 37 +++++++++++++++++++ .../src/services/postCompactAttachments.ts | 22 +++++++++-- 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index e838aadcc2..06abeb7aa0 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -16,6 +16,7 @@ import { uiTelemetryService } from '../telemetry/uiTelemetry.js'; import { tokenLimit } from '../core/tokenLimits.js'; import type { GeminiChat } from '../core/geminiChat.js'; import type { Config } from '../config/config.js'; +import { ApprovalMode } from '../config/config.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; import { PreCompactTrigger, PostCompactTrigger } from '../hooks/types.js'; import * as sideQueryModule from '../utils/sideQuery.js'; @@ -2478,7 +2479,10 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi }); function setupWithAppState(opts: { - approvalMode?: string; + // Typed as ApprovalMode (not string) so a future enum rename / value + // change breaks the test at compile time instead of silently passing + // because the literal happens to match the old value. + approvalMode?: ApprovalMode; backgroundTasks?: Array<{ id: string; kind: string; @@ -2509,7 +2513,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi firePostCompactEvent: vi.fn().mockResolvedValue(undefined), }), getModel: () => 'test-model', - getApprovalMode: () => opts.approvalMode ?? 'default', + getApprovalMode: () => opts.approvalMode ?? ApprovalMode.DEFAULT, getBackgroundTaskRegistry: () => ({ getAll: () => opts.backgroundTasks ?? [], }), @@ -2530,7 +2534,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi } as never); } - it('passes planModeActive=true when getApprovalMode() returns "plan"', async () => { + it('passes planModeActive=true when getApprovalMode() returns PLAN', async () => { stubSideQuery(); const composeSpy = vi .spyOn(postCompactModule, 'composePostCompactHistory') @@ -2539,7 +2543,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi { role: 'model', parts: [{ text: 'ack' }] }, ]); const { mockChat, mockConfig } = setupWithAppState({ - approvalMode: 'plan', + approvalMode: ApprovalMode.PLAN, }); await new ChatCompressionService().compress(mockChat, { @@ -2567,7 +2571,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi { role: 'model', parts: [{ text: 'ack' }] }, ]); const { mockChat, mockConfig } = setupWithAppState({ - approvalMode: 'auto-edit', + approvalMode: ApprovalMode.AUTO_EDIT, }); await new ChatCompressionService().compress(mockChat, { diff --git a/packages/core/src/services/postCompactAttachments.test.ts b/packages/core/src/services/postCompactAttachments.test.ts index 985684c98e..7d383474f7 100644 --- a/packages/core/src/services/postCompactAttachments.test.ts +++ b/packages/core/src/services/postCompactAttachments.test.ts @@ -1328,6 +1328,43 @@ describe('composePostCompactHistory — subagent snapshot', () => { expect(flat).not.toMatch(/x{300}/); }); + it('flattens newlines/tabs in descriptions so each task stays on one bullet line', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: [ + { + id: 'agent-a', + description: 'first line\nsecond line\r\nthird\twith\ttabs', + status: 'running', + startTime: 1, + }, + { + id: 'agent-b', + description: 'next task', + status: 'paused', + startTime: 2, + }, + ], + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + + // The agent-a bullet must stay on a single line — splitting it across + // newlines would let "second line" read as a sibling list item or + // worse, get parsed as a stray paragraph between two `- [..]` rows. + expect(flat).toMatch( + /- \[running] agent-a: first line second line third with tabs/, + ); + // agent-b should still appear directly after — not orphaned by an + // unintended newline in agent-a's payload. + expect(flat).toMatch(/agent-a:[^\n]*\n- \[paused] agent-b: next task/); + }); + it('escapes XML-sensitive characters in descriptions to prevent injection', async () => { const history: Content[] = [ { role: 'user', parts: [{ text: 'u' }] }, diff --git a/packages/core/src/services/postCompactAttachments.ts b/packages/core/src/services/postCompactAttachments.ts index 231e1081c9..17d20b2999 100644 --- a/packages/core/src/services/postCompactAttachments.ts +++ b/packages/core/src/services/postCompactAttachments.ts @@ -699,6 +699,19 @@ function escapeForXmlText(text: string): string { .replace(/>/g, '>'); } +/** + * Collapse interior whitespace (newlines, tabs, carriage returns) to single + * spaces before the description goes into a single bullet line. A raw `\n` + * inside `s.description` would otherwise split the bullet across multiple + * lines, producing "- [running] id: line1\nline2\n- [running] next: …" + * which the model reads as a malformed list (and could even read `line2` + * as a sibling row). Mirrors `sanitizePathForDisplay` further up in this + * file. + */ +function flattenWhitespaceForBullet(text: string): string { + return text.replace(/[\r\n\t]+/g, ' '); +} + function buildSubagentSnapshotPart(snaps: SubagentSnapshot[]): Part | null { if (snaps.length === 0) return null; const sorted = [...snaps].sort((a, b) => a.startTime - b.startTime); @@ -708,10 +721,13 @@ function buildSubagentSnapshotPart(snaps: SubagentSnapshot[]): Part | null { const overflow = Math.max(0, sorted.length - MAX_SUBAGENT_SNAPSHOT_COUNT); const shown = overflow > 0 ? sorted.slice(overflow) : sorted; const lines = shown.map((s) => { + // Order: flatten whitespace FIRST, then truncate. Otherwise a 200-char + // slice that lands inside a `\n` keeps the newline in the bullet. + const flattened = flattenWhitespaceForBullet(s.description); const truncated = - s.description.length > MAX_SUBAGENT_DESC_CHARS - ? s.description.slice(0, MAX_SUBAGENT_DESC_CHARS) + '…' - : s.description; + flattened.length > MAX_SUBAGENT_DESC_CHARS + ? flattened.slice(0, MAX_SUBAGENT_DESC_CHARS) + '…' + : flattened; return `- [${s.status}] ${s.id}: ${escapeForXmlText(truncated)}`; }); if (overflow > 0) { From 5e8b753ce321f58ff9329832939d99c1cda0afd3 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 14:03:59 +0800 Subject: [PATCH 09/12] fix(core): move PreCompact hook fire after length-guard; align plan-mode tool names Round 3 code review surfaced two issues: 1. PreCompact hook fired BEFORE the curatedHistory.length < 2 guard, so a single-message session would trigger any hook side effects (transcript dump, external notification, etc.) and then NOOP. Move the hook fire below the guard so hooks only run when compression is actually possible. New regression test asserts the contract. 2. PLAN_MODE_REMINDER_TEXT said "shell mutations" but the real qwen-code tool is `run_shell_command` (tool-names.ts:26). Use the verbatim tool names so a future rename is grep-discoverable. --- .../services/chatCompressionService.test.ts | 45 +++++++++++++++++++ .../src/services/chatCompressionService.ts | 39 ++++++++-------- .../src/services/postCompactAttachments.ts | 9 +++- 3 files changed, 73 insertions(+), 20 deletions(-) diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index 06abeb7aa0..f584b9ff43 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -2342,6 +2342,51 @@ describe('ChatCompressionService.compress — customInstructions plumbing', () = expect(passed.systemInstruction).toContain('focus on the auth bug'); }); + it('does NOT fire PreCompact hook when curatedHistory.length < 2 (NOOP path)', async () => { + // Contract: hooks with side effects (transcript dumps, external + // notifications) should only fire when there is actually something to + // compress. A history of [user-only] or [model-only] short-circuits to + // NOOP — the hook must not be triggered for those. + const firePreCompactEvent = vi.fn().mockResolvedValue(undefined); + const firePostCompactEvent = vi.fn().mockResolvedValue(undefined); + const oneMessageHistory: Content[] = [ + { role: 'user', parts: [{ text: 'just one' }] }, + ]; + const getHistoryMock = vi.fn().mockReturnValue(oneMessageHistory); + const mockChat = { + getHistory: getHistoryMock, + getHistoryShallow: getHistoryMock, + } as unknown as GeminiChat; + const mockConfig = { + getChatCompression: vi.fn(), + getBaseLlmClient: vi.fn(), + getContentGeneratorConfig: vi + .fn() + .mockReturnValue({ contextWindowSize: 200_000 }), + getHookSystem: vi + .fn() + .mockReturnValue({ firePreCompactEvent, firePostCompactEvent }), + getModel: () => 'test-model', + getApprovalMode: () => ApprovalMode.DEFAULT, + getDebugLogger: () => ({ warn: vi.fn(), debug: vi.fn() }), + getTargetDir: () => '/tmp/test-workspace', + } as unknown as Config; + + const result = await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 1_000, + customInstructions: 'should not reach the hook', + }); + + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(firePreCompactEvent).not.toHaveBeenCalled(); + expect(firePostCompactEvent).not.toHaveBeenCalled(); + }); + it('forwards customInstructions verbatim to firePreCompactEvent', async () => { vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ text: 's', diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index 34c9825133..d017e29dc8 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -350,6 +350,27 @@ export class ChatCompressionService { }; } + // CLAUDE-CODE-STYLE FULL-HISTORY COMPRESSION: the entire curated + // history is sent to the summary side-query (no split, no tail + // preservation), and the post-compact history is assembled by + // composePostCompactHistory below (summary + model ack + recent + // file restores + recent image restore). + + // Guard: need at least a user+model pair for a meaningful summary. + // This runs BEFORE the PreCompact hook fires — a hook with side effects + // (transcript dump, external notification) shouldn't be triggered for + // a session we're about to NOOP on anyway. + if (curatedHistory.length < 2) { + return { + newHistory: null, + info: { + originalTokenCount, + newTokenCount: originalTokenCount, + compressionStatus: CompressionStatus.NOOP, + }, + }; + } + // Fire PreCompact hook before compression begins. Pass any user-supplied // `/compress` instructions so hook scripts can read / log / amend them // via `hookSpecificOutput.additionalContext`. The aggregator concatenates @@ -380,24 +401,6 @@ export class ChatCompressionService { } } - // CLAUDE-CODE-STYLE FULL-HISTORY COMPRESSION: the entire curated - // history is sent to the summary side-query (no split, no tail - // preservation), and the post-compact history is assembled by - // composePostCompactHistory below (summary + model ack + recent - // file restores + recent image restore). - - // Guard: need at least a user+model pair for a meaningful summary. - if (curatedHistory.length < 2) { - return { - newHistory: null, - info: { - originalTokenCount, - newTokenCount: originalTokenCount, - compressionStatus: CompressionStatus.NOOP, - }, - }; - } - // Slim the side-query input: replace inlineData with placeholders. // The original history (with images) is preserved separately for // the post-compact image restoration block. diff --git a/packages/core/src/services/postCompactAttachments.ts b/packages/core/src/services/postCompactAttachments.ts index 17d20b2999..d9f462e933 100644 --- a/packages/core/src/services/postCompactAttachments.ts +++ b/packages/core/src/services/postCompactAttachments.ts @@ -660,12 +660,17 @@ function safeRealpath(p: string): string { } } +// Tool names listed verbatim from tool-names.ts so a future rename / removal +// surfaces during a global grep instead of leaving stale guidance in this +// reminder. Keep the list small (the most common modification tools) — an +// exhaustive enumeration would drift faster than it helps. const PLAN_MODE_REMINDER_TEXT = '\n' + 'You are currently in PLAN mode. You may research, read files, and ' + 'propose plans, but you may not execute modification tools (write_file, ' + - 'edit, shell mutations, etc.) until the user exits plan mode. The summary ' + - 'above may not reflect this constraint — honor plan mode regardless.\n' + + 'edit, run_shell_command, etc.) until the user exits plan mode. The ' + + 'summary above may not reflect this constraint — honor plan mode ' + + 'regardless.\n' + ''; /** Cap per-task description text in the snapshot block. Prevents a From 295cf409580d19d3024b9df2f5464548462d0c7f Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 18:21:47 +0800 Subject: [PATCH 10/12] refactor(core): share escapeXml, drive plan-mode names from ToolNames, extract reminder builder Code-review follow-ups on post-compact attachments: - Replace the local 3-char escapeForXmlText with the shared 5-char escapeXml from utils/xml.ts, and apply it to the subagent id and status as well as the description. Subagent ids derive from a user-configurable subagentConfig.name, so an unescaped `<`/`&` there could close the wrapper or forge sibling markup. - PLAN_MODE_REMINDER_TEXT now interpolates ToolNames.WRITE_FILE / .EDIT / .SHELL instead of retyping the names, so a future rename stays in sync. - Extract buildStateReminderParts() as the single source of truth for the plan-mode + subagent reminder blocks, used by both composePostCompactHistory and (next commit) its catch-fallback so the two paths can't drift. --- .../services/postCompactAttachments.test.ts | 35 +++++++++ .../src/services/postCompactAttachments.ts | 72 +++++++++++-------- 2 files changed, 77 insertions(+), 30 deletions(-) diff --git a/packages/core/src/services/postCompactAttachments.test.ts b/packages/core/src/services/postCompactAttachments.test.ts index 7d383474f7..f8758c7749 100644 --- a/packages/core/src/services/postCompactAttachments.test.ts +++ b/packages/core/src/services/postCompactAttachments.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import type { Content } from '@google/genai'; import { extractRecentFilePaths } from './postCompactAttachments.js'; +import { ToolNames } from '../tools/tool-names.js'; function fileReadCall(path: string): Content { return { @@ -1234,6 +1235,12 @@ describe('composePostCompactHistory — plan-mode reminder', () => { .join('\n'); expect(flat).toContain(''); expect(flat).toMatch(/may not execute modification/i); + // Tool names must come from the ToolNames constant source, not stale + // string literals — assert the actual current names appear so a rename + // that updates ToolNames keeps this reminder in sync. + expect(flat).toContain(ToolNames.WRITE_FILE); + expect(flat).toContain(ToolNames.EDIT); + expect(flat).toContain(ToolNames.SHELL); }); it('omits the plan-mode reminder when planModeActive is false or unset', async () => { @@ -1393,6 +1400,34 @@ describe('composePostCompactHistory — subagent snapshot', () => { expect(flat).toContain('<evil>'); }); + it('escapes XML-sensitive characters in the subagent id, not just the description', async () => { + // Ids derive from a user-configurable subagentConfig.name, so a `<`/`&` + // there must be escaped too — escaping only the description would still + // let the id close the wrapper or forge markup. + const history: Content[] = [ + { role: 'user', parts: [{ text: 'u' }] }, + { role: 'model', parts: [{ text: 'm' }] }, + ]; + const result = await composePostCompactHistory(history, 'SUMMARY', { + runningSubagents: [ + { + id: 'agent&', + description: 'safe description', + status: 'running', + startTime: 1, + }, + ], + }); + const flat = result + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + // Only our own wrapper close-tag may appear unescaped. + const closes = flat.match(/<\/background-tasks>/g) ?? []; + expect(closes.length).toBe(1); + expect(flat).toContain('agent</background-tasks>&<inject>'); + }); + it('caps the snapshot at MAX_SUBAGENT_SNAPSHOT_COUNT and notes the overflow', async () => { const history: Content[] = [ { role: 'user', parts: [{ text: 'u' }] }, diff --git a/packages/core/src/services/postCompactAttachments.ts b/packages/core/src/services/postCompactAttachments.ts index d9f462e933..e5d7020907 100644 --- a/packages/core/src/services/postCompactAttachments.ts +++ b/packages/core/src/services/postCompactAttachments.ts @@ -21,6 +21,8 @@ import { realpathSync } from 'node:fs'; import { resolve as resolvePath, sep as pathSep } from 'node:path'; import { CHARS_PER_TOKEN } from './tokenEstimation.js'; import { getFunctionResponseParts } from './compactionInputSlimming.js'; +import { escapeXml } from '../utils/xml.js'; +import { ToolNames } from '../tools/tool-names.js'; export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5; @@ -660,17 +662,17 @@ function safeRealpath(p: string): string { } } -// Tool names listed verbatim from tool-names.ts so a future rename / removal -// surfaces during a global grep instead of leaving stale guidance in this -// reminder. Keep the list small (the most common modification tools) — an -// exhaustive enumeration would drift faster than it helps. +// Tool names are pulled from the `ToolNames` constant source rather than +// retyped, so a future rename updates this reminder automatically instead of +// leaving stale guidance. Keep the list small (the most common modification +// tools) — an exhaustive enumeration would drift faster than it helps. const PLAN_MODE_REMINDER_TEXT = '\n' + 'You are currently in PLAN mode. You may research, read files, and ' + - 'propose plans, but you may not execute modification tools (write_file, ' + - 'edit, run_shell_command, etc.) until the user exits plan mode. The ' + - 'summary above may not reflect this constraint — honor plan mode ' + - 'regardless.\n' + + 'propose plans, but you may not execute modification tools (' + + `${ToolNames.WRITE_FILE}, ${ToolNames.EDIT}, ${ToolNames.SHELL}, etc.) ` + + 'until the user exits plan mode. The summary above may not reflect this ' + + 'constraint — honor plan mode regardless.\n' + ''; /** Cap per-task description text in the snapshot block. Prevents a @@ -691,19 +693,6 @@ function buildPlanModeReminderPart(): Part { return { text: PLAN_MODE_REMINDER_TEXT }; } -/** - * Escape the three XML-significant characters in subagent descriptions - * before splicing them inside our `` wrapper. Without - * this, a description containing `` could close the - * wrapper tag and inject arbitrary structure into the model's view. - */ -function escapeForXmlText(text: string): string { - return text - .replace(/&/g, '&') - .replace(//g, '>'); -} - /** * Collapse interior whitespace (newlines, tabs, carriage returns) to single * spaces before the description goes into a single bullet line. A raw `\n` @@ -733,7 +722,12 @@ function buildSubagentSnapshotPart(snaps: SubagentSnapshot[]): Part | null { flattened.length > MAX_SUBAGENT_DESC_CHARS ? flattened.slice(0, MAX_SUBAGENT_DESC_CHARS) + '…' : flattened; - return `- [${s.status}] ${s.id}: ${escapeForXmlText(truncated)}`; + // Escape EVERY interpolated field (id and status as well as the + // description) with the shared 5-char escaper. Subagent ids derive from + // a user-configurable `subagentConfig.name`, so a `<`/`&` there could + // otherwise close the `` wrapper or forge sibling + // markup the model would treat as trusted metadata. + return `- [${escapeXml(s.status)}] ${escapeXml(s.id)}: ${escapeXml(truncated)}`; }); if (overflow > 0) { lines.push( @@ -752,6 +746,29 @@ function buildSubagentSnapshotPart(snaps: SubagentSnapshot[]): Part | null { }; } +/** + * Build the mid-session state-reminder parts (plan-mode banner + background + * subagent snapshot) that lead the post-compact attachment. Extracted as a + * single source of truth so BOTH the normal `composePostCompactHistory` + * path AND its catch-fallback emit the same blocks — otherwise the fallback + * silently drops plan-mode enforcement and the subagent roster (the exact + * drift PR #4688 review caught). Pure: no I/O, safe to call from a catch. + */ +export function buildStateReminderParts(options: { + planModeActive?: boolean; + runningSubagents?: SubagentSnapshot[]; +}): Part[] { + const parts: Part[] = []; + if (options.planModeActive) { + parts.push(buildPlanModeReminderPart()); + } + if (options.runningSubagents && options.runningSubagents.length > 0) { + const snap = buildSubagentSnapshotPart(options.runningSubagents); + if (snap) parts.push(snap); + } + return parts; +} + export async function composePostCompactHistory( history: Content[], summary: string, @@ -789,14 +806,9 @@ export async function composePostCompactHistory( // Reminder text comes first so a token-conservative model that skims the // attachment still sees the plan-mode constraint and task pointers before // it gets to file bodies. - const postAckParts: Part[] = []; - if (planModeActive) { - postAckParts.push(buildPlanModeReminderPart()); - } - if (runningSubagents && runningSubagents.length > 0) { - const snap = buildSubagentSnapshotPart(runningSubagents); - if (snap) postAckParts.push(snap); - } + const postAckParts: Part[] = [ + ...buildStateReminderParts({ planModeActive, runningSubagents }), + ]; for (const block of fileBlocks) { for (const part of block.parts ?? []) postAckParts.push(part); } From 23f5a40d74c7ee524c8eff6a81e8c5314fe8249c Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 18:22:04 +0800 Subject: [PATCH 11/12] fix(core): scope subagent snapshot to backgrounded tasks; restore reminders on fallback; cap hook context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three code-review fixes in the compaction service: - collectActiveSubagents now also requires isBackgrounded — foreground agents are the parent's synchronously-awaited tool call and don't belong in a roster. Mirrors getRunningBackgroundCount. - The composePostCompactHistory catch-fallback re-applies the plan-mode + subagent reminders via the shared buildStateReminderParts (pure, no I/O), so a restoration failure no longer silently drops plan-mode enforcement and the subagent roster. - The PreCompact hook's additionalContext is capped at MAX_HOOK_INSTRUCTIONS_CHARS before entering the side-query prompt, closing the unbounded-input hole the user-text cap was meant to prevent. The fallback and hook-cap fixes have RED-verified regression tests. --- .../services/chatCompressionService.test.ts | 120 +++++++++++++++++- .../src/services/chatCompressionService.ts | 44 ++++++- 2 files changed, 156 insertions(+), 8 deletions(-) diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index f584b9ff43..1317106817 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -9,6 +9,7 @@ import { ChatCompressionService, computeThresholds, MAX_CONSECUTIVE_FAILURES, + MAX_HOOK_INSTRUCTIONS_CHARS, } from './chatCompressionService.js'; import type { Content } from '@google/genai'; import { CompressionStatus } from '../core/turn.js'; @@ -2516,6 +2517,43 @@ describe('ChatCompressionService.compress — customInstructions plumbing', () = const passed = spy.mock.calls[0]![1] as { systemInstruction: string }; expect(passed.systemInstruction).not.toContain('Additional Instructions:'); }); + + it('caps hook additionalContext at MAX_HOOK_INSTRUCTIONS_CHARS', async () => { + const spy = vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + totalTokenCount: 1500, + }, + } as never); + // A pathological hook returns far more context than the cap. It must be + // clipped before entering the side-query prompt, mirroring the user-text + // cap — otherwise an unbounded payload could trigger an unrecoverable PTL. + const longCtx = 'H'.repeat(MAX_HOOK_INSTRUCTIONS_CHARS + 1500); + const { mockChat, mockConfig } = setup({ + hookSystem: { + firePreCompactEvent: vi + .fn() + .mockResolvedValue(makeHookOutput({ additionalContext: longCtx })), + firePostCompactEvent: vi.fn().mockResolvedValue(undefined), + }, + }); + + await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 180_000, + }); + + const passed = spy.mock.calls[0]![1] as { systemInstruction: string }; + const hCount = (passed.systemInstruction.match(/H/g) ?? []).length; + expect(hCount).toBe(MAX_HOOK_INSTRUCTIONS_CHARS); + expect(hCount).toBeLessThan(longCtx.length); + }); }); describe('ChatCompressionService.compress — plan-mode + subagent attachment wiring', () => { @@ -2534,6 +2572,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi description: string; status: string; startTime: number; + isBackgrounded?: boolean; }>; }) { const history: Content[] = [ @@ -2634,7 +2673,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi expect(opts.planModeActive).toBe(false); }); - it('filters background tasks to running/paused agent tasks only', async () => { + it('filters background tasks to backgrounded running/paused agent tasks only', async () => { stubSideQuery(); const composeSpy = vi .spyOn(postCompactModule, 'composePostCompactHistory') @@ -2650,6 +2689,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi description: 'd', status: 'running', startTime: 1, + isBackgrounded: true, }, { id: 'p', @@ -2657,27 +2697,42 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi description: 'd', status: 'paused', startTime: 2, + isBackgrounded: true, + }, + // Foreground agent (isBackgrounded: false): the parent is + // synchronously awaiting it, so it does NOT belong in a + // roster even though it is running. + { + id: 'fg', + kind: 'agent', + description: 'd', + status: 'running', + startTime: 3, + isBackgrounded: false, }, { id: 'c', kind: 'agent', description: 'd', status: 'completed', - startTime: 3, + startTime: 4, + isBackgrounded: true, }, { id: 'f', kind: 'agent', description: 'd', status: 'failed', - startTime: 4, + startTime: 5, + isBackgrounded: true, }, { id: 'x', kind: 'agent', description: 'd', status: 'cancelled', - startTime: 5, + startTime: 6, + isBackgrounded: true, }, // Non-agent kinds (shell, monitor) must also be excluded — they // do not have a "task" the post-compact agent should send_message @@ -2687,7 +2742,8 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi kind: 'shell', description: 'd', status: 'running', - startTime: 6, + startTime: 7, + isBackgrounded: true, }, ], }); @@ -2704,6 +2760,7 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi const opts = composeSpy.mock.calls[0]![2] as { runningSubagents?: Array<{ id: string; status: string }>; }; + // 'fg' is excluded by the isBackgrounded gate; c/f/x by status; s1 by kind. expect(opts.runningSubagents?.map((t) => t.id)).toEqual(['r', 'p']); }); @@ -2756,4 +2813,57 @@ describe('ChatCompressionService.compress — plan-mode + subagent attachment wi }; expect(opts.runningSubagents).toEqual([]); }); + + it('fallback path still injects plan-mode reminder + subagent snapshot when composePostCompactHistory throws', async () => { + // Regression guard: the catch-fallback used to rebuild extraHistory by + // hand with only summary+ack, silently dropping plan-mode enforcement and + // the subagent roster. Both reminder builders are pure (no I/O), so the + // failure that took out composePostCompactHistory must not take them out. + // Use a large input / small output so the token-math lands COMPRESSED + // (newToken = original - (input-1000) + output) rather than tripping the + // inflation guard — we want to assert the fallback's *content*, not status. + vi.spyOn(sideQueryModule, 'runSideQuery').mockResolvedValue({ + text: 's', + usage: { + promptTokenCount: 49_000, + candidatesTokenCount: 1_500, + totalTokenCount: 50_500, + }, + } as never); + vi.spyOn(postCompactModule, 'composePostCompactHistory').mockRejectedValue( + new Error('EACCES: simulated restoration failure'), + ); + const { mockChat, mockConfig } = setupWithAppState({ + approvalMode: ApprovalMode.PLAN, + backgroundTasks: [ + { + id: 'agent-bg', + kind: 'agent', + description: 'long-running background task', + status: 'running', + startTime: 1, + isBackgrounded: true, + }, + ], + }); + + const result = await new ChatCompressionService().compress(mockChat, { + promptId: 'p', + force: true, + model: 'qwen-test', + config: mockConfig, + consecutiveFailures: 0, + originalTokenCount: 100_000, + }); + + // Degraded success — not a failure (summary still reduces context). + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + const flat = (result.newHistory ?? []) + .flatMap((c) => c.parts ?? []) + .map((p) => (p as { text?: string }).text ?? '') + .join('\n'); + expect(flat).toContain(''); + expect(flat).toContain(''); + expect(flat).toContain('agent-bg'); + }); }); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index d017e29dc8..cd59820a96 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -27,6 +27,7 @@ import { } from './compactionInputSlimming.js'; import { CHARS_PER_TOKEN, estimatePromptTokens } from './tokenEstimation.js'; import { + buildStateReminderParts, composePostCompactHistory, countToolResponseImages, postProcessSummary, @@ -90,6 +91,18 @@ export const HARD_BUFFER = 3_000; */ export const MAX_CONSECUTIVE_FAILURES = 3; +/** + * Hard cap on the PreCompact hook's `additionalContext` once it is merged + * into the side-query system prompt. The user-supplied `/compress` text is + * already capped at `MAX_COMPRESS_INSTRUCTIONS_CHARS` (2000) in + * compressCommand.ts for exactly this reason — the side-query has no + * input-truncation retry, so an unbounded hook payload could inflate the + * prompt and trigger a PTL the compaction path can't recover from. Hooks + * may legitimately concatenate context across several scripts, so this cap + * is set higher than the user-text cap. + */ +export const MAX_HOOK_INSTRUCTIONS_CHARS = 4000; + export interface CompactionThresholds { /** Token count at which UI warn tier triggers. */ readonly warn: number; @@ -211,7 +224,15 @@ function collectActiveSubagents(config: Config): SubagentSnapshot[] { .getAll() .filter( (t) => - t.kind === 'agent' && (t.status === 'running' || t.status === 'paused'), + t.kind === 'agent' && + // Only TRUE background subagents belong in a `` + // block. Foreground entries (`isBackgrounded: false`) are the + // parent's synchronously-awaited tool call — their pending + // functionCall is still in history and resolves through the normal + // tool-result channel, so a reminder would mislabel them. Mirrors + // the `getRunningBackgroundCount` filter in background-tasks.ts. + t.isBackgrounded && + (t.status === 'running' || t.status === 'paused'), ) .map((t) => ({ id: t.id, @@ -394,7 +415,12 @@ export class ChatCompressionService { // client.ts) — keep it consistent. const merged = result?.getAdditionalContext(); if (merged && merged.trim().length > 0) { - hookExtraInstructions = merged.trim(); + // Cap like the user-text path: an unbounded hook payload would + // otherwise bypass MAX_COMPRESS_INSTRUCTIONS_CHARS and inflate the + // side-query prompt past a recoverable size. + hookExtraInstructions = merged + .trim() + .slice(0, MAX_HOOK_INSTRUCTIONS_CHARS); } } catch (err) { config.getDebugLogger().warn(`PreCompact hook failed: ${err}`); @@ -574,8 +600,20 @@ export class ChatCompressionService { trailingFc?.role === 'model' ? (trailingFc.parts ?? []).filter((p) => !!p.functionCall) : []; + // Re-apply the SAME plan-mode + subagent reminders the normal path + // injects. These builders are pure (no disk I/O / history walking), + // so the failure that took out composePostCompactHistory can't take + // them out too — and dropping them here would silently lose plan-mode + // enforcement and the subagent roster on the degraded path. + const reminderParts = buildStateReminderParts({ + planModeActive: config.getApprovalMode?.() === ApprovalMode.PLAN, + runningSubagents: collectActiveSubagents(config), + }); extraHistory = [ - { role: 'user', parts: [{ text: postProcessSummary(summary) }] }, + { + role: 'user', + parts: [{ text: postProcessSummary(summary) }, ...reminderParts], + }, { role: 'model', parts: [ From 2af00221b210f094ef65f097d181152c36c82e7b Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 2 Jun 2026 18:22:19 +0800 Subject: [PATCH 12/12] feat(cli): warn on /compress instruction truncation; fix integration-test pty typing - /compress now emits an INFO notice (interactive), a stream message (acp), and a prefixed return message (non-interactive) when the instruction string exceeds MAX_COMPRESS_INSTRUCTIONS_CHARS, so the silent 2000-char clip is no longer invisible to the user. - Annotate the three `ptyProcess.onData((data: string) => ...)` callbacks in the compress integration test to clear the TS7006 implicit-any the reviewer's typecheck flagged (fixed all three occurrences, not only the one inside this PR's diff). --- .../context-compress-interactive.test.ts | 6 +-- .../src/ui/commands/compressCommand.test.ts | 51 +++++++++++++++++++ .../cli/src/ui/commands/compressCommand.ts | 23 ++++++++- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/integration-tests/interactive/context-compress-interactive.test.ts b/integration-tests/interactive/context-compress-interactive.test.ts index 8e9655f6ed..208690959e 100644 --- a/integration-tests/interactive/context-compress-interactive.test.ts +++ b/integration-tests/interactive/context-compress-interactive.test.ts @@ -34,7 +34,7 @@ describe('Interactive Mode', () => { const { ptyProcess } = rig.runInteractive(); let fullOutput = ''; - ptyProcess.onData((data) => (fullOutput += data)); + ptyProcess.onData((data: string) => (fullOutput += data)); // Wait for the app to be ready const isReady = await rig.waitForText('Type your message', 15000); @@ -80,7 +80,7 @@ describe('Interactive Mode', () => { const { ptyProcess } = rig.runInteractive(); let fullOutput = ''; - ptyProcess.onData((data) => (fullOutput += data)); + ptyProcess.onData((data: string) => (fullOutput += data)); // Wait for the app to be ready const isReady = await rig.waitForText('Type your message', 25000); @@ -122,7 +122,7 @@ describe('Interactive Mode', () => { const { ptyProcess } = rig.runInteractive(); let fullOutput = ''; - ptyProcess.onData((data) => (fullOutput += data)); + ptyProcess.onData((data: string) => (fullOutput += data)); const isReady = await rig.waitForText('Type your message', 15000); expect( diff --git a/packages/cli/src/ui/commands/compressCommand.test.ts b/packages/cli/src/ui/commands/compressCommand.test.ts index e83a9fe74a..7fd7881703 100644 --- a/packages/cli/src/ui/commands/compressCommand.test.ts +++ b/packages/cli/src/ui/commands/compressCommand.test.ts @@ -211,5 +211,56 @@ describe('compressCommand', () => { expect(call[3]).toBeDefined(); expect((call[3] as string).length).toBe(2000); }); + + it('surfaces an INFO notice to the user when instructions are truncated', async () => { + const long = 'x'.repeat(3000); + const ctx = createMockCommandContext({ + services: { + config: { + getGeminiClient: () => + ({ + tryCompressChat: mockTryCompressChat, + }) as unknown as GeminiClient, + }, + }, + invocation: { raw: `/compress ${long}`, name: 'compress', args: long }, + }); + await compressCommand.action!(ctx, ''); + expect(ctx.ui.addItem).toHaveBeenCalledWith( + expect.objectContaining({ + type: MessageType.INFO, + text: expect.stringContaining('truncated'), + }), + expect.any(Number), + ); + }); + + it('does NOT show a truncation notice when instructions fit under the cap', async () => { + const ctx = createMockCommandContext({ + services: { + config: { + getGeminiClient: () => + ({ + tryCompressChat: mockTryCompressChat, + }) as unknown as GeminiClient, + }, + }, + invocation: { + raw: '/compress short', + name: 'compress', + args: 'short', + }, + }); + await compressCommand.action!(ctx, ''); + const infoCalls = ( + ctx.ui.addItem as ReturnType + ).mock.calls.filter( + (c) => + (c[0] as { type?: MessageType }).type === MessageType.INFO && + typeof (c[0] as { text?: string }).text === 'string' && + (c[0] as { text: string }).text.includes('truncated'), + ); + expect(infoCalls).toHaveLength(0); + }); }); }); diff --git a/packages/cli/src/ui/commands/compressCommand.ts b/packages/cli/src/ui/commands/compressCommand.ts index 79980af762..d800f81207 100644 --- a/packages/cli/src/ui/commands/compressCommand.ts +++ b/packages/cli/src/ui/commands/compressCommand.ts @@ -62,9 +62,18 @@ export const compressCommand: SlashCommand = { } const rawArgs = context.invocation?.args?.trim() ?? ''; + const wasTruncated = rawArgs.length > MAX_COMPRESS_INSTRUCTIONS_CHARS; const customInstructions = rawArgs ? rawArgs.slice(0, MAX_COMPRESS_INSTRUCTIONS_CHARS) : undefined; + // Surface the silent cap so a user pasting an over-long focus directive + // knows their instructions were clipped mid-text rather than silently + // changing the summary's behaviour. + const truncationNotice = wasTruncated + ? t('Compression instructions were truncated to {{max}} characters.', { + max: String(MAX_COMPRESS_INSTRUCTIONS_CHARS), + }) + : undefined; const doCompress = async () => { const promptId = `compress-${Date.now()}`; @@ -79,6 +88,12 @@ export const compressCommand: SlashCommand = { if (executionMode === 'acp') { const messages = async function* () { try { + if (truncationNotice) { + yield { + messageType: 'info' as const, + content: truncationNotice, + }; + } yield { messageType: 'info' as const, content: 'Compressing context...', @@ -110,6 +125,12 @@ export const compressCommand: SlashCommand = { try { if (executionMode === 'interactive') { + if (truncationNotice) { + ui.addItem( + { type: MessageType.INFO, text: truncationNotice }, + Date.now(), + ); + } ui.setPendingItem(pendingMessage); } @@ -157,7 +178,7 @@ export const compressCommand: SlashCommand = { return { type: 'message', messageType: 'info', - content: `Context compressed (${compressed.originalTokenCount} -> ${compressed.newTokenCount}).`, + content: `${truncationNotice ? `${truncationNotice} ` : ''}Context compressed (${compressed.originalTokenCount} -> ${compressed.newTokenCount}).`, }; } catch (e) { // If cancelled via ESC, don't show error — cancelSlashCommand already handled UI