diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index 9389ba8f5f..f27140fecc 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -206,6 +206,7 @@ The `extra_body` field allows you to add custom parameters to the request body s | `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` | | `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` | | `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` | +| `context.gapThresholdMinutes` | number | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL. | `5` | #### Troubleshooting File Search Performance diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 3d4ed84e29..2b64685f79 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1069,6 +1069,7 @@ export async function loadCliConfig( telemetry: telemetrySettings, usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true, fileFiltering: settings.context?.fileFiltering, + thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes, checkpointing: argv.checkpointing || settings.general?.checkpointing?.enabled, proxy: diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index e765dd8014..4645d5803b 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -914,6 +914,16 @@ const SETTINGS_SCHEMA = { }, }, }, + gapThresholdMinutes: { + type: 'number', + label: 'Thinking Block Idle Threshold (minutes)', + category: 'Context', + requiresRestart: false, + default: 5, + description: + 'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.', + showInDialog: false, + }, }, }, diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index c54b557052..a9e47f99f8 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -370,6 +370,8 @@ export interface ConfigParameters { model?: string; outputLanguageFilePath?: string; maxSessionTurns?: number; + /** Minutes of inactivity before clearing retained thinking blocks. */ + thinkingIdleThresholdMinutes?: number; sessionTokenLimit?: number; experimentalZedIntegration?: boolean; cronEnabled?: boolean; @@ -557,6 +559,7 @@ export class Config { private ideMode: boolean; private readonly maxSessionTurns: number; + private readonly thinkingIdleThresholdMs: number; private readonly sessionTokenLimit: number; private readonly listExtensions: boolean; private readonly overrideExtensions?: string[]; @@ -683,6 +686,8 @@ export class Config { this.fileDiscoveryService = params.fileDiscoveryService ?? null; this.bugCommand = params.bugCommand; this.maxSessionTurns = params.maxSessionTurns ?? -1; + this.thinkingIdleThresholdMs = + (params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000; this.sessionTokenLimit = params.sessionTokenLimit ?? -1; this.experimentalZedIntegration = params.experimentalZedIntegration ?? false; @@ -1329,6 +1334,10 @@ export class Config { return this.maxSessionTurns; } + getThinkingIdleThresholdMs(): number { + return this.thinkingIdleThresholdMs; + } + getSessionTokenLimit(): number { return this.sessionTokenLimit; } diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 19dd88bcfe..9f7ead5c67 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => { getWorkingDir: vi.fn().mockReturnValue('/test/dir'), getFileService: vi.fn().mockReturnValue(fileService), getMaxSessionTurns: vi.fn().mockReturnValue(0), + getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000), getSessionTokenLimit: vi.fn().mockReturnValue(32000), getNoBrowser: vi.fn().mockReturnValue(false), getUsageStatisticsEnabled: vi.fn().mockReturnValue(true), @@ -427,6 +428,119 @@ describe('Gemini Client (client.ts)', () => { }); }); + describe('thinking block idle cleanup and latch', () => { + let mockChat: Partial; + + beforeEach(() => { + const mockStream = (async function* () { + yield { + type: GeminiEventType.Content, + value: 'response', + }; + })(); + mockTurnRunFn.mockReturnValue(mockStream); + + mockChat = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), + }; + client['chat'] = mockChat as GeminiChat; + }); + + it('should not strip thoughts on active session (< 5min idle)', async () => { + // Simulate a recent API completion (2 minutes ago — within default 5 min threshold) + client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000; + client['thinkingClearLatched'] = false; + + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-1', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect( + mockChat.stripThoughtsFromHistoryKeepRecent, + ).not.toHaveBeenCalled(); + }); + + it('should latch and strip thoughts after > 5min idle', async () => { + // Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold) + client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000; + client['thinkingClearLatched'] = false; + + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-2', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect(client['thinkingClearLatched']).toBe(true); + expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith( + 1, + ); + }); + + it('should keep stripping once latched even if idle < 5min', async () => { + // Pre-set latch with a recent timestamp (2 minutes ago — within threshold) + client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000; + client['thinkingClearLatched'] = true; + + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-3', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect(client['thinkingClearLatched']).toBe(true); + expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith( + 1, + ); + }); + + it('should update lastApiCompletionTimestamp after API call', async () => { + client['lastApiCompletionTimestamp'] = null; + + const before = Date.now(); + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-4', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect(client['lastApiCompletionTimestamp']).toBeGreaterThanOrEqual( + before, + ); + }); + + it('should reset latch and timestamp on resetChat', async () => { + client['lastApiCompletionTimestamp'] = Date.now(); + client['thinkingClearLatched'] = true; + + await client.resetChat(); + + expect(client['thinkingClearLatched']).toBe(false); + expect(client['lastApiCompletionTimestamp']).toBeNull(); + }); + }); + describe('tryCompressChat', () => { const mockGetHistory = vi.fn(); @@ -436,6 +550,7 @@ describe('Gemini Client (client.ts)', () => { addHistory: vi.fn(), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), } as unknown as GeminiChat; }); @@ -457,6 +572,7 @@ describe('Gemini Client (client.ts)', () => { getHistory: vi.fn((_curated?: boolean) => chatHistory), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockOriginalChat as GeminiChat; @@ -1149,6 +1265,7 @@ describe('Gemini Client (client.ts)', () => { addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), } as unknown as GeminiChat; client['chat'] = mockChat; @@ -1204,6 +1321,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1260,6 +1378,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1326,6 +1445,7 @@ hello addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1365,6 +1485,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1410,6 +1531,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1498,6 +1620,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1555,6 +1678,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1636,6 +1760,7 @@ Other open files: { role: 'user', parts: [{ text: 'previous message' }] }, ]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; }); @@ -1889,6 +2014,7 @@ Other open files: getHistory: vi.fn().mockReturnValue([]), // Default empty history setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2228,6 +2354,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2265,6 +2392,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2305,6 +2433,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2329,6 +2458,7 @@ Other open files: getHistory: vi.fn().mockReturnValue([]), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), stripOrphanedUserEntriesFromHistory: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2361,6 +2491,7 @@ Other open files: getHistory: vi.fn().mockReturnValue([]), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), stripOrphanedUserEntriesFromHistory: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2405,6 +2536,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; }); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 0f985364bf..13fc86aaa9 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -126,6 +126,25 @@ export class GeminiClient { */ private hasFailedCompressionAttempt = false; + /** + * Timestamp (epoch ms) of the last completed API call. + * Used to detect idle periods for thinking block cleanup. + * Starts as null — on the first query there is no prior thinking to clean, + * so the idle check is skipped until the first API call completes. + */ + private lastApiCompletionTimestamp: number | null = null; + + /** + * Sticky-on latch for clearing thinking blocks from prior turns. + * Triggered when idle exceeds the configured threshold (default 5 min, + * aligned with provider prompt-cache TTL). Once latched, stays true to + * prevent oscillation: without it, thinking would accumulate → get + * stripped → accumulate again, causing the message prefix to change + * repeatedly (bad for provider-side prompt caching and wastes context). + * Reset on /clear (resetChat). + */ + private thinkingClearLatched = false; + constructor(private readonly config: Config) { this.loopDetector = new LoopDetectionService(config); } @@ -199,6 +218,9 @@ export class GeminiClient { } async resetChat(): Promise { + // Reset thinking clear latch — fresh chat, no prior thinking to clean up + this.thinkingClearLatched = false; + this.lastApiCompletionTimestamp = null; await this.startChat(); } @@ -537,8 +559,27 @@ export class GeminiClient { // record user message for session management this.config.getChatRecordingService()?.recordUserMessage(request); - // strip thoughts from history before sending the message - this.stripThoughtsFromHistory(); + // Thinking block cross-turn retention with idle cleanup: + // - Active session (< threshold idle): keep thinking blocks for reasoning coherence + // - Idle > threshold: clear old thinking, keep only last 1 turn to free context + // - Latch: once triggered, never revert — prevents oscillation + if ( + !this.thinkingClearLatched && + this.lastApiCompletionTimestamp !== null + ) { + const thresholdMs = this.config.getThinkingIdleThresholdMs(); + const idleMs = Date.now() - this.lastApiCompletionTimestamp; + if (idleMs > thresholdMs) { + this.thinkingClearLatched = true; + debugLogger.debug( + `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`, + ); + } + } + if (this.thinkingClearLatched) { + this.getChat().stripThoughtsFromHistoryKeepRecent(1); + debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)'); + } } if (messageType !== SendMessageType.Retry) { this.sessionTurnCount++; @@ -680,6 +721,7 @@ export class GeminiClient { if (arenaAgentClient) { await arenaAgentClient.reportError('Loop detected'); } + this.lastApiCompletionTimestamp = Date.now(); return turn; } } @@ -698,9 +740,14 @@ export class GeminiClient { : 'Unknown error'; await arenaAgentClient.reportError(errorMsg); } + this.lastApiCompletionTimestamp = Date.now(); return turn; } } + + // Track API completion time for thinking block idle cleanup + this.lastApiCompletionTimestamp = Date.now(); + // Fire Stop hook through MessageBus (only if hooks are enabled and registered) // This must be done before any early returns to ensure hooks are always triggered if ( diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 4a47813881..9d9b45caf0 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -1767,6 +1767,150 @@ describe('GeminiChat', async () => { }); }); + describe('stripThoughtsFromHistoryKeepRecent', () => { + it('should keep the most recent N model turns with thoughts', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [ + { text: 'old thinking', thought: true }, + { text: 'response1' }, + ], + }, + { role: 'user', parts: [{ text: 'msg2' }] }, + { + role: 'model', + parts: [ + { text: 'mid thinking', thought: true }, + { text: 'response2' }, + ], + }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { + role: 'model', + parts: [ + { text: 'recent thinking', thought: true }, + { text: 'response3' }, + ], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + // First two model turns should have thoughts stripped + expect(history[1]!.parts).toEqual([{ text: 'response1' }]); + expect(history[3]!.parts).toEqual([{ text: 'response2' }]); + // Last model turn should keep thoughts + expect(history[5]!.parts).toEqual([ + { text: 'recent thinking', thought: true }, + { text: 'response3' }, + ]); + }); + + it('should not strip anything when keepTurns >= model turns with thoughts', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [{ text: 'thinking', thought: true }, { text: 'response' }], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + expect(history[1]!.parts).toEqual([ + { text: 'thinking', thought: true }, + { text: 'response' }, + ]); + }); + + it('should remove model content objects that become empty after stripping', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [{ text: 'only thinking', thought: true }], + }, + { role: 'user', parts: [{ text: 'msg2' }] }, + { + role: 'model', + parts: [ + { text: 'recent thinking', thought: true }, + { text: 'response' }, + ], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + // The first model turn (only thoughts) should be removed entirely + expect(history).toHaveLength(3); + expect(history[0]!.parts).toEqual([{ text: 'msg1' }]); + expect(history[1]!.parts).toEqual([{ text: 'msg2' }]); + expect(history[2]!.parts).toEqual([ + { text: 'recent thinking', thought: true }, + { text: 'response' }, + ]); + }); + + it('should also strip thoughtSignature from stripped turns', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [ + { text: 'old thinking', thought: true }, + { + text: 'with sig', + thoughtSignature: 'sig1', + } as unknown as { text: string; thoughtSignature: string }, + { text: 'response1' }, + ], + }, + { role: 'user', parts: [{ text: 'msg2' }] }, + { + role: 'model', + parts: [ + { text: 'recent thinking', thought: true }, + { text: 'response2' }, + ], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + // First model turn: thought stripped, thoughtSignature stripped + expect(history[1]!.parts).toEqual([ + { text: 'with sig' }, + { text: 'response1' }, + ]); + expect( + (history[1]!.parts![0] as { thoughtSignature?: string }) + .thoughtSignature, + ).toBeUndefined(); + }); + + it('should handle keepTurns=0 by stripping all thoughts', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [{ text: 'thinking', thought: true }, { text: 'response' }], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(0); + + const history = chat.getHistory(); + expect(history[1]!.parts).toEqual([{ text: 'response' }]); + }); + }); + describe('stripOrphanedUserEntriesFromHistory', () => { it('should pop a single trailing user entry', () => { chat.setHistory([ diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 12dfcb0809..5fd6caf030 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -576,6 +576,89 @@ export class GeminiChat { .filter((content) => content.parts && content.parts.length > 0); } + /** + * Strip thought parts from history, keeping the most recent `keepTurns` + * model turns that contain thinking blocks intact. + * + * Selection is based on thought-containing turns specifically (not all + * model turns) so the most recent reasoning chain is always preserved + * even if later model turns happen to have no thinking. + * + * Used for idle cleanup: after exceeding the configured idle threshold + * the old thinking blocks are no longer useful for reasoning coherence + * but still consume context tokens. + */ + stripThoughtsFromHistoryKeepRecent(keepTurns: number): void { + keepTurns = Number.isFinite(keepTurns) + ? Math.max(0, Math.floor(keepTurns)) + : 0; + + // Find indices of model turns that contain thought parts + const modelTurnIndices: number[] = []; + for (let i = 0; i < this.history.length; i++) { + const content = this.history[i]; + if ( + content.role === 'model' && + content.parts?.some( + (part) => + part && + typeof part === 'object' && + 'thought' in part && + part.thought, + ) + ) { + modelTurnIndices.push(i); + } + } + + // Determine which model turns to keep (the most recent `keepTurns`) + const turnsToStrip = new Set( + modelTurnIndices.slice( + 0, + Math.max(0, modelTurnIndices.length - keepTurns), + ), + ); + + if (turnsToStrip.size === 0) return; + + this.history = this.history + .map((content, index) => { + if (!turnsToStrip.has(index) || !content.parts) return content; + + // Strip thought parts from this turn + const filteredParts = content.parts + .filter( + (part) => + !( + part && + typeof part === 'object' && + 'thought' in part && + part.thought + ), + ) + .map((part) => { + if ( + part && + typeof part === 'object' && + 'thoughtSignature' in part + ) { + const newPart = { ...part }; + delete (newPart as { thoughtSignature?: string }) + .thoughtSignature; + return newPart; + } + return part; + }); + + return { + ...content, + parts: filteredParts, + }; + }) + // Remove Content objects that have no parts left after filtering + .filter((content) => content.parts && content.parts.length > 0); + } + /** * Pop all orphaned trailing user entries from chat history. * In a valid conversation the last entry is always a model response; diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json index 4f92b74d7d..fdd4fbbb17 100644 --- a/packages/vscode-ide-companion/schemas/settings.schema.json +++ b/packages/vscode-ide-companion/schemas/settings.schema.json @@ -383,6 +383,11 @@ "default": true } } + }, + "gapThresholdMinutes": { + "description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.", + "type": "number", + "default": 5 } } },