Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ import {
import { OpenAILogger } from '../../utils/openaiLogger.js';
import type OpenAI from 'openai';

vi.mock('../../telemetry/loggers.js', () => ({
logApiRequest: vi.fn(),
logApiResponse: vi.fn(),
logApiError: vi.fn(),
}));
vi.mock('../../telemetry/loggers.js', async (importOriginal) => {
const actual =
await importOriginal<typeof import('../../telemetry/loggers.js')>();
return {
...actual,
logApiRequest: vi.fn(),
logApiResponse: vi.fn(),
logApiError: vi.fn(),
};
});

vi.mock('../../utils/openaiLogger.js', () => ({
OpenAILogger: vi.fn().mockImplementation(() => ({
Expand Down Expand Up @@ -474,4 +479,45 @@ describe('LoggingContentGenerator', () => {
},
]);
});

it.each(['prompt_suggestion', 'forked_query'])(
'skips logApiRequest and OpenAI logging for internal promptId %s (generateContent)',
async (promptId) => {
const mockResponse = {
responseId: 'internal-resp',
modelVersion: 'test-model',
candidates: [{ content: { parts: [{ text: 'suggestion' }] } }],
usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
} as unknown as GenerateContentResponse;

const mockWrapped = {
generateContent: vi.fn().mockResolvedValue(mockResponse),
generateContentStream: vi.fn(),
} as unknown as ContentGenerator;

const gen = new LoggingContentGenerator(mockWrapped, createConfig(), {
enableOpenAILogging: true,
openAILoggingDir: '/tmp/test-logs',
});

const request = {
model: 'test-model',
contents: [{ role: 'user', parts: [{ text: 'test' }] }],
} as unknown as GenerateContentParameters;

await gen.generateContent(request, promptId);

// logApiRequest should NOT be called for internal prompts
expect(logApiRequest).not.toHaveBeenCalled();
// logApiResponse SHOULD be called (for /stats token tracking)
expect(logApiResponse).toHaveBeenCalled();
// OpenAI logger should NOT be called
const loggerInstance = (
OpenAILogger as unknown as ReturnType<typeof vi.fn>
).mock.results[0]?.value;
if (loggerInstance) {
expect(loggerInstance.logInteraction).not.toHaveBeenCalled();
}
},
);
});
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
logApiRequest,
logApiResponse,
} from '../../telemetry/loggers.js';
import { isInternalPromptId } from '../../utils/internalPromptIds.js';
import type {
ContentGenerator,
ContentGeneratorConfig,
Expand Down Expand Up @@ -143,8 +144,17 @@ export class LoggingContentGenerator implements ContentGenerator {
userPromptId: string,
): Promise<GenerateContentResponse> {
const startTime = Date.now();
this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
const openaiRequest = await this.buildOpenAIRequestForLogging(req);
const isInternal = isInternalPromptId(userPromptId);
if (!isInternal) {
this.logApiRequest(
this.toContents(req.contents),
req.model,
userPromptId,
);
}
const openaiRequest = isInternal
? undefined
: await this.buildOpenAIRequestForLogging(req);
try {
const response = await this.wrapped.generateContent(req, userPromptId);
const durationMs = Date.now() - startTime;
Expand All @@ -155,12 +165,16 @@ export class LoggingContentGenerator implements ContentGenerator {
userPromptId,
response.usageMetadata,
);
await this.logOpenAIInteraction(openaiRequest, response);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, response);
}
return response;
} catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError('', durationMs, error, req.model, userPromptId);
await this.logOpenAIInteraction(openaiRequest, undefined, error);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, undefined, error);
}
throw error;
}
}
Expand All @@ -170,16 +184,27 @@ export class LoggingContentGenerator implements ContentGenerator {
userPromptId: string,
): Promise<AsyncGenerator<GenerateContentResponse>> {
const startTime = Date.now();
this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
const openaiRequest = await this.buildOpenAIRequestForLogging(req);
const isInternal = isInternalPromptId(userPromptId);
if (!isInternal) {
this.logApiRequest(
this.toContents(req.contents),
req.model,
userPromptId,
);
}
const openaiRequest = isInternal
? undefined
: await this.buildOpenAIRequestForLogging(req);

let stream: AsyncGenerator<GenerateContentResponse>;
try {
stream = await this.wrapped.generateContentStream(req, userPromptId);
} catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError('', durationMs, error, req.model, userPromptId);
await this.logOpenAIInteraction(openaiRequest, undefined, error);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, undefined, error);
}
throw error;
}

Expand All @@ -199,12 +224,17 @@ export class LoggingContentGenerator implements ContentGenerator {
model: string,
openaiRequest?: OpenAI.Chat.ChatCompletionCreateParams,
): AsyncGenerator<GenerateContentResponse> {
const isInternal = isInternalPromptId(userPromptId);
// For internal prompts we only need the last usage metadata (for /stats);
// skip collecting full responses to avoid unnecessary memory overhead.
const responses: GenerateContentResponse[] = [];

let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined;
try {
for await (const response of stream) {
responses.push(response);
if (!isInternal) {
responses.push(response);
}
if (response.usageMetadata) {
lastUsageMetadata = response.usageMetadata;
}
Expand All @@ -219,9 +249,11 @@ export class LoggingContentGenerator implements ContentGenerator {
userPromptId,
lastUsageMetadata,
);
const consolidatedResponse =
this.consolidateGeminiResponsesForLogging(responses);
await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
if (!isInternal) {
const consolidatedResponse =
this.consolidateGeminiResponsesForLogging(responses);
await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
}
} catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError(
Expand All @@ -231,7 +263,9 @@ export class LoggingContentGenerator implements ContentGenerator {
responses[0]?.modelVersion || model,
userPromptId,
);
await this.logOpenAIInteraction(openaiRequest, undefined, error);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, undefined, error);
}
throw error;
}
}
Expand Down
29 changes: 22 additions & 7 deletions packages/core/src/followup/forkedQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
* Forked Query Infrastructure
*
* Enables cache-aware secondary LLM calls that share the main conversation's
* prompt prefix (systemInstruction + tools + history) for cache hits.
* prompt prefix (systemInstruction + history) for cache hits.
*
* DashScope already enables cache_control via X-DashScope-CacheControl header.
* By constructing the forked GeminiChat with identical generationConfig and
* history prefix, the fork automatically benefits from prefix caching.
*
* Note: `runForkedQuery` overrides `tools: []` at the per-request level so the
* model cannot produce function calls. `createForkedChat` retains the full
* generationConfig (including tools) for callers like speculation that need them.
*/

import type {
Expand All @@ -21,6 +25,11 @@ import type {
import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
import type { Config } from '../config/config.js';

/** Per-request config that strips tools so the model never produces function calls. */
const NO_TOOLS: GenerateContentConfig = Object.freeze({
tools: Object.freeze([]),
});

/**
* Snapshot of the main conversation's cache-critical parameters.
* Captured after each successful main turn so forked queries share the same prefix.
Expand Down Expand Up @@ -111,9 +120,13 @@ export function clearCacheSafeParams(): void {
// ---------------------------------------------------------------------------

/**
* Create an isolated GeminiChat that shares the same cache prefix as the main
* conversation. The fork uses identical generationConfig (systemInstruction +
* tools) and history, so DashScope's cache_control mechanism produces cache hits.
* Create an isolated GeminiChat that shares the main conversation's
* generationConfig (including systemInstruction, tools, and history).
*
* The full config is retained so that callers like `runSpeculativeLoop`
* can execute tool calls during speculation. For pure-text callers like
* `runForkedQuery`, tools are stripped at the per-request level via
* `NO_TOOLS` — see {@link runForkedQuery}.
*
* The fork does NOT have chatRecordingService or telemetryService to avoid
* polluting the main session's recordings and token counts.
Expand Down Expand Up @@ -165,7 +178,7 @@ function extractUsage(

/**
* Run a forked query using a GeminiChat that shares the main conversation's
* cache prefix. This is a single-turn request (no tool execution loop).
* cache prefix. This is a single-turn, tool-free request (no function calls).
*
* @param config - App config
* @param userMessage - The user message to send (e.g., SUGGESTION_PROMPT)
Expand All @@ -191,8 +204,10 @@ export async function runForkedQuery(
const model = options?.model ?? params.model;
const chat = createForkedChat(config, params);

// Build per-request config overrides for JSON schema if needed
const requestConfig: GenerateContentConfig = {};
// Build per-request config overrides.
// NO_TOOLS prevents the model from producing function calls — forked
// queries are pure text completion and must not appear in tool-call UI.
const requestConfig: GenerateContentConfig = { ...NO_TOOLS };
if (options?.abortSignal) {
requestConfig.abortSignal = options.abortSignal;
}
Expand Down
97 changes: 97 additions & 0 deletions packages/core/src/telemetry/loggers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ import {
logExtensionInstallEvent,
logExtensionUninstall,
logHookCall,
logApiError,
} from './loggers.js';
import * as metrics from './metrics.js';
import { QwenLogger } from './qwen-logger/qwen-logger.js';
Expand All @@ -77,6 +78,7 @@ import {
ExtensionInstallEvent,
ExtensionUninstallEvent,
HookCallEvent,
ApiErrorEvent,
} from './types.js';
import { FileOperation } from './metrics.js';
import type {
Expand Down Expand Up @@ -359,6 +361,101 @@ describe('loggers', () => {
});
});

describe('logApiResponse skips chatRecordingService for internal prompt IDs', () => {
it.each(['prompt_suggestion', 'forked_query'])(
'should not record to chatRecordingService when prompt_id is %s',
(promptId) => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiResponseEvent(
'resp-id',
'test-model',
50,
promptId,
);
logApiResponse(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
expect(mockUiEvent.addEvent).toHaveBeenCalled();
},
);

it('should record to chatRecordingService for normal prompt IDs', () => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiResponseEvent(
'resp-id',
'test-model',
50,
'user_query',
);
logApiResponse(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
});
});

describe('logApiError skips chatRecordingService for internal prompt IDs', () => {
it.each(['prompt_suggestion', 'forked_query'])(
'should not record to chatRecordingService when prompt_id is %s',
(promptId) => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiErrorEvent({
model: 'test-model',
durationMs: 100,
promptId,
errorMessage: 'test error',
});
logApiError(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
},
);

it('should record to chatRecordingService for normal prompt IDs', () => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiErrorEvent({
model: 'test-model',
durationMs: 100,
promptId: 'user_query',
errorMessage: 'test error',
});
logApiError(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
});
});

describe('logApiRequest', () => {
const mockConfig = {
getSessionId: () => 'test-session-id',
Expand Down
Loading
Loading