-
Notifications
You must be signed in to change notification settings - Fork 39
feat: add Vercel AI Gateway provider with pricing support #689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
naaa760
wants to merge
1
commit into
Merit-Systems:master
Choose a base branch
from
naaa760:ai-gateway-provider
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+257
−0
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
187 changes: 187 additions & 0 deletions
187
packages/app/server/src/providers/VercelAIGatewayProvider.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,187 @@ | ||
| import { LlmTransactionMetadata, Transaction } from '../types'; | ||
| import { getCostPerToken, getModelPrice, isValidModel } from '../services/AccountingService'; | ||
| import { BaseProvider } from './BaseProvider'; | ||
| import { ProviderType } from './ProviderType'; | ||
| import logger from '../logger'; | ||
| import { env } from '../env'; | ||
| import { parseSSEGPTFormat, type CompletionStateBody } from './GPTProvider'; | ||
| import { Decimal } from '@prisma/client/runtime/library'; | ||
|
|
||
| export class VercelAIGatewayProvider extends BaseProvider { | ||
| private readonly VERCEL_AI_GATEWAY_BASE_URL = 'https://ai-gateway.vercel.sh/v1'; | ||
|
|
||
| getType(): ProviderType { | ||
| return ProviderType.VERCEL_AI_GATEWAY; | ||
| } | ||
|
|
||
| getBaseUrl(): string { | ||
| return this.VERCEL_AI_GATEWAY_BASE_URL; | ||
| } | ||
|
|
||
| getApiKey(): string | undefined { | ||
| return env.VERCEL_AI_GATEWAY_API_KEY; | ||
| } | ||
|
|
||
| async handleBody( | ||
| data: string, | ||
| requestBody?: Record<string, unknown> | ||
| ): Promise<Transaction> { | ||
| try { | ||
| const model = this.getModel().toLowerCase(); | ||
| const isTranscriptionModel = model.includes('whisper') || model.includes('transcription'); | ||
| const isSpeechModel = model.includes('tts') || model.includes('speech'); | ||
|
|
||
| let isTranscriptionResponse = false; | ||
| try { | ||
| const parsed = JSON.parse(data); | ||
| if (parsed.text !== undefined && typeof parsed.text === 'string') { | ||
| isTranscriptionResponse = true; | ||
| } | ||
| } catch { | ||
| } | ||
|
|
||
| if (isTranscriptionModel || isTranscriptionResponse) { | ||
| return this.handleAudioResponse(data, requestBody, 'transcription'); | ||
| } | ||
|
|
||
| if (isSpeechModel) { | ||
| return this.handleAudioResponse(data, requestBody, 'speech'); | ||
| } | ||
|
|
||
| return this.handleChatCompletionResponse(data); | ||
| } catch (error) { | ||
| logger.error(`Error processing Vercel AI Gateway response: ${error}`); | ||
| throw error; | ||
| } | ||
| } | ||
|
|
||
| private handleChatCompletionResponse(data: string): Transaction { | ||
| let prompt_tokens = 0; | ||
| let completion_tokens = 0; | ||
| let total_tokens = 0; | ||
| let providerId = 'null'; | ||
|
|
||
| if (this.getIsStream()) { | ||
| const chunks = parseSSEGPTFormat(data); | ||
|
|
||
| for (const chunk of chunks) { | ||
| if (chunk.usage && chunk.usage !== null) { | ||
| prompt_tokens += chunk.usage.prompt_tokens; | ||
| completion_tokens += chunk.usage.completion_tokens; | ||
| total_tokens += chunk.usage.total_tokens; | ||
| } | ||
| providerId = chunk.id || 'null'; | ||
| } | ||
| } else { | ||
| const parsed = JSON.parse(data) as CompletionStateBody; | ||
| prompt_tokens += parsed.usage.prompt_tokens; | ||
| completion_tokens += parsed.usage.completion_tokens; | ||
| total_tokens += parsed.usage.total_tokens; | ||
| providerId = parsed.id || 'null'; | ||
| } | ||
|
|
||
| const cost = getCostPerToken( | ||
| this.getModel(), | ||
| prompt_tokens, | ||
| completion_tokens | ||
| ); | ||
|
|
||
| const metadata: LlmTransactionMetadata = { | ||
| providerId: providerId, | ||
| provider: this.getType(), | ||
| model: this.getModel(), | ||
| inputTokens: prompt_tokens, | ||
| outputTokens: completion_tokens, | ||
| totalTokens: total_tokens, | ||
| }; | ||
|
|
||
| return { | ||
| rawTransactionCost: cost, | ||
| metadata: metadata, | ||
| status: 'success', | ||
| }; | ||
| } | ||
|
|
||
| private handleAudioResponse( | ||
| data: string, | ||
| requestBody: Record<string, unknown> | undefined, | ||
| endpointType: 'transcription' | 'speech' | ||
| ): Transaction { | ||
| let cost = new Decimal(0); | ||
| let metadata: LlmTransactionMetadata; | ||
| const model = this.getModel(); | ||
|
|
||
| const modelPrice = getModelPrice(model); | ||
|
|
||
| if (endpointType === 'transcription') { | ||
| try { | ||
| const transcriptionData = JSON.parse(data); | ||
| const text = transcriptionData.text || ''; | ||
|
|
||
| if (modelPrice && isValidModel(model)) { | ||
| const textTokens = Math.ceil(text.length / 4); | ||
| cost = getCostPerToken(model, 0, textTokens); | ||
| } else { | ||
| cost = new Decimal(0.01); | ||
| } | ||
|
|
||
| metadata = { | ||
| providerId: 'transcription', | ||
| provider: this.getType(), | ||
| model: model, | ||
| inputTokens: 0, | ||
| outputTokens: text.length, | ||
| totalTokens: text.length, | ||
| }; | ||
| } catch (error) { | ||
| logger.error(`Error parsing transcription response: ${error}`); | ||
| cost = modelPrice && isValidModel(model) ? new Decimal(0) : new Decimal(0.01); | ||
| metadata = { | ||
| providerId: 'transcription', | ||
| provider: this.getType(), | ||
| model: model, | ||
| inputTokens: 0, | ||
| outputTokens: 0, | ||
| totalTokens: 0, | ||
| }; | ||
| } | ||
| } else if (endpointType === 'speech') { | ||
| const inputText = (requestBody?.input as string) || ''; | ||
| const characterCount = inputText.length; | ||
|
|
||
| if (modelPrice && isValidModel(model)) { | ||
| const inputTokens = Math.ceil(characterCount / 4); | ||
| cost = getCostPerToken(model, inputTokens, 0); | ||
| } else { | ||
| const costPerCharacter = new Decimal(0.000015); | ||
| cost = costPerCharacter.mul(characterCount); | ||
| } | ||
|
|
||
| metadata = { | ||
| providerId: 'speech', | ||
| provider: this.getType(), | ||
| model: model, | ||
| inputTokens: characterCount, | ||
| outputTokens: 0, | ||
| totalTokens: characterCount, | ||
| }; | ||
| } else { | ||
| cost = modelPrice && isValidModel(model) ? new Decimal(0) : new Decimal(0.01); | ||
| metadata = { | ||
| providerId: 'audio', | ||
| provider: this.getType(), | ||
| model: model, | ||
| inputTokens: 0, | ||
| outputTokens: 0, | ||
| totalTokens: 0, | ||
| }; | ||
| } | ||
|
|
||
| return { | ||
| rawTransactionCost: cost, | ||
| metadata: metadata, | ||
| status: 'success', | ||
| }; | ||
| } | ||
| } | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| import { | ||
| createOpenAI as createOpenAIBase, | ||
| OpenAIProvider, | ||
| } from '@ai-sdk/openai'; | ||
| import { ROUTER_BASE_URL } from 'config'; | ||
| import { EchoConfig } from '../types'; | ||
| import { validateAppId } from '../utils/validation'; | ||
| import { echoFetch } from './index'; | ||
|
|
||
| export function createEchoVercelAIGateway( | ||
| { appId, baseRouterUrl = ROUTER_BASE_URL }: EchoConfig, | ||
| getTokenFn: (appId: string) => Promise<string | null>, | ||
| onInsufficientFunds?: () => void | ||
| ): OpenAIProvider { | ||
| validateAppId(appId, 'createEchoVercelAIGateway'); | ||
|
|
||
| return createOpenAIBase({ | ||
| baseURL: baseRouterUrl, | ||
| apiKey: 'placeholder_replaced_by_echoFetch', | ||
| fetch: echoFetch( | ||
| fetch, | ||
| async () => await getTokenFn(appId), | ||
| onInsufficientFunds | ||
| ), | ||
| }); | ||
| } | ||
|
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
handleChatCompletionResponsemethod will throw anUnknownModelErrorwhen processing responses for provider-prefixed models (e.g., "openai/gpt-4", "anthropic/claude-3") because it attempts to look up pricing for the full prefixed model name, which doesn't exist in the pricing database.View Details
📝 Patch Details
Analysis
VercelAIGatewayProvider throws UnknownModelError for provider-prefixed models
What fails:
VercelAIGatewayProvider.handleChatCompletionResponse()callsgetCostPerToken(this.getModel(), ...)with provider-prefixed model names like "openai/gpt-4", but pricing database only contains base model names like "gpt-4"How to reproduce:
Result:
getCostPerToken()callsisValidModel("openai/gpt-4")which returns false, then throwsUnknownModelError: Invalid model: openai/gpt-4Expected: Should extract base model "gpt-4" for pricing lookup per Vercel AI Gateway docs which confirm "creator/model-name" format is correct
Note: Same issue affects
handleAudioResponse()method for transcription and speech endpoints