diff --git a/.github/workflows/update-model-costs.yml b/.github/workflows/update-model-costs.yml new file mode 100644 index 00000000..ee2271b2 --- /dev/null +++ b/.github/workflows/update-model-costs.yml @@ -0,0 +1,59 @@ +# This workflow updates the file in /model_cost_data/model_prices_and_context_window.json +name: Update model prices and context window JSON file + +on: + workflow_call: + schedule: + - cron: '0 2 * * 0' # Run every Sunday at 2:00 AM + +jobs: + update_model_prices: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Get the latest file + run: | + curl -Ss 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json' > model_cost_data/model_prices_and_context_window.json + + - name: Check if file changed + id: check-model-prices + run: | + if ! git diff --quiet model_cost_data/model_prices_and_context_window.json ; then + echo "changed=true" >> "$GITHUB_OUTPUT" + else + echo "changed=false" >> "$GITHUB_OUTPUT" + fi + + - name: Set git config + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + + - name: Get current date + id: date + run: | + echo "CURRENT_DATETIME=$(date +%Y-%m-%d)" >> $GITHUB_ENV + + - name: Generate PR if needed + if: steps.check-model-prices.outputs.changed == 'true' + run: | + git checkout -b update-model-prices-$GITHUB_SHA + + git add model_cost_data/model_prices_and_context_window.json + git commit -m "Update model_prices_and_context_window.json to version generated on ${{ env.CURRENT_DATETIME }}" + + echo "Pushing branch so we can create a PR..." + git push --set-upstream origin update-model-prices-$GITHUB_SHA + + gh pr create --title "Update model_prices_and_context_window.json" \ + --body "This PR updates the model_prices_and_context_window.json definition to the version generated on ${{ env.CURRENT_DATETIME }}" \ + --repo "$GITHUB_REPOSITORY" \ + --base main \ + --head update-model-prices-$GITHUB_SHA + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/migrations/versions/2025_01_28_0915-0c3539f66339_add_token_usage_columns.py b/migrations/versions/2025_01_28_0915-0c3539f66339_add_token_usage_columns.py new file mode 100644 index 00000000..e6b43250 --- /dev/null +++ b/migrations/versions/2025_01_28_0915-0c3539f66339_add_token_usage_columns.py @@ -0,0 +1,47 @@ +"""add token usage columns + +Revision ID: 0c3539f66339 +Revises: 0f9b8edc8e46 +Create Date: 2025-01-28 09:15:54.767311+00:00 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "0c3539f66339" +down_revision: Union[str, None] = "0f9b8edc8e46" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Begin transaction + op.execute("BEGIN TRANSACTION;") + + # We add the columns to the outputs table + # Add the columns with default values to avoid issues with the existing data + # The prices of the tokens may change in the future, + # so we need to store the cost of the tokens at the time of the request + op.execute("ALTER TABLE outputs ADD COLUMN input_tokens INT DEFAULT NULL;") + op.execute("ALTER TABLE outputs ADD COLUMN output_tokens INT DEFAULT NULL;") + op.execute("ALTER TABLE outputs ADD COLUMN input_cost FLOAT DEFAULT NULL;") + op.execute("ALTER TABLE outputs ADD COLUMN output_cost FLOAT DEFAULT NULL;") + + # Finish transaction + op.execute("COMMIT;") + + +def downgrade() -> None: + # Begin transaction + op.execute("BEGIN TRANSACTION;") + + op.execute("ALTER TABLE outputs DROP COLUMN input_tokens;") + op.execute("ALTER TABLE outputs DROP COLUMN output_tokens;") + op.execute("ALTER TABLE outputs DROP COLUMN input_cost;") + op.execute("ALTER TABLE outputs DROP COLUMN output_cost;") + + # Finish transaction + op.execute("COMMIT;") diff --git a/model_cost_data/model_prices_and_context_window.json b/model_cost_data/model_prices_and_context_window.json new file mode 100644 index 00000000..e6842a41 --- /dev/null +++ b/model_cost_data/model_prices_and_context_window.json @@ -0,0 +1,8173 @@ +{ + "sample_spec": { + "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", + "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", + "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", + "input_cost_per_token": 0.0000, + "output_cost_per_token": 0.000, + "litellm_provider": "one of https://docs.litellm.ai/docs/providers", + "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true + }, + "omni-moderation-latest": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "openai", + "mode": "moderation" + }, + "omni-moderation-latest-intents": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "openai", + "mode": "moderation" + }, + "omni-moderation-2024-09-26": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "openai", + "mode": "moderation" + }, + "gpt-4": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4o": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.00000500, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4o-audio-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.0001, + "output_cost_per_token": 0.000010, + "output_cost_per_audio_token": 0.0002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-audio-preview-2024-12-17": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "output_cost_per_token": 0.000010, + "output_cost_per_audio_token": 0.00008, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-audio-preview-2024-10-01": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.0001, + "output_cost_per_token": 0.000010, + "output_cost_per_audio_token": 0.0002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-mini-audio-preview-2024-12-17": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "input_cost_per_audio_token": 0.00001, + "output_cost_per_token": 0.0000006, + "output_cost_per_audio_token": 0.00002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-mini": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4o-mini-2024-07-18": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "o1": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "o1-mini": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": true, + "supports_prompt_caching": true + }, + "o1-mini-2024-09-12": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": true, + "supports_prompt_caching": true + }, + "o1-preview": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": true, + "supports_prompt_caching": true + }, + "o1-preview-2024-09-12": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": true, + "supports_prompt_caching": true + }, + "o1-2024-12-17": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "chatgpt-4o-latest": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4o-2024-05-13": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000025, + "output_cost_per_token_batches": 0.0000075, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4o-2024-08-06": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.0000050, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4o-2024-11-20": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.0000050, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4o-realtime-preview-2024-10-01": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.0001, + "cache_read_input_token_cost": 0.0000025, + "cache_creation_input_audio_token_cost": 0.00002, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.0002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-realtime-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-mini-realtime-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000006, + "input_cost_per_audio_token": 0.00001, + "cache_read_input_token_cost": 0.0000003, + "cache_creation_input_audio_token_cost": 0.0000003, + "output_cost_per_token": 0.0000024, + "output_cost_per_audio_token": 0.00002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4o-mini-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000006, + "input_cost_per_audio_token": 0.00001, + "cache_read_input_token_cost": 0.0000003, + "cache_creation_input_audio_token_cost": 0.0000003, + "output_cost_per_token": 0.0000024, + "output_cost_per_audio_token": 0.00002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true + }, + "gpt-4-turbo-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-0314": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai", + "mode": "chat", + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-0613": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-32k": { + "max_tokens": 4096, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "openai", + "mode": "chat", + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-32k-0314": { + "max_tokens": 4096, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "openai", + "mode": "chat", + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-32k-0613": { + "max_tokens": 4096, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "openai", + "mode": "chat", + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-turbo": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-turbo-2024-04-09": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-1106-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-0125-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-vision-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-4-1106-vision-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-3.5-turbo": { + "max_tokens": 4097, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-3.5-turbo-0301": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "openai", + "mode": "chat", + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-3.5-turbo-0613": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-3.5-turbo-1106": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000010, + "output_cost_per_token": 0.0000020, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-3.5-turbo-0125": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-3.5-turbo-16k": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "openai", + "mode": "chat", + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "gpt-3.5-turbo-16k-0613": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "openai", + "mode": "chat", + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "ft:gpt-3.5-turbo": { + "max_tokens": 4096, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, + "input_cost_per_token_batches": 0.0000015, + "output_cost_per_token_batches": 0.000003, + "litellm_provider": "openai", + "mode": "chat", + "supports_system_messages": true + }, + "ft:gpt-3.5-turbo-0125": { + "max_tokens": 4096, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, + "litellm_provider": "openai", + "mode": "chat", + "supports_system_messages": true + }, + "ft:gpt-3.5-turbo-1106": { + "max_tokens": 4096, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, + "litellm_provider": "openai", + "mode": "chat", + "supports_system_messages": true + }, + "ft:gpt-3.5-turbo-0613": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, + "litellm_provider": "openai", + "mode": "chat", + "supports_system_messages": true + }, + "ft:gpt-4-0613": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", + "supports_system_messages": true + }, + "ft:gpt-4o-2024-08-06": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000375, + "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.000001875, + "output_cost_per_token_batches": 0.000007500, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_system_messages": true + }, + "ft:gpt-4o-2024-11-20": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000375, + "cache_creation_input_token_cost": 0.000001875, + "output_cost_per_token": 0.000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "ft:gpt-4o-mini-2024-07-18": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000012, + "input_cost_per_token_batches": 0.000000150, + "output_cost_per_token_batches": 0.000000600, + "cache_read_input_token_cost": 0.00000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true + }, + "ft:davinci-002": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000001, + "litellm_provider": "text-completion-openai", + "mode": "completion" + }, + "ft:babbage-002": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000004, + "input_cost_per_token_batches": 0.0000002, + "output_cost_per_token_batches": 0.0000002, + "litellm_provider": "text-completion-openai", + "mode": "completion" + }, + "text-embedding-3-large": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "output_vector_size": 3072, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000065, + "output_cost_per_token_batches": 0.000000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "text-embedding-3-small": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "output_vector_size": 1536, + "input_cost_per_token": 0.00000002, + "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000010, + "output_cost_per_token_batches": 0.000000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "text-embedding-ada-002": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "output_vector_size": 1536, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "text-embedding-ada-002-v2": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000050, + "output_cost_per_token_batches": 0.000000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "text-moderation-stable": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "moderations" + }, + "text-moderation-007": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "moderations" + }, + "text-moderation-latest": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "moderations" + }, + "256-x-256/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000024414, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "512-x-512/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.0000000686, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "1024-x-1024/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.000000019, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1024-x-1792/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1792-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1024-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000007629, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1024-x-1792/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1792-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1024-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.0000000381469, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "whisper-1": { + "mode": "audio_transcription", + "input_cost_per_second": 0, + "output_cost_per_second": 0.0001, + "litellm_provider": "openai" + }, + "tts-1": { + "mode": "audio_speech", + "input_cost_per_character": 0.000015, + "litellm_provider": "openai" + }, + "tts-1-hd": { + "mode": "audio_speech", + "input_cost_per_character": 0.000030, + "litellm_provider": "openai" + }, + "azure/tts-1": { + "mode": "audio_speech", + "input_cost_per_character": 0.000015, + "litellm_provider": "azure" + }, + "azure/tts-1-hd": { + "mode": "audio_speech", + "input_cost_per_character": 0.000030, + "litellm_provider": "azure" + }, + "azure/whisper-1": { + "mode": "audio_transcription", + "input_cost_per_second": 0, + "output_cost_per_second": 0.0001, + "litellm_provider": "azure" + }, + "azure/o1-mini": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_prompt_caching": true + }, + "azure/o1-mini-2024-09-12": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_prompt_caching": true + }, + "azure/o1": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/o1-preview": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_prompt_caching": true + }, + "azure/o1-preview-2024-09-12": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_prompt_caching": true + }, + "azure/gpt-4o": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 0.00000125, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/gpt-4o-2024-08-06": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000275, + "output_cost_per_token": 0.000011, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/gpt-4o-2024-11-20": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000275, + "output_cost_per_token": 0.000011, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "azure/gpt-4o-2024-05-13": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/global-standard/gpt-4o-2024-08-06": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/global-standard/gpt-4o-2024-11-20": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "azure/global-standard/gpt-4o-mini": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "azure/gpt-4o-mini": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000000165, + "output_cost_per_token": 0.00000066, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/gpt-4o-mini-2024-07-18": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000000165, + "output_cost_per_token": 0.00000066, + "cache_read_input_token_cost": 0.000000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/gpt-4-turbo-2024-04-09": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "azure/gpt-4-0125-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-4-1106-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-4-0613": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/gpt-4-32k-0613": { + "max_tokens": 4096, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-32k": { + "max_tokens": 4096, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/gpt-4-turbo": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-4-turbo-vision-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat", + "supports_vision": true + }, + "azure/gpt-35-turbo-16k-0613": { + "max_tokens": 4096, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/gpt-35-turbo-1106": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-35-turbo-0613": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-35-turbo-0301": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-35-turbo-0125": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-35-turbo-16k": { + "max_tokens": 4096, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-35-turbo": { + "max_tokens": 4096, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/gpt-3.5-turbo-instruct-0914": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure_text", + "mode": "completion" + }, + "azure/gpt-35-turbo-instruct": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure_text", + "mode": "completion" + }, + "azure/gpt-35-turbo-instruct-0914": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure_text", + "mode": "completion" + }, + "azure/mistral-large-latest": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/mistral-large-2402": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/command-r-plus": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/ada": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, + "azure/text-embedding-ada-002": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, + "azure/text-embedding-3-large": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, + "azure/text-embedding-3-small": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "input_cost_per_token": 0.00000002, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, + "azure/standard/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.0000000381469, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/hd/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000007629, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/standard/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/standard/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/hd/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/hd/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/standard/1024-x-1024/dall-e-2": { + "input_cost_per_pixel": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure_ai/jamba-instruct": { + "max_tokens": 4096, + "max_input_tokens": 70000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000007, + "litellm_provider": "azure_ai", + "mode": "chat" + }, + "azure_ai/mistral-large": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000004, + "output_cost_per_token": 0.000012, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_function_calling": true + }, + "azure_ai/mistral-small": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat" + }, + "azure_ai/mistral-large-2407": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview" + }, + "azure_ai/ministral-3b": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000004, + "output_cost_per_token": 0.00000004, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview" + }, + "azure_ai/Llama-3.2-11B-Vision-Instruct": { + "max_tokens": 2048, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000037, + "output_cost_per_token": 0.00000037, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview" + }, + "azure_ai/Llama-3.3-70B-Instruct": { + "max_tokens": 2048, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000071, + "output_cost_per_token": 0.00000071, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.llama-3-3-70b-instruct-offer?tab=Overview" + }, + "azure_ai/Llama-3.2-90B-Vision-Instruct": { + "max_tokens": 2048, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000204, + "output_cost_per_token": 0.00000204, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview" + }, + "azure_ai/Meta-Llama-3-70B-Instruct": { + "max_tokens": 2048, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.00000037, + "litellm_provider": "azure_ai", + "mode": "chat" + }, + "azure_ai/Meta-Llama-3.1-8B-Instruct": { + "max_tokens": 2048, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.00000061, + "litellm_provider": "azure_ai", + "mode": "chat", + "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice" + }, + "azure_ai/Meta-Llama-3.1-70B-Instruct": { + "max_tokens": 2048, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000268, + "output_cost_per_token": 0.00000354, + "litellm_provider": "azure_ai", + "mode": "chat", + "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice" + }, + "azure_ai/Meta-Llama-3.1-405B-Instruct": { + "max_tokens": 2048, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000533, + "output_cost_per_token": 0.000016, + "litellm_provider": "azure_ai", + "mode": "chat", + "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice" + }, + "azure_ai/Phi-3.5-mini-instruct": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3.5-vision-instruct": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": true, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3.5-MoE-instruct": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000016, + "output_cost_per_token": 0.00000064, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-mini-4k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-mini-128k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-small-8k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-small-128k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-medium-4k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000017, + "output_cost_per_token": 0.00000068, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-medium-128k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000017, + "output_cost_per_token": 0.00000068, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/cohere-rerank-v3-multilingual": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "rerank" + }, + "azure_ai/cohere-rerank-v3-english": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "rerank" + }, + "azure_ai/Cohere-embed-v3-english": { + "max_tokens": 512, + "max_input_tokens": 512, + "output_vector_size": 1024, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "embedding", + "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice" + }, + "azure_ai/Cohere-embed-v3-multilingual": { + "max_tokens": 512, + "max_input_tokens": 512, + "output_vector_size": 1024, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "mode": "embedding", + "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice" + }, + "babbage-002": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000004, + "litellm_provider": "text-completion-openai", + "mode": "completion" + }, + "davinci-002": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", + "mode": "completion" + }, + "gpt-3.5-turbo-instruct": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", + "mode": "completion" + }, + "gpt-3.5-turbo-instruct-0914": { + "max_tokens": 4097, + "max_input_tokens": 8192, + "max_output_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", + "mode": "completion" + + }, + "claude-instant-1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000163, + "output_cost_per_token": 0.00000551, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "mistral/mistral-tiny": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000025, + "litellm_provider": "mistral", + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/mistral-small": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "mistral", + "supports_function_calling": true, + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/mistral-small-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "mistral", + "supports_function_calling": true, + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/mistral-medium": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, + "litellm_provider": "mistral", + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/mistral-medium-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, + "litellm_provider": "mistral", + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/mistral-medium-2312": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, + "litellm_provider": "mistral", + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/mistral-large-latest": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "mistral/mistral-large-2411": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "mistral/mistral-large-2402": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000004, + "output_cost_per_token": 0.000012, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "mistral/mistral-large-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "mistral/pixtral-large-latest": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_vision": true + }, + "mistral/pixtral-large-2411": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_vision": true + }, + "mistral/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_vision": true + }, + "mistral/open-mistral-7b": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000025, + "litellm_provider": "mistral", + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/open-mixtral-8x7b": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000007, + "output_cost_per_token": 0.0000007, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "mistral/open-mixtral-8x22b": { + "max_tokens": 8191, + "max_input_tokens": 64000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "mistral/codestral-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "mistral", + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/codestral-2405": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "mistral", + "mode": "chat", + "supports_assistant_prefill": true + }, + "mistral/open-mistral-nemo": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000003, + "litellm_provider": "mistral", + "mode": "chat", + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true + }, + "mistral/open-mistral-nemo-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000003, + "litellm_provider": "mistral", + "mode": "chat", + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true + }, + "mistral/open-codestral-mamba": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000025, + "litellm_provider": "mistral", + "mode": "chat", + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true + }, + "mistral/codestral-mamba-latest": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000025, + "litellm_provider": "mistral", + "mode": "chat", + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true + }, + "mistral/mistral-embed": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "input_cost_per_token": 0.0000001, + "litellm_provider": "mistral", + "mode": "embedding" + }, + "deepseek/deepseek-reasoner": { + "max_tokens": 8192, + "max_input_tokens": 64000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000055, + "input_cost_per_token_cache_hit": 0.00000014, + "output_cost_per_token": 0.00000219, + "litellm_provider": "deepseek", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_tool_choice": true, + "supports_prompt_caching": true + }, + "deepseek/deepseek-chat": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "input_cost_per_token_cache_hit": 0.000000014, + "cache_read_input_token_cost": 0.000000014, + "cache_creation_input_token_cost": 0.0, + "output_cost_per_token": 0.00000028, + "litellm_provider": "deepseek", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_tool_choice": true, + "supports_prompt_caching": true + }, + "codestral/codestral-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "codestral", + "mode": "chat", + "source": "https://docs.mistral.ai/capabilities/code_generation/", + "supports_assistant_prefill": true + }, + "codestral/codestral-2405": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "codestral", + "mode": "chat", + "source": "https://docs.mistral.ai/capabilities/code_generation/", + "supports_assistant_prefill": true + }, + "text-completion-codestral/codestral-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "text-completion-codestral", + "mode": "completion", + "source": "https://docs.mistral.ai/capabilities/code_generation/" + }, + "text-completion-codestral/codestral-2405": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "text-completion-codestral", + "mode": "completion", + "source": "https://docs.mistral.ai/capabilities/code_generation/" + }, + "xai/grok-beta": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "deepseek/deepseek-coder": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "input_cost_per_token_cache_hit": 0.000000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "deepseek", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_tool_choice": true, + "supports_prompt_caching": true + }, + "groq/llama-3.3-70b-versatile": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.3-70b-specdec": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000099, + "litellm_provider": "groq", + "mode": "chat" + }, + "groq/llama2-70b-4096": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000080, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama3-8b-8192": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000008, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.2-1b-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000004, + "output_cost_per_token": 0.00000004, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.2-3b-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000006, + "output_cost_per_token": 0.00000006, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.2-11b-text-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.00000018, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.2-11b-vision-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.00000018, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "groq/llama-3.2-90b-text-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.2-90b-vision-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "groq/llama3-70b-8192": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.1-8b-instant": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000008, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.1-70b-versatile": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama-3.1-405b-reasoning": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/mixtral-8x7b-32768": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.00000024, + "output_cost_per_token": 0.00000024, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/gemma-7b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000007, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/gemma2-9b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000020, + "output_cost_per_token": 0.00000020, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama3-groq-70b-8192-tool-use-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000089, + "output_cost_per_token": 0.00000089, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "groq/llama3-groq-8b-8192-tool-use-preview": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000019, + "output_cost_per_token": 0.00000019, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "cerebras/llama3.1-8b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "cerebras", + "mode": "chat", + "supports_function_calling": true + }, + "cerebras/llama3.1-70b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000006, + "litellm_provider": "cerebras", + "mode": "chat", + "supports_function_calling": true + }, + "friendliai/meta-llama-3.1-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "friendliai/meta-llama-3.1-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000006, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "claude-instant-1.2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000000163, + "output_cost_per_token": 0.000000551, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-2.1": { + "max_tokens": 8191, + "max_input_tokens": 200000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-haiku-20240307": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "cache_creation_input_token_cost": 0.0000003, + "cache_read_input_token_cost": 0.00000003, + "litellm_provider": "anthropic", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 264, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "claude-3-5-haiku-20241022": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 0.0000001, + "litellm_provider": "anthropic", + "mode": "chat", + "supports_function_calling": true, + "tool_use_system_prompt_tokens": 264, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "claude-3-opus-20240229": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "anthropic", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 395, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "claude-3-sonnet-20240229": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "anthropic", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "claude-3-5-sonnet-20240620": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 0.0000003, + "litellm_provider": "anthropic", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "claude-3-5-sonnet-20241022": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 0.0000003, + "litellm_provider": "anthropic", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_assistant_prefill": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "text-bison": { + "max_tokens": 2048, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@001": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-unicorn": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-unicorn@001": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison@001": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison@002": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison-32k": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison@001": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison32k": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison-32k@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko@001": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko@002": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-gecko-latest": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison@latest": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison@001": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison-32k": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-pro": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini-1.0-pro": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" + }, + "gemini-1.0-pro-001": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-ultra": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-ultra-001": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-pro-002": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.00001, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_vision": true, + "supports_pdf_input": true, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro-002": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.00001, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_vision": true, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro" + }, + "gemini-1.5-pro-001": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.00001, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_vision": true, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro-preview-0514": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro-preview-0215": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro-preview-0409": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-flash": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000075, + "input_cost_per_character": 0.00000001875, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 0.00000025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-flash-exp-0827": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 0.00000025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-flash-002": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000075, + "input_cost_per_character": 0.00000001875, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 0.00000025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash" + }, + "gemini-1.5-flash-001": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000075, + "input_cost_per_character": 0.00000001875, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 0.00000025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-flash-preview-0514": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000075, + "input_cost_per_character": 0.00000001875, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 0.00000025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-pro-experimental": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "input_cost_per_character": 0, + "output_cost_per_character": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": false, + "supports_tool_choice": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental" + }, + "gemini-flash-experimental": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "input_cost_per_character": 0, + "output_cost_per_character": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": false, + "supports_tool_choice": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental" + }, + "gemini-pro-vision": { + "max_tokens": 2048, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_images_per_prompt": 16, + "max_videos_per_prompt": 1, + "max_video_length": 2, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-pro-vision": { + "max_tokens": 2048, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_images_per_prompt": 16, + "max_videos_per_prompt": 1, + "max_video_length": 2, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-pro-vision-001": { + "max_tokens": 2048, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_images_per_prompt": 16, + "max_videos_per_prompt": 1, + "max_video_length": 2, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "medlm-medium": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_character": 0.0000005, + "output_cost_per_character": 0.000001, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "medlm-large": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.000005, + "output_cost_per_character": 0.000015, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-2.0-flash-exp": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_token": 0, + "input_cost_per_character": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_character": 0, + "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_character_above_128k_tokens": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + }, + "gemini-2.0-flash-thinking-exp": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_token": 0, + "input_cost_per_character": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_character": 0, + "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_character_above_128k_tokens": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + }, + "gemini/gemini-2.0-flash-exp": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_token": 0, + "input_cost_per_character": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_character": 0, + "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_character_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "tpm": 4000000, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + }, + "gemini/gemini-2.0-flash-thinking-exp": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_token": 0, + "input_cost_per_character": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_character": 0, + "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_character_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "tpm": 4000000, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + }, + "vertex_ai/claude-3-sonnet": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-sonnet@20240229": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-5-sonnet": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-5-sonnet@20240620": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-5-sonnet-v2": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-5-sonnet-v2@20241022": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-haiku": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-haiku@20240307": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-5-haiku": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-5-haiku@20241022": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-opus": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/claude-3-opus@20240229": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, + "vertex_ai/meta/llama3-405b-instruct-maas": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" + }, + "vertex_ai/meta/llama3-70b-instruct-maas": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" + }, + "vertex_ai/meta/llama3-8b-instruct-maas": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" + }, + "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "supports_system_messages": true, + "supports_vision": true, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas" + }, + "vertex_ai/mistral-large@latest": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/mistral-large@2411-001": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/mistral-large-2411": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/mistral-large@2407": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/mistral-nemo@latest": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/jamba-1.5-mini@001": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "vertex_ai-ai21_models", + "mode": "chat" + }, + "vertex_ai/jamba-1.5-large@001": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "vertex_ai-ai21_models", + "mode": "chat" + }, + "vertex_ai/jamba-1.5": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "vertex_ai-ai21_models", + "mode": "chat" + }, + "vertex_ai/jamba-1.5-mini": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "vertex_ai-ai21_models", + "mode": "chat" + }, + "vertex_ai/jamba-1.5-large": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "vertex_ai-ai21_models", + "mode": "chat" + }, + "vertex_ai/mistral-nemo@2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/codestral@latest": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000006, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/codestral@2405": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000006, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, + "vertex_ai/imagegeneration@006": { + "output_cost_per_image": 0.020, + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-generate-001": { + "output_cost_per_image": 0.04, + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-fast-generate-001": { + "output_cost_per_image": 0.02, + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "text-embedding-004": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-embedding-005": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-multilingual-embedding-002": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "textembedding-gecko": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko-multilingual": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko-multilingual@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko@003": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_character": 0.000000025, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-embedding-preview-0409": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "input_cost_per_token_batch_requests": 0.000000005, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "text-multilingual-embedding-preview-0409":{ + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/chat-bison": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "palm", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/chat-bison-001": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "palm", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-001": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-safety-off": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-safety-recitation-off": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini/gemini-1.5-flash-002": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "cache_read_input_token_cost": 0.00000001875, + "cache_creation_input_token_cost": 0.000001, + "input_cost_per_token": 0.000000075, + "input_cost_per_token_above_128k_tokens": 0.00000015, + "output_cost_per_token": 0.0000003, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 2000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash-001": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "cache_read_input_token_cost": 0.00000001875, + "cache_creation_input_token_cost": 0.000001, + "input_cost_per_token": 0.000000075, + "input_cost_per_token_above_128k_tokens": 0.00000015, + "output_cost_per_token": 0.0000003, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 2000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0.000000075, + "input_cost_per_token_above_128k_tokens": 0.00000015, + "output_cost_per_token": 0.0000003, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash-latest": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0.000000075, + "input_cost_per_token_above_128k_tokens": 0.00000015, + "output_cost_per_token": 0.0000003, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 2000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash-8b": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 4000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash-8b-exp-0924": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 4000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-exp-1114": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro." + } + }, + "gemini/gemini-exp-1206": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1206. Assuming same as gemini-1.5-pro." + } + }, + "gemini/gemini-1.5-flash-exp-0827": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash-8b-exp-0827": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 4000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-pro": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000035, + "input_cost_per_token_above_128k_tokens": 0.0000007, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "rpd": 30000, + "tpm": 120000, + "rpm": 360, + "source": "https://ai.google.dev/gemini-api/docs/models/gemini" + }, + "gemini/gemini-1.5-pro": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-pro-002": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-pro-001": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-pro-exp-0801": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-pro-exp-0827": { + "max_tokens": 8192, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-pro-latest": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.000021, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-pro-vision": { + "max_tokens": 2048, + "max_input_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000035, + "input_cost_per_token_above_128k_tokens": 0.0000007, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "rpd": 30000, + "tpm": 120000, + "rpm": 360, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini/gemini-gemma-2-27b-it": { + "max_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000105, + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini/gemini-gemma-2-9b-it": { + "max_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000105, + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "command-r": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "cohere_chat", + "mode": "chat", + "supports_function_calling": true + }, + "command-r-08-2024": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "cohere_chat", + "mode": "chat", + "supports_function_calling": true + }, + "command-r7b-12-2024": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000000375, + "litellm_provider": "cohere_chat", + "mode": "chat", + "supports_function_calling": true, + "source": "https://docs.cohere.com/v2/docs/command-r7b" + }, + "command-light": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000006, + "litellm_provider": "cohere_chat", + "mode": "chat" + }, + "command-r-plus": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "litellm_provider": "cohere_chat", + "mode": "chat", + "supports_function_calling": true + }, + "command-r-plus-08-2024": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "litellm_provider": "cohere_chat", + "mode": "chat", + "supports_function_calling": true + }, + "command-nightly": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, + "litellm_provider": "cohere", + "mode": "completion" + }, + "command": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, + "litellm_provider": "cohere", + "mode": "completion" + }, + "rerank-v3.5": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-english-v3.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-multilingual-v3.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-english-v2.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "rerank-multilingual-v2.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "input_cost_per_token": 0.0, + "input_cost_per_query": 0.002, + "output_cost_per_token": 0.0, + "litellm_provider": "cohere", + "mode": "rerank" + }, + "embed-english-light-v3.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding" + }, + "embed-multilingual-v3.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding" + }, + "embed-english-v2.0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "input_cost_per_token": 0.00000010, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding" + }, + "embed-english-light-v2.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding" + }, + "embed-multilingual-v2.0": { + "max_tokens": 768, + "max_input_tokens": 768, + "input_cost_per_token": 0.00000010, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding" + }, + "embed-english-v3.0": { + "max_tokens": 1024, + "max_input_tokens": 1024, + "input_cost_per_token": 0.00000010, + "input_cost_per_image": 0.0001, + "output_cost_per_token": 0.00000, + "litellm_provider": "cohere", + "mode": "embedding", + "supports_image_input": true, + "supports_embedding_image_input": true, + "metadata": { + "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." + } + }, + "replicate/meta/llama-2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000005, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-2-13b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000005, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-2-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000065, + "output_cost_per_token": 0.00000275, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-2-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000065, + "output_cost_per_token": 0.00000275, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-2-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000025, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-2-7b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000025, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-3-70b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000065, + "output_cost_per_token": 0.00000275, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-3-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000065, + "output_cost_per_token": 0.00000275, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-3-8b": { + "max_tokens": 8086, + "max_input_tokens": 8086, + "max_output_tokens": 8086, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000025, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/meta/llama-3-8b-instruct": { + "max_tokens": 8086, + "max_input_tokens": 8086, + "max_output_tokens": 8086, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000025, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/mistralai/mistral-7b-v0.1": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000025, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/mistralai/mistral-7b-instruct-v0.2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000025, + "litellm_provider": "replicate", + "mode": "chat" + }, + "replicate/mistralai/mixtral-8x7b-instruct-v0.1": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.000001, + "litellm_provider": "replicate", + "mode": "chat" + }, + "openrouter/deepseek/deepseek-chat": { + "max_tokens": 8192, + "max_input_tokens": 66000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "openrouter", + "supports_prompt_caching": true, + "mode": "chat" + }, + "openrouter/deepseek/deepseek-coder": { + "max_tokens": 8192, + "max_input_tokens": 66000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "openrouter", + "supports_prompt_caching": true, + "mode": "chat" + }, + "openrouter/microsoft/wizardlm-2-8x22b:nitro": { + "max_tokens": 65536, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/google/gemini-pro-1.5": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.0000075, + "input_cost_per_image": 0.00265, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "openrouter/mistralai/mixtral-8x22b-instruct": { + "max_tokens": 65536, + "input_cost_per_token": 0.00000065, + "output_cost_per_token": 0.00000065, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/cohere/command-r-plus": { + "max_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/databricks/dbrx-instruct": { + "max_tokens": 32768, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000006, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/anthropic/claude-3-haiku": { + "max_tokens": 200000, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "input_cost_per_image": 0.0004, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "openrouter/anthropic/claude-3-5-haiku": { + "max_tokens": 200000, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true + }, + "openrouter/anthropic/claude-3-haiku-20240307": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 264 + }, + "openrouter/anthropic/claude-3-5-haiku-20241022": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "tool_use_system_prompt_tokens": 264 + }, + "openrouter/anthropic/claude-3.5-sonnet": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_assistant_prefill": true + }, + "openrouter/anthropic/claude-3.5-sonnet:beta": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-3-sonnet": { + "max_tokens": 200000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "input_cost_per_image": 0.0048, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "openrouter/mistralai/mistral-large": { + "max_tokens": 32000, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { + "max_tokens": 32769, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/google/gemini-pro-vision": { + "max_tokens": 45875, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000375, + "input_cost_per_image": 0.0025, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "openrouter/fireworks/firellava-13b": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/meta-llama/llama-3-8b-instruct:free": { + "max_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/meta-llama/llama-3-8b-instruct:extended": { + "max_tokens": 16384, + "input_cost_per_token": 0.000000225, + "output_cost_per_token": 0.00000225, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/meta-llama/llama-3-70b-instruct:nitro": { + "max_tokens": 8192, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/meta-llama/llama-3-70b-instruct": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/openai/o1": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "openrouter/openai/o1-mini": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "openrouter/openai/o1-mini-2024-09-12": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "openrouter/openai/o1-preview": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "openrouter/openai/o1-preview-2024-09-12": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "openrouter/openai/gpt-4o": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4o-2024-05-13": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4-vision-preview": { + "max_tokens": 130000, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "input_cost_per_image": 0.01445, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "openrouter/openai/gpt-3.5-turbo": { + "max_tokens": 4095, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/openai/gpt-3.5-turbo-16k": { + "max_tokens": 16383, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/openai/gpt-4": { + "max_tokens": 8192, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/anthropic/claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000163, + "output_cost_per_token": 0.00000551, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/anthropic/claude-2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00001102, + "output_cost_per_token": 0.00003268, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/anthropic/claude-3-opus": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 395 + }, + "openrouter/google/palm-2-chat-bison": { + "max_tokens": 25804, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/google/palm-2-codechat-bison": { + "max_tokens": 20070, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/meta-llama/llama-2-13b-chat": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/meta-llama/llama-2-70b-chat": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/meta-llama/codellama-34b-instruct": { + "max_tokens": 8192, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/nousresearch/nous-hermes-llama2-13b": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/mancer/weaver": { + "max_tokens": 8000, + "input_cost_per_token": 0.000005625, + "output_cost_per_token": 0.000005625, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/gryphe/mythomax-l2-13b": { + "max_tokens": 8192, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/jondurbin/airoboros-l2-70b-2.1": { + "max_tokens": 4096, + "input_cost_per_token": 0.000013875, + "output_cost_per_token": 0.000013875, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/undi95/remm-slerp-l2-13b": { + "max_tokens": 6144, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/pygmalionai/mythalion-13b": { + "max_tokens": 4096, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/mistralai/mistral-7b-instruct": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/mistralai/mistral-7b-instruct:free": { + "max_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/qwen/qwen-2.5-coder-32b-instruct": { + "max_tokens": 33792, + "max_input_tokens": 33792, + "max_output_tokens": 33792, + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.00000018, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "j2-ultra": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000015, + "litellm_provider": "ai21", + "mode": "completion" + }, + "jamba-1.5-mini@001": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "ai21", + "mode": "chat" + }, + "jamba-1.5-large@001": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "ai21", + "mode": "chat" + }, + "jamba-1.5": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "ai21", + "mode": "chat" + }, + "jamba-1.5-mini": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "ai21", + "mode": "chat" + }, + "jamba-1.5-large": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "ai21", + "mode": "chat" + }, + "j2-mid": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00001, + "litellm_provider": "ai21", + "mode": "completion" + }, + "j2-light": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, + "litellm_provider": "ai21", + "mode": "completion" + }, + "dolphin": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "nlp_cloud", + "mode": "completion" + }, + "chatdolphin": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "nlp_cloud", + "mode": "chat" + }, + "luminous-base": { + "max_tokens": 2048, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.000033, + "litellm_provider": "aleph_alpha", + "mode": "completion" + }, + "luminous-base-control": { + "max_tokens": 2048, + "input_cost_per_token": 0.0000375, + "output_cost_per_token": 0.00004125, + "litellm_provider": "aleph_alpha", + "mode": "chat" + }, + "luminous-extended": { + "max_tokens": 2048, + "input_cost_per_token": 0.000045, + "output_cost_per_token": 0.0000495, + "litellm_provider": "aleph_alpha", + "mode": "completion" + }, + "luminous-extended-control": { + "max_tokens": 2048, + "input_cost_per_token": 0.00005625, + "output_cost_per_token": 0.000061875, + "litellm_provider": "aleph_alpha", + "mode": "chat" + }, + "luminous-supreme": { + "max_tokens": 2048, + "input_cost_per_token": 0.000175, + "output_cost_per_token": 0.0001925, + "litellm_provider": "aleph_alpha", + "mode": "completion" + }, + "luminous-supreme-control": { + "max_tokens": 2048, + "input_cost_per_token": 0.00021875, + "output_cost_per_token": 0.000240625, + "litellm_provider": "aleph_alpha", + "mode": "chat" + }, + "ai21.j2-mid-v1": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000125, + "output_cost_per_token": 0.0000125, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "ai21.j2-ultra-v1": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000188, + "output_cost_per_token": 0.0000188, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "ai21.jamba-instruct-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 70000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000007, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_system_messages": true + }, + "amazon.titan-text-lite-v1": { + "max_tokens": 4000, + "max_input_tokens": 42000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000004, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "amazon.titan-text-express-v1": { + "max_tokens": 8000, + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.0000013, + "output_cost_per_token": 0.0000017, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "amazon.titan-text-premier-v1:0": { + "max_tokens": 32000, + "max_input_tokens": 42000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "amazon.titan-embed-text-v1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "output_vector_size": 1536, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "mode": "embedding" + }, + "amazon.titan-embed-text-v2:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "output_vector_size": 1024, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "mode": "embedding" + }, + "amazon.titan-embed-image-v1": { + "max_tokens": 128, + "max_input_tokens": 128, + "output_vector_size": 1024, + "input_cost_per_token": 0.0000008, + "input_cost_per_image": 0.00006, + "output_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "supports_image_input": true, + "supports_embedding_image_input": true, + "mode": "embedding", + "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1", + "metadata": { + "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." + } + }, + "mistral.mistral-7b-instruct-v0:2": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000002, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "mistral.mixtral-8x7b-instruct-v0:1": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000045, + "output_cost_per_token": 0.0000007, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "mistral.mistral-large-2402-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true + }, + "mistral.mistral-large-2407-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true + }, + "mistral.mistral-small-2402-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true + }, + "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000045, + "output_cost_per_token": 0.0000007, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000045, + "output_cost_per_token": 0.0000007, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000091, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000002, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000002, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.00000026, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true + }, + "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true + }, + "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000104, + "output_cost_per_token": 0.0000312, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true + }, + "amazon.nova-micro-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 300000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000035, + "output_cost_per_token": 0.00000014, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true + }, + "us.amazon.nova-micro-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 300000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000035, + "output_cost_per_token": 0.00000014, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true + }, + "amazon.nova-lite-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000006, + "output_cost_per_token": 0.00000024, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_pdf_input": true, + "supports_prompt_caching": true + }, + "us.amazon.nova-lite-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000006, + "output_cost_per_token": 0.00000024, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_pdf_input": true, + "supports_prompt_caching": true + }, + "amazon.nova-pro-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 300000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000032, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_pdf_input": true, + "supports_prompt_caching": true + }, + "us.amazon.nova-pro-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 300000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000032, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_pdf_input": true, + "supports_prompt_caching": true + }, + "anthropic.claude-3-sonnet-20240229-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "anthropic.claude-3-5-sonnet-20240620-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "anthropic.claude-3-5-sonnet-20241022-v2:0": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "anthropic.claude-3-haiku-20240307-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "anthropic.claude-3-5-haiku-20241022-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true + }, + "anthropic.claude-3-opus-20240229-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "us.anthropic.claude-3-sonnet-20240229-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "us.anthropic.claude-3-haiku-20240307-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "us.anthropic.claude-3-5-haiku-20241022-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true + }, + "us.anthropic.claude-3-opus-20240229-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-sonnet-20240229-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "eu.anthropic.claude-3-haiku-20240307-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "eu.anthropic.claude-3-opus-20240229-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000163, + "output_cost_per_token": 0.00000551, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00611, + "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00611, + "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000223, + "output_cost_per_token": 0.00000755, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.01475, + "output_cost_per_second": 0.01475, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.008194, + "output_cost_per_second": 0.008194, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000248, + "output_cost_per_token": 0.00000838, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.01635, + "output_cost_per_second": 0.01635, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.009083, + "output_cost_per_second": 0.009083, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.command-text-v14": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.0000020, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/1-month-commitment/cohere.command-text-v14": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/6-month-commitment/cohere.command-text-v14": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_second": 0.0066027, + "output_cost_per_second": 0.0066027, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.command-light-text-v14": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000006, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/1-month-commitment/cohere.command-light-text-v14": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_second": 0.001902, + "output_cost_per_second": 0.001902, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/6-month-commitment/cohere.command-light-text-v14": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_second": 0.0011416, + "output_cost_per_second": 0.0011416, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.command-r-plus-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000030, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.command-r-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.embed-english-v3": { + "max_tokens": 512, + "max_input_tokens": 512, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", + "mode": "embedding" + }, + "cohere.embed-multilingual-v3": { + "max_tokens": 512, + "max_input_tokens": 512, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", + "mode": "embedding" + }, + "meta.llama3-3-70b-instruct-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000072, + "output_cost_per_token": 0.00000072, + "litellm_provider": "bedrock_converse", + "mode": "chat" + }, + "meta.llama2-13b-chat-v1": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000075, + "output_cost_per_token": 0.000001, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "meta.llama2-70b-chat-v1": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000195, + "output_cost_per_token": 0.00000256, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000006, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000006, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000006, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000036, + "output_cost_per_token": 0.00000072, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000069, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000032, + "output_cost_per_token": 0.00000065, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000039, + "output_cost_per_token": 0.00000078, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.00000101, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000318, + "output_cost_per_token": 0.0000042, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000305, + "output_cost_per_token": 0.00000403, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000286, + "output_cost_per_token": 0.00000378, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000345, + "output_cost_per_token": 0.00000455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000445, + "output_cost_per_token": 0.00000588, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "meta.llama3-1-8b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-8b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-1-70b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000099, + "output_cost_per_token": 0.00000099, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-70b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000099, + "output_cost_per_token": 0.00000099, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-1-405b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000532, + "output_cost_per_token": 0.000016, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-405b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000532, + "output_cost_per_token": 0.000016, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-1b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-1b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "eu.meta.llama3-2-1b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-3b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-3b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "eu.meta.llama3-2-3b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000019, + "output_cost_per_token": 0.00000019, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-11b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000035, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-11b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000035, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-90b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-90b-instruct-v1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, + "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.018, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.072, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.04, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.08, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "stability.sd3-large-v1:0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.08, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "stability.sd3-5-large-v1:0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.08, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "stability.stable-image-core-v1:0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.04, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "stability.stable-image-core-v1:1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.04, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "stability.stable-image-ultra-v1:0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.14, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "stability.stable-image-ultra-v1:1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.14, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "sagemaker/meta-textgeneration-llama-2-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000, + "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", + "mode": "completion" + }, + "sagemaker/meta-textgeneration-llama-2-7b-f": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000, + "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", + "mode": "chat" + }, + "sagemaker/meta-textgeneration-llama-2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000, + "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", + "mode": "completion" + }, + "sagemaker/meta-textgeneration-llama-2-13b-f": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000, + "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", + "mode": "chat" + }, + "sagemaker/meta-textgeneration-llama-2-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000, + "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", + "mode": "completion" + }, + "sagemaker/meta-textgeneration-llama-2-70b-b-f": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000, + "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", + "mode": "chat" + }, + "together-ai-up-to-4b": { + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "together_ai", + "mode": "chat" + }, + "together-ai-4.1b-8b": { + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "together_ai", + "mode": "chat" + }, + "together-ai-8.1b-21b": { + "max_tokens": 1000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000003, + "litellm_provider": "together_ai", + "mode": "chat" + }, + "together-ai-21.1b-41b": { + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000008, + "litellm_provider": "together_ai", + "mode": "chat" + }, + "together-ai-41.1b-80b": { + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "together_ai", + "mode": "chat" + }, + "together-ai-81.1b-110b": { + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.0000018, + "litellm_provider": "together_ai", + "mode": "chat" + }, + "together-ai-embedding-up-to-150m": { + "input_cost_per_token": 0.000000008, + "output_cost_per_token": 0.0, + "litellm_provider": "together_ai", + "mode": "embedding" + }, + "together-ai-embedding-151m-to-350m": { + "input_cost_per_token": 0.000000016, + "output_cost_per_token": 0.0, + "litellm_provider": "together_ai", + "mode": "embedding" + }, + "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.00000018, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "mode": "chat" + }, + "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "input_cost_per_token": 0.00000088, + "output_cost_per_token": 0.00000088, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "mode": "chat" + }, + "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { + "input_cost_per_token": 0.0000035, + "output_cost_per_token": 0.0000035, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, + "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo": { + "input_cost_per_token": 0.00000088, + "output_cost_per_token": 0.00000088, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "mode": "chat" + }, + "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "mode": "chat" + }, + "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000006, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "mode": "chat" + }, + "together_ai/mistralai/Mistral-7B-Instruct-v0.1": { + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "mode": "chat" + }, + "together_ai/togethercomputer/CodeLlama-34b-Instruct": { + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, + "ollama/codegemma": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion" + }, + "ollama/codegeex4": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat", + "supports_function_calling": false + }, + "ollama/deepseek-coder-v2-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat", + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-base": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion", + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-lite-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat", + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-lite-base": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion", + "supports_function_calling": true + }, + "ollama/internlm2_5-20b-chat": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat", + "supports_function_calling": true + }, + "ollama/llama2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama2:7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama2:13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama2:70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama2-uncensored": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion" + }, + "ollama/llama3": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama3:8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama3:70b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama3.1": { + "max_tokens": 32768, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat", + "supports_function_calling": true + }, + "ollama/mistral-large-instruct-2407": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mistral": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion" + }, + "ollama/mistral-7B-Instruct-v0.1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mistral-7B-Instruct-v0.2": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mixtral-8x22B-Instruct-v0.1": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/codellama": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion" + }, + "ollama/orca-mini": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion" + }, + "ollama/vicuna": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion" + }, + "deepinfra/lizpreciatior/lzlv_70b_fp16_hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/Gryphe/MythoMax-L2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": { + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Llama-2-70b-chat-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b": { + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/codellama/CodeLlama-34b-Instruct-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/deepinfra/mixtral": { + "max_tokens": 4096, + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", + "mode": "completion" + }, + "deepinfra/Phind/Phind-CodeLlama-34B-v2": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/deepinfra/airoboros-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/01-ai/Yi-34B-Chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/01-ai/Yi-6B-200K": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "completion" + }, + "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Llama-2-13b-chat-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/amazon/MistralLite": { + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000020, + "output_cost_per_token": 0.00000020, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Llama-2-7b-chat-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000008, + "output_cost_per_token": 0.00000008, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Meta-Llama-3-70B-Instruct": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000059, + "output_cost_per_token": 0.00000079, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Meta-Llama-3.1-405B-Instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "deepinfra/01-ai/Yi-34B-200K": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", + "mode": "completion" + }, + "deepinfra/openchat/openchat_3.5": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "perplexity/codellama-34b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000140, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/codellama-70b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-8b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-huge-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-large-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-large-128k-chat": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-small-128k-chat": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-small-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-7b-chat": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-7b-online": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000000, + "output_cost_per_token": 0.00000028, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-70b-online": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000000, + "output_cost_per_token": 0.00000280, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-2-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/mistral-7b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/mixtral-8x7b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-small-chat": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-small-online": { + "max_tokens": 12000, + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "input_cost_per_token": 0, + "output_cost_per_token": 0.00000028, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-medium-chat": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000018, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-medium-online": { + "max_tokens": 12000, + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "input_cost_per_token": 0, + "output_cost_per_token": 0.0000018, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-8b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "accounts/fireworks/models/llama-v3p2-90b-vision-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/firefunction-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.0000012, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/yi-large": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.0000012, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_response_schema": true, + "source": "https://fireworks.ai/pricing" + }, + + "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "input_cost_per_token": 0.000000008, + "output_cost_per_token": 0.000000, + "litellm_provider": "fireworks_ai-embedding-models", + "mode": "embedding", + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/nomic-ai/nomic-embed-text-v1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "input_cost_per_token": 0.000000008, + "output_cost_per_token": 0.000000, + "litellm_provider": "fireworks_ai-embedding-models", + "mode": "embedding", + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/WhereIsAI/UAE-Large-V1": { + "max_tokens": 512, + "max_input_tokens": 512, + "input_cost_per_token": 0.000000016, + "output_cost_per_token": 0.000000, + "litellm_provider": "fireworks_ai-embedding-models", + "mode": "embedding", + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/thenlper/gte-large": { + "max_tokens": 512, + "max_input_tokens": 512, + "input_cost_per_token": 0.000000016, + "output_cost_per_token": 0.000000, + "litellm_provider": "fireworks_ai-embedding-models", + "mode": "embedding", + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/thenlper/gte-base": { + "max_tokens": 512, + "max_input_tokens": 512, + "input_cost_per_token": 0.000000008, + "output_cost_per_token": 0.000000, + "litellm_provider": "fireworks_ai-embedding-models", + "mode": "embedding", + "source": "https://fireworks.ai/pricing" + }, + "fireworks-ai-up-to-16b": { + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "fireworks_ai" + }, + "fireworks-ai-16.1b-to-80b": { + "input_cost_per_token": 0.0000009, + "output_cost_per_token": 0.0000009, + "litellm_provider": "fireworks_ai" + }, + "fireworks-ai-moe-up-to-56b": { + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "fireworks_ai" + }, + "fireworks-ai-56b-to-176b": { + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.0000012, + "litellm_provider": "fireworks_ai" + }, + "fireworks-ai-default": { + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai" + }, + "fireworks-ai-embedding-up-to-150m": { + "input_cost_per_token": 0.000000008, + "output_cost_per_token": 0.000000, + "litellm_provider": "fireworks_ai-embedding-models" + }, + "fireworks-ai-embedding-150m-to-350m": { + "input_cost_per_token": 0.000000016, + "output_cost_per_token": 0.000000, + "litellm_provider": "fireworks_ai-embedding-models" + }, + "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat", + "supports_function_calling": true, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mistral-7B-Instruct-v0.1" + }, + "anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat", + "supports_function_calling": true, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x7B-Instruct-v0.1" + }, + "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 0.00000090, + "output_cost_per_token": 0.00000090, + "litellm_provider": "anyscale", + "mode": "chat", + "supports_function_calling": true, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x22B-Instruct-v0.1" + }, + "anyscale/HuggingFaceH4/zephyr-7b-beta": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/google/gemma-7b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat", + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/google-gemma-7b-it" + }, + "anyscale/meta-llama/Llama-2-7b-chat-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/meta-llama/Llama-2-13b-chat-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000025, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/meta-llama/Llama-2-70b-chat-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/codellama/CodeLlama-34b-Instruct-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/codellama/CodeLlama-70b-Instruct-hf": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "anyscale", + "mode": "chat", + "source" : "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf" + }, + "anyscale/meta-llama/Meta-Llama-3-8B-Instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat", + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct" + }, + "anyscale/meta-llama/Meta-Llama-3-70B-Instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000100, + "output_cost_per_token": 0.00000100, + "litellm_provider": "anyscale", + "mode": "chat", + "source" : "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct" + }, + "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "max_output_tokens": 3072, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@cf/meta/llama-2-7b-chat-int8": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "voyage/voyage-01": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-lite-01": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-large-2": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "input_cost_per_token": 0.00000012, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-finance-2": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.00000012, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-lite-02-instruct": { + "max_tokens": 4000, + "max_input_tokens": 4000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-law-2": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "input_cost_per_token": 0.00000012, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-code-2": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "input_cost_per_token": 0.00000012, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-2": { + "max_tokens": 4000, + "max_input_tokens": 4000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-3-large": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-3": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.00000006, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-3-lite": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.00000002, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-code-3": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-multimodal-3": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.00000012, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/rerank-2": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "max_query_tokens": 16000, + "input_cost_per_token": 0.00000005, + "input_cost_per_query": 0.00000005, + "output_cost_per_token": 0.0, + "litellm_provider": "voyage", + "mode": "rerank" + }, + "voyage/rerank-2-lite": { + "max_tokens": 8000, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "max_query_tokens": 8000, + "input_cost_per_token": 0.00000002, + "input_cost_per_query": 0.00000002, + "output_cost_per_token": 0.0, + "litellm_provider": "voyage", + "mode": "rerank" + }, + "databricks/databricks-meta-llama-3-1-405b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000005, + "input_dbu_cost_per_token": 0.000071429, + "output_cost_per_token": 0.00001500002, + "output_db_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-meta-llama-3-1-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/meta-llama-3.3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-dbrx-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.00000074998, + "input_dbu_cost_per_token": 0.000010714, + "output_cost_per_token": 0.00000224901, + "output_dbu_cost_per_token": 0.000032143, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-meta-llama-3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-llama-2-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.0000015, + "output_dbu_cost_per_token": 0.000021429, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-mixtral-8x7b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.00000099902, + "output_dbu_cost_per_token": 0.000014286, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-mpt-30b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000099902, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000099902, + "output_dbu_cost_per_token": 0.000014286, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-mpt-7b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-bge-large-en": { + "max_tokens": 512, + "max_input_tokens": 512, + "output_vector_size": 1024, + "input_cost_per_token": 0.00000010003, + "input_dbu_cost_per_token": 0.000001429, + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "litellm_provider": "databricks", + "mode": "embedding", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-gte-large-en": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "output_vector_size": 1024, + "input_cost_per_token": 0.00000012999, + "input_dbu_cost_per_token": 0.000001857, + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "litellm_provider": "databricks", + "mode": "embedding", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "sambanova/Meta-Llama-3.1-8B-Instruct": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000002, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat" + }, + "sambanova/Meta-Llama-3.1-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000012, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat" + }, + "sambanova/Meta-Llama-3.1-405B-Instruct": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000010, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat" + }, + "sambanova/Meta-Llama-3.2-1B-Instruct": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000008, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat" + }, + "sambanova/Meta-Llama-3.2-3B-Instruct": { + "max_tokens": 4000, + "max_input_tokens": 4000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000016, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat" + }, + "sambanova/Qwen2.5-Coder-32B-Instruct": { + "max_tokens": 8000, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000003, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat" + }, + "sambanova/Qwen2.5-72B-Instruct": { + "max_tokens": 8000, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000004, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat" + } +} diff --git a/src/codegate/api/v1.py b/src/codegate/api/v1.py index d4695ffd..0e5e601e 100644 --- a/src/codegate/api/v1.py +++ b/src/codegate/api/v1.py @@ -531,18 +531,21 @@ def version_check(): tags=["Workspaces", "Token Usage"], generate_unique_id_function=uniq_name, ) -async def get_workspace_token_usage(workspace_name: str) -> v1_models.TokenUsage: +async def get_workspace_token_usage(workspace_name: str) -> v1_models.TokenUsageAggregate: """Get the token usage of a workspace.""" - # TODO: This is a dummy implementation. In the future, we should have a proper - # implementation that fetches the token usage from the database. - return v1_models.TokenUsage( - used_tokens=50, - tokens_by_model=[ - v1_models.TokenUsageByModel( - provider_type="openai", model="gpt-4o-mini", used_tokens=20 - ), - v1_models.TokenUsageByModel( - provider_type="anthropic", model="claude-3-5-sonnet-20241022", used_tokens=30 - ), - ], - ) + + try: + ws = await wscrud.get_workspace_by_name(workspace_name) + except crud.WorkspaceDoesNotExistError: + raise HTTPException(status_code=404, detail="Workspace does not exist") + except Exception: + logger.exception("Error while getting workspace") + raise HTTPException(status_code=500, detail="Internal server error") + + try: + prompts_outputs = await dbreader.get_prompts_with_output(ws.id) + ws_token_usage = await v1_processing.parse_workspace_token_usage(prompts_outputs) + return ws_token_usage + except Exception: + logger.exception("Error while getting messages") + raise HTTPException(status_code=500, detail="Internal server error") diff --git a/src/codegate/api/v1_models.py b/src/codegate/api/v1_models.py index 3f1f37a6..e68e21f1 100644 --- a/src/codegate/api/v1_models.py +++ b/src/codegate/api/v1_models.py @@ -1,6 +1,6 @@ import datetime from enum import Enum -from typing import Any, List, Optional, Union +from typing import Any, Dict, List, Optional, Union import pydantic @@ -107,15 +107,6 @@ class PartialQuestions(pydantic.BaseModel): type: QuestionType -class PartialQuestionAnswer(pydantic.BaseModel): - """ - Represents a partial conversation. - """ - - partial_questions: PartialQuestions - answer: Optional[ChatMessage] - - class ProviderType(str, Enum): """ Represents the different types of providers we support. @@ -126,6 +117,7 @@ class ProviderType(str, Enum): vllm = "vllm" ollama = "ollama" lm_studio = "lm_studio" + llamacpp = "llamacpp" class TokenUsageByModel(pydantic.BaseModel): @@ -135,17 +127,47 @@ class TokenUsageByModel(pydantic.BaseModel): provider_type: ProviderType model: str - used_tokens: int + token_usage: db_models.TokenUsage -class TokenUsage(pydantic.BaseModel): +class TokenUsageAggregate(pydantic.BaseModel): """ Represents the tokens used. Includes the information of the tokens used by model. `used_tokens` are the total tokens used in the `tokens_by_model` list. """ - tokens_by_model: List[TokenUsageByModel] - used_tokens: int + tokens_by_model: Dict[str, TokenUsageByModel] + token_usage: db_models.TokenUsage + + def add_model_token_usage(self, model_token_usage: TokenUsageByModel) -> None: + # Copilot doesn't have a model name and we cannot obtain the tokens used. Skip it. + if model_token_usage.model == "": + return + + # Skip if the model has not used any tokens. + if ( + model_token_usage.token_usage.input_tokens == 0 + and model_token_usage.token_usage.output_tokens == 0 + ): + return + + if model_token_usage.model in self.tokens_by_model: + self.tokens_by_model[ + model_token_usage.model + ].token_usage += model_token_usage.token_usage + else: + self.tokens_by_model[model_token_usage.model] = model_token_usage + self.token_usage += model_token_usage.token_usage + + +class PartialQuestionAnswer(pydantic.BaseModel): + """ + Represents a partial conversation. + """ + + partial_questions: PartialQuestions + answer: Optional[ChatMessage] + model_token_usage: TokenUsageByModel class Conversation(pydantic.BaseModel): @@ -158,7 +180,7 @@ class Conversation(pydantic.BaseModel): type: QuestionType chat_id: str conversation_timestamp: datetime.datetime - token_usage: Optional[TokenUsage] + token_usage_agg: Optional[TokenUsageAggregate] class AlertConversation(pydantic.BaseModel): diff --git a/src/codegate/api/v1_processing.py b/src/codegate/api/v1_processing.py index f69e6a8f..e1c5aab9 100644 --- a/src/codegate/api/v1_processing.py +++ b/src/codegate/api/v1_processing.py @@ -14,9 +14,11 @@ PartialQuestionAnswer, PartialQuestions, QuestionAnswer, + TokenUsageAggregate, + TokenUsageByModel, ) from codegate.db.connection import alert_queue -from codegate.db.models import Alert, GetPromptWithOutputsRow +from codegate.db.models import Alert, GetPromptWithOutputsRow, TokenUsage logger = structlog.get_logger("codegate") @@ -57,16 +59,17 @@ async def _is_system_prompt(message: str) -> bool: return False -async def parse_request(request_str: str) -> Optional[str]: +async def parse_request(request_str: str) -> Tuple[Optional[List[str]], str]: """ - Parse the request string from the pipeline and return the message. + Parse the request string from the pipeline and return the message and the model. """ try: request = json.loads(request_str) except Exception as e: logger.warning(f"Error parsing request: {request_str}. {e}") - return None + return None, "" + model = request.get("model", "") messages = [] for message in request.get("messages", []): role = message.get("role") @@ -91,12 +94,12 @@ async def parse_request(request_str: str) -> Optional[str]: if message_prompt and not await _is_system_prompt(message_prompt): messages.append(message_prompt) - # If still we don't have anything, return empty string + # If still we don't have anything, return None string if not messages: - return None + return None, model - # Only respond with the latest message - return messages + # Respond with the messages and the model + return messages, model async def parse_output(output_str: str) -> Optional[str]: @@ -144,7 +147,9 @@ def _parse_single_output(single_output: dict) -> str: return full_output_message -async def _get_question_answer(row: GetPromptWithOutputsRow) -> Optional[PartialQuestionAnswer]: +async def _get_partial_question_answer( + row: GetPromptWithOutputsRow, +) -> Optional[PartialQuestionAnswer]: """ Parse a row from the get_prompt_with_outputs query and return a PartialConversation @@ -154,7 +159,7 @@ async def _get_question_answer(row: GetPromptWithOutputsRow) -> Optional[Partial request_task = tg.create_task(parse_request(row.request)) output_task = tg.create_task(parse_output(row.output)) - request_user_msgs = request_task.result() + request_user_msgs, model = request_task.result() output_msg_str = output_task.result() # If we couldn't parse the request, return None @@ -176,7 +181,28 @@ async def _get_question_answer(row: GetPromptWithOutputsRow) -> Optional[Partial ) else: output_message = None - return PartialQuestionAnswer(partial_questions=request_message, answer=output_message) + + token_usage = TokenUsage.from_db( + input_cost=row.input_cost, + input_tokens=row.input_tokens, + output_tokens=row.output_tokens, + output_cost=row.output_cost, + ) + # Use the model to update the token cost + provider = row.provider + # TODO: This should come from the database. For now, we are manually changing copilot to openai + # Change copilot provider to openai + if provider == "copilot": + provider = "openai" + model_token_usage = TokenUsageByModel( + model=model, token_usage=token_usage, provider_type=provider + ) + + return PartialQuestionAnswer( + partial_questions=request_message, + answer=output_message, + model_token_usage=model_token_usage, + ) def parse_question_answer(input_text: str) -> str: @@ -285,7 +311,8 @@ def _get_question_answer_from_partial( partial_question_answer: PartialQuestionAnswer, ) -> QuestionAnswer: """ - Get a QuestionAnswer object from a PartialQuestionAnswer object. + Get a QuestionAnswer object from a PartialQuestionAnswer object. PartialQuestionAnswer + contains a list of messages as question. QuestionAnswer contains a single message as question. """ # Get the last user message as the question question = ChatMessage( @@ -303,11 +330,8 @@ async def match_conversations( """ Match partial conversations to form a complete conversation. """ - valid_partial_qas = [ - partial_qas for partial_qas in partial_question_answers if partial_qas is not None - ] grouped_partial_questions = _group_partial_messages( - [partial_qs_a.partial_questions for partial_qs_a in valid_partial_qas] + [partial_qs_a.partial_questions for partial_qs_a in partial_question_answers] ) # Create the conversation objects @@ -315,12 +339,13 @@ async def match_conversations( map_q_id_to_conversation = {} for group in grouped_partial_questions: questions_answers: List[QuestionAnswer] = [] + token_usage_agg = TokenUsageAggregate(tokens_by_model={}, token_usage=TokenUsage()) first_partial_qa = None for partial_question in sorted(group, key=lambda x: x.timestamp): # Partial questions don't contain the answer, so we need to find the corresponding # valid partial question answer selected_partial_qa = None - for partial_qa in valid_partial_qas: + for partial_qa in partial_question_answers: if partial_question.message_id == partial_qa.partial_questions.message_id: selected_partial_qa = partial_qa break @@ -333,16 +358,19 @@ async def match_conversations( qa = _get_question_answer_from_partial(selected_partial_qa) qa.question.message = parse_question_answer(qa.question.message) questions_answers.append(qa) + token_usage_agg.add_model_token_usage(selected_partial_qa.model_token_usage) # only add conversation if we have some answers if len(questions_answers) > 0 and first_partial_qa is not None: + if token_usage_agg.token_usage.input_tokens == 0: + token_usage_agg = None conversation = Conversation( question_answers=questions_answers, provider=first_partial_qa.partial_questions.provider, type=first_partial_qa.partial_questions.type, chat_id=first_partial_qa.partial_questions.message_id, conversation_timestamp=first_partial_qa.partial_questions.timestamp, - token_usage=None, + token_usage_agg=token_usage_agg, ) for qa in questions_answers: map_q_id_to_conversation[qa.question.message_id] = conversation @@ -351,17 +379,25 @@ async def match_conversations( return conversations, map_q_id_to_conversation +async def _process_prompt_output_to_partial_qa( + prompts_outputs: List[GetPromptWithOutputsRow], +) -> List[PartialQuestionAnswer]: + """ + Process the prompts and outputs to PartialQuestionAnswer objects. + """ + # Parse the prompts and outputs in parallel + async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(_get_partial_question_answer(row)) for row in prompts_outputs] + return [task.result() for task in tasks if task.result() is not None] + + async def parse_messages_in_conversations( prompts_outputs: List[GetPromptWithOutputsRow], ) -> Tuple[List[Conversation], Dict[str, Conversation]]: """ Get all the messages from the database and return them as a list of conversations. """ - - # Parse the prompts and outputs in parallel - async with asyncio.TaskGroup() as tg: - tasks = [tg.create_task(_get_question_answer(row)) for row in prompts_outputs] - partial_question_answers = [task.result() for task in tasks] + partial_question_answers = await _process_prompt_output_to_partial_qa(prompts_outputs) conversations, map_q_id_to_conversation = await match_conversations(partial_question_answers) return conversations, map_q_id_to_conversation @@ -414,3 +450,16 @@ async def parse_get_alert_conversation( for row in alerts ] return [task.result() for task in tasks if task.result() is not None] + + +async def parse_workspace_token_usage( + prompts_outputs: List[GetPromptWithOutputsRow], +) -> TokenUsageAggregate: + """ + Parse the token usage from the workspace. + """ + partial_question_answers = await _process_prompt_output_to_partial_qa(prompts_outputs) + token_usage_agg = TokenUsageAggregate(tokens_by_model={}, token_usage=TokenUsage()) + for p_qa in partial_question_answers: + token_usage_agg.add_model_token_usage(p_qa.model_token_usage) + return token_usage_agg diff --git a/src/codegate/db/connection.py b/src/codegate/db/connection.py index 10c1c81f..b821f4d6 100644 --- a/src/codegate/db/connection.py +++ b/src/codegate/db/connection.py @@ -28,6 +28,7 @@ WorkspaceRow, WorkspaceWithSessionInfo, ) +from codegate.db.token_usage import TokenUsageParser from codegate.pipeline.base import PipelineContext logger = structlog.get_logger("codegate") @@ -174,15 +175,34 @@ async def record_outputs( # Just store the model respnses in the list of JSON objects. for output in outputs: full_outputs.append(output.output) + + # Parse the token usage from the outputs + token_parser = TokenUsageParser() + full_token_usage = await token_parser.parse_outputs(outputs) + output_db.output = json.dumps(full_outputs) + output_db.input_tokens = full_token_usage.input_tokens + output_db.output_tokens = full_token_usage.output_tokens + output_db.input_cost = full_token_usage.input_cost + output_db.output_cost = full_token_usage.output_cost sql = text( """ - INSERT INTO outputs (id, prompt_id, timestamp, output) - VALUES (:id, :prompt_id, :timestamp, :output) + INSERT INTO outputs ( + id, prompt_id, timestamp, output, input_tokens, output_tokens, input_cost, + output_cost + ) + VALUES ( + :id, :prompt_id, :timestamp, :output, :input_tokens, :output_tokens, + :input_cost, :output_cost + ) ON CONFLICT (id) DO UPDATE SET - timestamp = excluded.timestamp, output = excluded.output, - prompt_id = excluded.prompt_id + timestamp = excluded.timestamp, + output = excluded.output, + input_tokens = excluded.input_tokens, + output_tokens = excluded.output_tokens, + input_cost = excluded.input_cost, + output_cost = excluded.output_cost RETURNING * """ ) @@ -491,7 +511,11 @@ async def get_prompts_with_output(self, workpace_id: str) -> List[GetPromptWithO p.id, p.timestamp, p.provider, p.request, p.type, o.id as output_id, o.output, - o.timestamp as output_timestamp + o.timestamp as output_timestamp, + o.input_tokens, + o.output_tokens, + o.input_cost, + o.output_cost FROM prompts p LEFT JOIN outputs o ON p.id = o.prompt_id WHERE p.workspace_id = :workspace_id diff --git a/src/codegate/db/models.py b/src/codegate/db/models.py index 2a6434ef..c2a5ce8a 100644 --- a/src/codegate/db/models.py +++ b/src/codegate/db/models.py @@ -1,5 +1,5 @@ import datetime -from typing import Annotated, Any, Optional +from typing import Annotated, Any, Dict, Optional from pydantic import BaseModel, StringConstraints @@ -19,6 +19,10 @@ class Output(BaseModel): prompt_id: Any timestamp: Any output: Any + input_tokens: Optional[int] = None + output_tokens: Optional[int] = None + input_cost: Optional[float] = None + output_cost: Optional[float] = None class Prompt(BaseModel): @@ -30,6 +34,51 @@ class Prompt(BaseModel): workspace_id: Optional[str] +class TokenUsage(BaseModel): + """ + TokenUsage it's not a table, it's a model to represent the token usage. + The data is stored in the outputs table. + """ + + input_tokens: int = 0 + output_tokens: int = 0 + input_cost: float = 0 + output_cost: float = 0 + + @classmethod + def from_dict(cls, usage_dict: Dict) -> "TokenUsage": + return cls( + input_tokens=usage_dict.get("prompt_tokens", 0) or usage_dict.get("input_tokens", 0), + output_tokens=usage_dict.get("completion_tokens", 0) + or usage_dict.get("output_tokens", 0), + input_cost=0, + output_cost=0, + ) + + @classmethod + def from_db( + cls, + input_tokens: Optional[int], + output_tokens: Optional[int], + input_cost: Optional[float], + output_cost: Optional[float], + ) -> "TokenUsage": + return cls( + input_tokens=0 if not input_tokens else input_tokens, + output_tokens=0 if not output_tokens else output_tokens, + input_cost=0 if not input_cost else input_cost, + output_cost=0 if not output_cost else output_cost, + ) + + def __add__(self, other: "TokenUsage") -> "TokenUsage": + return TokenUsage( + input_tokens=self.input_tokens + other.input_tokens, + output_tokens=self.output_tokens + other.output_tokens, + input_cost=self.input_cost + other.input_cost, + output_cost=self.output_cost + other.output_cost, + ) + + WorkspaceNameStr = Annotated[ str, StringConstraints( @@ -76,6 +125,10 @@ class GetPromptWithOutputsRow(BaseModel): output_id: Optional[Any] output: Optional[Any] output_timestamp: Optional[Any] + input_tokens: Optional[int] + output_tokens: Optional[int] + input_cost: Optional[float] + output_cost: Optional[float] class WorkspaceWithSessionInfo(BaseModel): diff --git a/src/codegate/db/token_usage.py b/src/codegate/db/token_usage.py new file mode 100644 index 00000000..3d55cafb --- /dev/null +++ b/src/codegate/db/token_usage.py @@ -0,0 +1,96 @@ +import asyncio +import json +from pathlib import Path +from typing import Dict, List, Tuple + +import structlog + +from codegate.db import models as db_models + +logger = structlog.get_logger("codegate") + + +class TokenUsageError(Exception): + pass + + +class TokenUsageParser: + + def __init__(self): + current_dir = Path(__file__).parent + filemodel_path = ( + current_dir.parent.parent.parent + / "model_cost_data" + / "model_prices_and_context_window.json" + ) + with open(filemodel_path) as file: + self.model_cost_mapping: Dict[str, Dict] = json.load(file) + + if not self.model_cost_mapping or not isinstance(self.model_cost_mapping, dict): + raise TokenUsageError("Failed to load model prices and context window.") + + @property + def mapping_model_to_model_cost(self) -> dict: + """ + Maps the model name to the model cost name. The model cost name should + exist in the URL above (model_prices_and_context_window.json). + """ + return { + "claude-3-5-sonnet-latest": "claude-3-5-sonnet-20241022", + "claude-3-5-haiku-latest": "claude-3-5-haiku-20241022", + "claude-3-opus-latest": "claude-3-opus-20240229", + } + + async def _parse_usage_dict(self, usage_dict: dict) -> db_models.TokenUsage: + return db_models.TokenUsage.from_dict(usage_dict) + + async def _get_model_cost(self, model: str) -> Tuple[float, float]: + """ + Get the cost of the tokens for the model. + """ + if not model: + return 0, 0 + + model_cost_name = self.mapping_model_to_model_cost.get(model, model) + model_cost = self.model_cost_mapping.get(model_cost_name, {}) + # If the model is not found, return 0. Keys found in the URL above. + input_cost_per_token = model_cost.get("input_cost_per_token", 0) + output_cost_per_token = model_cost.get("output_cost_per_token", 0) + + return input_cost_per_token, output_cost_per_token + + async def _get_usage_from_output(self, output: db_models.Output) -> db_models.TokenUsage: + """ + Parse from an output chunk the token usage. + """ + try: + output_dict = json.loads(output.output) + except json.JSONDecodeError: + logger.error(f"Failed to decode output: {output.output}") + return db_models.TokenUsage() + + if not isinstance(output_dict, dict): + logger.error(f"Output is not a dictionary: {output_dict}") + return db_models.TokenUsage() + + token_usage = await self._parse_usage_dict(output_dict.get("usage", {})) + input_token_cost, output_token_cost = await self._get_model_cost( + output_dict.get("model", "") + ) + + token_usage.input_cost = token_usage.input_tokens * input_token_cost + token_usage.output_cost = token_usage.output_tokens * output_token_cost + + return token_usage + + async def parse_outputs(self, outputs: List[db_models.Output]) -> db_models.TokenUsage: + """ + Parse the token usage from the output chunks. + """ + async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(self._get_usage_from_output(output)) for output in outputs] + + token_usage = db_models.TokenUsage() + for task in tasks: + token_usage += task.result() + return token_usage diff --git a/src/codegate/inference/inference_engine.py b/src/codegate/inference/inference_engine.py index a69b7e13..9433a345 100644 --- a/src/codegate/inference/inference_engine.py +++ b/src/codegate/inference/inference_engine.py @@ -35,7 +35,7 @@ def _close_models(self): model._sampler.close() model.close() - async def __get_model(self, model_path, embedding=False, n_ctx=512, n_gpu_layers=0): + async def __get_model(self, model_path, embedding=False, n_ctx=512, n_gpu_layers=0) -> Llama: """ Returns Llama model object from __models if present. Otherwise, the model is loaded and added to __models and returned. diff --git a/src/codegate/pipeline/output.py b/src/codegate/pipeline/output.py index f5bb716a..76895120 100644 --- a/src/codegate/pipeline/output.py +++ b/src/codegate/pipeline/output.py @@ -1,3 +1,4 @@ +import asyncio from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import AsyncIterator, List, Optional @@ -113,8 +114,16 @@ def _store_chunk_content(self, chunk: ModelResponse) -> None: if choice.delta is not None and choice.delta.content is not None: self._context.processed_content.append(choice.delta.content) - async def _record_to_db(self): - await self._db_recorder.record_context(self._input_context) + def _record_to_db(self) -> None: + """ + Record the context to the database + + Important: We cannot use `await` in the finally statement. Otherwise, the stream + will transmmitted properly. Hence we get the running loop and create a task to + record the context. + """ + loop = asyncio.get_running_loop() + loop.create_task(self._db_recorder.record_context(self._input_context)) async def process_stream( self, stream: AsyncIterator[ModelResponse], cleanup_sensitive: bool = True @@ -144,13 +153,6 @@ async def process_stream( current_chunks = processed_chunks - # **Needed for Copilot**. This is a hacky way of recording in DB the context - # when we see the last chunk. Ideally this should be done in a `finally` or - # `StopAsyncIteration` but Copilot streams in an infite while loop so is not - # possible - if len(chunk.choices) > 0 and chunk.choices[0].get("finish_reason", "") == "stop": - await self._record_to_db() - # Yield all processed chunks for c in current_chunks: self._store_chunk_content(c) @@ -162,14 +164,16 @@ async def process_stream( logger.error(f"Error processing stream: {e}") raise e finally: + # NOTE: Don't use await in finally block, it will break the stream # Don't flush the buffer if we assume we'll call the pipeline again if cleanup_sensitive is False: + self._record_to_db() return # Process any remaining content in buffer when stream ends if self._context.buffer: final_content = "".join(self._context.buffer) - yield ModelResponse( + chunk = ModelResponse( id=self._buffered_chunk.id, choices=[ StreamingChoices( @@ -185,8 +189,11 @@ async def process_stream( model=self._buffered_chunk.model, object="chat.completion.chunk", ) + self._input_context.add_output(chunk) + yield chunk self._context.buffer.clear() + self._record_to_db() # Cleanup sensitive data through the input context if cleanup_sensitive and self._input_context and self._input_context.sensitive: self._input_context.sensitive.secure_cleanup() diff --git a/src/codegate/providers/base.py b/src/codegate/providers/base.py index 1ab055ea..8e9a4d40 100644 --- a/src/codegate/providers/base.py +++ b/src/codegate/providers/base.py @@ -197,8 +197,6 @@ async def _cleanup_after_streaming( yield item finally: if context: - # Record to DB the objects captured during the stream - await self._db_recorder.record_context(context) # Ensure sensitive data is cleaned up after the stream is consumed if context.sensitive: context.sensitive.secure_cleanup() diff --git a/src/codegate/providers/copilot/pipeline.py b/src/codegate/providers/copilot/pipeline.py index f040179a..e21d9a49 100644 --- a/src/codegate/providers/copilot/pipeline.py +++ b/src/codegate/providers/copilot/pipeline.py @@ -173,7 +173,15 @@ class CopilotChatNormalizer: def normalize(self, body: bytes) -> ChatCompletionRequest: json_body = json.loads(body) - return ChatCompletionRequest(**json_body) + normalized_data = ChatCompletionRequest(**json_body) + + # This would normally be the required to get the token usage with OpenAI models. + # However the response comes back empty with Copilot. Commenting for the moment. + # It's not critical since Copilot charges a fixed rate and not based in tokens. + # if normalized_data.get("stream", False): + # normalized_data["stream_options"] = {"include_usage": True} + + return normalized_data def denormalize(self, request_from_pipeline: ChatCompletionRequest) -> bytes: return json.dumps(request_from_pipeline).encode() diff --git a/src/codegate/providers/copilot/provider.py b/src/codegate/providers/copilot/provider.py index 71dec90e..35b26ae4 100644 --- a/src/codegate/providers/copilot/provider.py +++ b/src/codegate/providers/copilot/provider.py @@ -1,5 +1,4 @@ import asyncio -import contextlib import datetime import os import re diff --git a/src/codegate/providers/litellmshim/adapter.py b/src/codegate/providers/litellmshim/adapter.py index d3084184..8b53fb02 100644 --- a/src/codegate/providers/litellmshim/adapter.py +++ b/src/codegate/providers/litellmshim/adapter.py @@ -58,6 +58,9 @@ def normalize(self, data: Dict) -> ChatCompletionRequest: # so let's just pretend they doesn't exist if ret.get("tools") is not None: ret["tools"] = [] + + if ret.get("stream", False): + ret["stream_options"] = {"include_usage": True} return ret def denormalize(self, data: ChatCompletionRequest) -> Dict: diff --git a/src/codegate/providers/normalizer/completion.py b/src/codegate/providers/normalizer/completion.py index fc9518b8..c4cc6306 100644 --- a/src/codegate/providers/normalizer/completion.py +++ b/src/codegate/providers/normalizer/completion.py @@ -21,7 +21,10 @@ def normalize(self, data: Dict) -> ChatCompletionRequest: # We can add as many parameters as we like to data. ChatCompletionRequest is not strict. data["had_prompt_before"] = True try: - return ChatCompletionRequest(**data) + normalized_data = ChatCompletionRequest(**data) + if normalized_data.get("stream", False): + normalized_data["stream_options"] = {"include_usage": True} + return normalized_data except Exception as e: raise ValueError(f"Invalid completion parameters: {str(e)}") diff --git a/src/codegate/providers/ollama/adapter.py b/src/codegate/providers/ollama/adapter.py index 44d51092..32f0c8bc 100644 --- a/src/codegate/providers/ollama/adapter.py +++ b/src/codegate/providers/ollama/adapter.py @@ -27,6 +27,13 @@ def normalize(self, data: Dict) -> ChatCompletionRequest: # if we have the stream flag in data we set it, otherwise defaults to true normalized_data["stream"] = data.get("stream", True) + + # This would normally be the required to get the token usage. + # However Ollama python client doesn't support it. We would be able to get the response + # with a direct HTTP request. Since Ollama is local this is not critical. + # if normalized_data.get("stream", False): + # normalized_data["stream_options"] = {"include_usage": True} + return ChatCompletionRequest(**normalized_data) def denormalize(self, data: ChatCompletionRequest) -> Dict: diff --git a/src/codegate/providers/openai/adapter.py b/src/codegate/providers/openai/adapter.py index 43baf88d..3e8583f5 100644 --- a/src/codegate/providers/openai/adapter.py +++ b/src/codegate/providers/openai/adapter.py @@ -14,6 +14,8 @@ def normalize(self, data: Dict) -> ChatCompletionRequest: No normalizing needed, already OpenAI format """ normalized_data = self._normalize_content_messages(data) + if normalized_data.get("stream", False): + normalized_data["stream_options"] = {"include_usage": True} return ChatCompletionRequest(**normalized_data) def denormalize(self, data: ChatCompletionRequest) -> Dict: diff --git a/src/codegate/providers/vllm/adapter.py b/src/codegate/providers/vllm/adapter.py index ebd92d22..0391708b 100644 --- a/src/codegate/providers/vllm/adapter.py +++ b/src/codegate/providers/vllm/adapter.py @@ -128,6 +128,8 @@ def normalize(self, data: Dict) -> ChatCompletionRequest: ret_data = self._chat_ml_normalizer.normalize(normalized_data) else: ret_data = ChatCompletionRequest(**normalized_data) + if ret_data.get("stream", False): + ret_data["stream_options"] = {"include_usage": True} return ret_data def denormalize(self, data: ChatCompletionRequest) -> Dict: diff --git a/tests/api/test_v1_processing.py b/tests/api/test_v1_processing.py index 20598c67..28d70fe1 100644 --- a/tests/api/test_v1_processing.py +++ b/tests/api/test_v1_processing.py @@ -4,11 +4,9 @@ import pytest -from codegate.api.v1_models import ( - PartialQuestions, -) +from codegate.api.v1_models import PartialQuestions from codegate.api.v1_processing import ( - _get_question_answer, + _get_partial_question_answer, _group_partial_messages, _is_system_prompt, parse_output, @@ -77,7 +75,7 @@ async def test_is_system_prompt(message, expected_bool): ) async def test_parse_request(request_dict, expected_str_list): request_str = json.dumps(request_dict) - result = await parse_request(request_str) + result, _ = await parse_request(request_str) assert result == expected_str_list @@ -151,12 +149,16 @@ async def test_parse_output(output_dict, expected_str): GetPromptWithOutputsRow( id="1", timestamp=timestamp_now, - provider="provider", + provider="openai", request="foo", type="chat", output_id="2", output="bar", output_timestamp=timestamp_now, + input_tokens=None, + output_tokens=None, + input_cost=None, + output_cost=None, ) ], ) @@ -168,9 +170,9 @@ async def test_get_question_answer(request_msg_list, output_msg_str, row): "codegate.api.v1_processing.parse_output", new_callable=AsyncMock ) as mock_parse_output: # Set return values for the mocks - mock_parse_request.return_value = request_msg_list + mock_parse_request.return_value = request_msg_list, "openai" mock_parse_output.return_value = output_msg_str - result = await _get_question_answer(row) + result = await _get_partial_question_answer(row) mock_parse_request.assert_called_once() mock_parse_output.assert_called_once() @@ -181,7 +183,7 @@ async def test_get_question_answer(request_msg_list, output_msg_str, row): assert result.partial_questions.messages == request_msg_list if output_msg_str is not None: assert result.answer.message == output_msg_str - assert result.partial_questions.provider == "provider" + assert result.partial_questions.provider == "openai" assert result.partial_questions.type == "chat" diff --git a/tests/pipeline/test_output.py b/tests/pipeline/test_output.py index c700b3cd..07bc8cee 100644 --- a/tests/pipeline/test_output.py +++ b/tests/pipeline/test_output.py @@ -1,5 +1,5 @@ from typing import List -from unittest.mock import MagicMock +from unittest.mock import AsyncMock import pytest from litellm import ModelResponse @@ -95,7 +95,7 @@ async def test_single_step_processing(self): """Test processing a stream through a single step""" step = MockOutputPipelineStep("test_step", modify_content=True) context = MockContext() - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance([step], context, db_recorder) async def mock_stream(): @@ -120,7 +120,7 @@ async def test_multiple_steps_processing(self): MockOutputPipelineStep("step2", modify_content=True), ] context = MockContext() - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance(steps, context, db_recorder) async def mock_stream(): @@ -144,7 +144,7 @@ async def test_step_pausing(self): MockOutputPipelineStep("step2", modify_content=True), ] context = MockContext() - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance(steps, context, db_recorder) async def mock_stream(): @@ -201,7 +201,7 @@ async def process_chunk( return [] context = MockContext() - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance([ReplacementStep()], context, db_recorder) async def mock_stream(): @@ -226,7 +226,7 @@ async def test_buffer_processing(self): """Test that content is properly buffered and cleared""" step = MockOutputPipelineStep("test_step") context = MockContext() - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance([step], context, db_recorder) async def mock_stream(): @@ -248,7 +248,7 @@ async def test_empty_stream(self): """Test handling of an empty stream""" step = MockOutputPipelineStep("test_step") context = MockContext() - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance([step], context, db_recorder) async def mock_stream(): @@ -282,7 +282,7 @@ async def process_chunk( assert input_context.metadata["test"] == "value" return [chunk] - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance( [ContextCheckingStep()], input_context=input_context, db_recorder=db_recorder ) @@ -298,7 +298,7 @@ async def test_buffer_flush_on_stream_end(self): """Test that buffer is properly flushed when stream ends""" step = MockOutputPipelineStep("test_step", should_pause=True) context = MockContext() - db_recorder = MagicMock() + db_recorder = AsyncMock() instance = OutputPipelineInstance([step], context, db_recorder) async def mock_stream(): diff --git a/tests/providers/anthropic/test_adapter.py b/tests/providers/anthropic/test_adapter.py index 69735aa3..ba920e64 100644 --- a/tests/providers/anthropic/test_adapter.py +++ b/tests/providers/anthropic/test_adapter.py @@ -42,6 +42,7 @@ def test_normalize_anthropic_input(input_normalizer): ], "model": "claude-3-haiku-20240307", "stream": True, + "stream_options": {"include_usage": True}, } # Get translation