Skip to content

fix(utils.py): fix openai-like api response format parsing #7273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions litellm/cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def cost_per_token( # noqa: PLR0915
usage_object: Optional[Usage] = None, # just read the usage object if provided
### CALL TYPE ###
call_type: CallTypesLiteral = "completion",
audio_transcription_file_duration: float = 0.0, # for audio transcription calls - the file time in seconds
) -> Tuple[float, float]: # type: ignore
"""
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
Expand Down Expand Up @@ -236,6 +237,12 @@ def cost_per_token( # noqa: PLR0915
model=model,
custom_llm_provider=custom_llm_provider,
)
elif call_type == "atranscription" or call_type == "transcription":
return openai_cost_per_second(
model=model,
custom_llm_provider=custom_llm_provider,
duration=audio_transcription_file_duration,
)
elif custom_llm_provider == "vertex_ai":
cost_router = google_cost_router(
model=model_without_prefix,
Expand All @@ -261,13 +268,7 @@ def cost_per_token( # noqa: PLR0915
elif custom_llm_provider == "anthropic":
return anthropic_cost_per_token(model=model, usage=usage_block)
elif custom_llm_provider == "openai":
openai_cost_route = openai_cost_router(call_type=CallTypes(call_type))
if openai_cost_route == "cost_per_token":
return openai_cost_per_token(model=model, usage=usage_block)
elif openai_cost_route == "cost_per_second":
return openai_cost_per_second(
model=model, usage=usage_block, response_time_ms=response_time_ms
)
return openai_cost_per_token(model=model, usage=usage_block)
elif custom_llm_provider == "databricks":
return databricks_cost_per_token(model=model, usage=usage_block)
elif custom_llm_provider == "fireworks_ai":
Expand Down Expand Up @@ -484,6 +485,7 @@ def completion_cost( # noqa: PLR0915
completion_characters: Optional[int] = None
cache_creation_input_tokens: Optional[int] = None
cache_read_input_tokens: Optional[int] = None
audio_transcription_file_duration: float = 0.0
cost_per_token_usage_object: Optional[Usage] = _get_usage_object(
completion_response=completion_response
)
Expand Down Expand Up @@ -632,6 +634,13 @@ def completion_cost( # noqa: PLR0915
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
):
prompt_characters = litellm.utils._count_characters(text=prompt)
elif (
call_type == CallTypes.atranscription.value
or call_type == CallTypes.transcription.value
):
audio_transcription_file_duration = getattr(
completion_response, "duration", 0.0
)
elif (
call_type == CallTypes.rerank.value or call_type == CallTypes.arerank.value
):
Expand Down Expand Up @@ -708,6 +717,7 @@ def completion_cost( # noqa: PLR0915
cache_read_input_tokens=cache_read_input_tokens,
usage_object=cost_per_token_usage_object,
call_type=call_type,
audio_transcription_file_duration=audio_transcription_file_duration,
)
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar

Expand Down Expand Up @@ -814,3 +824,11 @@ def rerank_cost(
)
except Exception as e:
raise e


def transcription_cost(
model: str, custom_llm_provider: Optional[str], duration: float
) -> Tuple[float, float]:
return openai_cost_per_second(
model=model, custom_llm_provider=custom_llm_provider, duration=duration
)
24 changes: 16 additions & 8 deletions litellm/llms/openai/cost_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,36 +78,44 @@


def cost_per_second(
model: str, usage: Usage, response_time_ms: Optional[float] = 0.0
model: str, custom_llm_provider: Optional[str], duration: float = 0.0
) -> Tuple[float, float]:
"""
Calculates the cost per second for a given model, prompt tokens, and completion tokens.

Input:
- model: str, the model name without provider prefix
- custom_llm_provider: str, the custom llm provider
- duration: float, the duration of the response in seconds

Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
"""
## GET MODEL INFO
model_info = get_model_info(model=model, custom_llm_provider="openai")
model_info = get_model_info(
model=model, custom_llm_provider=custom_llm_provider or "openai"
)
prompt_cost = 0.0
completion_cost = 0.0
## Speech / Audio cost calculation
if (
"output_cost_per_second" in model_info
and model_info["output_cost_per_second"] is not None
and response_time_ms is not None
):
verbose_logger.debug(
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
)
## COST PER SECOND ##
completion_cost = model_info["output_cost_per_second"] * response_time_ms / 1000
completion_cost = model_info["output_cost_per_second"] * duration
elif (
"input_cost_per_second" in model_info
and model_info["input_cost_per_second"] is not None
and response_time_ms is not None
):
verbose_logger.debug(
f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; response time: {response_time_ms}"
f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expression logs
sensitive data (secret)
as clear text.
This expres

Copilot Autofix

AI 6 months ago

To fix the problem, we should avoid logging sensitive information directly. Instead, we can log a generic message that does not include sensitive details. This way, we maintain the ability to debug without exposing sensitive data.

  • Replace the log statements that include sensitive information with more generic messages.
  • Specifically, update the log statements on lines 106 and 115 in litellm/llms/openai/cost_calculation.py to remove sensitive data.
Suggested changeset 1
litellm/llms/openai/cost_calculation.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/litellm/llms/openai/cost_calculation.py b/litellm/llms/openai/cost_calculation.py
--- a/litellm/llms/openai/cost_calculation.py
+++ b/litellm/llms/openai/cost_calculation.py
@@ -105,3 +105,3 @@
         verbose_logger.debug(
-            f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
+            "Calculating cost per second for the model; duration: {}".format(duration)
         )
@@ -114,3 +114,3 @@
         verbose_logger.debug(
-            f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
+            "Calculating cost per second for the model; duration: {}".format(duration)
         )
EOF
@@ -105,3 +105,3 @@
verbose_logger.debug(
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
"Calculating cost per second for the model; duration: {}".format(duration)
)
@@ -114,3 +114,3 @@
verbose_logger.debug(
f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
"Calculating cost per second for the model; duration: {}".format(duration)
)
Copilot is powered by AI and may make mistakes. Always verify output.
)
## COST PER SECOND ##
prompt_cost = model_info["input_cost_per_second"] * response_time_ms / 1000
prompt_cost = model_info["input_cost_per_second"] * duration
completion_cost = 0.0

return prompt_cost, completion_cost
54 changes: 11 additions & 43 deletions litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3612,53 +3612,21 @@ def _map_and_modify_arg(supported_params: dict, provider: str, model: str):
else False
),
)
else: # assume passing in params for text-completion openai
else: # assume passing in params for openai-like api
supported_params = get_supported_openai_params(
model=model, custom_llm_provider="custom_openai"
)
_check_valid_arg(supported_params=supported_params)
if functions is not None:
optional_params["functions"] = functions
if function_call is not None:
optional_params["function_call"] = function_call
if temperature is not None:
optional_params["temperature"] = temperature
if top_p is not None:
optional_params["top_p"] = top_p
if n is not None:
optional_params["n"] = n
if stream is not None:
optional_params["stream"] = stream
if stream_options is not None:
optional_params["stream_options"] = stream_options
if stop is not None:
optional_params["stop"] = stop
if max_tokens is not None:
optional_params["max_tokens"] = max_tokens
if presence_penalty is not None:
optional_params["presence_penalty"] = presence_penalty
if frequency_penalty is not None:
optional_params["frequency_penalty"] = frequency_penalty
if logit_bias is not None:
optional_params["logit_bias"] = logit_bias
if user is not None:
optional_params["user"] = user
if response_format is not None:
optional_params["response_format"] = response_format
if seed is not None:
optional_params["seed"] = seed
if tools is not None:
optional_params["tools"] = tools
if tool_choice is not None:
optional_params["tool_choice"] = tool_choice
if max_retries is not None:
optional_params["max_retries"] = max_retries
if logprobs is not None:
optional_params["logprobs"] = logprobs
if top_logprobs is not None:
optional_params["top_logprobs"] = top_logprobs
if extra_headers is not None:
optional_params["extra_headers"] = extra_headers
optional_params = litellm.OpenAILikeChatConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
drop_params=(
drop_params
if drop_params is not None and isinstance(drop_params, bool)
else False
),
)
if (
custom_llm_provider
in ["openai", "azure", "text-completion-openai"]
Expand Down
68 changes: 40 additions & 28 deletions tests/local_testing/test_audio_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,14 @@ def return_val():
mock_async_post.return_value = mock_response
model = "vertex_ai/test"

response = await litellm.aspeech(
model=model,
input="async hello what llm guardrail do you have",
)
try:
response = await litellm.aspeech(
model=model,
input="async hello what llm guardrail do you have",
)
except litellm.APIConnectionError as e:
if "Your default credentials were not found" in str(e):
pytest.skip("skipping test, credentials not found")

# Assert asynchronous call
mock_async_post.assert_called_once()
Expand Down Expand Up @@ -181,18 +185,22 @@ def return_val():
mock_async_post.return_value = mock_response
model = "vertex_ai/test"

response = await litellm.aspeech(
model=model,
input="async hello what llm guardrail do you have",
voice={
"languageCode": "en-UK",
"name": "en-UK-Studio-O",
},
audioConfig={
"audioEncoding": "LINEAR22",
"speakingRate": "10",
},
)
try:
response = await litellm.aspeech(
model=model,
input="async hello what llm guardrail do you have",
voice={
"languageCode": "en-UK",
"name": "en-UK-Studio-O",
},
audioConfig={
"audioEncoding": "LINEAR22",
"speakingRate": "10",
},
)
except litellm.APIConnectionError as e:
if "Your default credentials were not found" in str(e):
pytest.skip("skipping test, credentials not found")

# Assert asynchronous call
mock_async_post.assert_called_once()
Expand Down Expand Up @@ -239,18 +247,22 @@ def return_val():
mock_async_post.return_value = mock_response
model = "vertex_ai/test"

response = await litellm.aspeech(
input=ssml,
model=model,
voice={
"languageCode": "en-UK",
"name": "en-UK-Studio-O",
},
audioConfig={
"audioEncoding": "LINEAR22",
"speakingRate": "10",
},
)
try:
response = await litellm.aspeech(
input=ssml,
model=model,
voice={
"languageCode": "en-UK",
"name": "en-UK-Studio-O",
},
audioConfig={
"audioEncoding": "LINEAR22",
"speakingRate": "10",
},
)
except litellm.APIConnectionError as e:
if "Your default credentials were not found" in str(e):
pytest.skip("skipping test, credentials not found")

# Assert asynchronous call
mock_async_post.assert_called_once()
Expand Down
37 changes: 37 additions & 0 deletions tests/local_testing/test_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1819,6 +1819,43 @@ async def test_litellm_gateway_from_sdk():
assert "hello" in mock_call.call_args.kwargs["extra_body"]


@pytest.mark.asyncio
async def test_litellm_gateway_from_sdk_structured_output():
from pydantic import BaseModel

class Result(BaseModel):
answer: str

litellm.set_verbose = True
from openai import OpenAI

openai_client = OpenAI(api_key="fake-key")

with patch.object(
openai_client.chat.completions, "create", new=MagicMock()
) as mock_call:
try:
litellm.completion(
model="litellm_proxy/openai/gpt-4o",
messages=[
{"role": "user", "content": "What is the capital of France?"}
],
api_key="my-test-api-key",
user="test",
response_format=Result,
base_url="https://litellm.ml-serving-internal.scale.com",
client=openai_client,
)
except Exception as e:
print(e)

mock_call.assert_called_once()

print("Call KWARGS - {}".format(mock_call.call_args.kwargs))
json_schema = mock_call.call_args.kwargs["response_format"]
assert "json_schema" in json_schema


# ################### Hugging Face Conversational models ########################
# def hf_test_completion_conv():
# try:
Expand Down
9 changes: 5 additions & 4 deletions tests/local_testing/test_completion_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ def test_whisper_openai():
transcription = TranscriptionResponse(
text="Four score and seven years ago, our fathers brought forth on this continent a new nation, conceived in liberty and dedicated to the proposition that all men are created equal. Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure."
)

setattr(transcription, "duration", 3)
transcription._hidden_params = {
"model": "whisper-1",
"custom_llm_provider": "openai",
Expand All @@ -401,7 +403,6 @@ def test_whisper_openai():
}
_total_time_in_seconds = 3

transcription._response_ms = _total_time_in_seconds * 1000
cost = litellm.completion_cost(model="whisper-1", completion_response=transcription)

print(f"cost: {cost}")
Expand All @@ -411,7 +412,7 @@ def test_whisper_openai():
* _total_time_in_seconds,
5,
)
assert cost == expected_cost
assert round(cost, 5) == round(expected_cost, 5)


def test_whisper_azure():
Expand All @@ -426,8 +427,8 @@ def test_whisper_azure():
"model_id": None,
}
_total_time_in_seconds = 3
setattr(transcription, "duration", _total_time_in_seconds)

transcription._response_ms = _total_time_in_seconds * 1000
cost = litellm.completion_cost(
model="azure/azure-whisper", completion_response=transcription
)
Expand All @@ -439,7 +440,7 @@ def test_whisper_azure():
* _total_time_in_seconds,
5,
)
assert cost == expected_cost
assert round(cost, 5) == round(expected_cost, 5)


def test_dalle_3_azure_cost_tracking():
Expand Down
Loading