3030)
3131
3232EVAL_WEATHER = EvalConfig (
33- prompt = "What's the weather in San Francisco? Temperature should be in fahrenheits ." ,
33+ prompt = "What's the weather in San Francisco? Temperature should be in Fahrenheit ." ,
3434 eval = "The user talks about the weather in San Francisco, including the degrees." ,
3535)
3636
37+ EVAL_WEATHER_AND_RESTAURANT = EvalConfig (
38+ prompt = "What's the weather in San Francisco, and what's a good restaurant there? Temperature should be in Fahrenheit." ,
39+ eval = "The user talks about the weather in San Francisco, including the degrees, and provides a restaurant recommendation." ,
40+ )
41+
3742EVAL_ONLINE_SEARCH = EvalConfig (
3843 prompt = "What's the current date in UTC?" ,
3944 eval = f"Current date in UTC is { datetime .now (timezone .utc ).strftime ('%A, %B %d, %Y' )} ." ,
@@ -145,10 +150,16 @@ def EVAL_VISION_IMAGE(*, eval_speaks_first: bool = False):
145150 ("12d-describe-image-moondream.py" , EVAL_VISION_IMAGE ()),
146151]
147152
153+ # For a few major services, we also test parallel function calling.
154+ # (We don't bother doing this with every single service, as it's expensive and
155+ # most rely on the same OpenAI-compatible implementation.)
148156TESTS_14 = [
149157 ("14-function-calling.py" , EVAL_WEATHER ),
158+ ("14-function-calling.py" , EVAL_WEATHER_AND_RESTAURANT ),
150159 ("14a-function-calling-anthropic.py" , EVAL_WEATHER ),
160+ ("14a-function-calling-anthropic.py" , EVAL_WEATHER_AND_RESTAURANT ),
151161 ("14e-function-calling-google.py" , EVAL_WEATHER ),
162+ ("14e-function-calling-google.py" , EVAL_WEATHER_AND_RESTAURANT ),
152163 ("14f-function-calling-groq.py" , EVAL_WEATHER ),
153164 ("14g-function-calling-grok.py" , EVAL_WEATHER ),
154165 ("14h-function-calling-azure.py" , EVAL_WEATHER ),
@@ -160,6 +171,7 @@ def EVAL_VISION_IMAGE(*, eval_speaks_first: bool = False):
160171 ("14p-function-calling-gemini-vertex-ai.py" , EVAL_WEATHER ),
161172 ("14q-function-calling-qwen.py" , EVAL_WEATHER ),
162173 ("14r-function-calling-aws.py" , EVAL_WEATHER ),
174+ ("14r-function-calling-aws.py" , EVAL_WEATHER_AND_RESTAURANT ),
163175 ("14v-function-calling-openai.py" , EVAL_WEATHER ),
164176 ("14w-function-calling-mistral.py" , EVAL_WEATHER ),
165177 ("14x-function-calling-openpipe.py" , EVAL_WEATHER ),
0 commit comments