@@ -67,8 +67,8 @@ def create_server():
6767
6868
6969def do_test_completion_with_required_tool_tiny (template_name : str , tool : dict , argument_key : str | None ):
70- n_predict = 512
7170 global server
71+ n_predict = 512
7272 # server = ServerPreset.stories15m_moe()
7373 server .jinja = True
7474 server .n_predict = n_predict
@@ -139,40 +139,62 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
139139@pytest .mark .parametrize ("tool,argument_key,hf_repo,template_override" , [
140140 (TEST_TOOL , "success" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
141141 (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
142+ (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
143+
144+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
142145 (TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
143146 (PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
147+
144148 (TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
145149 (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
150+ (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
151+
146152 (TEST_TOOL , "success" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
147153 (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
154+ (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
155+
148156 (TEST_TOOL , "success" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
149157 (PYTHON_TOOL , "code" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
158+ (PYTHON_TOOL , "code" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
159+
150160 (TEST_TOOL , "success" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
151161 (PYTHON_TOOL , "code" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
162+ (PYTHON_TOOL , "code" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
163+
152164 (TEST_TOOL , "success" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
153165 (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
154- (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
155- (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
166+ (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
167+
168+ (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , ("meetkai/functionary-medium-v3.2" , None )),
169+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , ("meetkai/functionary-medium-v3.2" , None )),
170+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , "chatml" ),
171+
156172 (TEST_TOOL , "success" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
157173 (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
174+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
175+
158176 (TEST_TOOL , "success" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
159177 (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
178+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
160179 # TODO: fix these
161180 # (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
162181 # (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
163182])
164- def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
183+ def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
184+ global server
165185 n_predict = 512
166186 server .n_slots = 1
167187 server .jinja = True
168188 server .n_ctx = 8192
169189 server .n_predict = n_predict
170190 server .model_hf_repo = hf_repo
171191 server .model_hf_file = None
172- if template_override :
192+ if isinstance ( template_override , tuple ) :
173193 (template_hf_repo , template_variant ) = template_override
174194 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
175195 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
196+ elif isinstance (template_override , str ):
197+ server .chat_template = template_override
176198 server .start (timeout_seconds = TIMEOUT_SERVER_START )
177199 res = server .make_request ("POST" , "/chat/completions" , data = {
178200 "max_tokens" : n_predict ,
@@ -252,29 +274,49 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
252274@pytest .mark .slow
253275@pytest .mark .parametrize ("hf_repo,template_override" , [
254276 ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
255- ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
277+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
278+
256279 ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
280+ ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
281+
257282 ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
258- ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
259- ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
283+ ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
284+
285+ ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
286+ ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
287+
288+ ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
289+ ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
290+
260291 ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
292+ ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
293+
261294 ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
295+ ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
296+
262297 ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
298+ ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
299+
300+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
301+ ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
302+
263303 # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
264304 # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
265305])
266- def test_weather_tool_call (hf_repo : str , template_override : Tuple [str , str | None ] | None ):
306+ def test_weather_tool_call (hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
267307 global server
268308 server .n_slots = 1
269309 server .jinja = True
270310 server .n_ctx = 8192
271311 server .n_predict = 512
272312 server .model_hf_repo = hf_repo
273313 server .model_hf_file = None
274- if template_override :
314+ if isinstance ( template_override , tuple ) :
275315 (template_hf_repo , template_variant ) = template_override
276316 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
277317 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
318+ elif isinstance (template_override , str ):
319+ server .chat_template = template_override
278320 server .start (timeout_seconds = TIMEOUT_SERVER_START )
279321 res = server .make_request ("POST" , "/chat/completions" , data = {
280322 "max_tokens" : 256 ,
@@ -298,30 +340,52 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non
298340
299341@pytest .mark .slow
300342@pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
301- (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
302343 (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
344+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
345+
303346 (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
304- ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
305- (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
347+ (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
348+
349+ (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
350+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
351+
352+ ('{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
353+ (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
354+
306355 ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
356+ ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
357+
307358 (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
308- (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
309- (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
359+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
360+
361+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
362+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
363+
364+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
365+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
366+
310367 (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
368+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
369+
370+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
371+ (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
372+
311373 # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
312374])
313- def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
375+ def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
314376 global server
315377 server .n_slots = 1
316378 server .jinja = True
317379 server .n_ctx = 8192
318380 server .n_predict = 128
319381 server .model_hf_repo = hf_repo
320382 server .model_hf_file = None
321- if template_override :
383+ if isinstance ( template_override , tuple ) :
322384 (template_hf_repo , template_variant ) = template_override
323385 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
324386 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
387+ elif isinstance (template_override , str ):
388+ server .chat_template = template_override
325389 server .start (timeout_seconds = TIMEOUT_SERVER_START )
326390 res = server .make_request ("POST" , "/chat/completions" , data = {
327391 "max_tokens" : 256 ,
0 commit comments