@@ -67,8 +67,8 @@ def create_server():
67
67
68
68
69
69
def do_test_completion_with_required_tool_tiny (template_name : str , tool : dict , argument_key : str | None ):
70
- n_predict = 512
71
70
global server
71
+ n_predict = 512
72
72
# server = ServerPreset.stories15m_moe()
73
73
server .jinja = True
74
74
server .n_predict = n_predict
@@ -139,40 +139,62 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
139
139
@pytest .mark .parametrize ("tool,argument_key,hf_repo,template_override" , [
140
140
(TEST_TOOL , "success" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
141
141
(PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
142
+ (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
143
+
144
+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
142
145
(TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
143
146
(PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
147
+
144
148
(TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
145
149
(PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
150
+ (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
151
+
146
152
(TEST_TOOL , "success" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
147
153
(PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
154
+ (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
155
+
148
156
(TEST_TOOL , "success" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
149
157
(PYTHON_TOOL , "code" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
158
+ (PYTHON_TOOL , "code" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
159
+
150
160
(TEST_TOOL , "success" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
151
161
(PYTHON_TOOL , "code" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
162
+ (PYTHON_TOOL , "code" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
163
+
152
164
(TEST_TOOL , "success" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
153
165
(PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
154
- (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
155
- (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
166
+ (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
167
+
168
+ (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , ("meetkai/functionary-medium-v3.2" , None )),
169
+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , ("meetkai/functionary-medium-v3.2" , None )),
170
+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , "chatml" ),
171
+
156
172
(TEST_TOOL , "success" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
157
173
(PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
174
+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
175
+
158
176
(TEST_TOOL , "success" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
159
177
(PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
178
+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
160
179
# TODO: fix these
161
180
# (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
162
181
# (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
163
182
])
164
- def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
183
+ def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
184
+ global server
165
185
n_predict = 512
166
186
server .n_slots = 1
167
187
server .jinja = True
168
188
server .n_ctx = 8192
169
189
server .n_predict = n_predict
170
190
server .model_hf_repo = hf_repo
171
191
server .model_hf_file = None
172
- if template_override :
192
+ if isinstance ( template_override , tuple ) :
173
193
(template_hf_repo , template_variant ) = template_override
174
194
server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
175
195
assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
196
+ elif isinstance (template_override , str ):
197
+ server .chat_template = template_override
176
198
server .start (timeout_seconds = TIMEOUT_SERVER_START )
177
199
res = server .make_request ("POST" , "/chat/completions" , data = {
178
200
"max_tokens" : n_predict ,
@@ -252,29 +274,49 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
252
274
@pytest .mark .slow
253
275
@pytest .mark .parametrize ("hf_repo,template_override" , [
254
276
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
255
- ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
277
+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
278
+
256
279
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
280
+ ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
281
+
257
282
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
258
- ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
259
- ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
283
+ ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
284
+
285
+ ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
286
+ ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
287
+
288
+ ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
289
+ ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
290
+
260
291
("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
292
+ ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
293
+
261
294
("bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
295
+ ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
296
+
262
297
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
298
+ ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
299
+
300
+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
301
+ ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
302
+
263
303
# ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
264
304
# ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
265
305
])
266
- def test_weather_tool_call (hf_repo : str , template_override : Tuple [str , str | None ] | None ):
306
+ def test_weather_tool_call (hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
267
307
global server
268
308
server .n_slots = 1
269
309
server .jinja = True
270
310
server .n_ctx = 8192
271
311
server .n_predict = 512
272
312
server .model_hf_repo = hf_repo
273
313
server .model_hf_file = None
274
- if template_override :
314
+ if isinstance ( template_override , tuple ) :
275
315
(template_hf_repo , template_variant ) = template_override
276
316
server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
277
317
assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
318
+ elif isinstance (template_override , str ):
319
+ server .chat_template = template_override
278
320
server .start (timeout_seconds = TIMEOUT_SERVER_START )
279
321
res = server .make_request ("POST" , "/chat/completions" , data = {
280
322
"max_tokens" : 256 ,
@@ -298,30 +340,52 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non
298
340
299
341
@pytest .mark .slow
300
342
@pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
301
- (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
302
343
(None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
344
+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
345
+
303
346
(None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
304
- ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
305
- (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
347
+ (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
348
+
349
+ (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
350
+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
351
+
352
+ ('{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
353
+ (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
354
+
306
355
('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
356
+ ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
357
+
307
358
(None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
308
- (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
309
- (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
359
+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
360
+
361
+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
362
+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
363
+
364
+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
365
+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
366
+
310
367
(None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
368
+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
369
+
370
+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
371
+ (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
372
+
311
373
# (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
312
374
])
313
- def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
375
+ def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
314
376
global server
315
377
server .n_slots = 1
316
378
server .jinja = True
317
379
server .n_ctx = 8192
318
380
server .n_predict = 128
319
381
server .model_hf_repo = hf_repo
320
382
server .model_hf_file = None
321
- if template_override :
383
+ if isinstance ( template_override , tuple ) :
322
384
(template_hf_repo , template_variant ) = template_override
323
385
server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
324
386
assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
387
+ elif isinstance (template_override , str ):
388
+ server .chat_template = template_override
325
389
server .start (timeout_seconds = TIMEOUT_SERVER_START )
326
390
res = server .make_request ("POST" , "/chat/completions" , data = {
327
391
"max_tokens" : 256 ,
0 commit comments