Skip to content

Commit 1a9d85d

Browse files
minleminzuilifuhuang
authored andcommitted
feat: append more comprehensive fields in messages instead of merely role and content (sgl-project#5996)
1 parent 1e1ee78 commit 1a9d85d

File tree

3 files changed

+65
-58
lines changed

3 files changed

+65
-58
lines changed

docs/backend/function_calling.ipynb

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@
3838
" from patch import launch_server_cmd\n",
3939
"else:\n",
4040
" from sglang.utils import launch_server_cmd\n",
41+
" import nest_asyncio\n",
4142
"\n",
43+
" nest_asyncio.apply()\n",
4244
"\n",
4345
"server_process, port = launch_server_cmd(\n",
4446
" \"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --tool-call-parser qwen25 --host 0.0.0.0\" # qwen25\n",
@@ -164,7 +166,7 @@
164166
"response_non_stream = client.chat.completions.create(\n",
165167
" model=model_name,\n",
166168
" messages=messages,\n",
167-
" temperature=0.1,\n",
169+
" temperature=0,\n",
168170
" top_p=0.95,\n",
169171
" max_tokens=1024,\n",
170172
" stream=False, # Non-streaming\n",
@@ -219,7 +221,7 @@
219221
"response_stream = client.chat.completions.create(\n",
220222
" model=model_name,\n",
221223
" messages=messages,\n",
222-
" temperature=0.1,\n",
224+
" temperature=0,\n",
223225
" top_p=0.95,\n",
224226
" max_tokens=1024,\n",
225227
" stream=True, # Enable streaming\n",
@@ -309,23 +311,24 @@
309311
"metadata": {},
310312
"outputs": [],
311313
"source": [
312-
"call_data = json.loads(full_arguments)\n",
314+
"messages.append(response_non_stream.choices[0].message)\n",
313315
"\n",
316+
"# Call the corresponding tool function\n",
317+
"tool_call = messages[-1].tool_calls[0]\n",
318+
"tool_name = tool_call.function.name\n",
319+
"tool_to_call = available_tools[tool_name]\n",
320+
"result = tool_to_call(**(json.loads(tool_call.function.arguments)))\n",
321+
"print_highlight(f\"Function call result: {result}\")\n",
322+
"# messages.append({\"role\": \"tool\", \"content\": result, \"name\": tool_name})\n",
314323
"messages.append(\n",
315324
" {\n",
316-
" \"role\": \"user\",\n",
317-
" \"content\": \"\",\n",
318-
" \"tool_calls\": {\"name\": \"get_current_weather\", \"arguments\": full_arguments},\n",
325+
" \"role\": \"tool\",\n",
326+
" \"tool_call_id\": tool_call.id,\n",
327+
" \"content\": str(result),\n",
328+
" \"name\": tool_name,\n",
319329
" }\n",
320330
")\n",
321331
"\n",
322-
"# Call the corresponding tool function\n",
323-
"tool_name = messages[-1][\"tool_calls\"][\"name\"]\n",
324-
"tool_to_call = available_tools[tool_name]\n",
325-
"result = tool_to_call(**call_data)\n",
326-
"print_highlight(f\"Function call result: {result}\")\n",
327-
"messages.append({\"role\": \"tool\", \"content\": result, \"name\": tool_name})\n",
328-
"\n",
329332
"print_highlight(f\"Updated message history: {messages}\")"
330333
]
331334
},
@@ -345,7 +348,7 @@
345348
"final_response = client.chat.completions.create(\n",
346349
" model=model_name,\n",
347350
" messages=messages,\n",
348-
" temperature=0.1,\n",
351+
" temperature=0,\n",
349352
" top_p=0.95,\n",
350353
" stream=False,\n",
351354
" tools=tools,\n",
@@ -391,7 +394,7 @@
391394
" \"sampling_params\": {\n",
392395
" \"skip_special_tokens\": False,\n",
393396
" \"max_new_tokens\": 1024,\n",
394-
" \"temperature\": 0.1,\n",
397+
" \"temperature\": 0,\n",
395398
" \"top_p\": 0.95,\n",
396399
" },\n",
397400
"}\n",
@@ -452,7 +455,7 @@
452455
"\n",
453456
"sampling_params = {\n",
454457
" \"max_new_tokens\": 1024,\n",
455-
" \"temperature\": 0.1,\n",
458+
" \"temperature\": 0,\n",
456459
" \"top_p\": 0.95,\n",
457460
" \"skip_special_tokens\": False,\n",
458461
"}\n",
@@ -540,14 +543,6 @@
540543
"outputs": [],
541544
"source": [
542545
"import openai\n",
543-
"from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
544-
"from sglang.test.test_utils import is_in_ci\n",
545-
"\n",
546-
"\n",
547-
"if is_in_ci():\n",
548-
" from patch import launch_server_cmd\n",
549-
"else:\n",
550-
" from sglang.utils import launch_server_cmd\n",
551546
"\n",
552547
"server_process, port = launch_server_cmd(\n",
553548
" \" python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --tool-call-parser pythonic --tp 1\" # llama-3.2-1b-instruct\n",
@@ -624,8 +619,8 @@
624619
"response_non_stream = client.chat.completions.create(\n",
625620
" model=model_name,\n",
626621
" messages=messages,\n",
627-
" temperature=0.8,\n",
628-
" top_p=0.8,\n",
622+
" temperature=0,\n",
623+
" top_p=0.9,\n",
629624
" stream=False, # Non-streaming\n",
630625
" tools=tools,\n",
631626
")\n",
@@ -635,8 +630,8 @@
635630
"response_stream = client.chat.completions.create(\n",
636631
" model=model_name,\n",
637632
" messages=messages,\n",
638-
" temperature=0.8,\n",
639-
" top_p=0.8,\n",
633+
" temperature=0,\n",
634+
" top_p=0.9,\n",
640635
" stream=True,\n",
641636
" tools=tools,\n",
642637
")\n",

python/sglang/srt/openai_api/adapter.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""Conversion between OpenAI APIs and native SRT APIs"""
1515

1616
import asyncio
17+
import base64
1718
import json
1819
import logging
1920
import os
@@ -970,17 +971,19 @@ def v1_chat_generate_request(
970971
for message in request.messages:
971972
if message.content is None:
972973
message.content = ""
973-
if isinstance(message.content, str):
974-
openai_compatible_messages.append(
975-
{"role": message.role, "content": message.content}
976-
)
974+
msg_dict = message.dict()
975+
if isinstance(msg_dict.get("content"), list):
976+
for chunk in msg_dict["content"]:
977+
if isinstance(chunk, dict) and chunk.get("type") == "text":
978+
new_msg = msg_dict.copy()
979+
new_msg["content"] = chunk["text"]
980+
new_msg = {
981+
k: v for k, v in new_msg.items() if v is not None
982+
}
983+
openai_compatible_messages.append(new_msg)
977984
else:
978-
content_list = message.dict()["content"]
979-
for content in content_list:
980-
if content["type"] == "text":
981-
openai_compatible_messages.append(
982-
{"role": message.role, "content": content["text"]}
983-
)
985+
msg_dict = {k: v for k, v in msg_dict.items() if v is not None}
986+
openai_compatible_messages.append(msg_dict)
984987
if (
985988
openai_compatible_messages
986989
and openai_compatible_messages[-1]["role"] == "assistant"
@@ -1290,7 +1293,8 @@ def v1_chat_generate_response(
12901293
text, call_info_list = parser.parse_non_stream(text)
12911294
tool_calls = [
12921295
ToolCall(
1293-
id=str(call_info.tool_index),
1296+
id=f"call_{base64.urlsafe_b64encode(uuid.uuid4().bytes).rstrip(b'=').decode()}",
1297+
index=call_info.tool_index,
12941298
function=FunctionResponse(
12951299
name=call_info.name, arguments=call_info.parameters
12961300
),
@@ -1406,6 +1410,7 @@ async def v1_chat_completions(
14061410
reasoning_parser_dict = {}
14071411

14081412
async def generate_stream_resp():
1413+
tool_call_first = True
14091414
is_firsts = {}
14101415
stream_buffers = {}
14111416
n_prev_tokens = {}
@@ -1572,7 +1577,6 @@ async def generate_stream_resp():
15721577
# 2) if we found calls, we output them as separate chunk(s)
15731578
for call_item in calls:
15741579
# transform call_item -> FunctionResponse + ToolCall
1575-
15761580
if finish_reason_type == "stop":
15771581
latest_delta_len = 0
15781582
if isinstance(call_item.parameters, str):
@@ -1595,15 +1599,19 @@ async def generate_stream_resp():
15951599
call_item.parameters = remaining_call
15961600

15971601
finish_reason_type = "tool_calls"
1598-
15991602
tool_call = ToolCall(
1600-
id=str(call_item.tool_index),
1603+
id=(
1604+
f"call_{base64.urlsafe_b64encode(uuid.uuid4().bytes).rstrip(b'=').decode()}"
1605+
if tool_call_first
1606+
else None
1607+
),
16011608
index=call_item.tool_index,
16021609
function=FunctionResponse(
16031610
name=call_item.name,
16041611
arguments=call_item.parameters,
16051612
),
16061613
)
1614+
tool_call_first = False
16071615
choice_data = ChatCompletionResponseStreamChoice(
16081616
index=index,
16091617
delta=DeltaMessage(tool_calls=[tool_call]),

python/sglang/srt/openai_api/protocol.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,29 @@ class ChatCompletionMessageContentAudioPart(BaseModel):
250250
]
251251

252252

253+
class FunctionResponse(BaseModel):
254+
"""Function response."""
255+
256+
name: Optional[str] = None
257+
arguments: Optional[str] = None
258+
259+
260+
class ToolCall(BaseModel):
261+
"""Tool call response."""
262+
263+
id: Optional[str] = None
264+
index: Optional[int] = None
265+
type: Literal["function"] = "function"
266+
function: FunctionResponse
267+
268+
253269
class ChatCompletionMessageGenericParam(BaseModel):
254270
role: Literal["system", "assistant", "tool"]
255271
content: Union[str, List[ChatCompletionMessageContentTextPart], None]
272+
tool_call_id: Optional[str] = None
273+
name: Optional[str] = None
274+
reasoning_content: Optional[str] = None
275+
tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None])
256276

257277

258278
class ChatCompletionMessageUserParam(BaseModel):
@@ -378,22 +398,6 @@ def set_tool_choice_default(cls, values):
378398
bootstrap_room: Optional[int] = None
379399

380400

381-
class FunctionResponse(BaseModel):
382-
"""Function response."""
383-
384-
name: Optional[str] = None
385-
arguments: Optional[str] = None
386-
387-
388-
class ToolCall(BaseModel):
389-
"""Tool call response."""
390-
391-
id: str
392-
index: Optional[int] = None
393-
type: Literal["function"] = "function"
394-
function: FunctionResponse
395-
396-
397401
class ChatMessage(BaseModel):
398402
role: Optional[str] = None
399403
content: Optional[str] = None

0 commit comments

Comments
 (0)