Skip to content

Commit a21ef36

Browse files
authored
support for the DeepSeek model by enabling streaming response parsing (#5592)
1 parent 3c4dc38 commit a21ef36

File tree

3 files changed

+100
-9
lines changed

3 files changed

+100
-9
lines changed

docs/references/deepseek.md

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,31 @@ Expected Response
193193
{"id": "62af80528930423a82c806651ec66e7c", "object": "chat.completion", "created": 1744431333, "model": "deepseek-ai/DeepSeek-V3-0324", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "reasoning_content": null, "tool_calls": [{"id": "0", "type": "function", "function": {"name": "query_weather", "arguments": "{\\"city\\": \\"Guangzhou\\"}"}}]}, "logprobs": null, "finish_reason": "tool_calls", "matched_stop": null}], "usage": {"prompt_tokens": 118, "total_tokens": 140, "completion_tokens": 22, "prompt_tokens_details": null}}
194194
195195
```
196-
196+
Sample Streaming Request:
197+
```
198+
curl "http://127.0.0.1:30000/v1/chat/completions" \
199+
-H "Content-Type: application/json" \
200+
-d '{"temperature": 0, "max_tokens": 100, "model": "deepseek-ai/DeepSeek-V3-0324","stream":true,"tools": [{"type": "function", "function": {"name": "query_weather", "description": "Get weather of an city, the user should supply a city first", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g. Beijing"}}, "required": ["city"]}}}], "messages": [{"role": "user", "content": "Hows the weather like in Qingdao today"}]}'
201+
```
202+
Expected Streamed Chunks (simplified for clarity):
203+
```
204+
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"{\""}}]}}]}
205+
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"city"}}]}}]}
206+
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\":\""}}]}}]}
207+
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"Q"}}]}}]}
208+
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"ing"}}]}}]}
209+
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"dao"}}]}}]}
210+
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\"}"}}]}}]}
211+
data: {"choices":[{"delta":{"tool_calls":null}}], "finish_reason": "tool_calls"}
212+
data: [DONE]
213+
```
214+
The client needs to concatenate all arguments fragments to reconstruct the complete tool call:
215+
```
216+
{"city": "Qingdao"}
217+
```
197218
Important Notes:
198219
1. Use a lower `"temperature"` value for better results.
199-
2. Currently, the function calling implementation for deepseek is incompatible with streaming requests.
220+
200221

201222

202223
## FAQ

python/sglang/srt/function_call_parser.py

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ def __init__(self):
491491
self.eot_token = "<|tool▁calls▁end|>"
492492
self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
493493
self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>"
494+
self._last_arguments = ""
494495

495496
def has_tool_call(self, text: str) -> bool:
496497
"""Check if the text contains a deepseek format tool call."""
@@ -528,13 +529,84 @@ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult
528529

529530
def structure_info(self) -> _GetInfoFunc:
530531
return lambda name: StructureInfo(
531-
begin="<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>"
532-
+ name
533-
+ "\n```json\n",
534-
end="\n```<|tool▁call▁end|><|tool▁calls▁end|>",
535-
trigger="<|tool▁calls▁begin|>",
532+
begin=">" + name + "\n```json\n",
533+
end="\n```<",
534+
trigger=">" + name + "\n```json\n",
536535
)
537536

537+
def parse_streaming_increment(
538+
self, new_text: str, tools: List[Tool]
539+
) -> StreamingParseResult:
540+
"""
541+
Streaming incremental parsing tool calls for DeepSeekV3 format.
542+
"""
543+
self._buffer += new_text
544+
current_text = self._buffer
545+
546+
if self.bot_token not in current_text:
547+
self._buffer = ""
548+
for e_token in [self.eot_token, "```", "<|tool▁call▁end|>"]:
549+
if e_token in new_text:
550+
new_text = new_text.replace(e_token, "")
551+
return StreamingParseResult(normal_text=new_text)
552+
553+
if not hasattr(self, "_tool_indices"):
554+
self._tool_indices = {
555+
tool.function.name: i
556+
for i, tool in enumerate(tools)
557+
if tool.function and tool.function.name
558+
}
559+
560+
calls: list[ToolCallItem] = []
561+
try:
562+
partial_match = re.search(
563+
pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)",
564+
string=current_text,
565+
flags=re.DOTALL,
566+
)
567+
if partial_match:
568+
func_name = partial_match.group(2).strip()
569+
func_args_raw = partial_match.group(3).strip()
570+
571+
if not self.current_tool_name_sent:
572+
calls.append(
573+
ToolCallItem(
574+
tool_index=self._tool_indices.get(func_name, 0),
575+
name=func_name,
576+
parameters="",
577+
)
578+
)
579+
self.current_tool_name_sent = True
580+
else:
581+
argument_diff = (
582+
func_args_raw[len(self._last_arguments) :]
583+
if func_args_raw.startswith(self._last_arguments)
584+
else func_args_raw
585+
)
586+
587+
if argument_diff:
588+
calls.append(
589+
ToolCallItem(
590+
tool_index=self._tool_indices.get(func_name, 0),
591+
name=None,
592+
parameters=argument_diff,
593+
)
594+
)
595+
self._last_arguments += argument_diff
596+
597+
if _is_complete_json(func_args_raw):
598+
result = StreamingParseResult(normal_text="", calls=calls)
599+
self._buffer = ""
600+
self._last_arguments = ""
601+
self.current_tool_name_sent = False
602+
return result
603+
604+
return StreamingParseResult(normal_text="", calls=calls)
605+
606+
except Exception as e:
607+
logger.error(f"Error in parse_streaming_increment: {e}")
608+
return StreamingParseResult(normal_text=current_text)
609+
538610

539611
class MultiFormatParser:
540612
def __init__(self, detectors: List[BaseFormatDetector]):

python/sglang/srt/openai_api/adapter.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -966,8 +966,6 @@ def v1_chat_generate_request(
966966
),
967967
}
968968
)
969-
# TODO fix the compatible issues with xgrammar
970-
strict_tag = None
971969

972970
for message in request.messages:
973971
if isinstance(message.content, str):

0 commit comments

Comments
 (0)