Skip to content

add function call parser for DeepSeek V3 #5224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 21, 2025
29 changes: 29 additions & 0 deletions docs/references/deepseek.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,35 @@ When using FlashInfer MLA wrapper (`--attention-backend flashinfer`) with specul

See [Separate Reasoning](https://docs.sglang.ai/backend/separate_reasoning.html).


### Function calling for DeepSeek Models

Add arguments `--tool-call-parser deepseekv3` to enable this feature. For example (running on 1 * H20 node):

```
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3-0324 --tp 8 --port 30000 --host 0.0.0.0 --mem-fraction-static 0.9 --disable-cuda-graph --tool-call-parser deepseekv3
```

Sample Request:

```
curl "http://127.0.0.1:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
-d '{"temperature": 0, "max_tokens": 100, "model": "deepseek-ai/DeepSeek-V3-0324", "tools": [{"type": "function", "function": {"name": "query_weather", "description": "Get weather of an city, the user should supply a city first", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g. Beijing"}}, "required": ["city"]}}}], "messages": [{"role": "user", "content": "Hows the weather like in Qingdao today"}]}'
```

Expected Response

```
{"id": "62af80528930423a82c806651ec66e7c", "object": "chat.completion", "created": 1744431333, "model": "deepseek-ai/DeepSeek-V3-0324", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "reasoning_content": null, "tool_calls": [{"id": "0", "type": "function", "function": {"name": "query_weather", "arguments": "{\\"city\\": \\"Guangzhou\\"}"}}]}, "logprobs": null, "finish_reason": "tool_calls", "matched_stop": null}], "usage": {"prompt_tokens": 118, "total_tokens": 140, "completion_tokens": 22, "prompt_tokens_details": null}}

```

Important Notes:
1. Use a lower `"temperature"` value for better results.
2. Currently, the function calling implementation for deepseek is incompatible with streaming requests.


## FAQ

1. **Question**: What should I do if model loading takes too long and NCCL timeout occurs?
Expand Down
60 changes: 60 additions & 0 deletions python/sglang/srt/function_call_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"<tool_call>",
"<|python_tag|>",
"[TOOL_CALLS]",
"<|tool▁calls▁begin|>",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

]


Expand Down Expand Up @@ -477,6 +478,64 @@ def structure_info(self) -> _GetInfoFunc:
)


class DeepSeekV3Detector(BaseFormatDetector):
"""
Detector for DeepSeek models.
Assumes function call format:
'<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Tokyo"}\n```<|tool▁call▁end|>\n<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Paris"}\n```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>
"""

def __init__(self):
super().__init__()
self.bot_token = "<|tool▁calls▁begin|>"
self.eot_token = "<|tool▁calls▁end|>"
self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>"

def has_tool_call(self, text: str) -> bool:
"""Check if the text contains a deepseek format tool call."""
return self.bot_token in text

def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
"""
One-time parsing: Detects and parses tool calls in the provided text.

:param text: The complete text to parse.
:param tools: List of available tools.
:return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
"""
idx = text.find(self.bot_token)
normal_text = text[:idx].strip() if idx != -1 else text
if self.bot_token not in text:
return StreamingParseResult(normal_text=normal_text, calls=[])
match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
calls = []
try:
for match_result in match_result_list:
# Get function name
func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
func_name = func_detail.group(2)
func_args = func_detail.group(3)
func_args = json.loads(func_args)
# construct match_result for parse_base_json
match_result = {"name": func_name, "parameters": func_args}
calls.extend(self.parse_base_json(match_result, tools))
return StreamingParseResult(normal_text=normal_text, calls=calls)
except Exception as e:
logger.error(f"Error in detect_and_parse: {e}")
# return the normal text if parsing fails
return StreamingParseResult(normal_text=text)

def structure_info(self) -> _GetInfoFunc:
return lambda name: StructureInfo(
begin="<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>"
+ name
+ "\n```json\n",
end="\n```<|tool▁call▁end|><|tool▁calls▁end|>",
trigger="<|tool▁calls▁begin|>",
)


class MultiFormatParser:
def __init__(self, detectors: List[BaseFormatDetector]):
"""
Expand Down Expand Up @@ -543,6 +602,7 @@ class FunctionCallParser:
"llama3": Llama32Detector,
"qwen25": Qwen25Detector,
"mistral": MistralDetector,
"deepseekv3": DeepSeekV3Detector,
}

def __init__(self, tools: List[Tool], tool_call_parser: str):
Expand Down
29 changes: 29 additions & 0 deletions python/sglang/srt/openai_api/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,35 @@ def v1_chat_generate_request(

if chat_template_name is None:
openai_compatible_messages = []
if (
tools
and tokenizer_manager.server_args.tool_call_parser == "deepseekv3"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we not add a chat template for this? this kind of prompt engineering is more suitable to add into a chat template.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we not add a chat template for this? this kind of prompt engineering is more suitable to add into a chat template.

Agree to update a new built-in template

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we not add a chat template for this? this kind of prompt engineering is more suitable to add into a chat template.

Agree to update a new built-in template

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, we can create a new PR by using custom chat template

):
# add function call prompt to deepseekv3
openai_compatible_messages.append(
{
"role": "system",
"content": """You are a helpful Assistant.
## Tools
### Function
You have the following functions available:
"""
+ "".join(
[
f"""
- `{tool['name']}`:
```json
{json.dumps(tool)}
```
"""
for tool in tools
]
),
}
)
# TODO fix the compatible issues with xgrammar
strict_tag = None

for message in request.messages:
if isinstance(message.content, str):
openai_compatible_messages.append(
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--tool-call-parser",
type=str,
choices=["qwen25", "mistral", "llama3"],
choices=["qwen25", "mistral", "llama3", "deepseekv3"],
default=ServerArgs.tool_call_parser,
help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
)
Expand Down
Loading