Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fastdeploy/engine/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def from_dict(cls, d: dict):
guided_grammar=d.get("guided_grammar", None),
structural_tag=d.get("structural_tag", None),
guided_json_object=d.get("guided_json_object", None),
enable_thinking=d.get("enable_thinking", False),
enable_thinking=d.get("enable_thinking", None),
reasoning_max_tokens=d.get("reasoning_max_tokens", None),
trace_carrier=d.get("trace_carrier", {}),
chat_template=d.get("chat_template", None),
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ async def _create_chat_completion_choice(

if output is not None and output.get("metrics") and output["metrics"].get("request_start_time"):
work_process_metrics.e2e_request_latency.observe(
time.time() - output.get("metrics").get("request_start_time")
time.time() - data.get("metrics").get("request_start_time")
)
message = ChatMessage(
role="assistant",
Expand Down Expand Up @@ -655,7 +655,7 @@ async def _create_chat_completion_choice(
finish_reason = "tool_calls"
else:
finish_reason = "length"
if output.get("error_msg") is not None and "Recover" in output["error_msg"]:
if data.get("error_msg") is not None and "Recover" in data["error_msg"]:
finish_reason = "recover_stop"

return ChatCompletionResponseChoice(
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/input/ernie4_5_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def process_request(self, request, max_model_len=None, **kwargs):
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
if k not in task:
if k not in task or task[k] is None:
task[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/input/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def process_request(self, request, max_model_len=None, **kwargs):
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
if k not in task:
if k not in task or task[k] is None:
task[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
Expand Down
6 changes: 3 additions & 3 deletions tests/entrypoints/openai/test_max_streaming_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ async def test_create_chat_completion_choice(self):
"test_data": {
"request_id": "test_1",
"outputs": {
"token_ids": [789],
"token_ids": [123, 456, 789],
"text": "Edge case response",
"reasoning_content": None,
"tool_call": None,
Expand All @@ -424,7 +424,7 @@ async def test_create_chat_completion_choice(self):
"previous_num_tokens": 1,
},
"mock_request": ChatCompletionRequest(
model="test", messages=[], return_token_ids=True, max_tokens=5, n=2
model="test", messages=[], return_token_ids=True, max_tokens=1, n=2
),
"expected": {
"index": 1,
Expand All @@ -434,7 +434,7 @@ async def test_create_chat_completion_choice(self):
"raw_prediction": None,
"num_cached_tokens": 0,
"num_image_tokens": 0,
"finish_reason": "stop",
"finish_reason": "length",
},
},
]
Expand Down