Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fastdeploy/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ async def add_request(

try:
request = Request.from_dict(prompt)
request.llm_engine_recv_req_timestamp = time.time()

# Check if already preprocessed by AsyncEngineClient
is_preprocessed = prompt.get("_preprocessed", False)
Expand Down
1 change: 1 addition & 0 deletions fastdeploy/engine/common_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,7 @@ def _insert_zmq_task_to_scheduler(self):
err_msg = None
try:
request = Request.from_dict(data)
request.llm_engine_recv_req_timestamp = time.time()
start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
main_process_metrics.requests_number.inc()
self.llm_logger.debug(f"Receive request: {request}")
Expand Down
1 change: 1 addition & 0 deletions fastdeploy/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def add_requests(self, task, sampling_params=None, **kwargs):
if sampling_params is not None:
task.update(asdict(sampling_params))
request = Request.from_dict(task)
request.llm_engine_recv_req_timestamp = time.time()
llm_logger.info(f"Receive request {request}")
if sampling_params is not None:
request.sampling_params = sampling_params
Expand Down
6 changes: 6 additions & 0 deletions fastdeploy/engine/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ class RequestMetrics:
model_forward_time: Optional[float] = None
model_execute_time: Optional[float] = None
request_start_time: Optional[float] = None
llm_engine_recv_req_timestamp: Optional[float] = None
llm_engine_send_req_to_engine_timestamp: Optional[float] = None
llm_engine_recv_token_timestamp: Optional[float] = None

def to_dict(self):
"""
Expand All @@ -413,6 +416,9 @@ def to_dict(self):
"model_forward_time": self.model_forward_time,
"model_execute_time": self.model_execute_time,
"request_start_time": self.request_start_time,
"llm_engine_recv_req_timestamp": self.llm_engine_recv_req_timestamp,
"llm_engine_send_req_to_engine_timestamp": self.llm_engine_send_req_to_engine_timestamp,
"llm_engine_recv_token_timestamp": self.llm_engine_recv_token_timestamp,
}

@classmethod
Expand Down
7 changes: 6 additions & 1 deletion fastdeploy/output/token_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,15 +630,20 @@ def _process_batch_output(self):
time_in_queue=task.schedule_start_time - task.preprocess_end_time,
preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time,
request_start_time=task.arrival_time,
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
llm_engine_recv_token_timestamp=time.time(),
)

self._record_first_token_metrics(task, current_time)

else:
metrics = RequestMetrics(
arrival_time=time.time(),
request_start_time=task.arrival_time,
model_execute_time=time.time() - task.inference_start_time,
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
llm_engine_recv_token_timestamp=time.time(),
)
self.number_of_output_tokens += len(token_ids)
self._record_metrics(task, current_time, token_ids)
Expand Down
1 change: 1 addition & 0 deletions tests/output/test_get_save_output_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(self):
self.prefill_chunk_info = None
self.prefill_chunk_num = 0
self.pooling_params = None
self.llm_engine_recv_req_timestamp = time.time()

def get(self, key: str, default_value=None):
if hasattr(self, key):
Expand Down
1 change: 1 addition & 0 deletions tests/output/test_process_batch_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self):
self.disaggregate_info = None
self.prefill_chunk_info = None
self.prefill_chunk_num = 0
self.llm_engine_recv_req_timestamp = time.time()

def get(self, key: str, default_value=None):
if hasattr(self, key):
Expand Down
Loading