Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions fastdeploy/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,12 @@ def __init__(
self.speculative_method = self.fd_config.speculative_config.method
self.speculative_decoding = self.speculative_method is not None
self.enable_logprob = fd_config.model_config.enable_logprob
self.max_logprobs = fd_config.model_config.max_logprobs
self.enable_early_stop = self.fd_config.early_stop_config.enable_early_stop
self.is_pooling_model = self.fd_config.model_config.runner_type == "pooling"
self.vocal_size = self.fd_config.model_config.vocab_size
self.ori_vocab_size = self.fd_config.model_config.ori_vocab_size
self.max_logprobs = (
self.ori_vocab_size if fd_config.model_config.max_logprobs == -1 else fd_config.model_config.max_logprobs
)
self.prompt_logprobs_reqs: dict[str, Request] = {}
self.in_progress_prompt_logprobs: dict[str, LogprobsTensors] = {}

Expand Down Expand Up @@ -2722,7 +2724,7 @@ def _get_prompt_logprobs_list(
if request.prompt_token_ids is None or num_prompt_logprobs is None:
continue
if num_prompt_logprobs == -1:
num_prompt_logprobs = self.vocal_size
num_prompt_logprobs = self.ori_vocab_size

num_tokens = request.prefill_end_index - request.prefill_start_index
num_prompt_tokens = len(request.prompt_token_ids)
Expand Down
Loading