diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 94245d3b216f..c5f674d8ccc4 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3084,6 +3084,8 @@ def _preprocess( positions = self.xdrope_positions.gpu[:, :num_input_tokens] else: positions = self.positions.gpu[:num_input_tokens] + if num_input_tokens > num_scheduled_tokens: + self.positions.gpu[num_scheduled_tokens:num_input_tokens].zero_() if is_first_rank: intermediate_tensors = None