File "/vllm-workspace/vllm/worker/model_runner_base.py", line 116, in _wrapper
return func(*args, **kwargs)
File "/vllm-workspace/vllm/worker/model_runner.py", line 1590, in execute_model
hidden_or_intermediate_states = model_executable(
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1735, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1746, in _call_impl
return forward_call(*args, **kwargs)
File "/vllm-workspace/vllm/model_executor/models/internvl.py", line 488, in forward
vision_embeddings = self._process_image_input(image_input)
File "/vllm-workspace/vllm/model_executor/models/internvl.py", line 471, in _process_image_input
image_embeds = self.extract_feature(image_input["data"])
File "/vllm-workspace/vllm/model_executor/models/internvl.py", line 395, in extract_feature
vit_embeds = self.vision_model(pixel_values=pixel_values)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1735, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1746, in _call_impl
return forward_call(*args, **kwargs)
File "/vllm-workspace/vllm/model_executor/models/intern_vit.py", line 356, in forward
encoder_outputs = self.encoder(inputs_embeds=hidden_states)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1735, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1746, in _call_impl
return forward_call(*args, **kwargs)
File "/vllm-workspace/vllm/model_executor/models/intern_vit.py", line 298, in forward
hidden_states = encoder_layer(hidden_states)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1735, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1746, in _call_impl
return forward_call(*args, **kwargs)
File "/vllm-workspace/vllm/model_executor/models/intern_vit.py", line 267, in forward
hidden_states = hidden_states + self.attn(
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1735, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1746, in _call_impl
return forward_call(*args, **kwargs)
File "/vllm-workspace/vllm/model_executor/models/intern_vit.py", line 203, in forward
x = x.transpose(1, 2).view(B, N, -1)
RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/opt/conda/envs/py_3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/opt/conda/envs/py_3.9/lib/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/vllm-workspace/vllm/engine/multiprocessing/engine.py", line 318, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
File "/vllm-workspace/vllm/engine/multiprocessing/engine.py", line 113, in from_engine_args
return cls(
File "/vllm-workspace/vllm/engine/multiprocessing/engine.py", line 69, in __init__
self.engine = LLMEngine(*args, **kwargs)
File "/vllm-workspace/vllm/engine/llm_engine.py", line 331, in __init__
self._initialize_kv_caches()
File "/vllm-workspace/vllm/engine/llm_engine.py", line 465, in _initialize_kv_caches
self.model_executor.determine_num_available_blocks())
File "/vllm-workspace/vllm/executor/distributed_gpu_executor.py", line 39, in determine_num_available_blocks
num_blocks = self._run_workers("determine_num_available_blocks", )
File "/vllm-workspace/vllm/executor/multiproc_gpu_executor.py", line 185, in _run_workers
driver_worker_output = driver_worker_method(*args, **kwargs)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/vllm-workspace/vllm/worker/worker.py", line 223, in determine_num_available_blocks
self.model_runner.profile_run()
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/vllm-workspace/vllm/worker/model_runner.py", line 1236, in profile_run
self.execute_model(model_input, kv_caches, intermediate_tensors)
File "/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/vllm-workspace/vllm/worker/model_runner_base.py", line 144, in _wrapper
raise type(err)(
RuntimeError: Error in model execution (input dumped to /tmp/err_execute_model_input_20240919-094504.pkl): view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.```
Your current environment
The output of `python collect_env.py`
Model Input Dumps
err_execute_model_input_20240919-094504.pkl.zip
🐛 Describe the bug
When I start the model via:
vllm serve OpenGVLab/InternVL2-Llama3-76B --tensor-parallel-size 8 --max-model-len 8000I get: