-
Notifications
You must be signed in to change notification settings - Fork 3k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
System Info
----------Python Info----------
Version : 3.12.3
Compiler : GCC 13.3.0
Build : ('main', 'Feb 4 2025 14:48:35')
Arch : ('64bit', 'ELF')
------------Pip Info-----------
Version : 25.2
Directory : /usr/local/lib/python3.12/dist-packages/pip
vllm : 0.11.0
sglang : not found.
ray : 2.49.2
torch : 2.8.0
----------verl Info-----------
Version : 0.5.0.dev
Directory : /data/dbn-ceph/verl-251007/verl/verl
Commit Hash : ab10eb26711d7e63351d66c69254133c1d41998e
----------Platform Info----------
Platform : Linux-5.4.0-42-generic-x86_64-with-glibc2.39
system : Linux
node : 251008-copy-copy-rc-hr5qb-h-zc626
release : 5.4.0-42-generic
version : #46-Ubuntu SMP Fri Jul 10 00:24:02 UTC 2020
----------Environment----------
CUDA Runtime : 12.8
CUDA Compiler : Cuda compilation tools, release 12.9, V12.9.41
Information
- The official example scripts
- My own modified scripts
Tasks
- An officially supported task in the
examplesfolder (such as GLUE/SQuAD, ...) - My own task or dataset (give details below)
Reproduction
script is here
#!/usr/bin/env bash
cd /data/dbn-ceph/verl-251007/verl
set -xeuo pipefail
project_name='DAPO'
exp_name='qwen3vl'
export VLLM_USE_V1=1
adv_estimator=grpo
use_kl_in_reward=False
kl_coef=0.0
use_kl_loss=False
kl_loss_coef=0.0
clip_ratio_low=0.2
clip_ratio_high=0.28
max_prompt_length=$((1024 * 1))
max_response_length=$((1024 * 4))
enable_overlong_buffer=True
overlong_buffer_len=$((1024 * 1))
overlong_penalty_factor=1.0
loss_agg_mode="token-mean"
train_prompt_bsz=32
n_resp_per_prompt=4
train_prompt_mini_bsz=16
# Ray
# RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
# WORKING_DIR=${WORKING_DIR:-"${PWD}"}
# RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/verl/trainer/runtime_env.yaml"}
NNODES=${NNODES:-1}
NGPUS_PER_NODE=${NGPUS_PER_NODE:-8}
# Paths
MODEL_PATH=/data/dbn-ceph/models/huggingface/Qwen/Qwen3-VL-30B-A3B-Instruct
# MODEL_PATH=/data/dbn-ceph/models/huggingface/Qwen/Qwen2___5-VL-3B-Instruct
CKPTS_DIR=/data/dbn-ceph/exp/qwen3vl/ckpt/${exp_name}
TRAIN_FILE=/data/dbn-ceph/datasets/data/geo3k/train.parquet
TEST_FILE=/data/dbn-ceph/datasets/data/geo3k/test.parquet
# Algorithm
temperature=1.0
top_p=1.0
top_k=-1 # 0 for HF rollout, -1 for vLLM rollout
val_top_p=0.7
# Performance Related Parameter
sp_size=4
use_dynamic_bsz=True
actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) * 1))
infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) * 1))
offload=True
gen_tp=4
fsdp_size=8
export HYDRA_FULL_ERROR=1
# ray job submit \
# --runtime-env=verl/trainer/runtime_env.yaml \
# --no-wait \
# -- \
python3 -m verl.trainer.main_ppo \
data.train_files="${TRAIN_FILE}" \
data.val_files="${TEST_FILE}" \
data.prompt_key=prompt \
data.truncation='left' \
data.max_prompt_length=${max_prompt_length} \
data.max_response_length=${max_response_length} \
data.train_batch_size=${train_prompt_bsz} \
actor_rollout_ref.rollout.n=${n_resp_per_prompt} \
algorithm.adv_estimator=${adv_estimator} \
algorithm.use_kl_in_reward=${use_kl_in_reward} \
algorithm.kl_ctrl.kl_coef=${kl_coef} \
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \
actor_rollout_ref.actor.clip_ratio_low=${clip_ratio_low} \
actor_rollout_ref.actor.clip_ratio_high=${clip_ratio_high} \
actor_rollout_ref.actor.clip_ratio_c=10.0 \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz} \
actor_rollout_ref.ref.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
actor_rollout_ref.model.path="${MODEL_PATH}" \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.actor.optim.lr_warmup_steps=10 \
actor_rollout_ref.actor.optim.weight_decay=0.1 \
actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} \
actor_rollout_ref.actor.fsdp_config.param_offload=${offload} \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=${offload} \
actor_rollout_ref.actor.entropy_coeff=0 \
actor_rollout_ref.actor.grad_clip=1.0 \
actor_rollout_ref.actor.loss_agg_mode=${loss_agg_mode} \
actor_rollout_ref.actor.ulysses_sequence_parallel_size=${sp_size} \
actor_rollout_ref.rollout.gpu_memory_utilization=0.80 \
actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \
actor_rollout_ref.rollout.enable_chunked_prefill=True \
actor_rollout_ref.rollout.max_num_batched_tokens=$((max_prompt_length + max_response_length)) \
actor_rollout_ref.rollout.temperature=${temperature} \
actor_rollout_ref.rollout.top_p=${top_p} \
actor_rollout_ref.rollout.top_k=${top_k} \
actor_rollout_ref.rollout.val_kwargs.temperature=${temperature} \
actor_rollout_ref.rollout.val_kwargs.top_p=${val_top_p} \
actor_rollout_ref.rollout.val_kwargs.top_k=${top_k} \
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
actor_rollout_ref.rollout.val_kwargs.n=1 \
actor_rollout_ref.rollout.name=vllm \
actor_rollout_ref.ref.fsdp_config.param_offload=${offload} \
actor_rollout_ref.ref.ulysses_sequence_parallel_size=${sp_size} \
actor_rollout_ref.ref.strategy=fsdp2 \
actor_rollout_ref.actor.fsdp_config.fsdp_size=${fsdp_size} \
actor_rollout_ref.actor.strategy=fsdp2 \
reward_model.reward_manager=dapo \
+reward_model.reward_kwargs.overlong_buffer_cfg.enable=${enable_overlong_buffer} \
+reward_model.reward_kwargs.overlong_buffer_cfg.len=${overlong_buffer_len} \
+reward_model.reward_kwargs.overlong_buffer_cfg.penalty_factor=${overlong_penalty_factor} \
+reward_model.reward_kwargs.overlong_buffer_cfg.log=False \
+reward_model.reward_kwargs.max_resp_len=${max_response_length} \
trainer.logger='["console","wandb"]' \
trainer.project_name="${project_name}" \
trainer.experiment_name="${exp_name}" \
trainer.n_gpus_per_node="${NGPUS_PER_NODE}" \
trainer.nnodes="${NNODES}" \
trainer.val_before_train=False \
trainer.test_freq=5 \
trainer.save_freq=-1 \
trainer.total_epochs=10 \
trainer.total_training_steps=300 \
trainer.default_local_dir="${CKPTS_DIR}" \
trainer.resume_mode=auto \
trainer.log_val_generations=10 \
| tee /data/dbn-ceph/exp/qwen3vl/log/${project_name}_${exp_name}_$(date +'%Y%m%d_%H%M%S').log
error is here
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/data/dbn-ceph/verl-251007/verl/verl/trainer/main_ppo.py", line 412, in <module>
main()
File "/usr/local/lib/python3.12/dist-packages/hydra/main.py", line 94, in decorated_main
_run_hydra(
File "/usr/local/lib/python3.12/dist-packages/hydra/_internal/utils.py", line 394, in _run_hydra
_run_app(
File "/usr/local/lib/python3.12/dist-packages/hydra/_internal/utils.py", line 457, in _run_app
run_and_report(
File "/usr/local/lib/python3.12/dist-packages/hydra/_internal/utils.py", line 223, in run_and_report
raise ex
File "/usr/local/lib/python3.12/dist-packages/hydra/_internal/utils.py", line 220, in run_and_report
return func()
^^^^^^
File "/usr/local/lib/python3.12/dist-packages/hydra/_internal/utils.py", line 458, in <lambda>
lambda: hydra.run(
^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/hydra/_internal/hydra.py", line 132, in run
_ = ret.return_value
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/hydra/core/utils.py", line 260, in return_value
raise self._return_value
File "/usr/local/lib/python3.12/dist-packages/hydra/core/utils.py", line 186, in run_job
ret.return_value = task_function(task_cfg)
^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/trainer/main_ppo.py", line 42, in main
run_ppo(config)
File "/data/dbn-ceph/verl-251007/verl/verl/trainer/main_ppo.py", line 85, in run_ppo
ray.get(runner.run.remote(config))
File "/usr/local/lib/python3.12/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 2882, in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 968, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(IndexError): ray::TaskRunner.run() (pid=137435, ip=10.178.138.183, actor_id=f2327b910f62332d6b2eb7e005000000, repr=<main_ppo.TaskRunner object at 0x7f48fa9c8140>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/trainer/main_ppo.py", line 317, in run
trainer.fit()
File "/data/dbn-ceph/verl-251007/verl/verl/trainer/ppo/ray_trainer.py", line 1060, in fit
old_log_prob = self.actor_rollout_wg.compute_log_prob(batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/single_controller/ray/base.py", line 48, in __call__
output = ray.get(output)
^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ray.exceptions.RayTaskError(IndexError): ray::WorkerDict.actor_rollout_compute_log_prob() (pid=103552, ip=10.178.143.81, actor_id=c672e00638c34463b51e646605000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f28f4af4f20>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/single_controller/ray/base.py", line 700, in func
return getattr(self.worker_dict[key], name)(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/single_controller/base/decorator.py", line 433, in inner
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/utils/profiler/profile.py", line 256, in wrapper
return func(self_instance, *args, **kwargs_inner)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/workers/fsdp_workers.py", line 958, in compute_log_prob
output, entropys = self.actor.compute_log_prob(data=data, calculate_entropy=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/utils/profiler/performance.py", line 105, in f
return self.log(decorated_function, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/utils/profiler/performance.py", line 118, in log
output = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/workers/actor/dp_actor.py", line 339, in compute_log_prob
entropy, log_probs = self._forward_micro_batch(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/workers/actor/dp_actor.py", line 170, in _forward_micro_batch
output = self.actor_module(
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1879, in _call_impl
return inner()
^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1827, in inner
result = forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/models/transformers/qwen3_vl.py", line 259, in forward_with_normal_backend
outputs = self.model(input_ids, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/models/transformers/qwen3_vl.py", line 246, in qwen3_vl_base_forward
return self.language_model(
^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/dbn-ceph/verl-251007/verl/verl/models/transformers/monkey_patch.py", line 170, in ulysses_wrapped_decoder_forward
return original_forward(self, *args, **call_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/transformers/utils/generic.py", line 927, in wrapper
outputs = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py", line 941, in forward
hidden_states = self._deepstack_process(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py", line 959, in _deepstack_process
local_this = hidden_states[visual_pos_masks, :].clone() + visual_embeds
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
IndexError: The shape of the mask [1, 19320] at index 1 does not match the shape of the indexed tensor [1, 4830, 2048] at index 1
Expected behavior
how to solve it
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working