update test case

A1waysBeenHere · A1waysBeenHere · commit e985c6bfcf69 · 2026-01-23T15:20:34.000+08:00
diff --git a/.github/workflows/e2e_ppo_trainer_veomni_vllm.yml b/.github/workflows/e2e_ppo_trainer_veomni_vllm.yml
@@ -123,10 +123,18 @@ jobs:
         run: |
           ray stop --force
           python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+      - name: Prepare GEO3K dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/geo3k
       - name: Running GSM8K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
         run: |
           ray stop --force
-          VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/run_ppo_trainer_veomni.sh
+          VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size4" bash tests/special_e2e/run_ppo_trainer_veomni.sh
+      - name: Running GEO3K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
+        run: |
+          ray stop --force
+          MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen3-2b-vl-function-reward-minimal-fsdp-size4" bash tests/special_e2e/run_ppo_trainer_veomni.sh
 
   cleanup:
     runs-on: ubuntu-latest
diff --git a/tests/special_e2e/run_ppo_trainer_veomni.sh b/tests/special_e2e/run_ppo_trainer_veomni.sh
@@ -8,8 +8,15 @@ MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
 
 TRAIN_FILES=${TRAIN_FILES:-${HOME}/data/gsm8k/train.parquet}
 VAL_FILES=${VAL_FILES:-${HOME}/data/gsm8k/test.parquet}
+VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:-False}
+NUM_GPUS=${NUM_GPUS:-8}
+FSDP_SIZE=${FSDP_SIZE:-4}
+SP_SIZE=${SP_SIZE:-2}
+EP_SIZE=${EP_SIZE:-2}
+VERL_EXP_NAME=${VERL_EXP_NAME:-qwen2.5-0.5b-function-reward-minimal-fsdp-size8}
 
-python3 -m verl.trainer.main_ppo \
+python3 -m verl.trainer.main_ppo --config-path=config\
+    --config-name="ppo_veomni_trainer.yaml" \
     algorithm.adv_estimator=grpo \
     data.train_files="${TRAIN_FILES}" \
     data.val_files="${VAL_FILES}" \
@@ -19,35 +26,44 @@ python3 -m verl.trainer.main_ppo \
     data.filter_overlong_prompts=True \
     data.truncation='error' \
     actor_rollout_ref.model.path="${MODEL_PATH}" \
+    actor_rollout_ref.actor.optim.lr=5e-7 \
     actor_rollout_ref.model.use_remove_padding=True \
     actor_rollout_ref.model.enable_gradient_checkpointing=True \
-    actor_rollout_ref.actor.optim.lr=5e-7 \
+    actor_rollout_ref.actor.veomni.param_offload=True \
+    actor_rollout_ref.actor.veomni.optimizer_offload=True \
+    actor_rollout_ref.actor.veomni.data_parallel_mode=fsdp2 \
     actor_rollout_ref.actor.ppo_mini_batch_size=8 \
     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \
     actor_rollout_ref.actor.use_kl_loss=True \
     actor_rollout_ref.actor.kl_loss_coef=0.001 \
     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
-    actor_rollout_ref.actor.veomni.param_offload=False \
-    actor_rollout_ref.actor.veomni.optimizer_offload=False \
-    actor_rollout_ref.actor.veomni.data_parallel_mode=fsdp2 \
-    actor_rollout_ref.actor.veomni.data_parallel_size=4 \
-    actor_rollout_ref.actor.veomni.ulysses_parallel_size=2 \
+    actor_rollout_ref.actor.entropy_coeff=0 \
     actor_rollout_ref.actor.use_torch_compile=False \
+    actor_rollout_ref.actor.veomni.data_parallel_size="${FSDP_SIZE}" \
+    actor_rollout_ref.actor.veomni.ulysses_parallel_size="${SP_SIZE}" \
+    actor_rollout_ref.actor.veomni.expert_parallel_size="${EP_SIZE}" \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
     actor_rollout_ref.ref.veomni.param_offload=True \
     actor_rollout_ref.ref.use_torch_compile=False \
     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
-    actor_rollout_ref.rollout.enable_chunked_prefill=False \
     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
+    actor_rollout_ref.rollout.enable_chunked_prefill=False \
     actor_rollout_ref.rollout.name=vllm \
-    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    +actor_rollout_ref.rollout.engine_kwargs.vllm.disable_mm_preprocessor_cache=True \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
+    actor_rollout_ref.rollout.free_cache_engine=True \
+    actor_rollout_ref.rollout.enforce_eager=True \
+    actor_rollout_ref.rollout.free_cache_engine=True \
     actor_rollout_ref.rollout.n=2 \
+    actor_rollout_ref.ref.veomni.optimizer_offload=True \
     algorithm.kl_ctrl.kl_coef=0.001 \
+    trainer.use_legacy_worker_impl=disable \
     trainer.critic_warmup=0 \
     trainer.logger=console \
     trainer.project_name='verl_grpo_example_gsm8k' \
-    trainer.experiment_name='qwen2_7b_function_rm' \
-    trainer.n_gpus_per_node=8 \
+    trainer.experiment_name="${VERL_EXP_NAME}" \
+    trainer.n_gpus_per_node="${NUM_GPUS}" \
+    trainer.val_before_train="${VAL_BEFORE_TRAIN}" \
     trainer.nnodes=1 \
     trainer.save_freq=-1 \
     trainer.test_freq=-1 \