update test case

A1waysBeenHere · A1waysBeenHere · commit c9e37bc86bfa · 2026-01-23T15:07:22.000+08:00
diff --git a/.github/workflows/e2e_ppo_trainer_veomni_vllm.yml b/.github/workflows/e2e_ppo_trainer_veomni_vllm.yml
@@ -123,10 +123,18 @@ jobs:
         run: |
           ray stop --force
           python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+      - name: Prepare GEO3K dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/geo3k
       - name: Running GSM8K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
         run: |
           ray stop --force
-          VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/run_ppo_trainer_veomni.sh
+          VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size4" bash tests/special_e2e/run_ppo_trainer_veomni.sh
+      - name: Running GEO3K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
+        run: |
+          ray stop --force
+          MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size4" bash tests/special_e2e/run_ppo_trainer_veomni.sh
 
   cleanup:
     runs-on: ubuntu-latest
diff --git a/tests/special_e2e/run_ppo_trainer_veomni.sh b/tests/special_e2e/run_ppo_trainer_veomni.sh
@@ -8,6 +8,12 @@ MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
 
 TRAIN_FILES=${TRAIN_FILES:-${HOME}/data/gsm8k/train.parquet}
 VAL_FILES=${VAL_FILES:-${HOME}/data/gsm8k/test.parquet}
+VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:-False}
+NUM_GPUS=${NUM_GPUS:-8}
+FSDP_SIZE=${FSDP_SIZE:-4}
+SP_SIZE=${SP_SIZE:-2}
+EP_SIZE=${EP_SIZE:-2}
+VERL_EXP_NAME=${VERL_EXP_NAME:-qwen2.5-0.5b-function-reward-minimal-fsdp-size8}
 
 python3 -m verl.trainer.main_ppo --config-path=config\
     --config-name="ppo_veomni_trainer.yaml" \
@@ -33,8 +39,9 @@ python3 -m verl.trainer.main_ppo --config-path=config\
     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
     actor_rollout_ref.actor.entropy_coeff=0 \
     actor_rollout_ref.actor.use_torch_compile=False \
-    actor_rollout_ref.actor.veomni.data_parallel_size=4 \
-    actor_rollout_ref.actor.veomni.ulysses_parallel_size=2 \
+    actor_rollout_ref.actor.veomni.data_parallel_size="${FSDP_SIZE}" \
+    actor_rollout_ref.actor.veomni.ulysses_parallel_size="${SP_SIZE}" \
+    actor_rollout_ref.actor.veomni.expert_parallel_size="${EP_SIZE}" \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
     actor_rollout_ref.ref.veomni.param_offload=True \
     actor_rollout_ref.ref.use_torch_compile=False \
@@ -54,8 +61,9 @@ python3 -m verl.trainer.main_ppo --config-path=config\
     trainer.critic_warmup=0 \
     trainer.logger=console \
     trainer.project_name='verl_grpo_example_gsm8k' \
-    trainer.experiment_name='qwen25_05b_function_rm' \
-    trainer.n_gpus_per_node=8 \
+    trainer.experiment_name="${VERL_EXP_NAME}" \
+    trainer.n_gpus_per_node="${NUM_GPUS}" \
+    trainer.val_before_train="${VAL_BEFORE_TRAIN}" \
     trainer.nnodes=1 \
     trainer.save_freq=-1 \
     trainer.test_freq=-1 \