@@ -8,8 +8,15 @@ MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
88
99TRAIN_FILES=${TRAIN_FILES:- ${HOME} / data/ gsm8k/ train.parquet}
1010VAL_FILES=${VAL_FILES:- ${HOME} / data/ gsm8k/ test.parquet}
11+ VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:- False}
12+ NUM_GPUS=${NUM_GPUS:- 8}
13+ FSDP_SIZE=${FSDP_SIZE:- 4}
14+ SP_SIZE=${SP_SIZE:- 2}
15+ EP_SIZE=${EP_SIZE:- 2}
16+ VERL_EXP_NAME=${VERL_EXP_NAME:- qwen2.5-0.5b-function-reward-minimal-fsdp-size8}
1117
12- python3 -m verl.trainer.main_ppo \
18+ python3 -m verl.trainer.main_ppo --config-path=config\
19+ --config-name=" ppo_veomni_trainer.yaml" \
1320 algorithm.adv_estimator=grpo \
1421 data.train_files=" ${TRAIN_FILES} " \
1522 data.val_files=" ${VAL_FILES} " \
@@ -19,35 +26,44 @@ python3 -m verl.trainer.main_ppo \
1926 data.filter_overlong_prompts=True \
2027 data.truncation=' error' \
2128 actor_rollout_ref.model.path=" ${MODEL_PATH} " \
29+ actor_rollout_ref.actor.optim.lr=5e-7 \
2230 actor_rollout_ref.model.use_remove_padding=True \
2331 actor_rollout_ref.model.enable_gradient_checkpointing=True \
24- actor_rollout_ref.actor.optim.lr=5e-7 \
32+ actor_rollout_ref.actor.veomni.param_offload=True \
33+ actor_rollout_ref.actor.veomni.optimizer_offload=True \
34+ actor_rollout_ref.actor.veomni.data_parallel_mode=fsdp2 \
2535 actor_rollout_ref.actor.ppo_mini_batch_size=8 \
2636 actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \
2737 actor_rollout_ref.actor.use_kl_loss=True \
2838 actor_rollout_ref.actor.kl_loss_coef=0.001 \
2939 actor_rollout_ref.actor.kl_loss_type=low_var_kl \
30- actor_rollout_ref.actor.veomni.param_offload=False \
31- actor_rollout_ref.actor.veomni.optimizer_offload=False \
32- actor_rollout_ref.actor.veomni.data_parallel_mode=fsdp2 \
33- actor_rollout_ref.actor.veomni.data_parallel_size=4 \
34- actor_rollout_ref.actor.veomni.ulysses_parallel_size=2 \
40+ actor_rollout_ref.actor.entropy_coeff=0 \
3541 actor_rollout_ref.actor.use_torch_compile=False \
42+ actor_rollout_ref.actor.veomni.data_parallel_size=" ${FSDP_SIZE} " \
43+ actor_rollout_ref.actor.veomni.ulysses_parallel_size=" ${SP_SIZE} " \
44+ actor_rollout_ref.actor.veomni.expert_parallel_size=" ${EP_SIZE} " \
3645 actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
3746 actor_rollout_ref.ref.veomni.param_offload=True \
3847 actor_rollout_ref.ref.use_torch_compile=False \
3948 actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
40- actor_rollout_ref.rollout.enable_chunked_prefill=False \
4149 actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
50+ actor_rollout_ref.rollout.enable_chunked_prefill=False \
4251 actor_rollout_ref.rollout.name=vllm \
43- actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
52+ +actor_rollout_ref.rollout.engine_kwargs.vllm.disable_mm_preprocessor_cache=True \
53+ actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
54+ actor_rollout_ref.rollout.free_cache_engine=True \
55+ actor_rollout_ref.rollout.enforce_eager=True \
56+ actor_rollout_ref.rollout.free_cache_engine=True \
4457 actor_rollout_ref.rollout.n=2 \
58+ actor_rollout_ref.ref.veomni.optimizer_offload=True \
4559 algorithm.kl_ctrl.kl_coef=0.001 \
60+ trainer.use_legacy_worker_impl=disable \
4661 trainer.critic_warmup=0 \
4762 trainer.logger=console \
4863 trainer.project_name=' verl_grpo_example_gsm8k' \
49- trainer.experiment_name=' qwen2_7b_function_rm' \
50- trainer.n_gpus_per_node=8 \
64+ trainer.experiment_name=" ${VERL_EXP_NAME} " \
65+ trainer.n_gpus_per_node=" ${NUM_GPUS} " \
66+ trainer.val_before_train=" ${VAL_BEFORE_TRAIN} " \
5167 trainer.nnodes=1 \
5268 trainer.save_freq=-1 \
5369 trainer.test_freq=-1 \
0 commit comments