Skip to content

Commit 1d5eb96

Browse files
committed
merge ppo_veomni_trainer.yaml back to ppo_trainer.yaml
1 parent cf7797f commit 1d5eb96

File tree

8 files changed

+33
-215
lines changed

8 files changed

+33
-215
lines changed

.github/workflows/e2e_ppo_trainer_veomni_vllm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ on:
6969
- "examples/data_preprocess/geo3k.py"
7070
- "tests/special_e2e/run_ppo_trainer_veomni.sh"
7171
- "verl/trainer/main_ppo.py"
72-
- "verl/trainer/config/ppo_veomni_trainer.yaml"
72+
- "verl/trainer/config/ppo_trainer.yaml"
7373

7474
# Cancel jobs on the same ref if a new one is triggered
7575
concurrency:

scripts/generate_trainer_config.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ set -euox pipefail
66
CONFIG_SPECS=(
77
"ppo_trainer:_generated_ppo_trainer.yaml:"
88
"ppo_megatron_trainer:_generated_ppo_megatron_trainer.yaml:--config-name=ppo_megatron_trainer.yaml"
9-
"ppo_veomni_trainer:_generated_ppo_veomni_trainer.yaml:--config-name=ppo_veomni_trainer.yaml"
9+
"ppo_trainer:_generated_ppo_veomni_trainer.yaml:model_engine=veomni"
1010
)
1111

1212
generate_config() {

tests/special_e2e/run_ppo_trainer_veomni.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ SP_SIZE=${SP_SIZE:-2}
1515
EP_SIZE=${EP_SIZE:-2}
1616
VERL_EXP_NAME=${VERL_EXP_NAME:-qwen2.5-0.5b-function-reward-minimal-fsdp-size8}
1717

18-
python3 -m verl.trainer.main_ppo --config-path=config\
19-
--config-name="ppo_veomni_trainer.yaml" \
18+
python3 -m verl.trainer.main_ppo \
19+
model_engine=veomni \
2020
algorithm.adv_estimator=grpo \
2121
data.train_files="${TRAIN_FILES}" \
2222
data.val_files="${VAL_FILES}" \

verl/trainer/config/_generated_ppo_veomni_trainer.yaml

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This reference configration yaml is automatically generated via 'scripts/generate_trainer_config.sh'
2-
# in which it invokes 'python3 scripts/print_cfg.py --cfg job --config-name=ppo_veomni_trainer.yaml' to flatten the 'verl/trainer/config/ppo_veomni_trainer.yaml' config fields into a single file.
2+
# in which it invokes 'python3 scripts/print_cfg.py --cfg job model_engine=veomni' to flatten the 'verl/trainer/config/ppo_trainer.yaml' config fields into a single file.
33
# Do not modify this file directly.
44
# The file is usually only for reference and never used.
55

@@ -93,6 +93,7 @@ actor_rollout_ref:
9393
- extra
9494
load_contents: ${.save_contents}
9595
async_save: false
96+
mbridge_config: {}
9697
use_fused_kernels: ${oc.select:actor_rollout_ref.model.use_fused_kernels,false}
9798
profiler:
9899
_target_: verl.utils.profiler.ProfilerConfig
@@ -303,9 +304,7 @@ actor_rollout_ref:
303304
quantization: null
304305
quantization_config_file: null
305306
mtp: ${oc.select:actor_rollout_ref.model.mtp, null}
306-
layer_name_map:
307-
qkv_layer_name: qkv
308-
gate_proj_layer_name: gate_up
307+
layered_summon: false
309308
model:
310309
_target_: verl.workers.config.HFModelConfig
311310
path: ~/models/deepseek-llm-7b-chat
@@ -315,13 +314,10 @@ actor_rollout_ref:
315314
trust_remote_code: false
316315
custom_chat_template: null
317316
external_lib: null
318-
override_config:
319-
model_config: {}
320-
moe_config:
321-
freeze_moe_router: false
317+
override_config: {}
322318
enable_gradient_checkpointing: true
323319
enable_activation_offload: false
324-
use_remove_padding: false
320+
use_remove_padding: true
325321
lora_rank: 0
326322
lora_alpha: 16
327323
target_modules: all-linear
@@ -389,14 +385,6 @@ data:
389385
path: null
390386
name: null
391387
apply_chat_template_kwargs: {}
392-
reward_manager:
393-
_target_: verl.trainer.config.config.RewardManagerConfig
394-
source: register
395-
name: ${oc.select:reward_model.reward_manager,naive}
396-
module:
397-
_target_: verl.trainer.config.config.ModuleConfig
398-
path: null
399-
name: custom_reward_manager
400388
critic:
401389
optim:
402390
_target_: verl.workers.config.VeOmniOptimizerConfig
@@ -473,6 +461,7 @@ critic:
473461
- extra
474462
load_contents: ${.save_contents}
475463
async_save: false
464+
mbridge_config: {}
476465
profiler:
477466
_target_: verl.utils.profiler.ProfilerConfig
478467
tool: ${oc.select:global_profiler.tool,null}
@@ -502,14 +491,17 @@ reward_model:
502491
enable: false
503492
use_reward_loop: true
504493
num_workers: 8
505-
reward_manager: naive
506494
enable_resource_pool: false
507495
n_gpus_per_node: 8
508496
nnodes: 0
509-
reward_loop_source: register
510-
reward_loop_module_path: null
511-
reward_loop_class_name: null
512-
launch_reward_fn_async: false
497+
reward_manager:
498+
_target_: verl.workers.config.reward_model.RewardManagerConfig
499+
source: register
500+
name: naive
501+
module:
502+
_target_: verl.trainer.config.config.ModuleConfig
503+
path: null
504+
name: custom_reward_manager
513505
model_path: null
514506
rollout:
515507
_target_: verl.workers.config.RolloutConfig
@@ -577,24 +569,26 @@ trainer:
577569
- console
578570
- wandb
579571
log_val_generations: 0
572+
rollout_data_dir: null
573+
validation_data_dir: null
580574
nnodes: 1
581575
n_gpus_per_node: 8
582576
save_freq: -1
583577
esi_redundant_time: 0
584578
resume_mode: auto
585579
resume_from_path: null
586-
del_local_ckpt_after_load: false
587580
val_before_train: true
581+
val_only: false
588582
test_freq: -1
589583
critic_warmup: 0
590584
default_hdfs_dir: null
585+
del_local_ckpt_after_load: false
591586
default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
592587
max_actor_ckpt_to_keep: null
593588
max_critic_ckpt_to_keep: null
594589
ray_wait_register_center_timeout: 300
595590
device: cuda
596-
rollout_data_dir: null
597-
use_legacy_worker_impl: disable
591+
use_legacy_worker_impl: auto
598592
global_profiler:
599593
_target_: verl.utils.profiler.ProfilerConfig
600594
tool: null
@@ -603,6 +597,7 @@ global_profiler:
603597
save_path: outputs/profile
604598
global_tool_config:
605599
nsys:
600+
_target_: verl.utils.profiler.config.NsightToolConfig
606601
discrete: false
607602
controller_nsight_options:
608603
trace: cuda,nvtx,cublas,ucx
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# @package _global_
2+
model_engine: dp
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# @package _global_
2+
model_engine: veomni

verl/trainer/config/ppo_trainer.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
# specify the default per-component configs
88
defaults:
99

10+
- model_engine: dp
11+
1012
# <folder_name>@<field_name>.<field_name>: <yaml_file_name>
1113
# actor_rollout_ref.actor: trainer/config/actor/dp_actor.yaml
12-
- actor@actor_rollout_ref.actor: dp_actor
14+
- actor@actor_rollout_ref.actor: ${model_engine}_actor
1315

1416
# data: trainer/config/data/legacy_data.yaml
1517
- data@data: legacy_data
1618

1719
# Reference model config.
1820
# Reference model will be enabled when actor.use_kl_loss or/and algorithm.use_kl_in_reward is/are True.
19-
- ref@actor_rollout_ref.ref: dp_ref
21+
- ref@actor_rollout_ref.ref: ${model_engine}_ref
2022

2123
# Rollout model config.
2224
- rollout@actor_rollout_ref.rollout: rollout
@@ -25,7 +27,7 @@ defaults:
2527
- model@actor_rollout_ref.model: hf_model
2628

2729
# Critic model config.
28-
- critic@critic: dp_critic
30+
- critic@critic: ${model_engine}_critic
2931

3032
# Reward model config.
3133
- reward_model@reward_model: reward_model

verl/trainer/config/ppo_veomni_trainer.yaml

Lines changed: 0 additions & 183 deletions
This file was deleted.

0 commit comments

Comments
 (0)