Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions examples/grpo_trainer/run_qwen3_235b_256k_megatron_npu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,23 @@
ulimit -n 32768

# Project Configuration
project_name='GRPO-Qwen3-235B-A22B-BASE-MATH'
exp_name='GRPO-Qwen3-235B-A22B-BASE-Megatron-vLLM'
project_name='GRPO-Qwen3-235B-A22B-Instruct-MATH'
exp_name='GRPO-Qwen3-235B-A22B-Instruct-Megatron-vLLM'

# Node Info
NNODES=${NNODES:-16}
NPUS_PER_NODE=${NPUS_PER_NODE:-16}

# Model Weights Paths
MODEL_PATH=${WORK_DIR}/Qwen3-235B-A22B-Thinking-2507
# MODEL_PATH=/mnt/weight/Qwen3-235B-A22B
MODEL_PATH=${WORK_DIR}/Qwen3-235B-A22B-Instruct-2507
MCORE_MODEL_PATH=${WORK_DIR}/Qwen3-235B-A22B-Instruct-2507-Mcore
RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"}
CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"}

# File System Paths
TRAIN_FILE=${WORK_DIR}/dapo-math-17k.parquet
TEST_FILE=${WORK_DIR}/dapo-math-17k.parquet
TRAIN_FILE=${WORK_DIR}/gsm8k/train.parquet
TEST_FILE=${WORK_DIR}/gsm8k/test.parquet

# Data Configuration
max_prompt_length=$((1024 * 1))
Expand All @@ -48,10 +49,10 @@ infer_ppo_max_token_len=$((max_prompt_length + max_response_length))
optimizer_offload_fraction=1

# Megatron Configuration
train_tp=4
train_ep=32
train_tp=2
train_ep=16
train_etp=1
train_pp=8
train_pp=16
train_cp=8

# vLLM Configuration
Expand All @@ -63,8 +64,8 @@ max_model_len=$((max_prompt_length + max_response_length))
max_num_batched_tokens=2048

# Pipeline Layer Configuration
first_layer=11
last_layer=11
first_layer=5
last_layer=5

# Data Configuration
DATA_ARGS=(
Expand Down Expand Up @@ -93,7 +94,6 @@ ALGORITHM_ARGS=(

# Actor Model Configuration
ACTOR_ARGS=(
actor_rollout_ref.actor.strategy=megatron
actor_rollout_ref.actor.use_torch_compile=False
actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz}
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss}
Expand All @@ -104,6 +104,9 @@ ACTOR_ARGS=(
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len}
actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz}
actor_rollout_ref.actor.kl_loss_type=low_var_kl
actor_rollout_ref.actor.optim.clip_grad=1.0
actor_rollout_ref.actor.optim.lr_warmup_steps=10
actor_rollout_ref.actor.optim.weight_decay=0.1
actor_rollout_ref.actor.optim.lr=1e-6
+actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_offload_fraction=${optimizer_offload_fraction}
+actor_rollout_ref.actor.optim.override_optimizer_config.use_precision_aware_optimizer=True
Expand Down Expand Up @@ -157,6 +160,7 @@ REF_ARGS=(
ROLLOUT_ARGS=(
actor_rollout_ref.rollout.name=vllm
actor_rollout_ref.rollout.n=${n_resp_per_prompt}
actor_rollout_ref.rollout.max_num_seqs=16
actor_rollout_ref.rollout.top_p=1.0
actor_rollout_ref.rollout.top_k=-1
actor_rollout_ref.rollout.temperature=1.0
Expand Down
1 change: 1 addition & 0 deletions verl/trainer/config/_generated_diffusion_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ actor_rollout_ref:
backend: naive
update_weights_bucket_megabytes: 2048
engine_kwargs: {}
custom_backend_module: null
trace:
_target_: verl.workers.config.TraceConfig
project_name: ${oc.select:trainer.project_name,null}
Expand Down
Loading