Skip to content

Commit c204eae

Browse files
add rl support for veomni backend
Co-authored-by: A1waysBeenHere <moyicong1999@163.com>
1 parent b53f0f1 commit c204eae

File tree

14 files changed

+638
-12
lines changed

14 files changed

+638
-12
lines changed
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# # Tests layout
2+
3+
# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4+
# - `tests/trainer` for testing functionality related to `verl/trainer`
5+
# - `tests/models` for testing functionality related to `verl/models`
6+
# - ...
7+
8+
# There are a few folders with `special_` prefix, created for special purposes:
9+
# - `special_distributed`: unit tests that must run with multiple GPUs
10+
# - `special_e2e`: end-to-end tests with training/generation scripts
11+
# - `special_npu`: tests for NPUs
12+
# - `special_sanity`: a suite of quick sanity tests
13+
# - `special_standalone`: a set of test that are designed to run in dedicated environments
14+
15+
# Accelerators for tests
16+
# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17+
# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18+
19+
# # Workflow layout
20+
21+
# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22+
# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23+
# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24+
# 3. End-to-end tests: `e2e_*.yml`
25+
# 4. Unit tests
26+
# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27+
# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28+
# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29+
# - new workflow yaml is added to `.github/workflows`
30+
# - new tests are added to workflow mentioned in 2.
31+
32+
name: e2e_ppo_trainer_veomni_vllm
33+
34+
on:
35+
# Trigger the workflow on push or pull request,
36+
# but only for the main branch.
37+
# For push, for now only anti-patterns are specified so it is more conservative
38+
# and achieves higher coverage.
39+
push:
40+
branches:
41+
- main
42+
- v0.*
43+
paths:
44+
- "**/*.py"
45+
# Other entrypoints
46+
- "!verl/trainer/fsdp_sft_trainer.py"
47+
# Megatron
48+
- "!verl/workers/**/megatron_*.py"
49+
pull_request:
50+
branches:
51+
- main
52+
- v0.*
53+
paths:
54+
- "**/*.py"
55+
# Other entrypoints
56+
- "!docker/**"
57+
# Docs
58+
- "!**/*.md"
59+
- "!docs/**"
60+
- "!examples/**"
61+
- "!tests/**"
62+
- "!verl/trainer/main_*.py"
63+
- "!verl/trainer/fsdp_sft_trainer.py"
64+
# Megatron
65+
- "!verl/workers/**/megatron_*.py"
66+
# Entrypoints
67+
- ".github/workflows/e2e_ppo_trainer_veomni_vllm.yml"
68+
- "examples/data_preprocess/gsm8k.py"
69+
- "examples/data_preprocess/geo3k.py"
70+
- "tests/special_e2e/run_ppo_trainer_veomni.sh"
71+
- "verl/trainer/main_ppo.py"
72+
- "verl/trainer/config/ppo_veomni_trainer.yaml"
73+
74+
# Cancel jobs on the same ref if a new one is triggered
75+
concurrency:
76+
group: ${{ github.workflow }}-${{ github.ref }}
77+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
78+
79+
# Declare permissions just read content.
80+
permissions:
81+
contents: read
82+
83+
env:
84+
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm012.dev3"
85+
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
86+
87+
jobs:
88+
setup:
89+
if: github.repository_owner == 'volcengine'
90+
runs-on: ubuntu-latest
91+
outputs:
92+
runner-label: ${{ steps.create-runner.outputs.runner-label }}
93+
mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
94+
steps:
95+
- uses: actions/checkout@v4
96+
- id: create-runner
97+
uses: volcengine/vemlp-github-runner@v1
98+
with:
99+
mode: "create"
100+
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
101+
mlp-image: "${{ env.IMAGE }}"
102+
103+
e2e_ppo_trainer_veomni_vllm:
104+
needs: setup
105+
runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
106+
timeout-minutes: 60 # Increase this timeout value as needed
107+
env:
108+
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
109+
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
110+
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
111+
HF_ENDPOINT: "https://hf-mirror.com"
112+
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
113+
steps:
114+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
115+
with:
116+
fetch-depth: 0
117+
- name: Install the current repository
118+
run: |
119+
pip3 install -r requirements-test.txt
120+
pip3 install --no-deps -e .
121+
pip3 install git+https://github.com/ByteDance-Seed/VeOmni.git@v0.1.4
122+
- name: Prepare GSM8K dataset
123+
run: |
124+
ray stop --force
125+
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
126+
- name: Prepare GEO3K dataset
127+
run: |
128+
ray stop --force
129+
python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/geo3k
130+
- name: Running GSM8K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
131+
run: |
132+
ray stop --force
133+
VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size4" bash tests/special_e2e/run_ppo_trainer_veomni.sh
134+
- name: Running GEO3K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=8, USP=1)
135+
run: |
136+
ray stop --force
137+
MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=8 SP_SIZE=1 EP_SIZE=1 VERL_EXP_NAME="qwen3-2b-vl-function-reward-minimal-fsdp-size8" bash tests/special_e2e/run_ppo_trainer_veomni.sh
138+
139+
cleanup:
140+
runs-on: ubuntu-latest
141+
needs:
142+
[
143+
setup,
144+
e2e_ppo_trainer_veomni_vllm,
145+
]
146+
if: always()
147+
steps:
148+
- id: destroy-runner
149+
uses: volcengine/vemlp-github-runner@v1
150+
with:
151+
mode: "destroy"
152+
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
153+
mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/env bash
2+
set -xeuo pipefail
3+
4+
# Download model if not exists
5+
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
6+
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
7+
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
8+
9+
TRAIN_FILES=${TRAIN_FILES:-${HOME}/data/gsm8k/train.parquet}
10+
VAL_FILES=${VAL_FILES:-${HOME}/data/gsm8k/test.parquet}
11+
VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:-False}
12+
NUM_GPUS=${NUM_GPUS:-8}
13+
FSDP_SIZE=${FSDP_SIZE:-4}
14+
SP_SIZE=${SP_SIZE:-2}
15+
EP_SIZE=${EP_SIZE:-2}
16+
VERL_EXP_NAME=${VERL_EXP_NAME:-qwen2.5-0.5b-function-reward-minimal-fsdp-size8}
17+
18+
python3 -m verl.trainer.main_ppo --config-path=config\
19+
--config-name="ppo_veomni_trainer.yaml" \
20+
algorithm.adv_estimator=grpo \
21+
data.train_files="${TRAIN_FILES}" \
22+
data.val_files="${VAL_FILES}" \
23+
data.train_batch_size=16 \
24+
data.max_prompt_length=512 \
25+
data.max_response_length=128 \
26+
data.filter_overlong_prompts=True \
27+
data.truncation='error' \
28+
actor_rollout_ref.model.path="${MODEL_PATH}" \
29+
actor_rollout_ref.actor.optim.lr=5e-7 \
30+
actor_rollout_ref.model.use_remove_padding=True \
31+
actor_rollout_ref.model.enable_gradient_checkpointing=True \
32+
actor_rollout_ref.actor.veomni.param_offload=True \
33+
actor_rollout_ref.actor.veomni.optimizer_offload=True \
34+
actor_rollout_ref.actor.ppo_mini_batch_size=8 \
35+
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \
36+
actor_rollout_ref.actor.use_kl_loss=True \
37+
actor_rollout_ref.actor.kl_loss_coef=0.001 \
38+
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
39+
actor_rollout_ref.actor.entropy_coeff=0 \
40+
actor_rollout_ref.actor.use_torch_compile=False \
41+
actor_rollout_ref.actor.veomni.data_parallel_size="${FSDP_SIZE}" \
42+
actor_rollout_ref.actor.veomni.ulysses_parallel_size="${SP_SIZE}" \
43+
actor_rollout_ref.actor.veomni.expert_parallel_size="${EP_SIZE}" \
44+
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
45+
actor_rollout_ref.ref.veomni.param_offload=True \
46+
actor_rollout_ref.ref.use_torch_compile=False \
47+
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
48+
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
49+
actor_rollout_ref.rollout.enable_chunked_prefill=False \
50+
actor_rollout_ref.rollout.name=vllm \
51+
+actor_rollout_ref.rollout.engine_kwargs.vllm.disable_mm_preprocessor_cache=True \
52+
actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
53+
actor_rollout_ref.rollout.free_cache_engine=True \
54+
actor_rollout_ref.rollout.enforce_eager=True \
55+
actor_rollout_ref.rollout.free_cache_engine=True \
56+
actor_rollout_ref.rollout.n=2 \
57+
actor_rollout_ref.ref.veomni.optimizer_offload=True \
58+
algorithm.kl_ctrl.kl_coef=0.001 \
59+
trainer.use_legacy_worker_impl=disable \
60+
trainer.critic_warmup=0 \
61+
trainer.logger=console \
62+
trainer.project_name='verl_grpo_example_gsm8k' \
63+
trainer.experiment_name="${VERL_EXP_NAME}" \
64+
trainer.n_gpus_per_node="${NUM_GPUS}" \
65+
trainer.val_before_train="${VAL_BEFORE_TRAIN}" \
66+
trainer.nnodes=1 \
67+
trainer.save_freq=-1 \
68+
trainer.test_freq=-1 \
69+
trainer.total_epochs=1 \
70+
trainer.total_training_steps=1 $@

tests/special_e2e/sft/run_sft_engine.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ VEOMNI_ENGINE_CONFIG="\
6767
optim.lr_min=1e-6 \
6868
optim.lr_scheduler_type=cosine \
6969
engine.ulysses_parallel_size=${SP_SIZE} \
70-
engine.data_parallel_mode=${FSDP_STRATEGY} \
7170
engine.data_parallel_size=${FSDP_SIZE}"
7271

7372

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# veomni actor config, inheriting from trainer/config/actor/actor.yaml
2+
defaults:
3+
# veomni optimizer config
4+
- ../optim@optim: veomni
5+
6+
# veomni engine config
7+
- ../engine@veomni: veomni
8+
9+
- actor
10+
11+
# load the reference default config, then apply the fields in the current yaml
12+
- _self_
13+
14+
_target_: verl.workers.config.VeOmniActorConfig
15+
16+
strategy: veomni
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# defaults specify the default config from each component
2+
defaults:
3+
4+
# veomni optimizer config
5+
- ../optim@optim: veomni
6+
7+
# veomni engine config
8+
- ../engine@veomni: veomni
9+
10+
# critic config, inheriting from trainer/config/critic/critic.yaml
11+
- critic
12+
13+
# load the reference default config, then apply the fields in the current yaml
14+
- _self_
15+
16+
# Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
17+
_target_: verl.workers.config.VeOmniCriticConfig
18+
19+
strategy: veomni
20+
21+
# model config for the critic
22+
model:
23+
24+
# Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
25+
_target_: verl.trainer.config.BaseModelConfig
26+
27+
# seed for data loader
28+
data_loader_seed: ${oc.select:actor_rollout_ref.actor.data_loader_seed,null}
29+

verl/trainer/config/engine/veomni.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ param_offload: False
77
# Whether to offload optimizer state to CPU
88
optimizer_offload: False
99

10-
# fsdp or fsdp2
11-
data_parallel_mode: fsdp2
12-
1310
data_parallel_size: 1
1411

1512
data_parallel_replicate_size: 1

0 commit comments

Comments
 (0)