File tree Expand file tree Collapse file tree 4 files changed +2
-23
lines changed
Expand file tree Collapse file tree 4 files changed +2
-23
lines changed Original file line number Diff line number Diff line change @@ -131,16 +131,12 @@ jobs:
131131 run : |
132132 ray stop --force
133133 OPTIM_MEMORY_EFFICIENT=True ENGINE=sglang SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/run_ppo_trainer_megatron.sh
134- with :
135- max_attempts : 3
136134 - name : Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
137135 run : |
138136 ray stop --force
139137 export VLLM_USE_V1=1
140138 ray start --head
141139 ENGINE=sglang MODE=async RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 bash tests/special_e2e/run_ppo_trainer_megatron.sh
142- with :
143- max_attempts : 3
144140 - name : Profiling GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Deepseek)
145141 run : |
146142 ray stop --force
@@ -151,8 +147,6 @@ jobs:
151147 else
152148 echo "[SUCCESS] profile success"
153149 fi
154- with :
155- max_attempts : 3
156150 - name : clean up
157151 run : |
158152 rm -rf checkpoints
Original file line number Diff line number Diff line change @@ -153,15 +153,11 @@ jobs:
153153 ray stop --force
154154 ALL_OFFLOAD=True SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct COMMON_PP=4 LORA_RANK=8 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True VANILLA_MBRIDGE=False VALUE_VANILLA_MBRIDGE=False USE_DIST_CKPT=False \
155155 bash tests/special_e2e/run_ppo_trainer_megatron.sh
156- with :
157- max_attempts : 3
158156 - name : Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use Megatron-Bridge LoRA e2e to pre-load and save (Deepseek)
159157 run : |
160158 ray stop --force
161159 RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 SAVE_FREQ=1 COMMON_PP=4 LORA_RANK=8 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True VANILLA_MBRIDGE=False VALUE_VANILLA_MBRIDGE=False USE_DIST_CKPT=False \
162160 bash tests/special_e2e/run_ppo_trainer_megatron.sh
163- with :
164- max_attempts : 3
165161 - name : clean up
166162 run : |
167163 rm -rf checkpoints
@@ -193,15 +189,11 @@ jobs:
193189 run : |
194190 ray stop --force
195191 ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 LR_WARMUP_STEPS=1 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
196- with :
197- max_attempts : 3
198192 - name : Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with FP8 rollout
199193 run : |
200194 ray stop --force
201195 export VLLM_USE_V1=1
202196 ROLLOUT_QUANTIZATION=fp8 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
203- with :
204- max_attempts : 3
205197 - name : clean up
206198 run : |
207199 rm -rf checkpoints
Original file line number Diff line number Diff line change 8888 mode : " create"
8989 faas-url : " ${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
9090 mlp-image : " ${{ env.IMAGE }}"
91- max_attempts : 3
9291
9392 gpu_unit_tests :
9493 if : github.repository_owner == 'volcengine'
@@ -106,8 +105,6 @@ jobs:
106105 fetch-depth : 0
107106 - name : Install the current repository
108107 run : |
109- pip3 list | grep cupy || true
110- pip3 list | grep cupy | xargs pip3 uninstall -y || true
111108 pip3 install hf_transfer
112109 pip3 install -r requirements-test.txt
113110 pip3 install --no-deps -e .
@@ -117,8 +114,6 @@ jobs:
117114 - name : Run all GPU unit tests
118115 run : |
119116 pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob='*on_cpu.py' --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob='tests/special*' --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" tests/
120- with :
121- max_attempts : 3
122117 - name : Testing LinearCrossEntropyTP Correctness, Computation Time and Memory Consumption
123118 run : |
124119 LOW_MEMORY=True torchrun --standalone --nnodes=1 --nproc-per-node=8 tests/utils/test_special_linear_cross_entropy_tp.py
Original file line number Diff line number Diff line change @@ -113,7 +113,7 @@ jobs:
113113 fetch-depth : 0
114114 - name : Install the current repository
115115 run : |
116- pip3 install hf_transfer fastmcp pytest-asyncio pytest-retry
116+ pip3 install hf_transfer fastmcp pytest-asyncio
117117 pip3 install -r requirements-test.txt
118118 pip3 install --no-deps -e .
119119 - name : Prepare gsm8k dataset
@@ -122,9 +122,7 @@ jobs:
122122 python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
123123 - name : Test the latest SGLang Rollout async with agent loop
124124 run : |
125- ROLLOUT_NAME=sglang pytest -svvv tests/experimental/agent_loop --retries 3 --retry-delay 5
126- with :
127- max_attempts : 3
125+ ROLLOUT_NAME=sglang pytest -svvv tests/experimental/agent_loop
128126
129127 cleanup :
130128 runs-on : ubuntu-latest
You can’t perform that action at this time.
0 commit comments