3636 main_package : ${{ steps.filter.outputs.main_package || steps.scheduled.outputs.main_package }}
3737 sgl_kernel : ${{ steps.filter.outputs.sgl_kernel || steps.scheduled.outputs.sgl_kernel }}
3838 multimodal_gen : ${{ steps.filter.outputs.multimodal_gen || steps.scheduled.outputs.multimodal_gen }}
39+ max_parallel : ${{ steps.set-parallel.outputs.max_parallel }}
3940 steps :
4041 - name : Checkout code
4142 uses : actions/checkout@v4
6869 echo "sgl_kernel=false" >> $GITHUB_OUTPUT
6970 echo "multimodal_gen=true" >> $GITHUB_OUTPUT
7071
72+ - name : Set max-parallel based on high-priority label
73+ id : set-parallel
74+ run : |
75+ if [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then
76+ echo "max_parallel=15" >> $GITHUB_OUTPUT
77+ echo "High priority PR detected, setting max_parallel to 15"
78+ else
79+ echo "max_parallel=8" >> $GITHUB_OUTPUT
80+ echo "Using default max_parallel of 8"
81+ fi
82+
7183 - name : Show filter results in summary (table)
7284 run : |
7385 {
7890 echo "| main_package | ${{ steps.filter.outputs.main_package || steps.scheduled.outputs.main_package }} |"
7991 echo "| sgl_kernel | ${{ steps.filter.outputs.sgl_kernel || steps.scheduled.outputs.sgl_kernel }} |"
8092 echo "| multimodal_gen | ${{ steps.filter.outputs.multimodal_gen || steps.scheduled.outputs.multimodal_gen }} |"
93+ echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |"
8194 } >> $GITHUB_STEP_SUMMARY
8295
8396 # =============================================== PR Gate ====================================================
@@ -382,6 +395,46 @@ jobs:
382395 # temporarily put backend-independent cpu tests here
383396 python3 run_suite.py --hw cpu --suite default
384397
398+ stage-b-test-small-1-gpu :
399+ needs : [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels]
400+ if : |
401+ always() &&
402+ (
403+ (inputs.target_stage == 'stage-b-test-small-1-gpu') ||
404+ (
405+ !inputs.target_stage &&
406+ (github.event_name == 'schedule' || (!failure() && !cancelled())) &&
407+ ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
408+ )
409+ )
410+ runs-on : 1-gpu-runner
411+ env :
412+ RUNNER_LABELS : 1-gpu-runner
413+ strategy :
414+ fail-fast : false
415+ matrix :
416+ partition : [0, 1, 2, 3]
417+ steps :
418+ - name : Checkout code
419+ uses : actions/checkout@v4
420+
421+ - name : Download artifacts
422+ if : needs.check-changes.outputs.sgl_kernel == 'true'
423+ uses : actions/download-artifact@v4
424+ with :
425+ path : sgl-kernel/dist/
426+ merge-multiple : true
427+ pattern : wheel-python3.10-cuda12.9
428+
429+ - name : Install dependencies
430+ run : |
431+ CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
432+
433+ - name : Run test
434+ timeout-minutes : 30
435+ run : |
436+ cd test/
437+ python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4
385438
386439 multimodal-gen-test-1-gpu :
387440 needs : [check-changes, call-gate, sgl-kernel-build-wheels]
@@ -529,7 +582,7 @@ jobs:
529582 RUNNER_LABELS : 1-gpu-runner
530583 strategy :
531584 fail-fast : false
532- max-parallel : 8
585+ max-parallel : ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
533586 matrix :
534587 part : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
535588 steps :
@@ -1143,7 +1196,7 @@ jobs:
11431196 strategy :
11441197 fail-fast : false
11451198 matrix :
1146- part : [0, 1]
1199+ part : [0, 1, 2 ]
11471200
11481201 steps :
11491202 - name : Checkout code
@@ -1165,7 +1218,7 @@ jobs:
11651218 timeout-minutes : 30
11661219 run : |
11671220 cd test/srt
1168- python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800
1221+ python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800
11691222
11701223 unit-test-backend-4-gpu-gb200 :
11711224 needs : [check-changes, call-gate, unit-test-backend-2-gpu, sgl-kernel-build-wheels-arm]
@@ -1209,11 +1262,14 @@ jobs:
12091262 unit-test-backend-8-gpu-b200 :
12101263 needs : [check-changes, call-gate, unit-test-backend-2-gpu]
12111264 if : |
1212- (inputs.target_stage == 'unit-test-backend-8-gpu-b200') ||
1265+ always() &&
12131266 (
1214- always() &&
1215- (github.event_name == 'schedule' || (!failure() && !cancelled())) &&
1216- ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
1267+ (inputs.target_stage == 'unit-test-backend-8-gpu-b200') ||
1268+ (
1269+ !inputs.target_stage &&
1270+ (github.event_name == 'schedule' || (!failure() && !cancelled())) &&
1271+ ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
1272+ )
12171273 )
12181274 runs-on : 8-gpu-b200
12191275 env :
@@ -1226,11 +1282,11 @@ jobs:
12261282 run : |
12271283 IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
12281284
1229- - name : Run test
1230- timeout-minutes : 45
1231- run : |
1232- cd test/srt
1233- python3 run_suite.py --suite per-commit-8-gpu-b200 --timeout-per-file 1800
1285+ # - name: Run test
1286+ # timeout-minutes: 45
1287+ # run: |
1288+ # cd test/srt
1289+ # python3 run_suite.py --suite per-commit-8-gpu-b200 --timeout-per-file 1800
12341290
12351291 pr-test-finish :
12361292 needs :
@@ -1248,6 +1304,7 @@ jobs:
12481304 multimodal-gen-test-2-gpu,
12491305
12501306 stage-a-test-1,
1307+ stage-b-test-small-1-gpu,
12511308 quantization-test,
12521309 unit-test-backend-1-gpu,
12531310 unit-test-backend-2-gpu,
@@ -1264,7 +1321,7 @@ jobs:
12641321 unit-test-deepep-8-gpu,
12651322 unit-test-backend-4-gpu-b200,
12661323 unit-test-backend-4-gpu-gb200,
1267- unit-test-backend-8-gpu-b200,
1324+ # unit-test-backend-8-gpu-b200, # Moved to nightly - large models only
12681325 ]
12691326 if : always()
12701327 runs-on : ubuntu-latest
0 commit comments