Skip to content

Commit 999585a

Browse files
authored
Merge branch 'main' into main
2 parents a2d2915 + ab00487 commit 999585a

207 files changed

Lines changed: 8438 additions & 2848 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
/python/sglang/srt/mem_cache @merrymercy @Ying1123 @hnyls2002 @xiezhq-hermann @hanming-lu @yizhang2077
2626
/python/sglang/srt/model_executor @merrymercy @Ying1123 @hnyls2002 @Fridge003 @ispobock
2727
/python/sglang/srt/model_executor/piecewise_cuda_graph_runner.py @hebiao064
28+
/python/sglang/srt/models/deepseek_v2.py @fzyzcjy @zhyncs @ispobock @ch-wan @merrymercy @Fridge003
2829
/python/sglang/srt/multimodal @mickqian @JustinTong0323 @yhyang201 @yuan-luo
2930
/python/sglang/srt/speculative @Ying1123 @merrymercy @hnyls2002
3031
/sgl-kernel @zhyncs @ispobock @BBuf @yizhang2077 @merrymercy @FlamingoPg @HaiShaw

.github/workflows/nightly-test-npu.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
matrix:
2424
part: [0, 1]
2525
container:
26-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
26+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
2727
steps:
2828
- name: Checkout code
2929
uses: actions/checkout@v4
@@ -69,7 +69,7 @@ jobs:
6969
matrix:
7070
part: [0]
7171
container:
72-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
72+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
7373
steps:
7474
- name: Checkout code
7575
uses: actions/checkout@v4
@@ -115,7 +115,7 @@ jobs:
115115
matrix:
116116
part: [0]
117117
container:
118-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
118+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
119119
steps:
120120
- name: Checkout code
121121
uses: actions/checkout@v4

.github/workflows/pr-test-npu.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
if: needs.check-changes.outputs.main_package == 'true'
4646
runs-on: linux-arm64-npu-1
4747
container:
48-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
48+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
4949
steps:
5050
- name: Checkout code
5151
uses: actions/checkout@v4
@@ -88,7 +88,7 @@ jobs:
8888
matrix:
8989
part: [0, 1, 2]
9090
container:
91-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
91+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
9292
steps:
9393
- name: Checkout code
9494
uses: actions/checkout@v4
@@ -127,7 +127,7 @@ jobs:
127127
if: needs.check-changes.outputs.main_package == 'true'
128128
runs-on: linux-arm64-npu-4
129129
container:
130-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
130+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
131131
steps:
132132
- name: Checkout code
133133
uses: actions/checkout@v4
@@ -170,7 +170,7 @@ jobs:
170170
matrix:
171171
part: [0, 1]
172172
container:
173-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
173+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
174174
steps:
175175
- name: Checkout code
176176
uses: actions/checkout@v4

.github/workflows/pr-test-rust.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,31 @@ jobs:
330330
docker rm oracle-db || true
331331
332332
333+
docker-build-test:
334+
if: |
335+
github.event_name != 'pull_request' ||
336+
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
337+
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
338+
runs-on: ubuntu-24.04
339+
steps:
340+
- name: Checkout repository
341+
uses: actions/checkout@v4
342+
343+
- name: Set up Docker Buildx
344+
uses: docker/setup-buildx-action@v3
345+
346+
- name: Build Docker image (no push)
347+
uses: docker/build-push-action@v5
348+
with:
349+
context: .
350+
file: docker/gateway.Dockerfile
351+
push: false
352+
tags: sglang-router:test
353+
cache-from: type=gha
354+
cache-to: type=gha,mode=max
355+
333356
finish:
334-
needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests]
357+
needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests, docker-build-test]
335358
runs-on: ubuntu-latest
336359
steps:
337360
- name: Finish

.github/workflows/pr-test.yml

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ jobs:
3636
main_package: ${{ steps.filter.outputs.main_package || steps.scheduled.outputs.main_package }}
3737
sgl_kernel: ${{ steps.filter.outputs.sgl_kernel || steps.scheduled.outputs.sgl_kernel }}
3838
multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.scheduled.outputs.multimodal_gen }}
39+
max_parallel: ${{ steps.set-parallel.outputs.max_parallel }}
3940
steps:
4041
- name: Checkout code
4142
uses: actions/checkout@v4
@@ -68,6 +69,17 @@ jobs:
6869
echo "sgl_kernel=false" >> $GITHUB_OUTPUT
6970
echo "multimodal_gen=true" >> $GITHUB_OUTPUT
7071
72+
- name: Set max-parallel based on high-priority label
73+
id: set-parallel
74+
run: |
75+
if [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then
76+
echo "max_parallel=15" >> $GITHUB_OUTPUT
77+
echo "High priority PR detected, setting max_parallel to 15"
78+
else
79+
echo "max_parallel=8" >> $GITHUB_OUTPUT
80+
echo "Using default max_parallel of 8"
81+
fi
82+
7183
- name: Show filter results in summary (table)
7284
run: |
7385
{
@@ -78,6 +90,7 @@ jobs:
7890
echo "| main_package | ${{ steps.filter.outputs.main_package || steps.scheduled.outputs.main_package }} |"
7991
echo "| sgl_kernel | ${{ steps.filter.outputs.sgl_kernel || steps.scheduled.outputs.sgl_kernel }} |"
8092
echo "| multimodal_gen | ${{ steps.filter.outputs.multimodal_gen || steps.scheduled.outputs.multimodal_gen }} |"
93+
echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |"
8194
} >> $GITHUB_STEP_SUMMARY
8295
8396
# =============================================== PR Gate ====================================================
@@ -382,6 +395,46 @@ jobs:
382395
# temporarily put backend-independent cpu tests here
383396
python3 run_suite.py --hw cpu --suite default
384397
398+
stage-b-test-small-1-gpu:
399+
needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels]
400+
if: |
401+
always() &&
402+
(
403+
(inputs.target_stage == 'stage-b-test-small-1-gpu') ||
404+
(
405+
!inputs.target_stage &&
406+
(github.event_name == 'schedule' || (!failure() && !cancelled())) &&
407+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
408+
)
409+
)
410+
runs-on: 1-gpu-runner
411+
env:
412+
RUNNER_LABELS: 1-gpu-runner
413+
strategy:
414+
fail-fast: false
415+
matrix:
416+
partition: [0, 1, 2, 3]
417+
steps:
418+
- name: Checkout code
419+
uses: actions/checkout@v4
420+
421+
- name: Download artifacts
422+
if: needs.check-changes.outputs.sgl_kernel == 'true'
423+
uses: actions/download-artifact@v4
424+
with:
425+
path: sgl-kernel/dist/
426+
merge-multiple: true
427+
pattern: wheel-python3.10-cuda12.9
428+
429+
- name: Install dependencies
430+
run: |
431+
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
432+
433+
- name: Run test
434+
timeout-minutes: 30
435+
run: |
436+
cd test/
437+
python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4
385438
386439
multimodal-gen-test-1-gpu:
387440
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
@@ -529,7 +582,7 @@ jobs:
529582
RUNNER_LABELS: 1-gpu-runner
530583
strategy:
531584
fail-fast: false
532-
max-parallel: 8
585+
max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
533586
matrix:
534587
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
535588
steps:
@@ -1143,7 +1196,7 @@ jobs:
11431196
strategy:
11441197
fail-fast: false
11451198
matrix:
1146-
part: [0, 1]
1199+
part: [0, 1, 2]
11471200

11481201
steps:
11491202
- name: Checkout code
@@ -1165,7 +1218,7 @@ jobs:
11651218
timeout-minutes: 30
11661219
run: |
11671220
cd test/srt
1168-
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800
1221+
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800
11691222
11701223
unit-test-backend-4-gpu-gb200:
11711224
needs: [check-changes, call-gate, unit-test-backend-2-gpu, sgl-kernel-build-wheels-arm]
@@ -1209,11 +1262,14 @@ jobs:
12091262
unit-test-backend-8-gpu-b200:
12101263
needs: [check-changes, call-gate, unit-test-backend-2-gpu]
12111264
if: |
1212-
(inputs.target_stage == 'unit-test-backend-8-gpu-b200') ||
1265+
always() &&
12131266
(
1214-
always() &&
1215-
(github.event_name == 'schedule' || (!failure() && !cancelled())) &&
1216-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
1267+
(inputs.target_stage == 'unit-test-backend-8-gpu-b200') ||
1268+
(
1269+
!inputs.target_stage &&
1270+
(github.event_name == 'schedule' || (!failure() && !cancelled())) &&
1271+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
1272+
)
12171273
)
12181274
runs-on: 8-gpu-b200
12191275
env:
@@ -1226,11 +1282,11 @@ jobs:
12261282
run: |
12271283
IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
12281284
1229-
- name: Run test
1230-
timeout-minutes: 45
1231-
run: |
1232-
cd test/srt
1233-
python3 run_suite.py --suite per-commit-8-gpu-b200 --timeout-per-file 1800
1285+
# - name: Run test
1286+
# timeout-minutes: 45
1287+
# run: |
1288+
# cd test/srt
1289+
# python3 run_suite.py --suite per-commit-8-gpu-b200 --timeout-per-file 1800
12341290

12351291
pr-test-finish:
12361292
needs:
@@ -1248,6 +1304,7 @@ jobs:
12481304
multimodal-gen-test-2-gpu,
12491305

12501306
stage-a-test-1,
1307+
stage-b-test-small-1-gpu,
12511308
quantization-test,
12521309
unit-test-backend-1-gpu,
12531310
unit-test-backend-2-gpu,
@@ -1264,7 +1321,7 @@ jobs:
12641321
unit-test-deepep-8-gpu,
12651322
unit-test-backend-4-gpu-b200,
12661323
unit-test-backend-4-gpu-gb200,
1267-
unit-test-backend-8-gpu-b200,
1324+
# unit-test-backend-8-gpu-b200, # Moved to nightly - large models only
12681325
]
12691326
if: always()
12701327
runs-on: ubuntu-latest

.github/workflows/release-docker-gateway.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
branches:
55
- main
66
paths:
7-
- "sgl-model-gateway/bindings/python/sglang_router/version.py"
7+
- sgl-model-gateway/bindings/python/pyproject.toml
88
workflow_dispatch:
99

1010
jobs:

.github/workflows/release-docker-npu-nightly.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
runs-on: ubuntu-22.04-arm
2020
strategy:
2121
matrix:
22-
cann_version: ["8.3.rc1"]
22+
cann_version: ["8.3.rc2"]
2323
device_type: ["910b", "a3"]
2424
steps:
2525
- name: Checkout repository
@@ -73,6 +73,6 @@ jobs:
7373
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
7474
provenance: false
7575
build-args: |
76-
SGLANG_KERNEL_NPU_TAG=20251128
76+
SGLANG_KERNEL_NPU_TAG=20251206
7777
CANN_VERSION=${{ matrix.cann_version }}
7878
DEVICE_TYPE=${{ matrix.device_type }}

.github/workflows/release-docker-npu.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
runs-on: ubuntu-22.04-arm
1818
strategy:
1919
matrix:
20-
cann_version: ["8.3.rc1"]
20+
cann_version: ["8.3.rc2"]
2121
device_type: ["910b", "a3"]
2222
steps:
2323
- name: Checkout repository
@@ -70,6 +70,6 @@ jobs:
7070
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
7171
provenance: false
7272
build-args: |
73-
SGLANG_KERNEL_NPU_TAG=20251128
73+
SGLANG_KERNEL_NPU_TAG=20251206
7474
CANN_VERSION=${{ matrix.cann_version }}
7575
DEVICE_TYPE=${{ matrix.device_type }}

Makefile

Lines changed: 0 additions & 49 deletions
This file was deleted.

benchmark/deepseek_v3/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee
3333

3434
```bash
3535
# Installation
36-
pip install "sglang[all]>=0.5.6"
36+
pip install "sglang[all]>=0.5.6.post1"
3737

3838
# Launch
3939
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code

0 commit comments

Comments
 (0)