Skip to content

Nightly Test (AMD ROCm 7.2) #105

Nightly Test (AMD ROCm 7.2)

Nightly Test (AMD ROCm 7.2) #105

name: Nightly Test (AMD ROCm 7.2)
on:
schedule:
- cron: '30 17 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
inputs:
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
job_select:
description: 'Select a job to run from dropdown (choose "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
- nightly-test-1-gpu-unit-rocm720
- nightly-accuracy-2-gpu-rocm720
- nightly-accuracy-2-gpu-vlm-rocm720
- nightly-perf-2-gpu-text-rocm720
- nightly-perf-2-gpu-vlm-rocm720
- nightly-accuracy-8-gpu-rocm720
- nightly-8-gpu-grok1-int4-rocm720
- nightly-8-gpu-grok2-rocm720
- nightly-8-gpu-deepseek-v31-rocm720
- nightly-8-gpu-deepseek-v32-rocm720
- nightly-8-gpu-deepseek-v32-mtp-rocm720
- nightly-8-gpu-deepseek-v3-kv-fp8-rocm720
- nightly-8-gpu-kimi-k25-rocm720
- nightly-8-gpu-qwen3-235b-rocm720
- nightly-8-gpu-qwen35-rocm720
- nightly-8-gpu-glm5-rocm720
- nightly-8-gpu-minimax-m25-rocm720
- nightly-1-gpu-zimage-turbo-rocm720
- nightly-test-1-gpu-mi35x-rocm720
- nightly-accuracy-8-gpu-mi35x-rocm720
- nightly-8-gpu-mi35x-grok1-int4-rocm720
- nightly-8-gpu-mi35x-grok2-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-8-gpu-mi35x-kimi-k25-rocm720
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720
- nightly-8-gpu-mi35x-qwen35-rocm720
- nightly-8-gpu-mi35x-glm5-rocm720
- nightly-8-gpu-mi35x-minimax-m25-rocm720
job_filter:
description: 'Or type comma-separated job names (overrides dropdown if non-empty)'
required: false
type: string
default: ''
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
env:
AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}
concurrency:
# When called via workflow_call with ref set, use a unique group per caller run to avoid
# collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name)
# to detect this, because github.event_name inherits from the caller in workflow_call.
group: nightly-test-amd-rocm720-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }}
cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }}
jobs:
# ============================================== MI30x ROCm 7.2 Unit Tests ==============================================
# 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x ROCm 7.2)
nightly-test-1-gpu-unit-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit-rocm720,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Unit Test ROCm 7.2 (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x ROCm 7.2 Accuracy Tests ==============================================
# 2-GPU Accuracy Tests - GSM8K eval (MI30x ROCm 7.2)
nightly-accuracy-2-gpu-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test ROCm 7.2 (2-GPU)
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation (ROCm 7.2)
nightly-accuracy-2-gpu-vlm-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Accuracy Test ROCm 7.2 (2-GPU VLM MMMU)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU Text Models Performance Tests (ROCm 7.2)
nightly-perf-2-gpu-text-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test ROCm 7.2 (2-GPU Text Models)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Performance Tests (ROCm 7.2)
nightly-perf-2-gpu-vlm-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test ROCm 7.2 (2-GPU VLM Models)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (ROCm 7.2)
nightly-accuracy-8-gpu-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Accuracy Test ROCm 7.2 (8-GPU Grok1-FP8)
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x ROCm 7.2 Combined Accuracy + Performance Tests ==============================================
# 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-grok1-int4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-grok2-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.1 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-deepseek-v31-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.1)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.1)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_ROCM700A=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf) ROCm 7.2
nightly-8-gpu-deepseek-v32-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf) ROCm 7.2
nightly-8-gpu-deepseek-v32-mtp-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3) ROCm 7.2
nightly-8-gpu-deepseek-v3-kv-fp8-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: DeepSeek-V3 KV FP8 Test ROCm 7.2 (8-GPU Basic + MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-kimi-k25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU Kimi-K2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Qwen3-235B (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-qwen3-235b-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test + Performance Test ROCm 7.2 (8-GPU Qwen3)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2
nightly-8-gpu-qwen35-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test ROCm 7.2 (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU GLM-5 (Accuracy) ROCm 7.2
nightly-8-gpu-glm5-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test ROCm 7.2 (8-GPU GLM-5 NSA)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-minimax-m25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU MiniMax-M2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x ROCm 7.2 Diffusion Tests ==============================================
# 1-GPU Z-Image-Turbo (Diffusion T2I) ROCm 7.2
nightly-1-gpu-zimage-turbo-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo-rocm720,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Z-Image-Turbo Diffusion Test ROCm 7.2 (1-GPU)
timeout-minutes: 45
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
-e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \
pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Upload generated images
if: always()
uses: actions/upload-artifact@v4
with:
name: zimage-turbo-outputs-rocm720
path: diffusion-artifacts/
if-no-files-found: ignore
retention-days: 30
# ============================================== MI35x ROCm 7.2 Tests ==============================================
# MI35x 1-GPU ROCm 7.2 tests
nightly-test-1-gpu-mi35x-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x-rocm720,'))
runs-on: linux-mi35x-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test MI35x ROCm 7.2 (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Accuracy Tests - GPT-OSS (ROCm 7.2)
nightly-accuracy-8-gpu-mi35x-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-grok1-int4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-grok2-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Accuracy Test (ROCm 7.2)
nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test (ROCm 7.2)
nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 TP+MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic) ROCm 7.2
nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-kimi-k25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Kimi-K2.5)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test + Performance Test MI35x ROCm 7.2 (8-GPU Qwen3-235B-MXFP4)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-qwen35-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
nightly-8-gpu-mi35x-glm5-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-5 NSA)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-minimax-m25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU MiniMax-M2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP) ROCm 7.2
nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
check-all-jobs:
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
needs:
# MI30x ROCm 7.2 Unit Tests
- nightly-test-1-gpu-unit-rocm720
# MI30x ROCm 7.2 Accuracy Tests
- nightly-accuracy-2-gpu-rocm720
- nightly-accuracy-2-gpu-vlm-rocm720
# MI30x ROCm 7.2 Performance Tests
- nightly-perf-2-gpu-text-rocm720
- nightly-perf-2-gpu-vlm-rocm720
- nightly-accuracy-8-gpu-rocm720
# MI30x ROCm 7.2 Combined Accuracy + Performance Tests
- nightly-8-gpu-grok1-int4-rocm720
- nightly-8-gpu-grok2-rocm720
- nightly-8-gpu-deepseek-v31-rocm720
- nightly-8-gpu-deepseek-v32-rocm720
- nightly-8-gpu-deepseek-v32-mtp-rocm720
- nightly-8-gpu-deepseek-v3-kv-fp8-rocm720
- nightly-8-gpu-kimi-k25-rocm720
- nightly-8-gpu-qwen3-235b-rocm720
- nightly-8-gpu-qwen35-rocm720
- nightly-8-gpu-glm5-rocm720
- nightly-8-gpu-minimax-m25-rocm720
# MI30x ROCm 7.2 Diffusion Tests
- nightly-1-gpu-zimage-turbo-rocm720
# MI35x ROCm 7.2 jobs
- nightly-test-1-gpu-mi35x-rocm720
- nightly-accuracy-8-gpu-mi35x-rocm720
- nightly-8-gpu-mi35x-grok1-int4-rocm720
- nightly-8-gpu-mi35x-grok2-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-8-gpu-mi35x-kimi-k25-rocm720
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720
- nightly-8-gpu-mi35x-qwen35-rocm720
- nightly-8-gpu-mi35x-glm5-rocm720
- nightly-8-gpu-mi35x-minimax-m25-rocm720
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more ROCm 7.2 nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more ROCm 7.2 nightly test jobs were cancelled"
exit 1
fi
echo "All ROCm 7.2 nightly test jobs passed"