Nightly Test (AMD ROCm 7.2) #105
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly Test (AMD ROCm 7.2) | |
| on: | |
| schedule: | |
| - cron: '30 17 * * *' | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - "python/sglang/version.py" | |
| workflow_dispatch: | |
| inputs: | |
| aiter_ref: | |
| description: 'Override AITER commit (optional, leave empty to use Dockerfile default)' | |
| required: false | |
| type: string | |
| default: '' | |
| continue_on_error: | |
| description: 'Continue on error (do not fail the workflow on test failures)' | |
| required: false | |
| type: boolean | |
| default: true | |
| job_select: | |
| description: 'Select a job to run from dropdown (choose "all" to run all jobs)' | |
| required: false | |
| type: choice | |
| default: 'all' | |
| options: | |
| - 'all' | |
| - nightly-test-1-gpu-unit-rocm720 | |
| - nightly-accuracy-2-gpu-rocm720 | |
| - nightly-accuracy-2-gpu-vlm-rocm720 | |
| - nightly-perf-2-gpu-text-rocm720 | |
| - nightly-perf-2-gpu-vlm-rocm720 | |
| - nightly-accuracy-8-gpu-rocm720 | |
| - nightly-8-gpu-grok1-int4-rocm720 | |
| - nightly-8-gpu-grok2-rocm720 | |
| - nightly-8-gpu-deepseek-v31-rocm720 | |
| - nightly-8-gpu-deepseek-v32-rocm720 | |
| - nightly-8-gpu-deepseek-v32-mtp-rocm720 | |
| - nightly-8-gpu-deepseek-v3-kv-fp8-rocm720 | |
| - nightly-8-gpu-kimi-k25-rocm720 | |
| - nightly-8-gpu-qwen3-235b-rocm720 | |
| - nightly-8-gpu-qwen35-rocm720 | |
| - nightly-8-gpu-glm5-rocm720 | |
| - nightly-8-gpu-minimax-m25-rocm720 | |
| - nightly-1-gpu-zimage-turbo-rocm720 | |
| - nightly-test-1-gpu-mi35x-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-rocm720 | |
| - nightly-8-gpu-mi35x-grok1-int4-rocm720 | |
| - nightly-8-gpu-mi35x-grok2-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| - nightly-8-gpu-mi35x-kimi-k25-rocm720 | |
| - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720 | |
| - nightly-8-gpu-mi35x-qwen35-rocm720 | |
| - nightly-8-gpu-mi35x-glm5-rocm720 | |
| - nightly-8-gpu-mi35x-minimax-m25-rocm720 | |
| job_filter: | |
| description: 'Or type comma-separated job names (overrides dropdown if non-empty)' | |
| required: false | |
| type: string | |
| default: '' | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| aiter_ref: | |
| description: 'Override AITER commit (optional, leave empty to use Dockerfile default)' | |
| required: false | |
| type: string | |
| default: '' | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: string | |
| default: 'all' | |
| continue_on_error: | |
| description: 'Continue on error (do not fail the workflow on test failures)' | |
| required: false | |
| type: boolean | |
| default: true | |
| env: | |
| AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }} | |
| concurrency: | |
| # When called via workflow_call with ref set, use a unique group per caller run to avoid | |
| # collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name) | |
| # to detect this, because github.event_name inherits from the caller in workflow_call. | |
| group: nightly-test-amd-rocm720-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }} | |
| cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }} | |
| jobs: | |
| # ============================================== MI30x ROCm 7.2 Unit Tests ============================================== | |
| # 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x ROCm 7.2) | |
| nightly-test-1-gpu-unit-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit-rocm720,')) | |
| runs-on: linux-mi325-1gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Unit Test ROCm 7.2 (1-GPU) | |
| timeout-minutes: 90 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================== MI30x ROCm 7.2 Accuracy Tests ============================================== | |
| # 2-GPU Accuracy Tests - GSM8K eval (MI30x ROCm 7.2) | |
| nightly-accuracy-2-gpu-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Test ROCm 7.2 (2-GPU) | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation (ROCm 7.2) | |
| nightly-accuracy-2-gpu-vlm-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Accuracy Test ROCm 7.2 (2-GPU VLM MMMU) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 2-GPU Text Models Performance Tests (ROCm 7.2) | |
| nightly-perf-2-gpu-text-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Performance Test ROCm 7.2 (2-GPU Text Models) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 2-GPU VLM Performance Tests (ROCm 7.2) | |
| nightly-perf-2-gpu-vlm-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Performance Test ROCm 7.2 (2-GPU VLM Models) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (ROCm 7.2) | |
| nightly-accuracy-8-gpu-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU GPT-OSS) | |
| timeout-minutes: 180 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Grok1-FP8) | |
| timeout-minutes: 60 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================== MI30x ROCm 7.2 Combined Accuracy + Performance Tests ============================================== | |
| # 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-grok1-int4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-grok2-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3.1 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-deepseek-v31-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.1) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.1) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_ROCM700A=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf) ROCm 7.2 | |
| nightly-8-gpu-deepseek-v32-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 150 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf) ROCm 7.2 | |
| nightly-8-gpu-deepseek-v32-mtp-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 180 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3) ROCm 7.2 | |
| nightly-8-gpu-deepseek-v3-kv-fp8-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: DeepSeek-V3 KV FP8 Test ROCm 7.2 (8-GPU Basic + MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2 | |
| nightly-8-gpu-kimi-k25-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Kimi-K2.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Qwen3-235B (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-qwen3-235b-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test + Performance Test ROCm 7.2 (8-GPU Qwen3) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2 | |
| nightly-8-gpu-qwen35-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]" | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Qwen 3.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU GLM-5 (Accuracy) ROCm 7.2 | |
| nightly-8-gpu-glm5-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75 | |
| - name: Accuracy Test ROCm 7.2 (8-GPU GLM-5 NSA) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2 | |
| nightly-8-gpu-minimax-m25-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU MiniMax-M2.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================== MI30x ROCm 7.2 Diffusion Tests ============================================== | |
| # 1-GPU Z-Image-Turbo (Diffusion T2I) ROCm 7.2 | |
| nightly-1-gpu-zimage-turbo-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo-rocm720,')) | |
| runs-on: linux-mi325-1gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Z-Image-Turbo Diffusion Test ROCm 7.2 (1-GPU) | |
| timeout-minutes: 45 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| -e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \ | |
| pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Upload generated images | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: zimage-turbo-outputs-rocm720 | |
| path: diffusion-artifacts/ | |
| if-no-files-found: ignore | |
| retention-days: 30 | |
| # ============================================== MI35x ROCm 7.2 Tests ============================================== | |
| # MI35x 1-GPU ROCm 7.2 tests | |
| nightly-test-1-gpu-mi35x-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x-rocm720,')) | |
| runs-on: linux-mi35x-gpu-1 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Test MI35x ROCm 7.2 (1-GPU) | |
| timeout-minutes: 90 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Accuracy Tests - GPT-OSS (ROCm 7.2) | |
| nightly-accuracy-8-gpu-mi35x-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GPT-OSS) | |
| timeout-minutes: 180 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-mi35x-grok1-int4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-mi35x-grok2-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 Accuracy Test (ROCm 7.2) | |
| nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test (ROCm 7.2) | |
| nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 TP+MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic) ROCm 7.2 | |
| nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 150 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2 | |
| nightly-8-gpu-mi35x-kimi-k25-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Kimi-K2.5) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance) ROCm 7.2 | |
| nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test + Performance Test MI35x ROCm 7.2 (8-GPU Qwen3-235B-MXFP4) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2 | |
| nightly-8-gpu-mi35x-qwen35-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]" | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Qwen 3.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-glm5-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75 | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-5 NSA) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2 | |
| nightly-8-gpu-mi35x-minimax-m25-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU MiniMax-M2.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP) ROCm 7.2 | |
| nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| check-all-jobs: | |
| if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch') | |
| needs: | |
| # MI30x ROCm 7.2 Unit Tests | |
| - nightly-test-1-gpu-unit-rocm720 | |
| # MI30x ROCm 7.2 Accuracy Tests | |
| - nightly-accuracy-2-gpu-rocm720 | |
| - nightly-accuracy-2-gpu-vlm-rocm720 | |
| # MI30x ROCm 7.2 Performance Tests | |
| - nightly-perf-2-gpu-text-rocm720 | |
| - nightly-perf-2-gpu-vlm-rocm720 | |
| - nightly-accuracy-8-gpu-rocm720 | |
| # MI30x ROCm 7.2 Combined Accuracy + Performance Tests | |
| - nightly-8-gpu-grok1-int4-rocm720 | |
| - nightly-8-gpu-grok2-rocm720 | |
| - nightly-8-gpu-deepseek-v31-rocm720 | |
| - nightly-8-gpu-deepseek-v32-rocm720 | |
| - nightly-8-gpu-deepseek-v32-mtp-rocm720 | |
| - nightly-8-gpu-deepseek-v3-kv-fp8-rocm720 | |
| - nightly-8-gpu-kimi-k25-rocm720 | |
| - nightly-8-gpu-qwen3-235b-rocm720 | |
| - nightly-8-gpu-qwen35-rocm720 | |
| - nightly-8-gpu-glm5-rocm720 | |
| - nightly-8-gpu-minimax-m25-rocm720 | |
| # MI30x ROCm 7.2 Diffusion Tests | |
| - nightly-1-gpu-zimage-turbo-rocm720 | |
| # MI35x ROCm 7.2 jobs | |
| - nightly-test-1-gpu-mi35x-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-rocm720 | |
| - nightly-8-gpu-mi35x-grok1-int4-rocm720 | |
| - nightly-8-gpu-mi35x-grok2-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| - nightly-8-gpu-mi35x-kimi-k25-rocm720 | |
| - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720 | |
| - nightly-8-gpu-mi35x-qwen35-rocm720 | |
| - nightly-8-gpu-mi35x-glm5-rocm720 | |
| - nightly-8-gpu-mi35x-minimax-m25-rocm720 | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check if any job failed | |
| run: | | |
| if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then | |
| echo "One or more ROCm 7.2 nightly test jobs failed" | |
| exit 1 | |
| fi | |
| if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then | |
| echo "One or more ROCm 7.2 nightly test jobs were cancelled" | |
| exit 1 | |
| fi | |
| echo "All ROCm 7.2 nightly test jobs passed" |