Nightly Test (AMD ROCm 7.2) #129
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly Test (AMD ROCm 7.2) | |
| on: | |
| schedule: | |
| - cron: '30 17 * * *' | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - "python/sglang/version.py" | |
| workflow_dispatch: | |
| inputs: | |
| aiter_ref: | |
| description: 'Override AITER commit (optional, leave empty to use Dockerfile default)' | |
| required: false | |
| type: string | |
| default: '' | |
| continue_on_error: | |
| description: 'Continue on error (do not fail the workflow on test failures)' | |
| required: false | |
| type: boolean | |
| default: true | |
| job_select: | |
| description: 'Select a job to run from dropdown (choose "all" to run all jobs)' | |
| required: false | |
| type: choice | |
| default: 'all' | |
| options: | |
| - 'all' | |
| # 1-GPU Unit Tests (MI30x + MI35x) | |
| - nightly-test-1-gpu-unit-rocm720 | |
| - nightly-test-1-gpu-mi35x-rocm720 | |
| # 2-GPU and 4-GPU Tests (MI30x + MI35x) | |
| - nightly-accuracy-2-gpu-rocm720 | |
| - nightly-accuracy-2-gpu-vlm-rocm720 | |
| - nightly-perf-2-gpu-text-rocm720 | |
| - nightly-perf-2-gpu-vlm-rocm720 | |
| - nightly-4-gpu-rocm720 | |
| # 2-GPU GLM-5.1-MXFP4 (MI35x only) | |
| - nightly-2-gpu-mi35x-glm51-mxfp4-rocm720 | |
| # 2-GPU DeepSeek-R1-MXFP4 TP2 (MI35x only) | |
| - nightly-2-gpu-mi35x-deepseek-r1-mxfp4-tp2-rocm720 | |
| # 8-GPU GPT-OSS (MI30x mixes Grok1-FP8; MI35x mixes Qwen3-Coder-Next) | |
| - nightly-accuracy-8-gpu-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-rocm720 | |
| # 8-GPU Grok1-INT4 (MI30x + MI35x) | |
| - nightly-8-gpu-grok1-int4-rocm720 | |
| - nightly-8-gpu-mi35x-grok1-int4-rocm720 | |
| # 8-GPU Grok2 (MI30x + MI35x) | |
| - nightly-8-gpu-grok2-rocm720 | |
| - nightly-8-gpu-mi35x-grok2-rocm720 | |
| # 8-GPU DeepSeek-V3.x (MI30x) | |
| - nightly-8-gpu-deepseek-v31-rocm720 | |
| - nightly-8-gpu-deepseek-v32-rocm720 | |
| - nightly-8-gpu-deepseek-v32-mtp-rocm720 | |
| - nightly-8-gpu-deepseek-v3-kv-fp8-rocm720 | |
| # 8-GPU DeepSeek-V3.2 (MI35x) | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| # 8-GPU DeepSeek-R1 (MI35x only) | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-tp4-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-hicache-rocm720 | |
| # 8-GPU DeepSeek-V4 (MI35x only) | |
| - nightly-8-gpu-mi35x-deepseek-v4-flash-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-v4-pro-rocm720 | |
| # 8-GPU Kimi-K2.6 (MI30x + MI35x) | |
| - nightly-8-gpu-kimi-k26-rocm720 | |
| - nightly-8-gpu-mi35x-kimi-k26-rocm720 | |
| # 8-GPU Qwen3-235B (MI30x + MI35x MXFP4) | |
| - nightly-8-gpu-qwen3-235b-rocm720 | |
| - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720 | |
| # 8-GPU Qwen 3.5 (MI30x + MI35x) | |
| - nightly-8-gpu-qwen35-rocm720 | |
| - nightly-8-gpu-mi35x-qwen35-rocm720 | |
| # 8-GPU GLM-5.1 (MI30x + MI35x) | |
| - nightly-8-gpu-glm51-rocm720 | |
| - nightly-8-gpu-mi35x-glm51-rocm720 | |
| # 8-GPU GLM-5-MXFP4 (MI35x only) | |
| - nightly-8-gpu-mi35x-glm5-mxfp4-rocm720 | |
| # 4-GPU MiniMax-M2.5 (MI35x) | |
| - nightly-4-gpu-mi35x-minimax-m25-rocm720 | |
| # 8-GPU MiniMax-M2.7 (MI30x only) | |
| - nightly-8-gpu-minimax-m27-rocm720 | |
| # Diffusion (MI30x) | |
| - nightly-1-gpu-zimage-turbo-rocm720 | |
| job_filter: | |
| description: 'Or type comma-separated job names (overrides dropdown if non-empty)' | |
| required: false | |
| type: string | |
| default: '' | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| aiter_ref: | |
| description: 'Override AITER commit (optional, leave empty to use Dockerfile default)' | |
| required: false | |
| type: string | |
| default: '' | |
| job_filter: | |
| description: 'Select which job to run (leave empty or "all" to run all jobs)' | |
| required: false | |
| type: string | |
| default: 'all' | |
| continue_on_error: | |
| description: 'Continue on error (do not fail the workflow on test failures)' | |
| required: false | |
| type: boolean | |
| default: true | |
| env: | |
| AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }} | |
| DOCKERHUB_AMD_USERNAME: ${{ secrets.DOCKERHUB_AMD_USERNAME }} | |
| DOCKERHUB_AMD_TOKEN: ${{ secrets.DOCKERHUB_AMD_TOKEN }} | |
| concurrency: | |
| # When called via workflow_call with ref set, use a unique group per caller run to avoid | |
| # collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name) | |
| # to detect this, because github.event_name inherits from the caller in workflow_call. | |
| # Manual dispatch runs also get unique groups so they never cancel each other. | |
| group: nightly-test-amd-rocm720-${{ github.event_name == 'workflow_dispatch' && format('manual-{0}', github.run_id) || inputs.ref && format('caller-{0}', github.run_id) || github.ref }} | |
| cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' && github.event_name != 'workflow_dispatch' }} | |
| jobs: | |
| # ============================================== MI30x ROCm 7.2 Unit Tests ============================================== | |
| # 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x ROCm 7.2) | |
| # ============================================================================== | |
| # 1-GPU Unit Tests (MI30x + MI35x) | |
| # ============================================================================== | |
| nightly-test-1-gpu-unit-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit-rocm720,')) | |
| runs-on: linux-mi325-1gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Unit Test ROCm 7.2 (1-GPU) | |
| timeout-minutes: 90 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-test-1-gpu-mi35x-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x-rocm720,')) | |
| runs-on: linux-mi35x-gpu-1 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Test MI35x ROCm 7.2 (1-GPU) | |
| timeout-minutes: 90 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 2-GPU and 4-GPU Tests (MI30x + MI35x) | |
| # ============================================================================== | |
| nightly-accuracy-2-gpu-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Test ROCm 7.2 (2-GPU) | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-2-gpu-mi35x-glm51-mxfp4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-2-gpu-mi35x-glm51-mxfp4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-2 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (2-GPU GLM-5.1-MXFP4 GSM8K) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-2-gpu-mi35x-glm51-mxfp4 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-2-gpu-mi35x-deepseek-r1-mxfp4-tp2-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-2-gpu-mi35x-deepseek-r1-mxfp4-tp2-rocm720,')) | |
| runs-on: linux-mi35x-gpu-2 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (2-GPU DeepSeek-R1-MXFP4 TP2) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-2-gpu-mi35x-deepseek-r1-mxfp4-tp2 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-accuracy-2-gpu-vlm-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Accuracy Test ROCm 7.2 (2-GPU VLM MMMU) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-perf-2-gpu-text-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Performance Test ROCm 7.2 (2-GPU Text Models) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-perf-2-gpu-vlm-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm-rocm720,')) | |
| runs-on: linux-mi325-2gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Performance Test ROCm 7.2 (2-GPU VLM Models) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-4-gpu-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu-rocm720,')) | |
| runs-on: linux-mi325-4gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Nightly Test ROCm 7.2 (4-GPU) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU GPT-OSS (MI30x mixes Grok1-FP8; MI35x mixes Qwen3-Coder-Next) | |
| # ============================================================================== | |
| nightly-accuracy-8-gpu-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU GPT-OSS) | |
| timeout-minutes: 180 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Grok1-FP8) | |
| timeout-minutes: 60 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-accuracy-8-gpu-mi35x-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GPT-OSS) | |
| timeout-minutes: 180 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU Grok1-INT4 (MI30x + MI35x) | |
| # ============================================================================== | |
| nightly-8-gpu-grok1-int4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-grok1-int4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU Grok1-INT4) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU Grok2 (MI30x + MI35x) | |
| # ============================================================================== | |
| nightly-8-gpu-grok2-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-grok2-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU Grok2) | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e RCCL_MSCCL_ENABLE=0 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU DeepSeek-V3.x (MI30x) | |
| # ============================================================================== | |
| nightly-8-gpu-deepseek-v31-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.1) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.1) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_ROCM700A=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-deepseek-v32-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 150 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-deepseek-v32-mtp-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 180 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-deepseek-v3-kv-fp8-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: DeepSeek-V3 KV FP8 Test ROCm 7.2 (8-GPU Basic + MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU DeepSeek-V3.2 (MI35x) | |
| # ============================================================================== | |
| nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 TP+MTP) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic) | |
| timeout-minutes: 150 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU DeepSeek-R1 (MI35x only) | |
| # ============================================================================== | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-tp4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-tp4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (DeepSeek-R1-MXFP4 TP4) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-tp4 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-deepseek-r1-hicache-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-hicache-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1 HiCache) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-hicache --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU DeepSeek-V4 (MI35x only) | |
| # ============================================================================== | |
| nightly-8-gpu-mi35x-deepseek-v4-flash-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-v4-flash-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| # --skip-test-time-deps: GSM8K + bench_one_batch_server don't need lmms-eval / human-eval. | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy + Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V4-Flash FP8 + FP4) | |
| timeout-minutes: 300 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v4-flash --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-deepseek-v4-pro-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-v4-pro-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| # --skip-test-time-deps: GSM8K + bench_one_batch_server don't need lmms-eval / human-eval. | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy + Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V4-Pro FP8 + FP4) | |
| timeout-minutes: 480 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v4-pro --nightly --timeout-per-file 14400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU Kimi-K2.6 (MI30x + MI35x) | |
| # ============================================================================== | |
| nightly-8-gpu-kimi-k26-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k26-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Kimi-K2.6) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k26 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-kimi-k26-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k26-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Kimi-K2.6) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k26 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU Qwen3-235B (MI30x + MI35x MXFP4) | |
| # ============================================================================== | |
| nightly-8-gpu-qwen3-235b-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test + Performance Test ROCm 7.2 (8-GPU Qwen3) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| # Install tabulate for run_suite.py (missing in MI35x container) | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test + Performance Test MI35x ROCm 7.2 (8-GPU Qwen3-235B-MXFP4) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU Qwen 3.5 (MI30x + MI35x) | |
| # ============================================================================== | |
| nightly-8-gpu-qwen35-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]" | |
| - name: Accuracy Test ROCm 7.2 (8-GPU Qwen 3.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU Qwen 3.5 FP8) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-qwen35-fp8 --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-qwen35-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]" | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Qwen 3.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU Qwen 3.5 FP8) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-qwen35-fp8 --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU GLM-5.1 (MI30x + MI35x) | |
| # ============================================================================== | |
| nightly-8-gpu-glm51-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm51-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU GLM-5.1 DSA) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm51 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU GLM-5.1) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-glm51 --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| nightly-8-gpu-mi35x-glm51-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm51-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-5.1 DSA) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm51 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU GLM-5.1) | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-glm51 --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU GLM-5-MXFP4 (MI35x only) | |
| # ============================================================================== | |
| nightly-8-gpu-mi35x-glm5-mxfp4-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5-mxfp4-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate | |
| - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-5-MXFP4) | |
| timeout-minutes: 180 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5-mxfp4 --nightly --timeout-per-file 7200 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test MI35x ROCm 7.2 (8-GPU GLM-5-MXFP4) | |
| timeout-minutes: 300 | |
| continue-on-error: true | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 registered/amd/perf/mi35x/test_glm5_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 4-GPU MiniMax-M2.5 (MI35x) | |
| # ============================================================================== | |
| nightly-4-gpu-mi35x-minimax-m25-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu-mi35x-minimax-m25-rocm720,')) | |
| runs-on: linux-mi35x-gpu-8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test MI35x ROCm 7.2 (4-GPU MiniMax-M2.5) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e SGLANG_USE_AITER_UNIFIED_ATTN=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-4-gpu-mi35x-minimax-m25-tp4 --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # 8-GPU MiniMax-M2.7 (MI30x only) | |
| # ============================================================================== | |
| nightly-8-gpu-minimax-m27-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m27-rocm720,')) | |
| runs-on: linux-mi325-8gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps | |
| - name: Accuracy Test ROCm 7.2 (8-GPU MiniMax-M2.7) | |
| timeout-minutes: 120 | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m27 --nightly --timeout-per-file 3600 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Performance Test ROCm 7.2 (8-GPU MiniMax-M2.7) | |
| timeout-minutes: 120 | |
| continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed | |
| run: | | |
| > github_summary.md # Clear summary file | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ | |
| -e SGLANG_USE_AITER=1 \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-minimax-m27 --nightly --timeout-per-file 5400 ${{ (github.event_name == 'schedule' || inputs.continue_on_error) && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| # ============================================================================== | |
| # Diffusion (MI30x) | |
| # ============================================================================== | |
| nightly-1-gpu-zimage-turbo-rocm720: | |
| if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo-rocm720,')) | |
| runs-on: linux-mi325-1gpu-sglang | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ci/amd/ensure_vram_clear.sh rocm | |
| - name: Setup docker (ROCm 7.2) | |
| run: | | |
| touch github_summary.md | |
| bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Z-Image-Turbo Diffusion Test ROCm 7.2 (1-GPU) | |
| timeout-minutes: 45 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \ | |
| -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ | |
| -e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \ | |
| pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$? | |
| echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true | |
| exit ${TEST_EXIT_CODE:-0} | |
| - name: Upload generated images | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: zimage-turbo-outputs-rocm720 | |
| path: diffusion-artifacts/ | |
| if-no-files-found: ignore | |
| retention-days: 30 | |
| check-all-jobs: | |
| if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch') | |
| needs: | |
| # 1-GPU Unit Tests (MI30x + MI35x) | |
| - nightly-test-1-gpu-unit-rocm720 | |
| - nightly-test-1-gpu-mi35x-rocm720 | |
| # 2-GPU and 4-GPU Tests (MI30x + MI35x) | |
| - nightly-accuracy-2-gpu-rocm720 | |
| - nightly-accuracy-2-gpu-vlm-rocm720 | |
| - nightly-perf-2-gpu-text-rocm720 | |
| - nightly-perf-2-gpu-vlm-rocm720 | |
| - nightly-4-gpu-rocm720 | |
| - nightly-2-gpu-mi35x-glm51-mxfp4-rocm720 | |
| - nightly-2-gpu-mi35x-deepseek-r1-mxfp4-tp2-rocm720 | |
| # 8-GPU GPT-OSS (MI30x mixes Grok1-FP8; MI35x mixes Qwen3-Coder-Next) | |
| - nightly-accuracy-8-gpu-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-rocm720 | |
| # 8-GPU Grok1-INT4 (MI30x + MI35x) | |
| - nightly-8-gpu-grok1-int4-rocm720 | |
| - nightly-8-gpu-mi35x-grok1-int4-rocm720 | |
| # 8-GPU Grok2 (MI30x + MI35x) | |
| - nightly-8-gpu-grok2-rocm720 | |
| - nightly-8-gpu-mi35x-grok2-rocm720 | |
| # 8-GPU DeepSeek-V3.x (MI30x) | |
| - nightly-8-gpu-deepseek-v31-rocm720 | |
| - nightly-8-gpu-deepseek-v32-rocm720 | |
| - nightly-8-gpu-deepseek-v32-mtp-rocm720 | |
| - nightly-8-gpu-deepseek-v3-kv-fp8-rocm720 | |
| # 8-GPU DeepSeek-V3.2 (MI35x) | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720 | |
| - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720 | |
| - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720 | |
| # 8-GPU DeepSeek-R1 (MI35x only) | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-tp4-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-r1-hicache-rocm720 | |
| # 8-GPU DeepSeek-V4 (MI35x only) | |
| - nightly-8-gpu-mi35x-deepseek-v4-flash-rocm720 | |
| - nightly-8-gpu-mi35x-deepseek-v4-pro-rocm720 | |
| # 8-GPU Kimi-K2.6 (MI30x + MI35x) | |
| - nightly-8-gpu-kimi-k26-rocm720 | |
| - nightly-8-gpu-mi35x-kimi-k26-rocm720 | |
| # 8-GPU Qwen3-235B (MI30x + MI35x MXFP4) | |
| - nightly-8-gpu-qwen3-235b-rocm720 | |
| - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720 | |
| # 8-GPU Qwen 3.5 (MI30x + MI35x) | |
| - nightly-8-gpu-qwen35-rocm720 | |
| - nightly-8-gpu-mi35x-qwen35-rocm720 | |
| # 8-GPU GLM-5.1 (MI30x + MI35x) | |
| - nightly-8-gpu-glm51-rocm720 | |
| - nightly-8-gpu-mi35x-glm51-rocm720 | |
| # 8-GPU GLM-5-MXFP4 (MI35x only) | |
| - nightly-8-gpu-mi35x-glm5-mxfp4-rocm720 | |
| # 4-GPU MiniMax-M2.5 (MI35x) | |
| - nightly-4-gpu-mi35x-minimax-m25-rocm720 | |
| # 8-GPU MiniMax-M2.7 (MI30x only) | |
| - nightly-8-gpu-minimax-m27-rocm720 | |
| # Diffusion (MI30x) | |
| - nightly-1-gpu-zimage-turbo-rocm720 | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check if any job failed | |
| run: | | |
| if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then | |
| echo "One or more ROCm 7.2 nightly test jobs failed" | |
| exit 1 | |
| fi | |
| if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then | |
| echo "One or more ROCm 7.2 nightly test jobs were cancelled" | |
| exit 1 | |
| fi | |
| echo "All ROCm 7.2 nightly test jobs passed" |