Merge pull request #1699 from ericniebler/fix-cuda-stream-scheduler-p… #2001
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI (GPU) | |
| on: | |
| push: | |
| branches: | |
| - main | |
| - "member-function-customization" | |
| - "pull-request/[0-9]+" | |
| concurrency: | |
| group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} | |
| cancel-in-progress: true | |
| jobs: | |
| build-gpu: | |
| if: github.repository == 'NVIDIA/stdexec' | |
| name: GPU (${{ matrix.name }}, CUDA ${{ matrix.cuda }}, ${{ matrix.build }}) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - { name: "clang 21", cuda: "12.0", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.0", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } | |
| - { name: "clang 21", cuda: "12.9", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.9", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } | |
| - { name: "nvc++ 25.7", cuda: "12.9", cxx: "mpic++", build: "Release", tag: "nvhpc25.7", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } | |
| - { name: "nvc++ 25.7", cuda: "12.9", cxx: "mpic++", build: "Debug", tag: "nvhpc25.7", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } | |
| - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Release", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } | |
| - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Debug", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } | |
| runs-on: linux-${{ matrix.arch }}-gpu-${{ matrix.gpu }}-${{ matrix.driver }}-1 | |
| container: | |
| options: -u root | |
| image: rapidsai/devcontainers:26.02-cpp-${{ matrix.tag }} | |
| env: | |
| NVIDIA_VISIBLE_DEVICES: "${{ env.NVIDIA_VISIBLE_DEVICES }}" | |
| permissions: | |
| id-token: write # This is required for configure-aws-credentials | |
| contents: read # This is required for actions/checkout | |
| defaults: | |
| run: | |
| shell: su coder {0} | |
| working-directory: /home/coder | |
| steps: | |
| - name: Run nvidia-smi to make sure GPU is working | |
| run: nvidia-smi | |
| - name: Checkout stdexec | |
| uses: actions/checkout@v4 | |
| with: | |
| path: stdexec | |
| persist-credentials: false | |
| - name: Setup environment | |
| run: | | |
| echo "ARTIFACT_PREFIX=${{runner.os}}-cuda${{matrix.cuda}}-${{matrix.tag}}-${{matrix.arch}}" >> "${GITHUB_ENV}" | |
| echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" | |
| - if: github.repository_owner == 'NVIDIA' | |
| name: Get AWS credentials for sccache bucket | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| aws-region: us-east-2 | |
| role-duration-seconds: 28800 # 8 hours | |
| role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA | |
| - name: Build and test GPU schedulers | |
| env: | |
| NVCC_APPEND_FLAGS: "-t=100" | |
| SCCACHE_BUCKET: "rapids-sccache-devs" | |
| SCCACHE_DIST_REQUEST_TIMEOUT: "7140" | |
| SCCACHE_DIST_URL: "https://${{ matrix.arch }}.linux.sccache.rapids.nvidia.com" | |
| SCCACHE_IDLE_TIMEOUT: "0" | |
| SCCACHE_REGION: "us-east-2" | |
| SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" | |
| SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" | |
| SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" | |
| SCCACHE_SERVER_LOG: "sccache=debug" | |
| SCCACHE_SERVER_PORT: "4225" | |
| run: | | |
| set -e; | |
| if test -n "${NVHPC_ROOT:+x}"; then | |
| HPCX_INIT="$(find -L "$NVHPC_ROOT"/comm_libs/ -path '*/latest/hpcx-init.sh')"; | |
| if test -n "${HPCX_INIT:+x}"; then | |
| . "$HPCX_INIT"; | |
| hpcx_load; | |
| fi | |
| fi | |
| set -x; | |
| devcontainer-utils-install-sccache --repo rapidsai/sccache --version rapids; | |
| devcontainer-utils-init-sccache-dist \ | |
| --enable-sccache-dist - <<< " \ | |
| --auth-type 'token' \ | |
| --auth-token '${{ secrets.STDEXEC_BUILD_CLUSTER_SECRET }}' \ | |
| "; | |
| # Copy source folder into ~/stdexec | |
| cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; | |
| chown -R coder:coder ~/stdexec; | |
| cd ~/stdexec; | |
| # Configure | |
| cmake -S . -B build -GNinja \ | |
| -DSTDEXEC_ENABLE_CUDA=ON \ | |
| -DSTDEXEC_ENABLE_IO_URING=OFF \ | |
| -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ | |
| -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ | |
| -DCMAKE_CUDA_COMPILER=${{ matrix.cxx }} \ | |
| -DCMAKE_CUDA_ARCHITECTURES=${{ matrix.sm }} \ | |
| ; | |
| # Compile | |
| cmake --build build -v -j 512; | |
| # Print sccache stats | |
| sccache -s; | |
| # Tests | |
| SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ | |
| ctest --test-dir build --verbose --output-on-failure --timeout 60; | |
| # Examples | |
| ./build/examples/nvexec/maxwell_cpu_st --iterations=1000 --N=512 --run-cpp --run-inline-scheduler; | |
| ./build/examples/nvexec/maxwell_cpu_mt --iterations=1000 --N=512 --run-std --run-stdpar --run-thread-pool-scheduler; | |
| ./build/examples/nvexec/maxwell_gpu_s --iterations=1000 --N=512 --run-cuda --run-stdpar --run-stream-scheduler; | |
| - if: ${{ !cancelled() }} | |
| name: Upload sccache logs | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} | |
| path: /tmp/sccache*.log | |
| compression-level: 9 | |
| ci-gpu: | |
| runs-on: ubuntu-latest | |
| name: CI (GPU) | |
| needs: | |
| - build-gpu | |
| steps: | |
| - run: echo "CI (GPU) success" |