@@ -74,48 +74,48 @@ jobs:
7474 with :
7575 swap-size-gb : 10
7676
77- - name : Install CUDA ${{ inputs.cuda-version }}
78- if : ${{ inputs.cuda-version != 'cpu' }}
79- uses : Jimver/cuda-toolkit@v0.2.26
80- id : cuda-toolkit
81- with :
82- cuda : ${{ inputs.cuda-version }}
83- linux-local-args : ' ["--toolkit"]'
84- # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
85- # method: ${{ (inputs.cuda-version == '11.8.0' || inputs.cuda-version == '12.1.0') && 'network' || 'local' }}
86- method : " network"
87- sub-packages : ' ["nvcc"]'
88-
89- - name : Install PyTorch ${{ inputs.torch-version }}+cu${{ inputs.cuda-version }}
90- run : |
91- pip install --upgrade pip
92- # With python 3.13 and torch 2.5.1, unless we update typing-extensions, we get error
93- # AttributeError: attribute '__default__' of 'typing.ParamSpec' objects is not writable
94- pip install typing-extensions==4.12.2
95- # We want to figure out the CUDA version to download pytorch
96- # e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
97- # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
98- # This code is ugly, maybe there's a better way to do this.
99- export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
100- minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118, '2.8': 126}[env['MATRIX_TORCH_VERSION']]; \
101- maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128, '2.8': 129}[env['MATRIX_TORCH_VERSION']]; \
102- print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \
103- )
104- if [[ ${{ inputs.torch-version }} == *"dev"* ]]; then
105- # pip install --no-cache-dir --pre torch==${{ inputs.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
106- # Can't use --no-deps because we need cudnn etc.
107- # Hard-coding this version of pytorch-triton for torch 2.6.0.dev20241001
108- pip install jinja2
109- pip install https://download.pytorch.org/whl/nightly/pytorch_triton-3.1.0%2Bcf34004b8a-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
110- pip install --no-cache-dir --pre https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}/torch-${{ inputs.torch-version }}%2Bcu${TORCH_CUDA_VERSION}-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
111- else
112- pip install --no-cache-dir torch==${{ inputs.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
113- fi
114- nvcc --version
115- python --version
116- python -c "import torch; print('PyTorch:', torch.__version__)"
117- python -c "import torch; print('CUDA:', torch.version.cuda)"
118- python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
77+ # - name: Install CUDA ${{ inputs.cuda-version }}
78+ # if: ${{ inputs.cuda-version != 'cpu' }}
79+ # uses: Jimver/cuda-toolkit@v0.2.26
80+ # id: cuda-toolkit
81+ # with:
82+ # cuda: ${{ inputs.cuda-version }}
83+ # linux-local-args: '["--toolkit"]'
84+ # # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
85+ # # method: ${{ (inputs.cuda-version == '11.8.0' || inputs.cuda-version == '12.1.0') && 'network' || 'local' }}
86+ # method: "network"
87+ # sub-packages: '["nvcc"]'
88+
89+ # - name: Install PyTorch ${{ inputs.torch-version }}+cu${{ inputs.cuda-version }}
90+ # run: |
91+ # pip install --upgrade pip
92+ # # With python 3.13 and torch 2.5.1, unless we update typing-extensions, we get error
93+ # # AttributeError: attribute '__default__' of 'typing.ParamSpec' objects is not writable
94+ # pip install typing-extensions==4.12.2
95+ # # We want to figure out the CUDA version to download pytorch
96+ # # e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
97+ # # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
98+ # # This code is ugly, maybe there's a better way to do this.
99+ # export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
100+ # minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118, '2.8': 126}[env['MATRIX_TORCH_VERSION']]; \
101+ # maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128, '2.8': 129}[env['MATRIX_TORCH_VERSION']]; \
102+ # print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \
103+ # )
104+ # if [[ ${{ inputs.torch-version }} == *"dev"* ]]; then
105+ # # pip install --no-cache-dir --pre torch==${{ inputs.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
106+ # # Can't use --no-deps because we need cudnn etc.
107+ # # Hard-coding this version of pytorch-triton for torch 2.6.0.dev20241001
108+ # pip install jinja2
109+ # pip install https://download.pytorch.org/whl/nightly/pytorch_triton-3.1.0%2Bcf34004b8a-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
110+ # pip install --no-cache-dir --pre https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}/torch-${{ inputs.torch-version }}%2Bcu${TORCH_CUDA_VERSION}-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl
111+ # else
112+ # pip install --no-cache-dir torch==${{ inputs.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
113+ # fi
114+ # nvcc --version
115+ # python --version
116+ # python -c "import torch; print('PyTorch:', torch.__version__)"
117+ # python -c "import torch; print('CUDA:', torch.version.cuda)"
118+ # python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)"
119119
120120 - name : Restore build cache
121121 uses : actions/cache/restore@v4
@@ -136,82 +136,82 @@ jobs:
136136
137137 ls -al ./
138138
139- - name : Build wheel
140- id : build_wheel
141- run : |
142- cat > build.sh <<'EOF'
143- #!/usr/bin/env bash
144- set -euxo pipefail
145-
146- echo "Running build.sh"
147- # We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
148- # https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
149- # However this still fails so I'm using a newer version of setuptools
150- pip install setuptools==75.8.0
151- pip install ninja packaging wheel
152- export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
153- export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
154- # Limit MAX_JOBS otherwise the github runner goes OOM
155- # nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM
156-
157- export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2)
158- export NVCC_THREADS=2
159- export FLASH_ATTENTION_FORCE_BUILD="TRUE"
160- export FLASH_ATTENTION_FORCE_CXX11_ABI=${{ inputs.cxx11-abi }}
161-
162- python setup.py bdist_wheel --dist-dir=dist
163-
164- tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ inputs.cxx11-abi }}
165- wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
166- ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
167- echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
168- EOF
169-
170- chmod +x build.sh
171-
172- timeout 20m ./build.sh || EXIT_CODE=$?
173-
174- # Store exit code in GitHub env for later steps
175- echo "build_exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
176-
177- # Do not fail the job if timeout killed the build
178- exit 0
179-
180- - name : Log build logs
181- if : always()
182- run : |
183- ls -al ./
184-
185- tar -cvf build-cache.tar -C build . --atime-preserve=replace
186-
187- - name : Save build cache
188- if : always()
189- uses : actions/cache/save@v4
190- with :
191- key : build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11-abi }}-${{ github.run_number }}-${{ github.run_attempt }}
192- path : build-cache.tar
193-
194- - name : Log Built Wheels
195- run : |
196- ls dist
197-
198- - name : Get Release with tag
199- id : get_current_release
200- if : steps.build_wheel.outputs.build_exit_code == 0
201- uses : joutvhu/get-release@v1
202- with :
203- tag_name : ${{ inputs.release-version }}
204- env :
205- GITHUB_TOKEN : ${{ secrets.GITHUB_TOKEN }}
206-
207- - name : Upload Release Asset
208- id : upload_release_asset
209- if : inputs.upload-to-release && steps.build_wheel.outputs.build_exit_code == 0
210- uses : actions/upload-release-asset@v1
211- env :
212- GITHUB_TOKEN : ${{ secrets.GITHUB_TOKEN }}
213- with :
214- upload_url : ${{ steps.get_current_release.outputs.upload_url }}
215- asset_path : ./dist/${{env.wheel_name}}
216- asset_name : ${{env.wheel_name}}
217- asset_content_type : application/*
139+ # - name: Build wheel
140+ # id: build_wheel
141+ # run: |
142+ # cat > build.sh <<'EOF'
143+ # #!/usr/bin/env bash
144+ # set -euxo pipefail
145+
146+ # echo "Running build.sh"
147+ # # We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
148+ # # https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
149+ # # However this still fails so I'm using a newer version of setuptools
150+ # pip install setuptools==75.8.0
151+ # pip install ninja packaging wheel
152+ # export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
153+ # export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
154+ # # Limit MAX_JOBS otherwise the github runner goes OOM
155+ # # nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM
156+
157+ # export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2)
158+ # export NVCC_THREADS=2
159+ # export FLASH_ATTENTION_FORCE_BUILD="TRUE"
160+ # export FLASH_ATTENTION_FORCE_CXX11_ABI=${{ inputs.cxx11-abi }}
161+
162+ # python setup.py bdist_wheel --dist-dir=dist
163+
164+ # tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ inputs.cxx11-abi }}
165+ # wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
166+ # ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
167+ # echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
168+ # EOF
169+
170+ # chmod +x build.sh
171+
172+ # timeout 20m ./build.sh || EXIT_CODE=$?
173+
174+ # # Store exit code in GitHub env for later steps
175+ # echo "build_exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
176+
177+ # # Do not fail the job if timeout killed the build
178+ # exit 0
179+
180+ # - name: Log build logs
181+ # if: always()
182+ # run: |
183+ # ls -al ./
184+
185+ # tar -cvf build-cache.tar -C build . --atime-preserve=replace
186+
187+ # - name: Save build cache
188+ # if: always()
189+ # uses: actions/cache/save@v4
190+ # with:
191+ # key: build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11-abi }}-${{ github.run_number }}-${{ github.run_attempt }}
192+ # path: build-cache.tar
193+
194+ # - name: Log Built Wheels
195+ # run: |
196+ # ls dist
197+
198+ # - name: Get Release with tag
199+ # id: get_current_release
200+ # if: steps.build_wheel.outputs.build_exit_code == 0
201+ # uses: joutvhu/get-release@v1
202+ # with:
203+ # tag_name: ${{ inputs.release-version }}
204+ # env:
205+ # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
206+
207+ # - name: Upload Release Asset
208+ # id: upload_release_asset
209+ # if: inputs.upload-to-release && steps.build_wheel.outputs.build_exit_code == 0
210+ # uses: actions/upload-release-asset@v1
211+ # env:
212+ # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
213+ # with:
214+ # upload_url: ${{ steps.get_current_release.outputs.upload_url }}
215+ # asset_path: ./dist/${{env.wheel_name}}
216+ # asset_name: ${{env.wheel_name}}
217+ # asset_content_type: application/*
0 commit comments