Skip to content

Commit 33dd576

Browse files
MekkCyberAbdennacer-Badaoui
authored andcommitted
[kernels] Add Tests & CI for kernels (huggingface#41765)
* first commit * add tests * add kernel config * add more tests * add ci * small fix * change branch name * update tests * nit * change test name * revert jobs * addressing review * reenable all jobs * address second review
1 parent 5048536 commit 33dd576

File tree

5 files changed

+491
-4
lines changed

5 files changed

+491
-4
lines changed

.github/workflows/self-scheduled-caller.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,15 @@ jobs:
118118
report_repo_id: hf-internal-testing/transformers_daily_ci
119119
commit_sha: ${{ github.sha }}
120120
secrets: inherit
121+
122+
kernels-ci:
123+
name: Kernels CI
124+
uses: ./.github/workflows/self-scheduled.yml
125+
with:
126+
job: run_kernels_gpu
127+
slack_report_channel: "#transformers-ci-daily-kernels"
128+
docker: huggingface/transformers-all-latest-gpu
129+
ci_event: Daily CI
130+
report_repo_id: hf-internal-testing/transformers_daily_ci
131+
commit_sha: ${{ github.sha }}
132+
secrets: inherit

.github/workflows/self-scheduled.yml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,70 @@ jobs:
475475
name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
476476
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
477477

478+
run_kernels_gpu:
479+
if: ${{ inputs.job == 'run_kernels_gpu' }}
480+
name: Kernel tests
481+
strategy:
482+
fail-fast: false
483+
matrix:
484+
machine_type: [aws-g5-4xlarge-cache]
485+
runs-on:
486+
group: '${{ matrix.machine_type }}'
487+
container:
488+
image: ${{ inputs.docker }}
489+
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
490+
steps:
491+
- name: Update clone
492+
working-directory: /transformers
493+
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
494+
495+
- name: Reinstall transformers in edit mode
496+
working-directory: /transformers
497+
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[testing]
498+
499+
- name: Install kernels
500+
working-directory: /transformers
501+
run: python3 -m pip install -U kernels
502+
503+
- name: NVIDIA-SMI
504+
run: nvidia-smi
505+
506+
- name: Environment
507+
working-directory: /transformers
508+
run: python3 utils/print_env.py
509+
510+
- name: Show installed libraries and their versions
511+
working-directory: /transformers
512+
run: pip freeze
513+
514+
- name: Set `machine_type` for report and artifact names
515+
working-directory: /transformers
516+
shell: bash
517+
run: |
518+
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
519+
machine_type=single-gpu
520+
else
521+
machine_type=${{ matrix.machine_type }}
522+
fi
523+
echo "machine_type=$machine_type" >> $GITHUB_ENV
524+
525+
- name: Run kernel tests on GPU
526+
working-directory: /transformers
527+
run: |
528+
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_kernels_gpu_test_reports tests/kernels/test_kernels.py
529+
530+
- name: Failure short reports
531+
if: ${{ failure() }}
532+
continue-on-error: true
533+
run: cat /transformers/reports/${{ env.machine_type }}_run_kernels_gpu_test_reports/failures_short.txt
534+
535+
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_kernels_gpu_test_reports"
536+
if: ${{ always() }}
537+
uses: actions/upload-artifact@v4
538+
with:
539+
name: ${{ env.machine_type }}_run_kernels_gpu_test_reports
540+
path: /transformers/reports/${{ env.machine_type }}_run_kernels_gpu_test_reports
541+
478542
run_extract_warnings:
479543
# Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
480544
if: ${{ always() && inputs.job == 'run_models_gpu' }}
@@ -527,6 +591,7 @@ jobs:
527591
run_examples_gpu,
528592
run_torch_cuda_extensions_gpu,
529593
run_quantization_torch_gpu,
594+
run_kernels_gpu,
530595
run_extract_warnings
531596
]
532597
if: always() && !cancelled()

src/transformers/integrations/hub_kernels.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,13 @@
5151
)
5252
},
5353
"RMSNorm": {
54-
"cuda": LayerRepository(
55-
repo_id="kernels-community/liger_kernels",
56-
layer_name="LigerRMSNorm",
57-
),
54+
"cuda": {
55+
Mode.INFERENCE: LayerRepository(
56+
repo_id="kernels-community/liger_kernels",
57+
layer_name="LigerRMSNorm",
58+
# revision="pure-layer-test",
59+
),
60+
},
5861
"rocm": {
5962
Mode.INFERENCE: LayerRepository(
6063
repo_id="kernels-community/liger_kernels",

0 commit comments

Comments
 (0)