Skip to content

Fix batched inference/generation, position_ids creation, falcon alibi, gpt_bigcode multi-query,.. #1245

Fix batched inference/generation, position_ids creation, falcon alibi, gpt_bigcode multi-query,..

Fix batched inference/generation, position_ids creation, falcon alibi, gpt_bigcode multi-query,.. #1245

name: ONNX Runtime Slow / Python - Test
on:
workflow_dispatch:
schedule:
- cron: 0 7 * * * # every day at 7am UTC
pull_request:
branches: [main]
types:
- opened
- labeled
- reopened
- unlabeled
- synchronize
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
TRANSFORMERS_IS_CI: true
jobs:
build:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'schedule') ||
(github.event_name == 'workflow_dispatch') ||
contains(github.event.pull_request.labels.*.name, 'slow') ||
contains(github.event.pull_request.labels.*.name, 'onnxruntime-slow')
}}
strategy:
fail-fast: false
matrix:
python-version: [3.9]
transformers-version: [latest]
runs-on: [ubuntu-22.04, windows-2022]
runs-on: ${{ matrix.runs-on }}
steps:
- name: Free Disk Space (Ubuntu)
if: matrix.runs-on == 'ubuntu-22.04'
uses: jlumbroso/free-disk-space@main
with:
swap-storage: false
- name: Free Disk Space (macOS)
if: matrix.runs-on == 'macos-15'
run: |
sudo rm -rf /Library/Developer/Xcode/DerivedData/*
sudo rm -rf ~/Library/Developer/Xcode/Archives/*
sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode/*
sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode.SimulatorKit/*
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install --upgrade pip
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[tests,onnxruntime] diffusers
- name: Test with pytest (in series)
run: |
pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv
env:
RUN_SLOW: 1
- name: Test with pytest (in parallel)
run: |
pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv
env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
RUN_SLOW: 1