Skip to content

fix: avoid duplicate sharded directory scans #5237

fix: avoid duplicate sharded directory scans

fix: avoid duplicate sharded directory scans #5237

Workflow file for this run

name: Python CI
on:
pull_request:
paths-ignore:
- "**.md"
- "**.txt"
- "**.rst"
- "LICENSE*"
- ".github/workflows/docs-check.yml"
push:
branches:
- main
workflow_dispatch:
permissions:
contents: read
pull-requests: read
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
# First, determine what changed to optimize subsequent jobs
changes:
name: Detect Changes
runs-on: ubuntu-latest
outputs:
python: ${{ steps.filter.outputs.python }}
picklescan: ${{ steps.filter.outputs.picklescan }}
docker: ${{ steps.filter.outputs.docker }}
workflows: ${{ steps.filter.outputs.workflows }}
dependencies: ${{ steps.filter.outputs.dependencies }}
steps:
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v4
id: filter
with:
filters: |
python:
- '**.py'
- 'pyproject.toml'
- 'uv.lock'
picklescan:
- 'packages/modelaudit-picklescan/**'
docker:
- 'Dockerfile*'
- '.dockerignore'
workflows:
- '.github/workflows/**'
dependencies:
- 'pyproject.toml'
- 'uv.lock'
- 'packages/modelaudit-picklescan/pyproject.toml'
- 'packages/modelaudit-picklescan/uv.lock'
lint:
name: Lint and Format
needs: changes
# Always run on main branch, otherwise only if Python files changed
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Cache Python dependencies
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.venv
~/.cache/pip
~/.cache/uv
key: ${{ runner.os }}-uv-py3.12-${{ hashFiles('**/uv.lock', '**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-uv-py3.12-
${{ runner.os }}-uv-
- name: Sync dependencies
run: |
uv sync --extra all-ci
- name: Lint with Ruff
run: |
uv run ruff check modelaudit/ tests/
- name: Check import organization with Ruff
run: |
uv run ruff check --select I modelaudit/ tests/
- name: Check formatting with Ruff
run: |
uv run ruff format --check modelaudit/ tests/
dependency-audit:
name: Dependency Audit
needs: changes
if: github.event_name == 'pull_request' && (needs.changes.outputs.dependencies == 'true' || needs.changes.outputs.workflows == 'true')
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Sync dependencies
run: |
uv sync --extra all-ci
- name: Audit dependencies for vulnerabilities
run: |
# CVE-2026-4539 currently has no upstream Pygments fix release.
# pip-audit cannot audit unpublished editable path requirements, so
# the standalone Rust-backed pickle package is audited separately.
uv export --no-hashes --no-emit-project \
| sed -e '/^-e \.\/packages\/modelaudit-picklescan$/d' -e '/^modelaudit-picklescan==/d' \
> /tmp/modelaudit-audit-requirements.txt
uvx pip-audit --strict --desc --ignore-vuln CVE-2026-4539 -r /tmp/modelaudit-audit-requirements.txt
uv export --project packages/modelaudit-picklescan --no-hashes --no-emit-project \
> /tmp/modelaudit-picklescan-audit-requirements.txt
if grep -Eq '^[A-Za-z0-9_.-]' /tmp/modelaudit-picklescan-audit-requirements.txt; then
uvx pip-audit --strict --desc -r /tmp/modelaudit-picklescan-audit-requirements.txt
else
echo "Standalone picklescan package has no Python dependencies to audit."
fi
license-check:
name: License Compliance
needs: changes
if: github.ref == 'refs/heads/main' || needs.changes.outputs.dependencies == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Sync dependencies
run: |
uv sync --extra all-ci
- name: Check dependency licenses
run: |
uv run python -c "
import importlib.metadata as md
import sys
# Approved license keywords (case-insensitive substring matching).
# See docs/maintainers/dependency-policy.md for the full policy.
APPROVED_KEYWORDS = [
'mit', 'bsd', 'apache', 'isc', 'psf', 'python software foundation',
'mpl', 'mozilla public',
'public domain', 'unlicense', 'cc0', '0bsd', 'zlib',
'3-clause bsd',
]
# Classifier fragments that indicate an approved license.
APPROVED_CLASSIFIERS = [
'MIT License',
'BSD License',
'Apache Software License',
'ISC License',
'Mozilla Public License',
'Python Software Foundation License',
'Public Domain',
]
# Classifiers that require explicit package-level approval.
BLOCKED_CLASSIFIERS = [
'GNU General Public License',
'GNU Affero General Public License',
'GNU Lesser General Public License',
]
# NVIDIA CUDA packages are proprietary but are required transitive
# dependencies of PyTorch (an optional extra). They are redistributable
# under NVIDIA's EULA and cannot be avoided when using CUDA-enabled torch.
# See docs/maintainers/dependency-policy.md for the full exception rationale.
NVIDIA_PREFIXES = ('nvidia-', 'cuda-')
# LGPL packages that have received explicit maintainer approval.
# Per policy, LGPL requires case-by-case approval. Each entry here
# must have a corresponding note in THIRD_PARTY_NOTICES.md.
APPROVED_LGPL_PACKAGES = {
'inflate64',
'multivolumefile',
'pybcj',
'py7zr',
'pyppmd',
}
# Packages that ship no license metadata at all but are known-good.
# Verify manually on PyPI before adding here.
KNOWN_GOOD_NO_METADATA = {
'google-crc32c', # Apache-2.0 (https://pypi.org/project/google-crc32c/)
}
def is_approved(name: str, lic: str, classifiers: list[str]) -> bool:
# Known-good packages with broken/missing metadata
if name in KNOWN_GOOD_NO_METADATA:
return True
# NVIDIA CUDA transitive deps from PyTorch optional extra
if any(name.lower().startswith(p) for p in NVIDIA_PREFIXES):
return True
# LGPL packages with explicit maintainer approval
if name in APPROVED_LGPL_PACKAGES:
return True
# Check license field (case-insensitive keyword match)
lic_lower = lic.lower().strip()
if lic_lower and any(kw in lic_lower for kw in APPROVED_KEYWORDS):
return True
# Check classifiers
lic_classifiers = [c for c in classifiers if c.startswith('License')]
# Reject blocked license families before checking approvals
for cls in lic_classifiers:
if any(blocked in cls for blocked in BLOCKED_CLASSIFIERS):
return False
for cls in lic_classifiers:
if any(frag in cls for frag in APPROVED_CLASSIFIERS):
return True
return False
failed = []
for dist in md.distributions():
name = dist.metadata['Name']
if name == 'modelaudit':
continue
# Check both License (legacy) and License-Expression (PEP 639)
lic = dist.metadata.get('License', '') or ''
lic_expr = dist.metadata.get('License-Expression', '') or ''
# Combine both fields for checking
combined_lic = f'{lic} {lic_expr}'.strip()
classifiers = dist.metadata.get_all('Classifier') or []
if not is_approved(name, combined_lic, classifiers):
# Build a readable label for the failure message
lic_display = combined_lic[:80] if combined_lic else '(empty)'
lic_cls = [c.split(' :: ')[-1] for c in classifiers if 'License' in c]
if lic_cls:
lic_display += f' classifiers={lic_cls}'
failed.append(f'{name}: {lic_display}')
if failed:
print('Unapproved licenses found:')
for f in sorted(failed):
print(f' {f}')
sys.exit(1)
else:
print('All dependency licenses are approved.')
"
uv-lock-check:
name: Lock File Consistency
needs: changes
if: github.ref == 'refs/heads/main' || needs.changes.outputs.dependencies == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Check uv.lock is in sync with pyproject.toml
run: |
uv lock --check
- name: Check standalone picklescan uv.lock is in sync
working-directory: packages/modelaudit-picklescan
run: |
uv lock --check
type-check:
name: Type Check
needs: changes
# Only run if Python files changed
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Cache Python dependencies
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.venv
~/.cache/pip
~/.cache/uv
key: ${{ runner.os }}-uv-py3.12-${{ hashFiles('**/uv.lock', '**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-uv-py3.12-
${{ runner.os }}-uv-
- name: Sync dependencies
run: |
uv sync --extra all-ci
- name: Type checking
run: |
uv run mypy modelaudit/ tests/
- name: Check for circular imports
run: |
set -euo pipefail
python scripts/minimal_circular_check.py
# Fast feedback job for immediate results
quick-feedback:
name: Quick Feedback (Python 3.12)
needs: changes
# Always run for fastest feedback
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Cache Python dependencies
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.venv
~/.cache/pip
~/.cache/uv
key: ${{ runner.os }}-uv-py3.12-${{ hashFiles('**/uv.lock', '**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-uv-py3.12-
${{ runner.os }}-uv-
- name: Sync dependencies
run: |
uv sync --extra all-ci
- name: Run fast tests with fail-fast
run: |
uv run pytest tests -x --maxfail=1 -n auto -m "not slow and not integration and not performance" --tb=short --durations=10
windows-tests:
name: Windows Tests (Python 3.11)
needs: changes
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: windows-latest
timeout-minutes: 30
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
- name: Pin Python version
run: |
uv python pin 3.11
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Sync dependencies
run: |
uv sync --extra all-ci-windows
- name: Run fast tests with fail-fast
run: |
uv run pytest tests -x --maxfail=1 -n auto -m "not slow and not integration and not performance" --tb=short --durations=10
test:
name: Test Python ${{ matrix.python-version }}
needs: changes
# Only run if Python files changed
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
matrix:
# For PRs, only test min and max Python versions to save time
# For main branch, test all versions
# Note: Python 3.10+ supported (see pyproject.toml)
# Python 3.10 gets NumPy 1.x, Python 3.11+ gets NumPy 2.x
python-version: ${{ github.event_name == 'pull_request' && fromJSON('["3.10", "3.13"]') || fromJSON('["3.10", "3.11", "3.12", "3.13"]') }}
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin ${{ matrix.python-version }}
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Cache Python dependencies
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.venv
~/.cache/pip
~/.cache/uv
key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-${{ hashFiles('**/uv.lock', '**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-uv-py${{ matrix.python-version }}-
${{ runner.os }}-uv-
- name: Sync dependencies
run: |
uv sync --extra all-ci
- name: Run fast tests with fail-fast (PRs, Python 3.12 with coverage)
if: github.event_name == 'pull_request' && matrix.python-version == '3.12'
run: |
uv run pytest tests -x --maxfail=1 -n auto -m "not slow and not integration and not performance" --cov=modelaudit --cov-report=xml --tb=short --durations=15
- name: Run fast tests with fail-fast (PRs, non-3.12)
if: github.event_name == 'pull_request' && matrix.python-version != '3.12'
run: |
uv run pytest tests -x --maxfail=1 -n auto -m "not slow and not integration and not performance" --tb=short --durations=15
- name: Run slow/integration tests on PR (if labeled)
# Run slow tests on PRs when the 'run-slow-tests' label is added
# This allows developers to validate slow tests before merging
if: github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-slow-tests') && matrix.python-version == '3.12'
timeout-minutes: 20
run: |
echo "Running slow/integration tests due to 'run-slow-tests' label"
uv run pytest tests -n auto -m "slow or integration or performance" --tb=short --durations=20
- name: Run fast tests with coverage (main branch only)
if: github.ref == 'refs/heads/main' && matrix.python-version == '3.12'
run: |
uv run pytest tests -n auto -m "not slow and not integration and not performance" --cov=modelaudit --cov-report=xml --tb=short --durations=15
- name: Run fast tests without coverage (main branch, non-3.12)
if: github.ref == 'refs/heads/main' && matrix.python-version != '3.12'
run: |
uv run pytest tests -n auto -m "not slow and not integration and not performance" --tb=short --durations=15
- name: Run slow/integration tests (main branch only)
# Run ALL slow/integration/performance tests on main branch.
# Tests with missing dependencies auto-skip via pytest.importorskip() or framework markers.
# Runs on Python 3.12 only to keep CI time reasonable.
if: github.ref == 'refs/heads/main' && matrix.python-version == '3.12'
timeout-minutes: 20
run: |
# Run all slow, integration, and performance tests
uv run pytest tests -n auto -m "slow or integration or performance" --tb=short --durations=20
- name: Upload coverage to Codecov
if: matrix.python-version == '3.12'
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6
with:
files: ./coverage.xml
fail_ci_if_error: false
verbose: true
test-numpy-compatibility:
name: Test NumPy ${{ matrix.numpy-mode }} - Python ${{ matrix.python-version }}
needs: changes
# Only run on main branch or if dependencies changed
if: github.ref == 'refs/heads/main' || needs.changes.outputs.dependencies == 'true'
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
matrix:
# Test both NumPy versions with appropriate Python versions:
# - NumPy 1.x on Python 3.10 (last version supporting NumPy 1.x)
# - NumPy 2.x on Python 3.11+ (requires Python >=3.11)
# Reduced matrix for PRs, full matrix for main branch
include: ${{ github.event_name == 'pull_request' && fromJSON('[{"python-version":"3.10","numpy-mode":"1.x"},{"python-version":"3.11","numpy-mode":"2.x"}]') || fromJSON('[{"python-version":"3.10","numpy-mode":"1.x"},{"python-version":"3.11","numpy-mode":"2.x"},{"python-version":"3.12","numpy-mode":"2.x"},{"python-version":"3.13","numpy-mode":"2.x"}]') }}
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin ${{ matrix.python-version }}
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Cache Python dependencies
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.venv
~/.cache/pip
~/.cache/uv
key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-numpy${{ matrix.numpy-mode }}-${{ hashFiles('**/uv.lock', '**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-uv-py${{ matrix.python-version }}-numpy${{ matrix.numpy-mode }}-
${{ runner.os }}-uv-py${{ matrix.python-version }}-
${{ runner.os }}-uv-
- name: Install dependencies
run: |
# Dependencies are automatically version-matched based on Python version
# Python 3.10: NumPy 1.x + TensorFlow 2.13+
# Python 3.11+: NumPy 2.x + TensorFlow 2.17+
uv sync --extra all-ci
- name: Check NumPy version
run: |
uv run python -c "import numpy; print(f'NumPy version: {numpy.__version__}')"
- name: Test scanner diagnostics
run: |
uv run modelaudit doctor --show-failed
- name: Test basic scanning functionality
run: |
# Create a test file to scan
echo "Testing basic scanning with NumPy ${{ matrix.numpy-mode }}"
uv run python -c "import pickle; pickle.dump({'test': 'data'}, open('test_model.pkl', 'wb'))"
uv run modelaudit scan test_model.pkl --format json --output test_results.json
cat test_results.json
- name: Run NumPy compatibility tests
run: |
uv run pytest tests/scanners/test_numpy_scanner.py -v
uv run pytest tests/scanners/test_scanner_registry.py -v -k "numpy"
test-vendored-protos:
name: Test Vendored TensorFlow Protos
needs: changes
# Run on main branch or if Python/dependency files changed
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.dependencies == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Install WITHOUT TensorFlow
run: |
# Install only base dependencies - no tensorflow extra
uv sync
- name: Verify TensorFlow is NOT installed
run: |
uv run python -c "
try:
import tensorflow
print('ERROR: TensorFlow should not be installed!')
exit(1)
except ImportError:
print('✓ TensorFlow is not installed (expected)')
"
- name: Verify vendored protos load correctly
run: |
uv run python -c "
import modelaudit.protos
assert modelaudit.protos._check_vendored_protos(), 'Protos should be available'
assert modelaudit.protos.is_using_vendored_protos(), 'Should use vendored protos'
print('✓ Vendored protos loaded successfully')
"
- name: Test safe SavedModel scanning
run: |
uv run python -m modelaudit tests/assets/samples/tensorflow/safe_savedmodel/ --format json | \
uv run python -c "import sys,json; d=json.load(sys.stdin); assert len(d['issues'])==0, f'Expected 0 issues, got {len(d[\"issues\"])}'; print('✓ Safe model: 0 issues')"
- name: Test malicious SavedModel scanning
run: |
# Should detect PyFunc operation
uv run python -m modelaudit tests/assets/samples/tensorflow/malicious_pyfunc/ --format json | \
uv run python -c "import sys,json; d=json.load(sys.stdin); critical=[i for i in d['issues'] if i['severity']=='critical']; assert len(critical)>=1, f'Expected critical issues, got {len(critical)}'; print(f'✓ Malicious model: {len(critical)} critical issue(s) detected')"
test-proto-reproducibility:
name: Verify Vendored Proto Reproducibility
needs: changes
# Only run on main or when proto-related files change
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.dependencies == 'true'
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install protoc 33.5
run: |
# Pin protoc to the exact version used to generate vendored protos.
# The vendored files contain "Protobuf Python Version: 6.33.5"
# which corresponds to libprotoc 33.5.
PROTOC_VERSION="33.5"
PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip"
curl -fsSL -o /tmp/protoc.zip \
"https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"
sudo unzip -o /tmp/protoc.zip -d /usr/local bin/protoc 'include/*'
sudo chmod +x /usr/local/bin/protoc
rm /tmp/protoc.zip
protoc --version
- name: Regenerate protos and compare
run: |
set -euo pipefail
# Save checksums of current vendored protos
find modelaudit/protos/tensorflow -name "*.py" -exec md5sum {} \; | sort > /tmp/before.md5
# Regenerate using the same script
bash scripts/compile_tensorflow_protos.sh
# Save checksums of regenerated protos
find modelaudit/protos/tensorflow -name "*.py" -exec md5sum {} \; | sort > /tmp/after.md5
# Compare
if ! diff /tmp/before.md5 /tmp/after.md5; then
echo "ERROR: Vendored protos differ from regenerated output!"
echo "Run 'bash scripts/compile_tensorflow_protos.sh' locally and commit the results."
diff /tmp/before.md5 /tmp/after.md5 || true
exit 1
fi
echo "Vendored protos match regenerated output."
test-extras-smoke:
name: Extras Smoke Test (${{ matrix.extra }})
needs: changes
# Only run on main or when dependencies change
if: github.ref == 'refs/heads/main' || needs.changes.outputs.dependencies == 'true'
runs-on: ubuntu-latest
timeout-minutes: 15
strategy:
fail-fast: false
matrix:
extra:
[
h5,
pytorch,
safetensors,
onnx,
dill,
joblib,
flax,
xgboost,
sevenzip,
]
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Install with ${{ matrix.extra }} extra
run: |
uv sync --extra ${{ matrix.extra }}
- name: Verify import and basic scan
run: |
# Verify modelaudit loads without import errors
uv run python -c "import modelaudit; print(f'modelaudit {modelaudit.__version__} loaded')"
# Run scanner diagnostics
uv run modelaudit doctor --show-failed
# Create a test file and scan it
uv run python -c "import pickle; pickle.dump({'test': True}, open('smoke_test.pkl', 'wb'))"
uv run modelaudit scan smoke_test.pkl --format json
build:
name: Build and Package
needs: changes
# Always build on main, otherwise only if Python/dependency/workflow files changed
if: github.ref == 'refs/heads/main' || needs.changes.outputs.python == 'true' || needs.changes.outputs.dependencies == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin 3.12
- name: Install Rust toolchain
run: |
rustup toolchain install stable --profile minimal
rustup default stable
- name: Cache Python dependencies
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.venv
~/.cache/pip
~/.cache/uv
key: ${{ runner.os }}-uv-py3.12-${{ hashFiles('**/uv.lock', '**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-uv-py3.12-
${{ runner.os }}-uv-
- name: Sync dependencies
run: |
uv sync --extra all-ci
- name: Build standalone pickle package
run: |
uv build packages/modelaudit-picklescan --out-dir /tmp/modelaudit-picklescan-dist
- name: Build package
run: |
uv build
- name: Validate package metadata
run: |
uvx twine check dist/*
- name: Smoke test wheel install
run: |
set -euo pipefail
uv venv /tmp/modelaudit-wheel-smoke
uv pip install \
--python /tmp/modelaudit-wheel-smoke/bin/python \
--find-links /tmp/modelaudit-picklescan-dist \
dist/modelaudit-*.whl
smoke_dir="$(mktemp -d)"
(
cd "$smoke_dir"
PYTHONPATH= /tmp/modelaudit-wheel-smoke/bin/python -I - <<'PY'
import importlib.util
import modelaudit
import modelaudit_picklescan
assert importlib.util.find_spec("modelaudit_picklescan._rust") is not None
report = modelaudit_picklescan.scan_bytes(b"\x80\x04}q\x00.")
assert report.status.value == "complete", report
print(f"modelaudit {modelaudit.__version__} and modelaudit_picklescan loaded")
PY
)
- name: Upload artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: dist
path: dist/
picklescan-package:
name: Standalone Pickle Package (${{ matrix.python-version }})
needs: changes
if: github.ref == 'refs/heads/main' || needs.changes.outputs.picklescan == 'true' || needs.changes.outputs.workflows == 'true'
runs-on: ubuntu-latest
timeout-minutes: 15
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
defaults:
run:
working-directory: packages/modelaudit-picklescan
steps:
- name: Checkout repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Pin Python version
run: |
uv python pin ${{ matrix.python-version }}
- name: Install Rust toolchain
run: |
rustup toolchain install 1.83.0 --profile minimal
rustup toolchain install stable --profile minimal --component rustfmt --component clippy
rustup default stable
- name: Cache Cargo dependencies
uses: actions/cache@v5
with:
path: |
~/.cargo/registry
~/.cargo/git
packages/modelaudit-picklescan/target
key: ${{ runner.os }}-cargo-picklescan-${{ hashFiles('packages/modelaudit-picklescan/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-picklescan-
- name: Check standalone package lock is in sync
run: |
uv lock --check
- name: Check Rust scanner formatting
run: |
cargo fmt --manifest-path Cargo.toml -- --check
- name: Check Rust scanner crate
run: |
cargo check --manifest-path Cargo.toml
- name: Check Rust scanner MSRV
run: |
rustup toolchain install 1.83.0 --profile minimal
cargo +1.83.0 check --manifest-path Cargo.toml --locked
- name: Lint Rust scanner crate
run: |
cargo clippy --manifest-path Cargo.toml --all-targets -- -D warnings
- name: Test Rust scanner crate
run: |
cargo test --manifest-path Cargo.toml
- name: Lint standalone package with Ruff
run: |
uv run --with ruff ruff check src tests
- name: Check standalone package import organization with Ruff
run: |
uv run --with ruff ruff check --select I src tests
- name: Check standalone package formatting with Ruff
run: |
uv run --with ruff ruff format --check src tests
- name: Type check standalone package with mypy
run: |
uv run --with mypy mypy src tests
- name: Run standalone package tests
run: |
uv run --with pytest --with pytest-xdist pytest -n auto tests --tb=short
- name: Build standalone package
run: |
uv build --out-dir /tmp/modelaudit-picklescan-dist
- name: Validate standalone package metadata
run: |
uvx twine check /tmp/modelaudit-picklescan-dist/*
- name: Smoke test standalone package wheel install
run: |
set -euo pipefail
uv venv /tmp/modelaudit-picklescan-wheel-smoke
shopt -s nullglob
picklescan_wheels=(/tmp/modelaudit-picklescan-dist/modelaudit_picklescan-*.whl)
if [[ ${#picklescan_wheels[@]} -ne 1 ]]; then
echo "ERROR: Expected exactly 1 modelaudit_picklescan wheel artifact, found ${#picklescan_wheels[@]}"
ls -la /tmp/modelaudit-picklescan-dist/
exit 1
fi
uv pip install --python /tmp/modelaudit-picklescan-wheel-smoke/bin/python "${picklescan_wheels[0]}"
smoke_dir="$(mktemp -d)"
(
cd "$smoke_dir"
PYTHONPATH= /tmp/modelaudit-picklescan-wheel-smoke/bin/python -I - <<'PY'
import importlib.util
import modelaudit_picklescan
assert importlib.util.find_spec("modelaudit") is None
assert importlib.util.find_spec("modelaudit_picklescan._rust") is not None
report = modelaudit_picklescan.scan_bytes(b"\x80\x04}q\x00.")
assert report.status.value == "complete", report
assert report.verdict.value == "clean", report
assert not any(notice.code == "engine_fallback" for notice in report.notices)
print("standalone modelaudit_picklescan wheel loaded without modelaudit")
PY
)
# Summary job to ensure all required checks pass
ci-success:
name: CI Success
needs:
[
changes,
quick-feedback,
lint,
dependency-audit,
license-check,
uv-lock-check,
type-check,
windows-tests,
test,
test-numpy-compatibility,
test-vendored-protos,
test-proto-reproducibility,
test-extras-smoke,
build,
picklescan-package,
]
if: always()
runs-on: ubuntu-latest
steps:
- name: Check if all jobs succeeded
run: |
# A job is considered successful if it either succeeded or was skipped
# (skipped means the job's conditions weren't met, which is fine)
QUICK_FEEDBACK_RESULT="${{ needs.quick-feedback.result }}"
LINT_RESULT="${{ needs.lint.result }}"
DEPENDENCY_AUDIT_RESULT="${{ needs.dependency-audit.result }}"
LICENSE_RESULT="${{ needs.license-check.result }}"
UV_LOCK_RESULT="${{ needs.uv-lock-check.result }}"
TYPE_CHECK_RESULT="${{ needs.type-check.result }}"
WINDOWS_RESULT="${{ needs.windows-tests.result }}"
TEST_RESULT="${{ needs.test.result }}"
NUMPY_RESULT="${{ needs.test-numpy-compatibility.result }}"
PROTOS_RESULT="${{ needs.test-vendored-protos.result }}"
PROTO_REPRO_RESULT="${{ needs.test-proto-reproducibility.result }}"
EXTRAS_RESULT="${{ needs.test-extras-smoke.result }}"
BUILD_RESULT="${{ needs.build.result }}"
PICKLESCAN_RESULT="${{ needs.picklescan-package.result }}"
CHANGES_RESULT="${{ needs.changes.result }}"
# Check if conditional jobs should have run
ON_MAIN_BRANCH="${{ github.ref == 'refs/heads/main' }}"
DEPENDENCIES_CHANGED="${{ needs.changes.outputs.dependencies == 'true' }}"
PICKLESCAN_CHANGED="${{ needs.changes.outputs.picklescan == 'true' }}"
PYTHON_CHANGED="${{ needs.changes.outputs.python == 'true' }}"
WORKFLOWS_CHANGED="${{ needs.changes.outputs.workflows == 'true' }}"
echo "Job results:"
echo " quick-feedback: $QUICK_FEEDBACK_RESULT"
echo " lint: $LINT_RESULT"
echo " dependency-audit: $DEPENDENCY_AUDIT_RESULT"
echo " license-check: $LICENSE_RESULT"
echo " uv-lock-check: $UV_LOCK_RESULT"
echo " type-check: $TYPE_CHECK_RESULT"
echo " windows-tests: $WINDOWS_RESULT"
echo " test: $TEST_RESULT"
echo " test-numpy-compatibility: $NUMPY_RESULT"
echo " test-vendored-protos: $PROTOS_RESULT"
echo " test-proto-reproducibility: $PROTO_REPRO_RESULT"
echo " test-extras-smoke: $EXTRAS_RESULT"
echo " build: $BUILD_RESULT"
echo " picklescan-package: $PICKLESCAN_RESULT"
echo " changes: $CHANGES_RESULT"
# Check if any job failed or was cancelled
FAILED=false
[[ "$CHANGES_RESULT" == "failure" || "$CHANGES_RESULT" == "cancelled" ]] && FAILED=true
[[ "$QUICK_FEEDBACK_RESULT" == "failure" || "$QUICK_FEEDBACK_RESULT" == "cancelled" ]] && FAILED=true
[[ "$LINT_RESULT" == "failure" || "$LINT_RESULT" == "cancelled" ]] && FAILED=true
[[ "$DEPENDENCY_AUDIT_RESULT" == "failure" || "$DEPENDENCY_AUDIT_RESULT" == "cancelled" ]] && FAILED=true
[[ "$TYPE_CHECK_RESULT" == "failure" || "$TYPE_CHECK_RESULT" == "cancelled" ]] && FAILED=true
[[ "$WINDOWS_RESULT" == "failure" || "$WINDOWS_RESULT" == "cancelled" ]] && FAILED=true
[[ "$TEST_RESULT" == "failure" || "$TEST_RESULT" == "cancelled" ]] && FAILED=true
[[ "$BUILD_RESULT" == "failure" || "$BUILD_RESULT" == "cancelled" ]] && FAILED=true
# Conditional jobs: only fail if they should have run
# License/lock checks run on main or when dependencies change
if [[ "$ON_MAIN_BRANCH" == "true" || "$DEPENDENCIES_CHANGED" == "true" || "$WORKFLOWS_CHANGED" == "true" ]]; then
[[ "$LICENSE_RESULT" == "failure" || "$LICENSE_RESULT" == "cancelled" ]] && FAILED=true
[[ "$UV_LOCK_RESULT" == "failure" || "$UV_LOCK_RESULT" == "cancelled" ]] && FAILED=true
[[ "$NUMPY_RESULT" == "failure" || "$NUMPY_RESULT" == "cancelled" ]] && FAILED=true
[[ "$EXTRAS_RESULT" == "failure" || "$EXTRAS_RESULT" == "cancelled" ]] && FAILED=true
fi
# Vendored protos jobs run on main or when python/dependencies change
if [[ "$ON_MAIN_BRANCH" == "true" || "$PYTHON_CHANGED" == "true" || "$DEPENDENCIES_CHANGED" == "true" || "$WORKFLOWS_CHANGED" == "true" ]]; then
[[ "$PROTOS_RESULT" == "failure" || "$PROTOS_RESULT" == "cancelled" ]] && FAILED=true
[[ "$PROTO_REPRO_RESULT" == "failure" || "$PROTO_REPRO_RESULT" == "cancelled" ]] && FAILED=true
fi
if [[ "$ON_MAIN_BRANCH" == "true" || "$PICKLESCAN_CHANGED" == "true" || "$WORKFLOWS_CHANGED" == "true" ]]; then
[[ "$PICKLESCAN_RESULT" == "failure" || "$PICKLESCAN_RESULT" == "cancelled" ]] && FAILED=true
fi
if [[ "$FAILED" == "true" ]]; then
echo "Some CI checks failed!"
exit 1
else
echo "All CI checks passed (or were skipped due to path filters)!"
exit 0
fi