Skip to content

Add MTP drafter pipeline for npu executor to enable speculative decoding. #8602

Add MTP drafter pipeline for npu executor to enable speculative decoding.

Add MTP drafter pipeline for npu executor to enable speculative decoding. #8602

Workflow file for this run

name: "CI-Mac"
on:
push:
tags:
- v*.*.*
pull_request:
branches:
- main
schedule:
- cron: "0 10 * * *" # Run at 2am PST (10am UTC) every day to refresh the cache.
workflow_dispatch: # Manual trigger
inputs:
REFRESH_CACHE:
description: 'Refresh cache to remove unused files'
type: boolean
default: true
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true
jobs:
presubmit:
name: "Presubmit-Mac"
runs-on: macos-latest
permissions:
actions: write # For gh cache delete.
contents: write # For gh release upload.
env:
MODEL_KEY: gemma-3-1b-it-v1
MODEL_PATH: ./models/gemma3-1b-it-int4.litertlm
MODEL_URL: https://huggingface.co/litert-community/Gemma3-1B-IT/resolve/main/gemma3-1b-it-int4.litertlm
GH_TOKEN: ${{ github.token }} # For gh release upload.
REFRESH_CACHE: ${{ github.event_name == 'schedule' ||
(github.event_name == 'workflow_dispatch' && inputs.REFRESH_CACHE) }}
steps:
- name: Checkout code.
uses: actions/checkout@v4
with:
lfs: true
- name : Set up cache keys.
id: cache-keys
run: |
CACHE_RESTORE_KEY_2="${GITHUB_WORKFLOW}"
CACHE_RESTORE_KEY_1="$CACHE_RESTORE_KEY_2-${{ hashFiles('**/WORKSPACE', '**/.bazelrc') }}"
CACHE_RESTORE_KEY_0="$CACHE_RESTORE_KEY_1-${{ hashFiles('**/BUILD*') }}"
# If it's not a pull request, then it will be the same as $CACHE_RESTORE_KEY_1-.
CACHE_RESTORE_KEY_HEAD="$CACHE_RESTORE_KEY_0-${{ github.event.pull_request.base.sha }}"
CACHE_KEY="$CACHE_RESTORE_KEY_0-${{ github.sha }}"
echo "CACHE_RESTORE_KEY_2=$CACHE_RESTORE_KEY_2" >> "$GITHUB_OUTPUT"
echo "CACHE_RESTORE_KEY_1=$CACHE_RESTORE_KEY_1" >> "$GITHUB_OUTPUT"
echo "CACHE_RESTORE_KEY_0=$CACHE_RESTORE_KEY_0" >> "$GITHUB_OUTPUT"
echo "CACHE_RESTORE_KEY_HEAD=$CACHE_RESTORE_KEY_HEAD" >> "$GITHUB_OUTPUT"
echo "CACHE_KEY=$CACHE_KEY" >> "$GITHUB_OUTPUT"
- name: Clean build outputs if cache is being refreshed.
if: env.REFRESH_CACHE == 'true'
run: bazel clean --expunge
- name: Restore bazel cache if cache is not being refreshed.
id: bazel-cache
if: env.REFRESH_CACHE != 'true'
uses: actions/cache/restore@v4
with:
path: |
~/.cache/bazel-macos
~/.cache/bazel-ios
key: ${{ steps.cache-keys.outputs.CACHE_KEY }}
restore-keys: |
${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_HEAD }}
${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_0 }}-
${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_1 }}-
${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_2 }}-
- name: Check cache hit.
run: |
echo "Cache Hit: ${STEPS_BAZEL_CACHE_OUTPUTS_CACHE_HIT}"
echo "Cache Primary Key: ${STEPS_BAZEL_CACHE_OUTPUTS_CACHE_PRIMARY_KEY}"
echo "Cache Matched Key: ${STEPS_BAZEL_CACHE_OUTPUTS_CACHE_MATCHED_KEY}"
env:
STEPS_BAZEL_CACHE_OUTPUTS_CACHE_HIT: ${{ steps.bazel-cache.outputs.cache-hit }}
STEPS_BAZEL_CACHE_OUTPUTS_CACHE_PRIMARY_KEY: ${{ steps.bazel-cache.outputs.cache-primary-key }}
STEPS_BAZEL_CACHE_OUTPUTS_CACHE_MATCHED_KEY: ${{ steps.bazel-cache.outputs.cache-matched-key }}
- name: Download Model
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
mkdir -p ./models
echo "Downloading model from Hugging Face..."
curl -L --retry 5 -f \
-H "Authorization: Bearer $HF_TOKEN" \
-o ${{ env.MODEL_PATH }} \
"${{ env.MODEL_URL }}"
ls -lh ${{ env.MODEL_PATH }}
- name: Run bazel build on MacOS.
run: |
bazel build --disk_cache=~/.cache/bazel-macos \
//... \
//runtime/engine:litert_lm_main
- name: Update litert_lm_main prebuilt for MacOS if new version tag is pushed.
if: github.ref_type == 'tag'
run: |
cp bazel-bin/runtime/engine/litert_lm_main litert_lm_main.macos_arm64
gh release upload ${GITHUB_REF_NAME} litert_lm_main.macos_arm64 --clobber
- name: Run bazel test on MacOS.
run: |
bazel test --disk_cache=~/.cache/bazel-macos --test_output=errors //...
- name: Install pytest
run: python3 -m pip install --break-system-packages pytest==8.3.4
- name: Run pytest
run: pytest tools/test/ --model-path=${{ env.MODEL_PATH }} --build-system=bazel
- name: Run bazel build for iOS Simulator.
run: |
# The `ios_sim_arm64` config causes bazel to build some mac-only
# targets that are not compatible with the iOS simulator, which
# leads to build failures. These flags filter out those
# incompatible builds.
bazel build --disk_cache=~/.cache/bazel-ios --config=ios_sim_arm64 \
--build_tag_filters=-requires-mac-inputs:hard,-no_mac \
//... \
//runtime/engine:litert_lm_main \
-- \
-//python/... \
-//schema/py:* \
-//kotlin/...
- name: Update litert_lm_main prebuilt for iOS Simulator if new version tag is pushed.
if: github.ref_type == 'tag'
run: |
cp bazel-bin/runtime/engine/litert_lm_main litert_lm_main.ios_sim_arm64
gh release upload ${GITHUB_REF_NAME} litert_lm_main.ios_sim_arm64 --clobber
- name: Remove cache if cache is being refreshed.
if: env.REFRESH_CACHE == 'true'
continue-on-error: true # Ignore errors when cache is not found.
run: gh cache delete ${STEPS_CACHE_KEYS_OUTPUTS_CACHE_KEY}
env:
STEPS_CACHE_KEYS_OUTPUTS_CACHE_KEY: ${{ steps.cache-keys.outputs.CACHE_KEY }}
- name: Save bazel cache if it's new or being refreshed.
uses: actions/cache/save@v4
if: env.REFRESH_CACHE == 'true' || steps.bazel-cache.outputs.cache-hit != 'true'
with:
path: |
~/.cache/bazel-macos
~/.cache/bazel-ios
key: ${{ steps.cache-keys.outputs.CACHE_KEY }}