Add MTP drafter pipeline for npu executor to enable speculative decoding. #8602

Workflow file for this run

.github/workflows/ci-build-mac.yml at 7337136

	name: "CI-Mac"
	on:
	push:
	tags:
	- v..*
	pull_request:
	branches:
	- main
	schedule:
	- cron: "0 10 * * *" # Run at 2am PST (10am UTC) every day to refresh the cache.
	workflow_dispatch: # Manual trigger
	inputs:
	REFRESH_CACHE:
	description: 'Refresh cache to remove unused files'
	type: boolean
	default: true


	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.ref }}
	cancel-in-progress: true

	jobs:
	presubmit:
	name: "Presubmit-Mac"
	runs-on: macos-latest
	permissions:
	actions: write # For gh cache delete.
	contents: write # For gh release upload.
	env:
	MODEL_KEY: gemma-3-1b-it-v1
	MODEL_PATH: ./models/gemma3-1b-it-int4.litertlm
	MODEL_URL: https://huggingface.co/litert-community/Gemma3-1B-IT/resolve/main/gemma3-1b-it-int4.litertlm

	GH_TOKEN: ${{ github.token }} # For gh release upload.
	REFRESH_CACHE: ${{ github.event_name == 'schedule' \|\|
	(github.event_name == 'workflow_dispatch' && inputs.REFRESH_CACHE) }}
	steps:
	- name: Checkout code.
	uses: actions/checkout@v4
	with:
	lfs: true

	- name : Set up cache keys.
	id: cache-keys
	run: \|
	CACHE_RESTORE_KEY_2="${GITHUB_WORKFLOW}"
	CACHE_RESTORE_KEY_1="$CACHE_RESTORE_KEY_2-${{ hashFiles('/WORKSPACE', '/.bazelrc') }}"
	CACHE_RESTORE_KEY_0="$CACHE_RESTORE_KEY_1-${{ hashFiles('*/BUILD') }}"
	# If it's not a pull request, then it will be the same as $CACHE_RESTORE_KEY_1-.
	CACHE_RESTORE_KEY_HEAD="$CACHE_RESTORE_KEY_0-${{ github.event.pull_request.base.sha }}"
	CACHE_KEY="$CACHE_RESTORE_KEY_0-${{ github.sha }}"
	echo "CACHE_RESTORE_KEY_2=$CACHE_RESTORE_KEY_2" >> "$GITHUB_OUTPUT"
	echo "CACHE_RESTORE_KEY_1=$CACHE_RESTORE_KEY_1" >> "$GITHUB_OUTPUT"
	echo "CACHE_RESTORE_KEY_0=$CACHE_RESTORE_KEY_0" >> "$GITHUB_OUTPUT"
	echo "CACHE_RESTORE_KEY_HEAD=$CACHE_RESTORE_KEY_HEAD" >> "$GITHUB_OUTPUT"
	echo "CACHE_KEY=$CACHE_KEY" >> "$GITHUB_OUTPUT"

	- name: Clean build outputs if cache is being refreshed.
	if: env.REFRESH_CACHE == 'true'
	run: bazel clean --expunge

	- name: Restore bazel cache if cache is not being refreshed.
	id: bazel-cache
	if: env.REFRESH_CACHE != 'true'
	uses: actions/cache/restore@v4
	with:
	path: \|
	~/.cache/bazel-macos
	~/.cache/bazel-ios
	key: ${{ steps.cache-keys.outputs.CACHE_KEY }}
	restore-keys: \|
	${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_HEAD }}
	${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_0 }}-
	${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_1 }}-
	${{ steps.cache-keys.outputs.CACHE_RESTORE_KEY_2 }}-

	- name: Check cache hit.
	run: \|
	echo "Cache Hit: ${STEPS_BAZEL_CACHE_OUTPUTS_CACHE_HIT}"
	echo "Cache Primary Key: ${STEPS_BAZEL_CACHE_OUTPUTS_CACHE_PRIMARY_KEY}"
	echo "Cache Matched Key: ${STEPS_BAZEL_CACHE_OUTPUTS_CACHE_MATCHED_KEY}"
	env:
	STEPS_BAZEL_CACHE_OUTPUTS_CACHE_HIT: ${{ steps.bazel-cache.outputs.cache-hit }}
	STEPS_BAZEL_CACHE_OUTPUTS_CACHE_PRIMARY_KEY: ${{ steps.bazel-cache.outputs.cache-primary-key }}
	STEPS_BAZEL_CACHE_OUTPUTS_CACHE_MATCHED_KEY: ${{ steps.bazel-cache.outputs.cache-matched-key }}

	- name: Download Model
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	run: \|
	mkdir -p ./models
	echo "Downloading model from Hugging Face..."
	curl -L --retry 5 -f \
	-H "Authorization: Bearer $HF_TOKEN" \
	-o ${{ env.MODEL_PATH }} \
	"${{ env.MODEL_URL }}"
	ls -lh ${{ env.MODEL_PATH }}

	- name: Run bazel build on MacOS.
	run: \|
	bazel build --disk_cache=~/.cache/bazel-macos \
	//... \
	//runtime/engine:litert_lm_main

	- name: Update litert_lm_main prebuilt for MacOS if new version tag is pushed.
	if: github.ref_type == 'tag'
	run: \|
	cp bazel-bin/runtime/engine/litert_lm_main litert_lm_main.macos_arm64
	gh release upload ${GITHUB_REF_NAME} litert_lm_main.macos_arm64 --clobber

	- name: Run bazel test on MacOS.
	run: \|
	bazel test --disk_cache=~/.cache/bazel-macos --test_output=errors //...

	- name: Install pytest
	run: python3 -m pip install --break-system-packages pytest==8.3.4

	- name: Run pytest
	run: pytest tools/test/ --model-path=${{ env.MODEL_PATH }} --build-system=bazel

	- name: Run bazel build for iOS Simulator.
	run: \|
	# The `ios_sim_arm64` config causes bazel to build some mac-only
	# targets that are not compatible with the iOS simulator, which
	# leads to build failures. These flags filter out those
	# incompatible builds.
	bazel build --disk_cache=~/.cache/bazel-ios --config=ios_sim_arm64 \
	--build_tag_filters=-requires-mac-inputs:hard,-no_mac \
	//... \
	//runtime/engine:litert_lm_main \
	-- \
	-//python/... \
	-//schema/py:* \
	-//kotlin/...

	- name: Update litert_lm_main prebuilt for iOS Simulator if new version tag is pushed.
	if: github.ref_type == 'tag'
	run: \|
	cp bazel-bin/runtime/engine/litert_lm_main litert_lm_main.ios_sim_arm64
	gh release upload ${GITHUB_REF_NAME} litert_lm_main.ios_sim_arm64 --clobber

	- name: Remove cache if cache is being refreshed.
	if: env.REFRESH_CACHE == 'true'
	continue-on-error: true # Ignore errors when cache is not found.
	run: gh cache delete ${STEPS_CACHE_KEYS_OUTPUTS_CACHE_KEY}
	env:
	STEPS_CACHE_KEYS_OUTPUTS_CACHE_KEY: ${{ steps.cache-keys.outputs.CACHE_KEY }}

	- name: Save bazel cache if it's new or being refreshed.
	uses: actions/cache/save@v4
	if: env.REFRESH_CACHE == 'true' \|\| steps.bazel-cache.outputs.cache-hit != 'true'
	with:
	path: \|
	~/.cache/bazel-macos
	~/.cache/bazel-ios
	key: ${{ steps.cache-keys.outputs.CACHE_KEY }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add MTP drafter pipeline for npu executor to enable speculative decoding. #8602

Workflow file

Add MTP drafter pipeline for npu executor to enable speculative decoding. #8602

Uh oh!

Workflow file for this run