@@ -374,7 +374,13 @@ jobs:
374
374
secrets : inherit
375
375
strategy :
376
376
matrix :
377
- hf_model_repo : [google/gemma-2-2b]
377
+ hf_model_id : [
378
+ google/gemma-2-2b,
379
+ Qwen/Qwen2.5-0.5B,
380
+ HuggingFaceTB/SmolLM2-135M,
381
+ meta-llama/Llama-3.2-1B,
382
+ allenai/OLMo-1B-hf
383
+ ]
378
384
fail-fast : false
379
385
with :
380
386
secrets-env : EXECUTORCH_HF_TOKEN
@@ -389,66 +395,39 @@ jobs:
389
395
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
390
396
conda activate "${CONDA_ENV}"
391
397
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
392
-
393
- echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
394
- rm -rf cmake-out
395
- cmake \
396
- -DCMAKE_INSTALL_PREFIX=cmake-out \
397
- -DCMAKE_BUILD_TYPE=Release \
398
- -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
399
- -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
400
- -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
401
- -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
402
- -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
403
- -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
404
- -DEXECUTORCH_BUILD_XNNPACK=ON \
405
- -DPYTHON_EXECUTABLE=python \
406
- -Bcmake-out .
407
- cmake --build cmake-out -j9 --target install --config Release
408
-
409
- echo "Build llama runner"
410
- dir="examples/models/llama"
411
- cmake \
412
- -DCMAKE_INSTALL_PREFIX=cmake-out \
413
- -DCMAKE_BUILD_TYPE=Release \
414
- -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
415
- -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
416
- -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
417
- -DEXECUTORCH_BUILD_XNNPACK=ON \
418
- -DPYTHON_EXECUTABLE=python \
419
- -Bcmake-out/${dir} \
420
- ${dir}
421
- cmake --build cmake-out/${dir} -j9 --config Release
422
398
echo "::endgroup::"
423
399
424
- echo "::group::Set up HuggingFace Dependencies"
425
- if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
426
- echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
427
- exit 1
428
- fi
400
+ echo "::group::Set up Hugging Face"
429
401
pip install -U "huggingface_hub[cli]"
430
402
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
403
+ git clone https://github.com/huggingface/optimum-executorch
404
+ cd optimum-executorch
405
+ # There is no release yet, for CI stability, always test from the same commit on main
406
+ git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25
407
+ pip install .
431
408
pip install accelerate sentencepiece
432
409
pip list
433
410
echo "::endgroup::"
434
411
435
- echo "::group::Export to ExecuTorch"
436
- TOKENIZER_FILE=tokenizer.model
437
- TOKENIZER_BIN_FILE=tokenizer.bin
438
- ET_MODEL_NAME=et_model
439
- DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}")
440
- if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then
441
- echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
442
- python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE}
443
- ls ./tokenizer.bin
444
- else
445
- echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
446
- exit 1
447
- fi
448
-
449
- python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
450
-
451
- cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
412
+ echo "::group::Export and Run ${{ matrix.hf_model_id }}"
413
+ # Pass matrix variable as environment variable
414
+ export MODEL_ID="${{ matrix.hf_model_id }}"
415
+ python -c "
416
+ import os
417
+ from optimum.executorch import ExecuTorchModelForCausalLM
418
+ from transformers import AutoTokenizer
419
+
420
+ model_id = os.getenv('MODEL_ID')
421
+ print(f'Loading model: {model_id}')
422
+ model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
423
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
424
+ generated_text = model.text_generation(
425
+ tokenizer=tokenizer,
426
+ prompt='Simply put, the theory of relativity states that',
427
+ max_seq_len=64
428
+ )
429
+ print(generated_text)
430
+ "
452
431
echo "::endgroup::"
453
432
454
433
0 commit comments