@@ -12,7 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1212MODEL_NAME=$1 # stories110M.pt
1313BUILD_TOOL=$2 # buck2 or cmake
1414DTYPE=$3 # fp16 or fp32
15- MODE=${4:- " xnnpack" } # portable or xnnpack
15+ MODE=${4:- " xnnpack+custom " } # portable or xnnpack+custom or xnnpack+custom+qe
1616if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
1717 echo " Expecting atleast 4 positional arguments"
1818 echo " Usage: [...]"
@@ -37,7 +37,7 @@ if [[ -z "${MODE:-}" ]]; then
3737 exit 1
3838fi
3939
40- if [[ " ${MODE} " =~ xnnpack.* ]]; then
40+ if [[ " ${MODE} " =~ . * xnnpack.* ]]; then
4141 XNNPACK=ON
4242else
4343 XNNPACK=OFF
4949 CUSTOM=OFF
5050fi
5151
52+ if [[ " ${MODE} " =~ .* qe.* ]]; then
53+ QE=ON
54+ else
55+ QE=OFF
56+ fi
57+
5258if [[ -z " ${BUCK:- } " ]]; then
5359 BUCK=buck2
5460fi
@@ -69,6 +75,7 @@ cmake_install_executorch_libraries() {
6975 -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
7076 -DEXECUTORCH_BUILD_CUSTOM=" $CUSTOM " \
7177 -DEXECUTORCH_BUILD_OPTIMIZED=ON \
78+ -DEXECUTORCH_BUILD_QUANTIZED=" $QE " \
7279 -DEXECUTORCH_BUILD_XNNPACK=" $XNNPACK " \
7380 -DPYTHON_EXECUTABLE=" $PYTHON_EXECUTABLE " \
7481 -Bcmake-out .
@@ -84,7 +91,7 @@ cmake_build_llama_runner() {
8491 -DEXECUTORCH_BUILD_CUSTOM=" $CUSTOM " \
8592 -DEXECUTORCH_BUILD_OPTIMIZED=ON \
8693 -DEXECUTORCH_BUILD_XNNPACK=" $XNNPACK " \
87- -DEXECUTORCH_BUILD_OPTIMIZED=ON \
94+ -DEXECUTORCH_BUILD_QUANTIZED= " $QE " \
8895 -DPYTHON_EXECUTABLE=" $PYTHON_EXECUTABLE " \
8996 -Bcmake-out/${dir} \
9097 ${dir}
126133# Export model.
127134EXPORTED_MODEL_NAME=" ${EXPORTED_MODEL_NAME} .pte"
128135echo " Exporting ${EXPORTED_MODEL_NAME} "
129- EXPORT_ARGS=" -c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} "
130- if [[ " ${MODE} " == " xnnpack+kv+custom" ]]; then
131- EXPORT_ARGS=" ${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
136+ EXPORT_ARGS=" -c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
137+ if [[ " ${XNNPACK} " == " ON" ]]; then
138+ EXPORT_ARGS=" ${EXPORT_ARGS} -X -qmode 8da4w -G 128"
139+ fi
140+ if [[ " ${CUSTOM} " == " ON" ]]; then
141+ EXPORT_ARGS=" ${EXPORT_ARGS} --use_sdpa_with_kv_cache"
142+ fi
143+ if [[ " ${QE} " == " ON" ]]; then
144+ EXPORT_ARGS=" ${EXPORT_ARGS} --embedding-quantize '8,1024'"
132145fi
133146# Add dynamically linked library location
134147$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
0 commit comments