Skip to content

Commit 7ca8eba

Browse files
author
yifan_shen3
committed
make use of coreml quantizer options in llama export
1 parent 4c3d54b commit 7ca8eba

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

examples/models/llama2/export_llama_lib.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
get_pt2e_quantization_params,
3939
get_pt2e_quantizers,
4040
get_qnn_quantizer,
41+
get_coreml_quantizer,
4142
)
4243

4344
from executorch.sdk.etrecord import generate_etrecord
@@ -128,6 +129,10 @@ def build_args_parser() -> argparse.ArgumentParser:
128129
"qnn_8a8w",
129130
"qnn_16a16w",
130131
"qnn_16a4w",
132+
"coreml",
133+
"coreml_qc4",
134+
"coreml_xnnpack",
135+
"coreml_xnnpack_qc4",
131136
],
132137
help="Use PT2E quantization. Comma separated options. e.g. xnnpack_dynamic (for per channel 8 bit weight), xnnpack_dynamic_qc4 (for per channel 4 bit weight), embedding.",
133138
)
@@ -416,6 +421,10 @@ def get_quantizer_and_quant_params(args):
416421
args.pt2e_quantize, args.quantization_mode
417422
)
418423
quantizers.append(qnn_quantizer)
424+
if args.coreml and args.pt2e_quantize:
425+
assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
426+
coreml_quantizer = get_coreml_quantizer(args.pt2e_quantize)
427+
quantizers.append(coreml_quantizer)
419428
logging.info(f"Applying quantizers: {quantizers}")
420429
return pt2e_quant_params, quantizers, quant_dtype
421430

@@ -469,7 +478,7 @@ def _export_llama(modelname, args) -> LLMEdgeManager: # noqa: C901
469478
modelname = f"mps_{modelname}"
470479

471480
if args.coreml:
472-
partitioners.append(get_coreml_partitioner(args.use_kv_cache))
481+
partitioners.append(get_coreml_partitioner(args.use_kv_cache, args.pt2e_quantize))
473482
modelname = f"coreml_{modelname}"
474483

475484
if args.qnn:

0 commit comments

Comments
 (0)