From c90b7a6a38bc34f2eb46d6d4f0608424fa64767d Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Tue, 28 May 2024 15:01:23 -0700 Subject: [PATCH] Switch the order of the to_dtype function and source transform We're running quantization during source transform and some quantization infra doesn't support bf16 yet. Move to_dtype one stage earlier so we can choose the dtype fp32 before running quantization transform. Differential Revision: [D57883363](https://our.internmc.facebook.com/intern/diff/D57883363/) [ghstack-poisoned] --- examples/models/llama2/export_llama_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py index 007f40344e6..31c666fdeb1 100644 --- a/examples/models/llama2/export_llama_lib.py +++ b/examples/models/llama2/export_llama_lib.py @@ -366,8 +366,8 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager: ) .set_output_dir(output_dir_path) .set_metadata(args.metadata) - .source_transform(transforms) .to_dtype(dtype_override) + .source_transform(transforms) )