From c90b7a6a38bc34f2eb46d6d4f0608424fa64767d Mon Sep 17 00:00:00 2001
From: Chen Lai <chenlai@fb.com>
Date: Tue, 28 May 2024 15:01:23 -0700
Subject: [PATCH] Switch the order of the to_dtype function and source
 transform

We're running quantization during source transform and some quantization  infra doesn't support bf16 yet. Move to_dtype one stage earlier so we can choose the dtype fp32 before running quantization transform.

Differential Revision: [D57883363](https://our.internmc.facebook.com/intern/diff/D57883363/)

[ghstack-poisoned]
---
 examples/models/llama2/export_llama_lib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
index 007f40344e6..31c666fdeb1 100644
--- a/examples/models/llama2/export_llama_lib.py
+++ b/examples/models/llama2/export_llama_lib.py
@@ -366,8 +366,8 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
         )
         .set_output_dir(output_dir_path)
         .set_metadata(args.metadata)
-        .source_transform(transforms)
         .to_dtype(dtype_override)
+        .source_transform(transforms)
     )