Use mmap_utils for more efficient reading/writing of serialized model data.

gonnet · copybara-github · commit eee3a00f1fd4 · 2026-03-16T09:57:26.000-07:00
PiperOrigin-RevId: 884497688
diff --git a/ai_edge_quantizer/quantizer.py b/ai_edge_quantizer/quantizer.py
@@ -24,6 +24,7 @@
 
 import os
 import io
+from ai_edge_litert.tools import mmap_utils
 from ai_edge_quantizer import algorithm_manager
 from ai_edge_quantizer import calibrator
 from ai_edge_quantizer import default_policy
@@ -87,8 +88,7 @@ def save(
         pathlib.Path(save_folder) / (model_name + '_recipe.json')
     )
     recipe = json.dumps(self.recipe)
-    with open(recipe_save_path, 'w') as output_file_handle:
-      output_file_handle.write(recipe)
+    mmap_utils.set_file_contents(recipe_save_path, recipe.encode())
 
   def export_model(self, filepath: Path, overwrite: bool = False) -> None:
     """Exports the quantized model to a .tflite flatbuffer.
@@ -120,8 +120,9 @@ def export_model(self, filepath: Path, overwrite: bool = False) -> None:
             ' consider change the model name or specify overwrite=True to'
             ' overwrite the model if needed.'
         )
-    with open(filepath, 'wb') as output_file_handle:
-      output_file_handle.write(self.quantized_model)
+
+    # Try to write the file via an `mmap.mmap` to avoid any buffering.
+    mmap_utils.set_file_contents(filepath, self.quantized_model)
 
 
 class Quantizer:
@@ -207,9 +208,8 @@ def load_config_policy(self, filename: Path) -> None:
     Args:
       filename: Config policy filename.
     """
-    with open(filename, 'r') as f:
-      content = f.read()
-      policy = default_policy.update_default_config_policy(content)
+    content = bytearray(mmap_utils.get_file_contents(filename)).decode()
+    policy = default_policy.update_default_config_policy(content)
 
     # Register the policy for MIN_MAX_UNIFORM_QUANT algorithm.
     algorithm_manager.register_config_check_policy_func(
diff --git a/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py b/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py
@@ -17,16 +17,13 @@
 
 import collections
 import logging
-import mmap
-import os
 import pathlib
 
 import immutabledict
 import numpy as np
 
-import os
-import io
 from ai_edge_litert.tools import flatbuffer_utils
+from ai_edge_litert.tools import mmap_utils
 from ai_edge_quantizer import qtyping
 
 
@@ -151,26 +148,7 @@ def get_model_content(tflite_path: Path) -> memoryview:
   Returns:
     The model bytes.
   """
-  model_bytes = None
-
-  # Try to mmap the file first if it is local.
-  try:
-    if (fd := os.open(tflite_path, os.O_RDONLY)) >= 0:
-      model_bytes = mmap.mmap(fd, 0, flags=mmap.MAP_SHARED, prot=mmap.PROT_READ)
-      os.close(fd)
-  except IOError as e:
-    logging.info(
-        'Mapping model file "%s" failed with exception: %s.',
-        tflite_path,
-        e,
-    )
-
-  # If mapping failed, go at it conventionally.
-  if model_bytes is None:
-    with open(tflite_path, "rb") as tflite_file:
-      model_bytes = tflite_file.read()
-
-  return memoryview(model_bytes)
+  return mmap_utils.get_file_contents(tflite_path)
 
 
 def get_model_buffer(tflite_path: Path) -> bytearray:
@@ -182,28 +160,7 @@ def get_model_buffer(tflite_path: Path) -> bytearray:
   Returns:
     model_buffer: the model buffer.
   """
-  model_bytearray = None
-
-  # Try to mmap the file first if it is local.
-  try:
-    if (fd := os.open(tflite_path, os.O_RDONLY)) >= 0:
-      try:
-        model_mmap = mmap.mmap(
-            fd, 0, flags=mmap.MAP_SHARED, prot=mmap.PROT_READ
-        )
-        model_bytearray = bytearray(model_mmap[:])
-      except IOError as e:
-        print(f"Mapping model file {tflite_path} failed with exception: {e}.")
-      os.close(fd)
-  except RuntimeError:
-    pass
-
-  # If mapping failed, go at it conventionally.
-  if model_bytearray is None:
-    with open(tflite_path, "rb") as tflite_file:
-      model_bytearray = bytearray(tflite_file.read())
-
-  return model_bytearray
+  return bytearray(mmap_utils.get_file_contents(tflite_path))
 
 
 def parse_op_tensors(
diff --git a/ai_edge_quantizer/utils/tfl_interpreter_utils.py b/ai_edge_quantizer/utils/tfl_interpreter_utils.py
@@ -20,11 +20,10 @@
 import ml_dtypes
 import numpy as np
 
+from ai_edge_litert.tools import mmap_utils
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
 from ai_edge_litert import interpreter as tfl  # pylint: disable=g-direct-tensorflow-import
-import os
-import io
 
 DEFAULT_SIGNATURE_KEY = "serving_default"
 
@@ -52,8 +51,7 @@ def create_tfl_interpreter(
     A TFLite interpreter.
   """
   if isinstance(tflite_model, str):
-    with open(tflite_model, "rb") as f:
-      tflite_model = f.read()
+    tflite_model = mmap_utils.get_file_contents(tflite_model)
 
   if use_xnnpack:
     op_resolver = tfl.OpResolverType.BUILTIN