microsoft · justinchuby · Aug 11, 2023 · Aug 10, 2023 · Aug 10, 2023 · Aug 10, 2023
diff --git a/onnxscript/function_libs/torch_lib/graph_building.py b/onnxscript/function_libs/torch_lib/graph_building.py
@@ -3,6 +3,7 @@
 
 import logging
 import os
+import tempfile
 import typing
 import warnings
 from typing import Any, Dict, Final, List, Mapping, Optional, Sequence, Tuple, Union
@@ -342,6 +343,18 @@ def _create_op_call_in_torch_graph(
     return node_ouputs
 
 
+def _estimate_tensor_size(tensor: torch.Tensor) -> int:
+    """Estimate the size of a tensor in bytes.
+
+    Args:
+        tensor: The tensor to estimate the size of.
+
+    Returns:
+        The estimated size of the tensor in bytes.
+    """
+    return tensor.numel() * tensor.element_size()
+
+
 class TorchScriptGraph:
     _LOCAL_FUNCTION_DOMAIN_NAME: Final[str] = "torch_export"
     """The domain name for local functions."""
@@ -683,12 +696,15 @@ def to_model_proto(
             # TODO(BowenBao): All local function domain versions are hardcoded as 1.
             unique_custom_domains[function_proto.domain] = 1
 
-        (
-            proto,
-            _,
-            _,
-            _,
-        ) = self._torch_graph._export_onnx(  # type: ignore[attr-defined] # pylint: disable=protected-access
+        initializers_size = sum(
+            _estimate_tensor_size(tensor) for tensor in self.initializers.values()
+        )
+
+        # Treat models > 1GB as large models so that we have ample room
+        # for the rest of the proto fields.
+        large_model = initializers_size > (2**30)
+
+        export_kwargs: dict[str, Any] = dict(
             initializers=self.initializers if include_initializers else {},
             onnx_opset_version=opset_version,
             # TODO(justinchuby): Figure out how to get the dynamic axes from the inputs
@@ -699,15 +715,30 @@ def to_model_proto(
             keep_initializers_as_inputs=False,
             custom_opsets={},
             add_node_names=True,
-            # TODO(#493): Passing in this instead of reading from env.
-            # User must put the exported model file in the same folder to launch ORT.
-            onnx_file_path=os.path.join(
-                os.getenv("EXTERNAL_ONNX_INITIALIZER_FOLDER", ""), "dummy_model_path.onnx"
-            ),
             node_attr_to_name={},
         )
 
-        onnx_model = onnx.load_from_string(proto)
+        cache_model_to_disk = include_initializers and large_model
+
+        if cache_model_to_disk:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                onnx_file_path = os.path.join(temp_dir, "exported_model.onnx")
+                export_kwargs["onnx_file_path"] = onnx_file_path
+                _ = self._torch_graph._export_onnx(  # type: ignore[attr-defined] # pylint: disable=protected-access
+                    **export_kwargs
+                )
+            onnx_model = onnx.load_model(onnx_file_path, load_external_data=True)
+        else:
+            (
+                proto,
+                _,
+                _,
+                _,
+            ) = self._torch_graph._export_onnx(  # type: ignore[attr-defined] # pylint: disable=protected-access
+                **export_kwargs
+            )
+            onnx_model = onnx.load_from_string(proto)
+
         onnx_model.functions.extend(function_proto_dict.values())
 
         # `_export_onnx` only exports opset_imports that is visible to it. It does not
@@ -725,10 +756,14 @@ def to_model_proto(
         )
 
         try:
-            onnx_model = onnx.shape_inference.infer_shapes(
-                onnx_model, check_type=True, strict_mode=False, data_prop=True
-            )
-            onnx.checker.check_model(onnx_model, full_check=True)
+            if not cache_model_to_disk:
+                # Only check the model if it is in memory.
+                # Otherwise the checker and shape_inference will fail because
+                # we cannot serialize the model.
+                onnx_model = onnx.shape_inference.infer_shapes(
+                    onnx_model, check_type=True, strict_mode=False, data_prop=True
+                )
+                onnx.checker.check_model(onnx_model, full_check=True)
         except (onnx.checker.ValidationError, onnx.shape_inference.InferenceError) as e:
             warnings.warn(f"ONNX model is invalid: {e}", stacklevel=1)
             logging.debug(