Handled the memory issue

cehongwang · cehongwang · commit f8b6ac6c4965 · 2024-08-17T12:13:25.000-07:00
diff --git a/py/torch_tensorrt/dynamo/conversion/_conversion.py b/py/torch_tensorrt/dynamo/conversion/_conversion.py
@@ -3,6 +3,7 @@
 import logging
 from typing import Any, List, Optional, Sequence
 
+import tensorrt as trt
 import torch
 from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
 from torch_tensorrt._Device import Device
@@ -17,8 +18,6 @@
 from torch_tensorrt.dynamo.runtime import PythonTorchTensorRTModule, TorchTensorRTModule
 from torch_tensorrt.dynamo.utils import get_torch_inputs
 
-import tensorrt as trt
-
 logger = logging.getLogger(__name__)
 
 
@@ -131,13 +130,13 @@ def convert_module(
     from torch_tensorrt.dynamo._refit import _refit_single_trt_engine_with_gm
     from torch_tensorrt.logging import TRT_LOGGER
 
-    runtime = trt.Runtime(TRT_LOGGER)
-    refit_test_engine = runtime.deserialize_cuda_engine(
-        interpreter_result.serialized_engine
-    )
     weight_name_map: Any = None
     # Do the test refit with cached map if make_refitable is enabled
     if settings.make_refitable:
+        runtime = trt.Runtime(TRT_LOGGER)
+        refit_test_engine = runtime.deserialize_cuda_engine(
+            interpreter_result.serialized_engine
+        )
         weight_name_map = interpreter_result.weight_name_map
         try:
             _refit_single_trt_engine_with_gm(
@@ -150,6 +149,9 @@ def convert_module(
         except AssertionError:
             logger.warning("Fast refit test failed. Removing the weight map caching.")
 
+        del refit_test_engine
+        torch.cuda.empty_cache()
+
     rt_cls = PythonTorchTensorRTModule
 
     if ENABLED_FEATURES.torch_tensorrt_runtime and not settings.use_python_runtime: