Fixed batchnorm converter change

cehongwang · cehongwang · commit df121b9872f4 · 2024-08-22T17:50:58.000-07:00
diff --git a/examples/dynamo/mutable_torchtrt_module_example.py b/examples/dynamo/mutable_torchtrt_module_example.py
@@ -34,7 +34,7 @@
     "make_refitable": True,
 }
 
-model = models.resnet18(pretrained=False).eval().to("cuda")
+model = models.resnet18(pretrained=True).eval().to("cuda")
 mutable_module = torch_trt.MutableTorchTensorRTModule(model, **settings)
 # You can use the mutable module just like the original pytorch module. The compilation happens while you first call the mutable module.
 mutable_module(*inputs)
@@ -45,7 +45,7 @@
 
 # %%
 # Making changes to mutable module can trigger refit or re-compilation. For example, loading a different state_dict and setting new weight values will trigger refit, and adding a module to the model will trigger re-compilation.
-model2 = models.resnet18(pretrained=True).eval().to("cuda")
+model2 = models.resnet18(pretrained=False).eval().to("cuda")
 mutable_module.load_state_dict(model2.state_dict())
 
 
diff --git a/examples/dynamo/refit_engine_example.py b/examples/dynamo/refit_engine_example.py
@@ -39,7 +39,7 @@
 # Compile the module for the first time and save it.
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-model = models.resnet18(pretrained=False).eval().to("cuda")
+model = models.resnet18(pretrained=True).eval().to("cuda")
 exp_program = torch.export.export(model, tuple(inputs))
 enabled_precisions = {torch.float}
 debug = False
@@ -68,7 +68,7 @@
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 # Create and compile the updated model
-model2 = models.resnet18(pretrained=True).eval().to("cuda")
+model2 = models.resnet18(pretrained=False).eval().to("cuda")
 exp_program2 = torch.export.export(model2, tuple(inputs))
 
 
diff --git a/py/torch_tensorrt/dynamo/_refit.py b/py/torch_tensorrt/dynamo/_refit.py
@@ -115,25 +115,8 @@ def construct_refit_mapping_from_weight_name_map(
     for engine_weight_name, (sd_weight_name, np_weight_type) in weight_name_map.items():
         trt_dtype = dtype.try_from(np_weight_type).to(trt.DataType)
         torch_dtype = dtype.try_from(np_weight_type).to(torch.dtype)
-        if engine_weight_name.split(" ")[-1] in ["SCALE", "SHIFT"]:
-            # Batch Norm Layer
-            params = {
-                "weight": 1.0,
-                "bias": 0.0,
-                "running_mean": 0.0,
-                "running_var": 1.0,
-            }
-            for w in sd_weight_name:
-                if w in state_dict:
-                    params[w.split(".")[-1]] = state_dict[w]
-            scale = params["weight"] / torch.sqrt(params["running_var"] + 1e-5)
-            shift = params["bias"] - params["running_mean"] * scale
-            # Set scale to scale or shift to shift
-            engine_weight_map[engine_weight_name] = eval(
-                engine_weight_name.split(" ")[-1].lower()
-            )
 
-        elif sd_weight_name not in state_dict:
+        if sd_weight_name not in state_dict:
             # If weights is not in sd, we can leave it unchanged
             continue
         else:
@@ -180,7 +163,7 @@ def _refit_single_trt_engine_with_gm(
 
         # Debug Use
         # correct = construct_refit_mapping(new_gm, input_list, settings)
-        # {k: np.allclose(correct[k][0], mapping[k][0].cpu().numpy(), 1e-2, 1e-2) for k in mapping if k in correct}
+        # comparison = {k: (np.allclose(correct[k][0], mapping[k][0].cpu().numpy(), 1e-2, 1e-2), correct[k][0], mapping[k][0]) for k in mapping if k in correct}
 
         for layer_name in weight_list:
             if layer_name not in mapping:
diff --git a/py/torch_tensorrt/dynamo/conversion/_conversion.py b/py/torch_tensorrt/dynamo/conversion/_conversion.py
@@ -137,7 +137,6 @@ def convert_module(
         refit_test_engine = runtime.deserialize_cuda_engine(
             interpreter_result.serialized_engine
         )
-        weight_name_map = interpreter_result.weight_name_map
         try:
             _refit_single_trt_engine_with_gm(
                 new_gm=module,
@@ -146,6 +145,7 @@ def convert_module(
                 settings=settings,
                 weight_name_map=interpreter_result.weight_name_map,
             )
+            weight_name_map = interpreter_result.weight_name_map
         except AssertionError:
             logger.warning("Fast refit test failed. Removing the weight map caching.")
 
diff --git a/tests/py/dynamo/models/test_model_refit.py b/tests/py/dynamo/models/test_model_refit.py
@@ -35,8 +35,8 @@
 @pytest.mark.unit
 def test_mapping():
 
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     trt_input = [
         torchtrt.Input(i.shape, dtype=torch.float, format=torch.contiguous_format)
@@ -91,8 +91,8 @@ def test_mapping():
 @pytest.mark.unit
 def test_refit_one_engine_with_weightmap():
 
-    model = models.resnet152(pretrained=False).eval().to("cuda")
-    model2 = models.resnet152(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False
@@ -140,8 +140,8 @@ def test_refit_one_engine_with_weightmap():
 @pytest.mark.unit
 def test_refit_one_engine_no_map_with_weightmap():
 
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False
@@ -191,8 +191,8 @@ def test_refit_one_engine_no_map_with_weightmap():
 @pytest.mark.unit
 def test_refit_one_engine_with_wrong_weightmap():
 
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False
@@ -301,8 +301,8 @@ def test_refit_one_engine_bert_with_weightmap():
 @pytest.mark.unit
 def test_refit_one_engine_inline_runtime__with_weightmap():
     trt_ep_path = os.path.join(tempfile.gettempdir(), "compiled.ep")
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False
@@ -347,8 +347,8 @@ def test_refit_one_engine_inline_runtime__with_weightmap():
 @pytest.mark.unit
 def test_refit_one_engine_python_runtime_with_weightmap():
 
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False
@@ -467,8 +467,8 @@ def forward(self, x):
 @pytest.mark.unit
 def test_refit_one_engine_without_weightmap():
 
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False
@@ -571,8 +571,8 @@ def test_refit_one_engine_bert_without_weightmap():
 @pytest.mark.unit
 def test_refit_one_engine_inline_runtime_without_weightmap():
     trt_ep_path = os.path.join(tempfile.gettempdir(), "compiled.ep")
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False
@@ -617,8 +617,8 @@ def test_refit_one_engine_inline_runtime_without_weightmap():
 @pytest.mark.unit
 def test_refit_one_engine_python_runtime_without_weightmap():
 
-    model = models.resnet18(pretrained=False).eval().to("cuda")
-    model2 = models.resnet18(pretrained=True).eval().to("cuda")
+    model = models.resnet18(pretrained=True).eval().to("cuda")
+    model2 = models.resnet18(pretrained=False).eval().to("cuda")
     inputs = [torch.randn((1, 3, 224, 224)).to("cuda")]
     enabled_precisions = {torch.float}
     debug = False

Original file line number	Diff line number	Diff line change
`@@ -137,7 +137,6 @@ def convert_module(`
`137`	`137`	`refit_test_engine = runtime.deserialize_cuda_engine(`
`138`	`138`	`interpreter_result.serialized_engine`
`139`	`139`	`)`
`140`		`- weight_name_map = interpreter_result.weight_name_map`
`141`	`140`	`try:`
`142`	`141`	`_refit_single_trt_engine_with_gm(`
`143`	`142`	`new_gm=module,`
`@@ -146,6 +145,7 @@ def convert_module(`
`146`	`145`	`settings=settings,`
`147`	`146`	`weight_name_map=interpreter_result.weight_name_map,`
`148`	`147`	`)`
	`148`	`+ weight_name_map = interpreter_result.weight_name_map`
`149`	`149`	`except AssertionError:`
`150`	`150`	`logger.warning("Fast refit test failed. Removing the weight map caching.")`
`151`	`151`