pytorch · frank-wei · Jun 23, 2022 · Jun 23, 2022 · Jun 23, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -47,7 +47,7 @@ jobs:
           command: |
             pip3 install nvidia-pyindex
             pip3 install nvidia-tensorrt==8.2.4.2
-            pip3 install --pre torch==1.13.0.dev20220618  torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113
+            pip3 install --pre torch==1.13.0.dev20220621  torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113
             pip3 install pytest parameterized expecttest
             # install torch_tensorrt
             mv WORKSPACE.ci WORKSPACE

diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py
@@ -92,10 +92,12 @@ def acc_ops_conv1d(
             kernel=weight,
             bias=bias,
         )
-    padding = kwargs["padding"]
-    padding = padding + (0,)
-    stride = extend_attr_to_tuple(kwargs["stride"], 1)
-    dilation = extend_attr_to_tuple(kwargs["dilation"], 1)
+    # expand params to 2d for computation
+    padding = list(kwargs["padding"])
+    padding.append(0)
+    stride = extend_attr_to_tuple(kwargs["stride"], 2)
+    dilation = extend_attr_to_tuple(kwargs["dilation"], 2)
+
     set_layer_name(layer, target, name)
     layer.stride_nd = stride
     layer.padding_nd = padding

diff --git a/py/torch_tensorrt/fx/converters/convolution.py b/py/torch_tensorrt/fx/converters/convolution.py
@@ -32,13 +32,13 @@ def common_conv(network, mod, dimension, input_val, layer_name, is_quantized):
         unsqueeze_layer.name = f"{layer_name}_unsqueeze"
         input_val = unsqueeze_layer.get_output(0)
 
-        padding = padding + (0,)
         kernel = np.expand_dims(kernel, -1)
         kernel_size = kernel.shape[2:]
         if bias is not None:
             bias = bias[None]
-            # bias = np.expand_dims(bias, -1)
-
+        stride = (stride[0], 1)
+        padding = (padding[0], 0)
+        dilation = (dilation[0], 1)
     layer = network.add_convolution_nd(
         input=input_val,
         num_output_maps=mod.out_channels,

diff --git a/py/torch_tensorrt/fx/input_tensor_spec.py b/py/torch_tensorrt/fx/input_tensor_spec.py
@@ -66,7 +66,10 @@ def from_tensors(cls, tensors: Sequence[torch.Tensor]) -> List["InputTensorSpec"
 
     @classmethod
     def from_tensors_with_dynamic_batch_size(
-        cls, tensors: Sequence[torch.Tensor], batch_size_range: Tuple[int, int, int]
+        cls,
+        tensors: Sequence[torch.Tensor],
+        batch_size_range: Tuple[int, int, int],
+        opt_profile_replica: int = 1,
     ) -> List["InputTensorSpec"]:
         """
         Produce a list of InputTenosrSpec named tuples which would contain
@@ -93,7 +96,7 @@ def from_tensors_with_dynamic_batch_size(
             ), f"The {i}th tensor (shape: {tensor.shape}) doesn't have the correct batch size: {batch_size}."
             shape = list(tensor.shape)
             shape[0] = -1
-            shape_ranges: List[ShapeRange] = [tuple(tuple([bs] + shape[1:]) for bs in batch_size_range)]  # type: ignore[list-item]
+            shape_ranges: List[ShapeRange] = [tuple(tuple([bs] + shape[1:]) for bs in batch_size_range)] * opt_profile_replica  # type: ignore[list-item]
             input_specs.append(
                 cls(tuple(shape), tensor.dtype, tensor.device, shape_ranges)
             )

diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py
@@ -100,6 +100,7 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
                         self.lower_setting.max_batch_size,
                         self.lower_setting.max_batch_size,
                     ),
+                    self.lower_setting.opt_profile_replica,
                 )
                 if self.lower_setting.explicit_batch_dimension
                 else InputTensorSpec.from_tensors(input)

diff --git a/py/torch_tensorrt/fx/lower_setting.py b/py/torch_tensorrt/fx/lower_setting.py
@@ -69,6 +69,8 @@ class LowerSetting(LowerSettingBasic):
     how presets are applied. Refer to
     `caffe2.torch.fb.model_transform.fx2trt.presets.ESUHMLowererPreset` on how
     to add a preset.
+    opt_profile_replica (int): the number of opt profile set for TensorRT engine, this field is
+    only used by explicit batch dim with dynamic shape mode.
     """
 
     input_specs: List[InputTensorSpec] = dc.field(default_factory=list)
@@ -86,3 +88,4 @@ class LowerSetting(LowerSettingBasic):
     save_timing_cache: bool = False
     cuda_graph_batch_size: int = -1
     preset_lowerer: str = ""
+    opt_profile_replica: int = 1
diff --git a/py/torch_tensorrt/fx/passes/lower_basic_pass.py b/py/torch_tensorrt/fx/passes/lower_basic_pass.py
@@ -31,9 +31,7 @@ def skip_folding_quant_dequant(node: torch.fx.Node):
                 return True
         return False
 
-    const_split_mod = split_const_subgraphs(
-        traced_mod, skip_folding_quant_dequant, device_for_folded_attrs="cuda"
-    )
+    const_split_mod = split_const_subgraphs(traced_mod, skip_folding_quant_dequant)
     const_split_mod.run_folding()
     return const_split_mod
 

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_adaptive_avgpool.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_adaptive_avgpool.py
@@ -1,8 +1,8 @@
 import torch
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from parameterized import parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase, InputTensorSpec
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
 
 
 class TestAdaptiveAvgPoolConverter(AccTestCase):

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_any.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_any.py
@@ -2,8 +2,8 @@
 import torch.nn as nn
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from parameterized import parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase
 
 
 class TestAnyConverters(AccTestCase):

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_as_strided.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_as_strided.py
@@ -2,8 +2,8 @@
 import torch.nn as nn
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from parameterized import parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase
 
 
 class TestConverter(AccTestCase):

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_avgpool.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_avgpool.py
@@ -1,8 +1,8 @@
 import torch
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from parameterized import param, parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase
 
 
 class TestAvgPoolConverter(AccTestCase):

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_batchnorm.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_batchnorm.py
@@ -1,7 +1,7 @@
 import torch
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
-from torch.testing._internal.common_fx2trt import AccTestCase, InputTensorSpec
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
 
 
 class TestBatchNormConverter(AccTestCase):

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_binary_ops.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_binary_ops.py
@@ -5,8 +5,8 @@
 
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from parameterized import parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase, InputTensorSpec
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
 
 NEED_TEST_BOTH_CONSTANTS_CASE = True
 

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_cat.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_cat.py
@@ -1,8 +1,8 @@
 import torch
 import torch.nn as nn
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
-from torch.testing._internal.common_fx2trt import AccTestCase, InputTensorSpec
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
 
 
 class TestCatConverter(AccTestCase):

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_chunk.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_chunk.py
@@ -2,8 +2,8 @@
 import torch.nn as nn
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from parameterized import parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase, InputTensorSpec
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
 
 
 class TestChunkConverter(AccTestCase):

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_clamp.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_clamp.py
@@ -1,8 +1,8 @@
 import torch
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from parameterized import param, parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase
 from torch.testing._internal.common_utils import run_tests
+from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase
 
 
 class TestClampConverter(AccTestCase):