pytorch · narendasan · May 31, 2020 · May 31, 2020 · May 31, 2020 · May 31, 2020
diff --git a/BUILD b/BUILD
@@ -8,6 +8,8 @@ pkg_tar(
         "//core/conversion:include",
         "//core/conversion/conversionctx:include",
         "//core/conversion/converters:include",
+        "//core/conversion/var:include",
+        "//core/conversion/tensorcontainer:include",
         "//core/conversion/evaluators:include",
         "//core/execution:include",
         "//core/lowering:include",
@@ -35,6 +37,15 @@ pkg_tar(
 )
 
 
+pkg_tar(
+    name = "bin",
+    package_dir = "bin/",
+    srcs = [
+        "//cpp/trtorchc:trtorchc",
+    ],
+    mode = "0755",
+)
+
 
 
 pkg_tar(
@@ -46,6 +57,7 @@ pkg_tar(
     ],
     deps = [
         ":lib",
+        ":bin",
         ":include",
         ":include_core",
     ],

diff --git a/README.md b/README.md
@@ -23,6 +23,8 @@ compile_settings.op_precision = torch::kFloat;
 auto trt_mod = trtorch::CompileGraph(ts_mod, compile_settings);
 // Run like normal
 auto results = trt_mod.forward({in_tensor});
+// Save module for later
+trt_mod.save("trt_torchscript_module.ts");
 ...
 ```
 
@@ -46,6 +48,7 @@ trt_ts_module = trtorch.compile(torch_script_module, compile_settings)
 
 input_data = input_data.half()
 result = trt_ts_module(input_data)
+torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
 ```
 
 > Notes on running in lower precisions:

diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -6,7 +6,9 @@
 #include "NvInfer.h"
 
 #include "ATen/core/function_schema.h"
+#include "ATen/core/jit_type.h"
 
+#include "torch/custom_class.h"
 #include "torch/csrc/jit/frontend/function_schema_parser.h"
 #include "torch/csrc/jit/ir/ir.h"
 #include "torch/csrc/jit/passes/pass_manager.h"
@@ -40,32 +42,70 @@ c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::str
 
 
 void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit::Graph>& g, std::string& serialized_engine) {
-    execution::EngineID uid = execution::RegisterEngineFromSerializedEngine(serialized_engine);
-    auto num_io = execution::GetEngineIO(uid);
-
-    auto self = g->addInput("self.1");
+    auto engine = execution::TRTEngine(mod._ivalue()->name(), serialized_engine);
+    // Get required metadata about the engine out
+    auto num_io = engine.num_io;
+    auto name = engine.name;
+
+    // Add the engine as an attribute of the module, this will let the engine be serialized and deserialized
+    auto engine_ptr = c10::make_intrusive<execution::TRTEngine>(engine);
+    mod.register_attribute(
+        name,
+        c10::getCustomClassType<c10::intrusive_ptr<execution::TRTEngine>>(),
+        c10::IValue(std::move(engine_ptr)),
+        false
+    );
+
+    // Add the module as an input into the graph
+    auto self = g->addInput("self_1");
     self->setType(mod.type());
 
-    auto id_val = g->insertConstant(uid);
+    // Start by retriveing the engine from the module attribute list
+    auto engine_node = g->createGetAttr(self, name);
+    g->block()->appendNode(engine_node);
 
+    // Add inputs to the graph corresponding to the number of input tensors expected by the engine
+    // Also store those inputs in a vector so that they can be coalesced into a single list at runtime
     std::vector<torch::jit::Value*> engine_inputs;
-    engine_inputs.push_back(id_val);
-
     for (uint64_t i = 0; i < num_io.first; i++) {
-        auto in_val = g->addInput("");
+        auto in_val = g->addInput(std::string("input_") + std::to_string(i));
         in_val->setType(c10::TensorType::get());
         engine_inputs.push_back(in_val);
     }
 
-    auto engine_node = g->create(c10::Symbol::fromQualString("trt::execute_engine"), torch::jit::ArrayRef<torch::jit::Value*>(engine_inputs), num_io.second);
-    g->block()->appendNode(engine_node);
-
-    if (engine_node->outputs().size() > 1) {
-        auto return_tuple_node = g->createTuple(engine_node->outputs());
+    // Create a node that will merge all of the input tensors into a single list argument to the trt::execute_engine op
+    // Creates: prim::ListConstruct(<input tensors>)
+    auto input_list_node = g->createList(c10::TensorType::get(), torch::jit::ArrayRef<torch::jit::Value*>(engine_inputs));
+    g->block()->appendNode(input_list_node);
+
+    // Make a list of inputs to the actual trt::execute_engine op
+    // Note: Ordering of list and then engine is because we can pop off the engine first which contains all the metadata
+    // needed for execution
+    std::vector<torch::jit::Value*> execute_node_inputs;
+    execute_node_inputs.push_back(input_list_node->outputs()[0]);
+    execute_node_inputs.push_back(engine_node->outputs()[0]);
+
+    // Create the actual execution node trt::execute_engine using the assembled inputs
+    auto execute_node = g->create(c10::Symbol::fromQualString("trt::execute_engine"), torch::jit::ArrayRef<torch::jit::Value*>(execute_node_inputs), 1);
+    g->block()->appendNode(execute_node);
+    execute_node->outputs()[0]->setType(c10::ListType::ofTensors());
+
+    // Create a node to unpack the list into seperate tensors, in the case of there being only one tensor, the tensor will be returned,
+    // otherwise they are returned as a tuple of tensors.
+    // Creates: prim::ListUnpack(<engine output>)
+    auto unpack_node = g->createListUnpack(execute_node->outputs()[0], num_io.second);
+    g->block()->appendNode(unpack_node);
+
+    // If there are multiple output tensors from TensorRT we wrap them in a tuple to return
+    if (unpack_node->outputs().size() > 1) {
+        // Creates prim::TupleConstruct(<output tensors>) using outputs of the unpack node
+        auto return_tuple_node = g->createTuple(unpack_node->outputs());
         g->block()->appendNode(return_tuple_node);
+        // Set the output as the produced tuple
         g->registerOutput(return_tuple_node->outputs()[0]);
     } else {
-        g->registerOutput(engine_node->outputs()[0]);
+        // Set the output as the sole output tensor
+        g->registerOutput(unpack_node->outputs()[0]);
     }
 
     LOG_DEBUG(*g << "(AddEngineToGraph)\n");

diff --git a/core/conversion/InterfaceTypes.cpp b/core/conversion/InterfaceTypes.cpp
@@ -34,7 +34,7 @@ InputRange::InputRange(std::vector<int64_t> d) {
     min = util::toDims(d);
     max = util::toDims(d);
     input_shape = util::toDims(d);
-
+    input_is_dynamic = false;
 }
 
 
@@ -67,6 +67,7 @@ InputRange::InputRange(std::vector<int64_t> min_shape, std::vector<int64_t> opt_
         dim.insert(max_shape[i]);
         if (dim.size() != 1) {
             dyn_shape.push_back(-1);
+            input_is_dynamic = true;
         } else {
             dyn_shape.push_back(opt_shape[i]);
         }

diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp
@@ -155,6 +155,10 @@ void AddInputs(ConversionCtx* ctx,
         profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kOPT, dims.opt);
         profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMAX, dims.max);
 
+        if (dims.input_is_dynamic) {
+            ctx->input_is_dynamic = true;
+        }
+
         ctx->value_tensor_map[in] = trt_in;
     }
 

diff --git a/core/conversion/conversion.h b/core/conversion/conversion.h
@@ -15,6 +15,7 @@ struct InputRange {
     nvinfer1::Dims max;
     nvinfer1::Dims opt;
     nvinfer1::Dims input_shape;
+    bool input_is_dynamic = false;
     // Should we restrict to unsigned?
     InputRange(std::vector<int64_t> d);
     InputRange(std::vector<int64_t> min_shape,

diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h
@@ -42,6 +42,7 @@ struct ConversionCtx {
 
     ~ConversionCtx();
 
+    bool input_is_dynamic = false;
     nvinfer1::IBuilder* builder;
     nvinfer1::INetworkDefinition* net;
     nvinfer1::IBuilderConfig* cfg;

diff --git a/core/conversion/converters/impl/batch_norm.cpp b/core/conversion/converters/impl/batch_norm.cpp
@@ -19,12 +19,24 @@ auto batch_norm_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
       auto orig_shape = input->getDimensions();
       auto shape = util::toVec(orig_shape);
       auto options = torch::TensorOptions().dtype(torch::kFloat32);
-      auto gamma = args[1].unwrapToTensor(at::full({shape}, 1, {options}));
-      auto beta = args[2].unwrapToTensor(at::full({shape}, 1, {options}));
-      auto mean = args[3].unwrapToTensor(at::full({shape}, 0, {options}));
-      auto var = args[4].unwrapToTensor(at::full({shape}, 0, {options}));
+
+      torch::Tensor gamma, beta, mean, var;
+
+      if (ctx->input_is_dynamic) {
+        gamma = args[1].unwrapToTensor();
+        beta = args[2].unwrapToTensor();
+        mean = args[3].unwrapToTensor();
+        var = args[4].unwrapToTensor();
+      } else {
+        gamma = args[1].unwrapToTensor(at::full({shape}, 1, {options}));
+        beta = args[2].unwrapToTensor(at::full({shape}, 1, {options}));
+        mean = args[3].unwrapToTensor(at::full({shape}, 0, {options}));
+        var = args[4].unwrapToTensor(at::full({shape}, 0, {options}));
+      }
+
       auto eps = args[7].unwrapToDouble(1e-5f);
 
+
       LOG_DEBUG("momentum disregarded");
       LOG_DEBUG("training disregarded");
       LOG_DEBUG("cudnn disregarded");

diff --git a/core/conversion/converters/impl/concat.cpp b/core/conversion/converters/impl/concat.cpp
@@ -8,7 +8,7 @@ namespace conversion {
 namespace converters {
 namespace impl {
 namespace {
-auto cat_registrations = RegisterNodeConversionPatterns()
+auto cat_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
     .pattern({
         "aten::cat(Tensor[] tensors, int dim=0) -> Tensor",
         [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/converters/impl/constant.cpp b/core/conversion/converters/impl/constant.cpp
@@ -7,7 +7,7 @@ namespace conversion {
 namespace converters {
 namespace impl {
 namespace {
-auto constant_registrations = RegisterNodeConversionPatterns()
+auto constant_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
     .pattern({
         "trt::const(Tensor self) -> Tensor",
         [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/converters/impl/conv_deconv.cpp b/core/conversion/converters/impl/conv_deconv.cpp
@@ -9,7 +9,7 @@ namespace conversion {
 namespace converters {
 namespace impl {
 namespace {
-auto conv_registrations = RegisterNodeConversionPatterns()
+auto conv_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
     .pattern({
         R"SIG(aten::_convolution(Tensor input, Tensor weight,
                                  Tensor? bias, int[] stride, int[] padding,

diff --git a/core/conversion/converters/impl/element_wise.cpp b/core/conversion/converters/impl/element_wise.cpp
@@ -68,7 +68,7 @@ nvinfer1::ILayer* add_elementwise(ConversionCtx* ctx, nvinfer1::ElementWiseOpera
 
 }
 
-auto element_wise_registrations = RegisterNodeConversionPatterns()
+auto element_wise_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
      .pattern({
             "aten::add.Tensor(Tensor self, Tensor other, Scalar alpha=1) -> Tensor",
             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/converters/impl/linear.cpp b/core/conversion/converters/impl/linear.cpp
@@ -8,7 +8,7 @@ namespace converters {
 namespace impl {
 namespace {
 
-auto linear_registrations = RegisterNodeConversionPatterns()
+auto linear_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
     .pattern({
         "aten::linear(Tensor input, Tensor weight, Tensor? bias = None) -> (Tensor)",
         [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/converters/impl/matrix_multiply.cpp b/core/conversion/converters/impl/matrix_multiply.cpp
@@ -8,7 +8,7 @@ namespace converters {
 namespace impl {
 namespace {
 
-auto mm_registrations = RegisterNodeConversionPatterns()
+auto mm_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
   .pattern({
     "aten::matmul(Tensor self, Tensor other) -> (Tensor)",
     [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/converters/impl/pooling.cpp b/core/conversion/converters/impl/pooling.cpp
@@ -8,7 +8,7 @@ namespace converters {
 namespace impl {
 namespace {
 
-auto pooling_registrations = RegisterNodeConversionPatterns()
+auto pooling_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
     .pattern({
         "aten::max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=[0, 0], int[2] dilation=[1, 1], bool ceil_mode=False) -> (Tensor)",
         [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/converters/impl/reduce.cpp b/core/conversion/converters/impl/reduce.cpp
@@ -11,7 +11,7 @@ namespace {
 
 
 
-auto reduce_registrations = RegisterNodeConversionPatterns()
+auto reduce_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
     .pattern({
         "aten::mean(Tensor self, *, ScalarType? dtype=None) -> (Tensor)",
         [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/converters/impl/shape.cpp b/core/conversion/converters/impl/shape.cpp
@@ -9,7 +9,7 @@ namespace converters {
 namespace impl {
 namespace {
 
-static auto shape_registrations = RegisterNodeConversionPatterns()
+static auto shape_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
   .pattern({
     // To use in static input size cases (explicit batch)
     "aten::size.int(Tensor self, int dim) -> (Tensor)",

diff --git a/core/conversion/converters/impl/shuffle.cpp b/core/conversion/converters/impl/shuffle.cpp
@@ -9,7 +9,7 @@ namespace converters {
 namespace impl {
 namespace {
 
-static auto shuffle_registrations = RegisterNodeConversionPatterns()
+static auto shuffle_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
   .pattern({
     "aten::flatten.using_ints(Tensor self, int start_dim=0, int end_dim=-1) -> (Tensor)",
     [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -50,12 +50,10 @@ static auto shuffle_registrations = RegisterNodeConversionPatterns()
     [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
       auto in = args[0].ITensor();
       auto in_shape = util::toVec(in->getDimensions());
-      auto ex_tensor = torch::rand(in_shape);
-      auto new_shape = ex_tensor.view(args[1].unwrapToIntList().vec()).sizes();
 
       auto shuffle = ctx->net->addShuffle(*in);
       TRTORCH_CHECK(shuffle, "Unable to create shuffle layer from node: " << *n);
-      shuffle->setReshapeDimensions(util::toDims(new_shape));
+      shuffle->setReshapeDimensions(util::toDims(args[1].unwrapToIntList().vec()));
       shuffle->setName(util::node_info(n).c_str());
 
       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle->getOutput(0));

diff --git a/core/conversion/converters/impl/softmax.cpp b/core/conversion/converters/impl/softmax.cpp
@@ -7,7 +7,7 @@ namespace converters {
 namespace impl {
 namespace {
 
-static auto softmax_registrations = RegisterNodeConversionPatterns()
+static auto softmax_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
     .pattern({
         "aten::softmax.int(Tensor self, int dim, int? dtype=None) -> (Tensor)",
         [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

diff --git a/core/conversion/tensorcontainer/TensorContainer.cpp b/core/conversion/tensorcontainer/TensorContainer.cpp
@@ -6,7 +6,7 @@ namespace conversion {
 namespace {
 
 static auto tensor_container =
-  torch::class_<TensorContainer>("_eval_ivalue_types", "TensorContainer")
+  torch::class_<TensorContainer>("_trtorch_eval_ivalue_types", "TensorContainer")
       .def(torch::init<>());
 } // namespace
 } // conversion

diff --git a/core/conversion/var/BUILD b/core/conversion/var/BUILD
@@ -30,7 +30,7 @@ load("@rules_pkg//:pkg.bzl", "pkg_tar")
 
 pkg_tar(
     name = "include",
-    package_dir = "core/conversion/arg/",
+    package_dir = "core/conversion/var/",
     srcs = [
         "Var.h",
         "Var_inl.h"

diff --git a/core/execution/BUILD b/core/execution/BUILD
@@ -14,7 +14,6 @@ cc_library(
     ],
     srcs = [
         "TRTEngine.cpp",
-        "TRTEngineManager.cpp",
         "register_trt_op.cpp",
     ],
     deps = [