From 443455370eeda8dd4b7033f216ca2f894d0e7c95 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 13 Jan 2022 15:11:56 +0800 Subject: [PATCH 01/22] feat: [collection] make torch_tensorrt::core::ir::Input and torch_tensorrt::Input compatible with IValue. Support simple case of tuple input model. Add unit test. Signed-off-by: inocsin --- core/compiler.cpp | 17 ++-- core/compiler.h | 10 ++- core/conversion/conversion.cpp | 1 + core/ir/StaticParams.cpp | 5 +- core/ir/ir.cpp | 50 +++++++++-- core/ir/ir.h | 24 +++++- cpp/include/torch_tensorrt/torch_tensorrt.h | 28 ++++++- cpp/src/compile_spec.cpp | 66 +++++++++++++++ cpp/src/torch_tensorrt.cpp | 4 + tests/cpp/BUILD | 20 ++++- tests/cpp/test_collection.cpp | 91 +++++++++++++++++++++ tests/py/test_collection.py | 55 +++++++++++++ 12 files changed, 354 insertions(+), 17 deletions(-) create mode 100644 tests/cpp/test_collection.cpp create mode 100644 tests/py/test_collection.py diff --git a/core/compiler.cpp b/core/compiler.cpp index b684b808f5..45ecd3c993 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -254,6 +254,7 @@ GraphAndMapping ConstructFallbackGraph( // update the input ranges for each segments convert_cfg.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params); + // TODO mapping Inputs Ivalue to flatten one here auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, static_params); auto temp_g = std::make_shared(); auto device_spec = convert_cfg.engine_settings.device; @@ -307,11 +308,17 @@ void MapInputsAndDetermineDTypes( ir::TypeMap& first_use_type_map) { // Associate input specs with inputs cfg.convert_info.inputs = std::move(ir::associate_specs_with_inputs(g, cfg.inputs, static_params)); - - for (auto& in : g->inputs()) { - if (static_params.find(in) == static_params.end()) { + auto tensor_inputs = ir::get_tensor_inputs(g, static_params); + LOG_DEBUG("In MapInputsAndDetermineDTypes " << "g->inputs() size " << g->inputs().size() << ", tensor_inputs size " << tensor_inputs.size()); + // for (auto& in : g->inputs()) { + // if (static_params.find(in) == static_params.end()) { + for (auto in : tensor_inputs) { ir::Input& spec = cfg.convert_info.inputs.find(in)->second; - auto est_type_opt = first_use_type_map.find(in)->second; + c10::optional est_type_opt = {}; + auto est_it = first_use_type_map.find(in); + if (est_it != first_use_type_map.end()) { + est_type_opt = first_use_type_map.find(in)->second; + } if (est_type_opt && !spec.dtype_is_user_defined) { // If we can calculate the type from the graph and the type was not defined by the user then use the calculated // type @@ -354,7 +361,7 @@ void MapInputsAndDetermineDTypes( // The user defined the type so no changes are necessary } } - } + // } } uint64_t GetRecommendedWorkspaceSize(const runtime::CudaDevice& device) { diff --git a/core/compiler.h b/core/compiler.h index c1bb85aa3b..1743f566b9 100644 --- a/core/compiler.h +++ b/core/compiler.h @@ -8,12 +8,20 @@ #include "core/partitioning/partitioning.h" #include "core/runtime/runtime.h" #include "torch/csrc/jit/api/module.h" +#include "torch/csrc/jit/ir/ir.h" namespace torch_tensorrt { namespace core { struct CompileSpec { - CompileSpec(std::vector inputs) : inputs(inputs) {} + CompileSpec(std::vector inputs) : inputs(inputs) { + graph_inputs = ir::GraphInputs(inputs); + } + CompileSpec(torch::jit::IValue& input_signature) { + graph_inputs = ir::GraphInputs(input_signature); + inputs = graph_inputs.flattened_inputs; + } + ir::GraphInputs graph_inputs; std::vector inputs; conversion::ConversionInfo convert_info; lowering::LowerInfo lower_info; diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index 8da79b13a3..56e484e898 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -184,6 +184,7 @@ void AddInputs( ctx->input_is_dynamic = true; } + // mapping torch Value to tensorrt iTensor ctx->value_tensor_map[in] = trt_in; ctx->num_inputs += 1; } diff --git a/core/ir/StaticParams.cpp b/core/ir/StaticParams.cpp index ac16c72d9f..0fe03e4aff 100644 --- a/core/ir/StaticParams.cpp +++ b/core/ir/StaticParams.cpp @@ -11,7 +11,10 @@ StaticParams get_static_params(c10::ArrayRef inputs, std::ve StaticParams static_params; auto param_it = params.begin(); for (auto in : inputs) { - if (in->type() != c10::TensorType::get() && param_it != params.end()) { + // handle TensorType, TupleType and ListType + if (in->type() != c10::TensorType::get() && + !in->type()->isSubtypeOf(c10::TupleType::create()) && + !in->type()->isSubtypeOf(c10::ListType::ofTensors()) && param_it != params.end()) { static_params[in] = *param_it; ++param_it; } diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index 1c1813ea5f..ce97fa9dbe 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -32,7 +32,9 @@ std::vector get_tensor_inputs( StaticParams& static_params) { std::vector input_tensors; auto inputs = g->inputs(); + LOG_DEBUG("Inputs size " << inputs.size()); for (auto in : inputs) { + LOG_DEBUG("input debug name: " << in->debugName()); // Disregarding inputs that are not tensors or are static // // Ex. @@ -40,6 +42,27 @@ std::vector get_tensor_inputs( // input.1:Tensor -> used if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) { input_tensors.push_back(in); + } else if (in->type()->cast() && static_params.find(in) == static_params.end()) { + // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) { + at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(in); + LOG_DEBUG("Tuple size " << unpack_tuple.size()); + for (auto item: unpack_tuple) { + input_tensors.push_back(in); + } + } else if (in->type()->isSubtypeOf(c10::ListType::ofTensors()) && static_params.find(in) == static_params.end()) { + + LOG_DEBUG("List use size " << in->uses().size()); + // for (auto use : in->uses()) { + // LOG_DEBUG(use.user->outputs()[0]->debugName()); + // } + // TODO: set the correct list number according to the Input IValue + int n = 2; + auto unpack_node = g->createListUnpack(in, n); + g->block()->appendNode(unpack_node); + for (auto item: unpack_node->outputs()) { + input_tensors.push_back(item); + } + LOG_DEBUG("Unpack List of size " << n); } } return input_tensors; @@ -52,14 +75,17 @@ c10::optional get_value_first_calc_dtype_opt(torch::jit::Block* auto b_ins = b->inputs(); std::unordered_set b_in_set(b_ins.begin(), b_ins.end()); - TORCHTRT_ASSERT( - in->type() == c10::TensorType::get(), "Input is not a tensor, cannot check for dtype based on calculation"); + // TORCHTRT_ASSERT( + // in->type() == c10::TensorType::get(), "Input is not a tensor, cannot check for dtype based on calculation"); auto consumers = in->uses(); auto search_list = std::vector(consumers.begin(), consumers.end()); - - for (auto iter = search_list.begin(); iter != search_list.end(); ++iter) { - auto n = iter->user; + LOG_DEBUG("Users number for " << in->debugName() << ": " << consumers.size()); + while(search_list.size() > 0) { + // after insertion, original iterator will be invalid + auto& u = search_list.front(); + search_list.erase(search_list.begin()); + auto n = u.user; LOG_GRAPH("Node we are looking at: " << util::node_info(n)); auto ins = n->inputs(); auto outs = n->outputs(); @@ -142,16 +168,28 @@ c10::optional get_value_first_calc_dtype_opt(torch::jit::Block* TypeMap get_block_first_calc_dtypes_opt(torch::jit::Block* b) { TypeMap types; - for (auto i : b->inputs()) { if (i->type() == c10::TensorType::get()) { torch::jit::Value* in = i; types.insert({in, get_value_first_calc_dtype_opt(b, i)}); + } else if(i->type()->cast()) { + // make sure very time get the same ptr + at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(i); + LOG_DEBUG("Tuple size " << unpack_tuple.size()); + for (auto item: unpack_tuple) { + torch::jit::Value* in = item; + types.insert({in, get_value_first_calc_dtype_opt(b, i)}); + } + } else if(i->type()->isSubtypeOf(c10::ListType::ofTensors())) { + LOG_INFO("Unsupported type of c10::ListType::ofTensors()"); } } return types; } +static auto core_input_container = + torch::class_("_torch_tensorrt_core_ir", "Input").def(torch::init<>()); + } // namespace ir } // namespace core } // namespace torch_tensorrt diff --git a/core/ir/ir.h b/core/ir/ir.h index 2d9acccc69..056f257f22 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -11,9 +11,10 @@ namespace torch_tensorrt { namespace core { namespace ir { -struct Input { +struct Input : torch::CustomClassHolder { // Input(std::vector shape); // Input(std::vector min_shape, std::vector opt_shape, std::vector max_shape); + Input() {}; Input( std::vector shape, nvinfer1::DataType dtype = nvinfer1::DataType::kFLOAT, @@ -36,13 +37,34 @@ struct Input { nvinfer1::Dims opt; nvinfer1::DataType dtype; nvinfer1::TensorFormat format; + int id; }; +// Add to spec +struct GraphInputs { + GraphInputs() {} + GraphInputs(torch::jit::IValue inputs) { + input_signature = inputs; + // TODO flatten IValue + } + GraphInputs(std::vector inputs) { + flattened_inputs = inputs; + // TODO construct the IValue + } + torch::jit::IValue input_signature; // nested Input, full input spec + std::vector flattened_inputs; // flattend Input +}; + +typedef std::pair GraphIO; // Graph input output mapping + using StaticParams = std::map; StaticParams get_static_params(c10::ArrayRef inputs, std::vector params); using InputSpecMap = std::unordered_map; +std::vector get_tensor_inputs( + std::shared_ptr& g, + StaticParams& static_params); InputSpecMap associate_specs_with_inputs( std::shared_ptr& g, std::vector specs, diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index ace05d33f5..63dc96e654 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -14,6 +14,7 @@ #include #include #include +#include "torch/custom_class.h" // Just include the .h? #ifndef DOXYGEN_SHOULD_SKIP_THIS @@ -363,7 +364,7 @@ class TORCHTRT_API TensorFormat { * signifying a static input shape or a set of three input shapes representing * the min, optiminal and max input shapes allowed for the engine. */ -struct TORCHTRT_API Input { +struct TORCHTRT_API Input : torch::CustomClassHolder{ /// Minimum acceptable input size into the engine std::vector min_shape; /// Optimal input size into the engine (size optimized for given kernels accept any size in min max range) @@ -378,6 +379,7 @@ struct TORCHTRT_API Input { /// Expected tensor format for the input TensorFormat format; + Input() {} /** * @brief Construct a new Input spec object for static input size from * vector, optional arguments allow the user to configure expected input shape @@ -512,6 +514,16 @@ struct TORCHTRT_API Input { bool input_is_dynamic; }; +/** + * @brief A struct to hold complex inputs + * + * This struct can either hold a conplex inputs of shape or a flattened one, + */ +struct TORCHTRT_API GraphInputs { + torch::jit::IValue input_signature; // nested Input, full input spec + std::vector flattened_inputs; // flattend Input +}; + /** * @brief Get the build information for the library including the dependency * versions @@ -581,6 +593,15 @@ struct TORCHTRT_API CompileSpec { */ CompileSpec(std::vector inputs) : inputs(std::move(inputs)) {} + /** + * @brief Construct a new Extra Info object from IValue. + * The IValue store a complex Input + * + * @param inputs + */ + CompileSpec(torch::jit::IValue input_signature) { + graph_inputs.input_signature = input_signature; + } // Defaults should reflect TensorRT defaults for BuilderConfig /** @@ -591,6 +612,11 @@ struct TORCHTRT_API CompileSpec { */ std::vector inputs; + /** + * @brief Specifications for inputs to the engine, can store a IValue which has stored complex Input + * or a flatened Input + */ + GraphInputs graph_inputs; /** * @brief The set of precisions TensorRT is allowed to use for kernels during compilation * diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 3058b23ce0..74de9e4801 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -20,16 +20,82 @@ CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { inputs.push_back(Input(in)); } + graph_inputs.flattened_inputs = inputs; } CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { inputs.push_back(Input(in)); } + graph_inputs.flattened_inputs = inputs; +} + +void flatten_dfs(std::vector& flattened_inputs, torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) { + if (input_ivalue.isTuple()) { + auto input_tuple = input_ivalue.toTuple(); + std::vector converted_elements; + for (auto item: input_tuple->elements()) { + torch::jit::IValue converted_item; + flatten_dfs(flattened_inputs, item, converted_item); + converted_elements.push_back(converted_item); + auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements); + converted_ivalue = torch::jit::IValue(tuple_ptr); + } + } else if(input_ivalue.isList()) { + auto input_list = input_ivalue.toList().vec(); + c10::TypePtr type = input_list[0].type(); + auto converted_elements = c10::impl::GenericList(type); + // std::vector converted_elements; + for (auto item: input_list) { + torch::jit::IValue converted_item; + flatten_dfs(flattened_inputs, item, converted_item); + converted_elements.push_back(converted_item); + } + converted_ivalue = torch::jit::IValue(converted_elements); + } else if(input_ivalue.isCustomClass()) { + torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass())); + flattened_inputs.push_back(cur_input); + converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(cur_input))); + } +} + +torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs external_graph_input) { + torch_tensorrt::core::ir::GraphInputs internal_graph_input; + + // flattened version + if (external_graph_input.flattened_inputs.size() > 0) { + // std::vector input_shape_list; + auto empty_ivalue = torch::jit::IValue(c10::make_intrusive(torchtrt::core::ir::Input())); + c10::TypePtr type = empty_ivalue.type(); + auto input_shape_list = c10::impl::GenericList(type); + std::vector internal_input = to_vec_internal_inputs(external_graph_input.flattened_inputs); + for (auto input_shape: internal_input) { + auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); + input_shape_list.push_back(input_shape_ivalue); + } + + torch::jit::IValue input_signature(input_shape_list); + internal_graph_input.flattened_inputs = internal_input; + internal_graph_input.input_signature = input_signature; + + } + // nested version + else { + std::vector flattened_inputs; + torch::jit::IValue input_signature; + flatten_dfs(flattened_inputs, external_graph_input.input_signature, input_signature); + internal_graph_input.flattened_inputs = flattened_inputs; + internal_graph_input.input_signature = input_signature; + printf("in nested version branch\n"); + + } + return internal_graph_input; } torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) { torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.inputs)); + internal.graph_inputs = to_internal_graph_inputs(external.graph_inputs); + internal.inputs = internal.graph_inputs.flattened_inputs; for (auto p : external.enabled_precisions) { internal.convert_info.engine_settings.enabled_precisions.insert(toTRTDataType(p)); diff --git a/cpp/src/torch_tensorrt.cpp b/cpp/src/torch_tensorrt.cpp index 42b44833de..9d6f271332 100644 --- a/cpp/src/torch_tensorrt.cpp +++ b/cpp/src/torch_tensorrt.cpp @@ -30,6 +30,7 @@ torch::jit::script::Module compile(const torch::jit::script::Module& module, Com LOG_DEBUG(get_build_info()); // Want to export a much simpler (non TRT header dependent) API so doing the // type conversion here + printf("in torch_tensorrt::ts::compile\n"); return torch_tensorrt::core::CompileGraph(module, to_internal_compile_spec(info)); } @@ -52,4 +53,7 @@ void set_device(const int gpu_id) { // Want to export a much simpler (non CUDA header dependent) API torch_tensorrt::core::set_device(gpu_id); } + +static auto tensorrt_input_container = + torch::class_("_torch_tensorrt", "Input").def(torch::init<>()); } // namespace torch_tensorrt diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD index 3d69afba95..2d545dc8f1 100644 --- a/tests/cpp/BUILD +++ b/tests/cpp/BUILD @@ -18,7 +18,8 @@ test_suite( ":test_multiple_registered_engines", ":test_serialization", ":test_module_fallback", - ":test_example_tensors" + ":test_example_tensors", + ":test_collection" ], ) @@ -32,7 +33,8 @@ test_suite( ":test_multiple_registered_engines", ":test_serialization", ":test_module_fallback", - ":test_example_tensors" + ":test_example_tensors", + ":test_collection" ], ) @@ -122,6 +124,20 @@ cc_test( }) ) +cc_test( + name = "test_collection", + srcs = ["test_collection.cpp"], + data = [ + "//tests/modules:jit_models", + ], + deps = [ + "//tests/util", + "@googletest//:gtest_main", + ] + select({ + ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], + "//conditions:default": ["@libtorch//:libtorch"], + }) +) cc_test( name = "test_compiled_modules", srcs = ["test_compiled_modules.cpp"], diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp new file mode 100644 index 0000000000..a48e642a1c --- /dev/null +++ b/tests/cpp/test_collection.cpp @@ -0,0 +1,91 @@ +#include +#include +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/script.h" +#include "torch_tensorrt/torch_tensorrt.h" + + +TEST(CppAPITests, TestCollection) { + + + std::string path = + // "/opt/trtorch/tuple2model.ts"; + // "/opt/trtorch/tuple2_list2_v3.ts"; + // "/opt/trtorch/tuple2_tuple2_v3.ts"; + "/opt/trtorch/tuple2_v3.ts"; + // "/opt/trtorch/list2_list2_v3.ts"; + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + std::vector inputs; + inputs.push_back(in0); + + torch::jit::Module mod; + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + mod = torch::jit::load(path); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + } + mod.eval(); + mod.to(torch::kCUDA); + + + std::vector inputs_; + + for (auto in : inputs) { + inputs_.push_back(torch::jit::IValue(in.clone())); + } + + + std::vector complex_inputs, complex_inputs_list; + std::vector tuple; + std::tuple input_tuple(in0, in0); + // auto input_list = c10::impl::GenericList(c10::TensorType::get()); + // input_list.push_back(inputs_[0]); + // input_list.push_back(inputs_[0]); + + // torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); + + complex_inputs.push_back(input_tuple); + complex_inputs_list.push_back(in0); + complex_inputs_list.push_back(in0); + + + + auto out = mod.forward(complex_inputs); + LOG_DEBUG("Finish torchscirpt forward"); + + + auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + + auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); + + + c10::TypePtr elementType = input_shape_ivalue.type(); + auto list = c10::impl::GenericList(elementType); + list.push_back(input_shape_ivalue); + list.push_back(input_shape_ivalue); + + std::tuple input_shape_tuple(input_shape_ivalue, input_shape_ivalue); + + + torch::jit::IValue complex_input_shape(input_shape_tuple); + // torch::jit::IValue complex_input_shape(list); + + auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape); + compile_settings.require_full_compilation = false; + // compile_settings.torch_executed_modules.push_back("model1"); + // compile_settings.torch_executed_ops.push_back("aten::sub"); + + + // // FP16 execution + // compile_settings.enabled_precisions = {torch::kHalf}; + // // Compile module + auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); + LOG_DEBUG("Finish compile"); + // auto trt_out = trt_mod.forward(complex_inputs); + auto trt_out = trt_mod.forward(complex_inputs_list); + + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); +} \ No newline at end of file diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py new file mode 100644 index 0000000000..610bf9fe9c --- /dev/null +++ b/tests/py/test_collection.py @@ -0,0 +1,55 @@ +import torch +import copy +import torch.nn as nn +import torch.nn.functional as F +from typing import Tuple, List, Dict + +class Model1(nn.Module): + def __init__(self): + super(Model1, self).__init__() + + def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): + r = z[1] + z[0] + return r, z[1] + + +class TestModel1(nn.Module): + def __init__(self): + super(TestModel, self).__init__() + self.model1 = Model1() + + def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): + r2, r1 = self.model1((z[0], z[1])) + # unsupport ops + i = r2.size(1) + j = r2.size(2) +# r3 = torch.tensor(i) * torch.tensor(j) + r3 = r2[0,0,0,0] + k = int(r3) - 5 + +# if k > 0: + r = r1 - k + result = (r, r1) +# else: +# r = r1 - k +# result = (r1, r) + return result + +class TestModel(nn.Module): + def __init__(self): + super(TestModel, self).__init__() + + def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): + r = z[0] + z[1] + return r + +test_model = TestModel() + +ts = torch.jit.script(test_model) +print(ts.graph) + +ts.to("cuda").eval() +input_data = torch.randn((16, 3, 32, 32)) +input_data = input_data.float().to("cuda") +result = ts((input_data, input_data)) +torch.jit.save(ts, "./tuple2_v3.ts") \ No newline at end of file From 2fc1363dd0de1a264de97607d4a5209f79261b96 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 17 Feb 2022 22:46:41 +0800 Subject: [PATCH 02/22] feat: [collection] try to defer determing the data type of tuple/list elements. Using two level vector to store ir::Input Signed-off-by: inocsin --- core/compiler.cpp | 107 +++++++++++++++++------------- core/compiler.h | 2 +- core/conversion/conversion.h | 1 + core/ir/ir.cpp | 120 +++++++++++++++++++++++++++++----- core/ir/ir.h | 13 +++- core/lowering/lowering.cpp | 2 +- tests/cpp/test_collection.cpp | 4 +- 7 files changed, 180 insertions(+), 69 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 45ecd3c993..5b811ca20b 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -305,60 +305,71 @@ void MapInputsAndDetermineDTypes( CompileSpec& cfg, std::shared_ptr& g, ir::StaticParams& static_params, - ir::TypeMap& first_use_type_map) { + ir::CollectionTypeMap& first_use_type_map) { + // ir::TypeMap& first_use_type_map) { // Associate input specs with inputs - cfg.convert_info.inputs = std::move(ir::associate_specs_with_inputs(g, cfg.inputs, static_params)); - auto tensor_inputs = ir::get_tensor_inputs(g, static_params); - LOG_DEBUG("In MapInputsAndDetermineDTypes " << "g->inputs() size " << g->inputs().size() << ", tensor_inputs size " << tensor_inputs.size()); + // cfg.convert_info.inputs = std::move(ir::associate_specs_with_inputs(g, cfg.inputs, static_params)); + cfg.convert_info.collection_inputs = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params)); + + auto collection_inputs = ir::get_collection_inputs(g, static_params); + LOG_DEBUG("In MapInputsAndDetermineDTypes " << "g->inputs() size " << g->inputs().size() << ", collection_inputs size " << collection_inputs.size()); // for (auto& in : g->inputs()) { // if (static_params.find(in) == static_params.end()) { - for (auto in : tensor_inputs) { - ir::Input& spec = cfg.convert_info.inputs.find(in)->second; - c10::optional est_type_opt = {}; + for (auto in : collection_inputs) { + std::vector& spec = cfg.convert_info.collection_inputs.find(in)->second; + // ir::Input& spec = cfg.convert_info.inputs.find(in)->second; + // c10::optional est_type_opt = {}; + std::vector> est_type_opt; + auto est_it = first_use_type_map.find(in); if (est_it != first_use_type_map.end()) { est_type_opt = first_use_type_map.find(in)->second; } - if (est_type_opt && !spec.dtype_is_user_defined) { - // If we can calculate the type from the graph and the type was not defined by the user then use the calculated - // type - LOG_INFO( - "Since input type is not explicitly defined, infering using first tensor calculation\n Found input " - << in->debugName() << " has type " << est_type_opt.value() - << ". If this is incorrect explicitly set dtype for input and file a bug"); - spec.dtype = util::ScalarTypeToTRTDataType(est_type_opt.value()); - } else if (!est_type_opt && !spec.dtype_is_user_defined) { - // If we cannot calculate the type and the user did not define the type, then default to FP32 - LOG_WARNING( - "Cannot infer input type from calcuations in graph for input " - << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity"); - spec.dtype = nvinfer1::DataType::kFLOAT; - } else if (spec.dtype_is_user_defined && cfg.partition_info.enabled) { - if (!est_type_opt) { - LOG_INFO("Cannot infer input tensor dtype in graph. Using user provided input dtype settings"); - first_use_type_map[in] = {util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype)}; - } else { - if (util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype) != est_type_opt.value()) { - std::stringstream ss; - ss << "For input " << in->debugName() << ", found user specified input dtype as "; - ss << cfg.convert_info.inputs.find(in)->second.dtype; - ss << ", however when inspecting the graph, the input type expected was inferred to be "; - ss << est_type_opt.value() << std::endl; - ss << "The compiler is going to use the user setting " << cfg.convert_info.inputs.find(in)->second.dtype; - ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n"; - ss << "compatibility with PyTorch's data type convention is required.\n"; - ss << "If you do indeed see errors at runtime either:\n"; - ss << "- Remove the dtype spec for " << in->debugName() << std::endl; - ss << "- Disable partial compilation by setting require_full_compilation to True"; - auto warn_str = ss.str(); - LOG_WARNING(warn_str); + // traverse elements in est_type_out and spec + for (int i = 0; i < est_type_opt.size(); i++) { + if (est_type_opt[i] && !spec[i].dtype_is_user_defined) { + // If we can calculate the type from the graph and the type was not defined by the user then use the calculated + // type + LOG_INFO( + "Since input type is not explicitly defined, infering using first tensor calculation\n Found input " + << in->debugName() << " has type " << est_type_opt[i].value() + << ". If this is incorrect explicitly set dtype for input and file a bug"); + spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value()); + } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) { + // If we cannot calculate the type and the user did not define the type, then default to FP32 + LOG_WARNING( + "Cannot infer input type from calcuations in graph for input " + << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity"); + spec[i].dtype = nvinfer1::DataType::kFLOAT; + } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) { + if (!est_type_opt[i]) { + LOG_INFO("Cannot infer input tensor dtype in graph, unable to verify user input dtype settings"); + } else { + // if (util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype) != est_type_opt.value()) { + if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype) != est_type_opt[i].value()) { + std::stringstream ss; + ss << "For input " << in->debugName() << ", found user specified input dtype as "; + ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; + // ss << cfg.convert_info.inputs.find(in)->second.dtype; + ss << ", however when inspecting the graph, the input type expected was inferred to be "; + ss << est_type_opt[i].value() << std::endl; + // ss << "The compiler is going to use the user setting " << cfg.convert_info.inputs.find(in)->second.dtype; + ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; + ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n"; + ss << "compatibility with PyTorch's data type convention is required.\n"; + ss << "If you do indeed see errors at runtime either:\n"; + ss << "- Remove the dtype spec for " << in->debugName() << std::endl; + ss << "- Disable partial compilation by setting require_full_compilation to True"; + auto warn_str = ss.str(); + LOG_WARNING(warn_str); + // Overwrite type map with user settings + // first_use_type_map[in] = {util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype)}; + first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype)}; + } } - // Overwrite type map with user settings - // We use this map for partitiioning since we need c10::ScalarTypes not nvinfer::DataTypes - first_use_type_map[in] = {util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype)}; + } else { + // The user defined the type so no changes are necessary } - } else { - // The user defined the type so no changes are necessary } } // } @@ -383,7 +394,8 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std:: auto params = graph_and_parameters.second; auto static_params = ir::get_static_params(g->inputs(), params); // Infer the type of an input from the weights of the calculation - auto first_use_types = ir::get_block_first_calc_dtypes_opt(g->block()); + // auto first_use_types = ir::get_block_first_calc_dtypes_opt(g->block()); + auto first_use_types = ir::get_block_first_calc_dtypes_opt_collection(g->block()); // GPU default WS size : 1 GB // Set WS = 256 Mb for Jetson nano/TX1 like platforms whose compute capability is 5.X. @@ -423,7 +435,8 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) auto params = graph_and_parameters.second; auto static_params = ir::get_static_params(g->inputs(), params); // Infer the type of an input from the weights of the calculation - auto first_use_types = ir::get_block_first_calc_dtypes_opt(g->block()); + // auto first_use_types = ir::get_block_first_calc_dtypes_opt(g->block()); + auto first_use_types = ir::get_block_first_calc_dtypes_opt_collection(g->block()); MapInputsAndDetermineDTypes(cfg, g, static_params, first_use_types); auto isBlockConvertible = conversion::VerifyConverterSupportForBlock(g->block(), true); diff --git a/core/compiler.h b/core/compiler.h index 1743f566b9..16cb17148d 100644 --- a/core/compiler.h +++ b/core/compiler.h @@ -22,7 +22,7 @@ struct CompileSpec { inputs = graph_inputs.flattened_inputs; } ir::GraphInputs graph_inputs; - std::vector inputs; + std::vector inputs; // can be replaced by graph_inputs conversion::ConversionInfo convert_info; lowering::LowerInfo lower_info; partitioning::PartitionInfo partition_info; diff --git a/core/conversion/conversion.h b/core/conversion/conversion.h index 58c06b42a3..ba194716e8 100644 --- a/core/conversion/conversion.h +++ b/core/conversion/conversion.h @@ -13,6 +13,7 @@ namespace conversion { struct ConversionInfo { ir::InputSpecMap inputs; + ir::CollectionInputSpecMap collection_inputs; BuilderSettings engine_settings; }; diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index ce97fa9dbe..93d3a16f2d 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -13,6 +13,14 @@ InputSpecMap associate_specs_with_inputs( return pair_input_vals_with_specs(tensor_inputs, specs); } +CollectionInputSpecMap associate_specs_with_collection_inputs( + std::shared_ptr& g, + ir::GraphInputs graph_inputs, + StaticParams& static_params) { + auto tensor_inputs = get_collection_inputs(g, static_params); + return pair_input_vals_with_specs(tensor_inputs, graph_inputs.collection_inputs); +} + InputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector specs) { TORCHTRT_CHECK( vals.size() == specs.size(), @@ -27,6 +35,20 @@ InputSpecMap pair_input_vals_with_specs(std::vector va return a; } +CollectionInputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector>& specs) { + TORCHTRT_CHECK( + vals.size() == specs.size(), + "Expected dimension specifications for all input tensors" + << ", but found " << vals.size() << " input tensors and " << specs.size() << " dimension specs"); + + CollectionInputSpecMap a; + for (size_t i = 0; i < vals.size(); i++) { + LOG_DEBUG("Paring " << i << ": " << vals[i]->debugName() << " : " << specs[i]); + a.insert({vals[i], specs[i]}); + } + return a; +} + std::vector get_tensor_inputs( std::shared_ptr& g, StaticParams& static_params) { @@ -42,27 +64,59 @@ std::vector get_tensor_inputs( // input.1:Tensor -> used if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) { input_tensors.push_back(in); - } else if (in->type()->cast() && static_params.find(in) == static_params.end()) { + } + // else if (in->type()->cast() && static_params.find(in) == static_params.end()) { + // // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) { + // at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(in); + // LOG_DEBUG("Tuple size " << unpack_tuple.size()); + // for (auto item: unpack_tuple) { + // input_tensors.push_back(in); + // } + // } else if (in->type()->isSubtypeOf(c10::ListType::ofTensors()) && static_params.find(in) == static_params.end()) { + + // LOG_DEBUG("List use size " << in->uses().size()); + // // for (auto use : in->uses()) { + // // LOG_DEBUG(use.user->outputs()[0]->debugName()); + // // } + // // TODO: set the correct list number according to the Input IValue + // int n = 2; + // auto unpack_node = g->createListUnpack(in, n); + // g->block()->appendNode(unpack_node); + // for (auto item: unpack_node->outputs()) { + // input_tensors.push_back(item); + // } + // LOG_DEBUG("Unpack List of size " << n); + // } + } + return input_tensors; +} + +std::vector get_collection_inputs( + std::shared_ptr& g, + StaticParams& static_params) { + std::vector input_tensors; + auto inputs = g->inputs(); + LOG_DEBUG("Inputs size " << inputs.size()); + for (auto in : inputs) { + LOG_DEBUG("input debug name: " << in->debugName()); + // Disregarding inputs that are not tensors or are static + // + // Ex. + // self.1:__torch__.alexnet -> ignored + // input.1:Tensor -> used + if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) { + input_tensors.push_back(in); + } else if (in->type()->kind() == torch::jit::TypeKind::TupleType && static_params.find(in) == static_params.end()) { // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) { + input_tensors.push_back(in); // push original tuple at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(in); LOG_DEBUG("Tuple size " << unpack_tuple.size()); - for (auto item: unpack_tuple) { - input_tensors.push_back(in); - } - } else if (in->type()->isSubtypeOf(c10::ListType::ofTensors()) && static_params.find(in) == static_params.end()) { - - LOG_DEBUG("List use size " << in->uses().size()); - // for (auto use : in->uses()) { - // LOG_DEBUG(use.user->outputs()[0]->debugName()); + // for (auto item: unpack_tuple) { + // input_tensors.push_back(in); // } - // TODO: set the correct list number according to the Input IValue - int n = 2; - auto unpack_node = g->createListUnpack(in, n); - g->block()->appendNode(unpack_node); - for (auto item: unpack_node->outputs()) { - input_tensors.push_back(item); - } - LOG_DEBUG("Unpack List of size " << n); + } else if (in->type()->kind() == torch::jit::TypeKind::ListType && static_params.find(in) == static_params.end()) { + LOG_DEBUG("List use size " << in->uses().size()); + input_tensors.push_back(in); // push original list } } return input_tensors; @@ -187,6 +241,38 @@ TypeMap get_block_first_calc_dtypes_opt(torch::jit::Block* b) { return types; } +CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* b) { + CollectionTypeMap types; + for (auto i : b->inputs()) { + if (i->type() == c10::TensorType::get()) { + torch::jit::Value* in = i; + types.insert({in, {get_value_first_calc_dtype_opt(b, i)}}); + } else if(i->type()->kind() == torch::jit::TypeKind::TupleType) { + LOG_DEBUG("get_block_first_calc_dtypes_opt TupleType"); + + + // TODO: how to evaluate the data type of tuple element + // make sure very time get the same ptr + at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(i); + LOG_DEBUG("get_block_first_calc_dtypes_opt: tuple size " << unpack_tuple.size()); + std::vector> empty_dytpes(unpack_tuple.size()); + types.insert({i, empty_dytpes}); // insert an empty + // for (auto item: unpack_tuple) { + // torch::jit::Value* in = item; + // types.insert({in, get_value_first_calc_dtype_opt(b, i)}); + // } + + } else if(i->type()->kind() == torch::jit::TypeKind::ListType) { + // TODO: how to evaluate the data type of tuple element + LOG_DEBUG("get_block_first_calc_dtypes_opt ListType"); + types.insert({i, {}}); // insert an empty + // LOG_INFO("Unsupported type of c10::ListType::ofTensors()"); + + } + } + return types; +} + static auto core_input_container = torch::class_("_torch_tensorrt_core_ir", "Input").def(torch::init<>()); diff --git a/core/ir/ir.h b/core/ir/ir.h index 056f257f22..54e61a44a4 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -53,6 +53,7 @@ struct GraphInputs { } torch::jit::IValue input_signature; // nested Input, full input spec std::vector flattened_inputs; // flattend Input + std::vector> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e) }; typedef std::pair GraphIO; // Graph input output mapping @@ -61,6 +62,7 @@ using StaticParams = std::map; StaticParams get_static_params(c10::ArrayRef inputs, std::vector params); using InputSpecMap = std::unordered_map; +using CollectionInputSpecMap = std::unordered_map>; std::vector get_tensor_inputs( std::shared_ptr& g, @@ -69,16 +71,25 @@ InputSpecMap associate_specs_with_inputs( std::shared_ptr& g, std::vector specs, StaticParams& static_params); +CollectionInputSpecMap associate_specs_with_collection_inputs( + std::shared_ptr& g, + ir::GraphInputs graph_inputs, + StaticParams& static_params); InputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector specs); +CollectionInputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector>& specs); std::vector get_tensor_inputs( std::shared_ptr& g, StaticParams& static_params); +std::vector get_collection_inputs( + std::shared_ptr& g, + StaticParams& static_params); using TypeMap = std::unordered_map>; +using CollectionTypeMap = std::unordered_map>>; c10::optional get_value_first_calc_dtype_opt(torch::jit::Block* b, torch::jit::Value* in); ir::TypeMap get_block_first_calc_dtypes_opt(torch::jit::Block* b); - +ir::CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* b); } // namespace ir } // namespace core } // namespace torch_tensorrt diff --git a/core/lowering/lowering.cpp b/core/lowering/lowering.cpp index d3296c347c..0051ad451c 100644 --- a/core/lowering/lowering.cpp +++ b/core/lowering/lowering.cpp @@ -33,7 +33,7 @@ void LowerGraph(std::shared_ptr& g, LowerInfo lower_info) { torch::jit::InlineFunctionalGraphs(g); torch::jit::PeepholeOptimize(g, false); torch::jit::FuseLinear(g); - torch::jit::LowerAllTuples(g); + // torch::jit::LowerAllTuples(g); if (!lower_info.disable_cse) { torch::jit::EliminateCommonSubexpression(g); } diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index a48e642a1c..019dd6c7f1 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -83,8 +83,8 @@ TEST(CppAPITests, TestCollection) { // // Compile module auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); - // auto trt_out = trt_mod.forward(complex_inputs); - auto trt_out = trt_mod.forward(complex_inputs_list); + auto trt_out = trt_mod.forward(complex_inputs); + // auto trt_out = trt_mod.forward(complex_inputs_list); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); From 0072e37e9c2c5e52780383602f022a486f7b03e4 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 10 Mar 2022 17:09:26 +0800 Subject: [PATCH 03/22] feat: [collection] limited support for tuple input Signed-off-by: inocsin --- core/compiler.cpp | 6 +- core/compiler.h | 7 +- core/ir/StaticParams.cpp | 4 +- core/ir/ir.cpp | 26 +++-- core/ir/ir.h | 22 ++--- core/partitioning/shape_analysis.cpp | 103 +++++++++++++++++--- core/partitioning/shape_analysis.h | 8 +- cpp/include/torch_tensorrt/torch_tensorrt.h | 6 +- cpp/src/compile_spec.cpp | 101 +++++++++++++------ tests/cpp/test_collection.cpp | 6 +- 10 files changed, 204 insertions(+), 85 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 5b811ca20b..a431bcdae3 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -449,8 +449,9 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) if (cfg.partition_info.enabled && !(cfg.lower_info.forced_fallback_modules.size() == 0 && cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)) { - auto input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.inputs, first_use_types); - auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), input_ivalues_map, cfg, static_params); + + auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_inputs, first_use_types); + auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), collection_input_ivalues_map, cfg, static_params); new_g = graph_and_mapping.first; LOG_INFO("Segmented Graph: " << *new_g); @@ -464,6 +465,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) TORCHTRT_CHECK( conversion::VerifyConverterSupportForBlock(g->block()), "Not all operations in graph are supported by the compiler"); + // TODO find the right auto engine = conversion::ConvertBlockToEngine(g->block(), cfg.convert_info, static_params); AddEngineToGraph(new_mod, new_g, engine, cuda_device); } diff --git a/core/compiler.h b/core/compiler.h index 16cb17148d..71aa8899b2 100644 --- a/core/compiler.h +++ b/core/compiler.h @@ -15,11 +15,12 @@ namespace core { struct CompileSpec { CompileSpec(std::vector inputs) : inputs(inputs) { - graph_inputs = ir::GraphInputs(inputs); + // graph_inputs = ir::GraphInputs(inputs); } CompileSpec(torch::jit::IValue& input_signature) { - graph_inputs = ir::GraphInputs(input_signature); - inputs = graph_inputs.flattened_inputs; + // graph_inputs = ir::GraphInputs(input_signature); + // inputs = graph_inputs.flattened_inputs; + graph_inputs.input_signature = input_signature; } ir::GraphInputs graph_inputs; std::vector inputs; // can be replaced by graph_inputs diff --git a/core/ir/StaticParams.cpp b/core/ir/StaticParams.cpp index 0fe03e4aff..0073ad2888 100644 --- a/core/ir/StaticParams.cpp +++ b/core/ir/StaticParams.cpp @@ -13,8 +13,8 @@ StaticParams get_static_params(c10::ArrayRef inputs, std::ve for (auto in : inputs) { // handle TensorType, TupleType and ListType if (in->type() != c10::TensorType::get() && - !in->type()->isSubtypeOf(c10::TupleType::create()) && - !in->type()->isSubtypeOf(c10::ListType::ofTensors()) && param_it != params.end()) { + in->type()->kind() != torch::jit::TypeKind::TupleType && + in->type()->kind() != torch::jit::TypeKind::ListType && param_it != params.end()) { static_params[in] = *param_it; ++param_it; } diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index 93d3a16f2d..a1a49ba3ca 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -18,10 +18,11 @@ CollectionInputSpecMap associate_specs_with_collection_inputs( ir::GraphInputs graph_inputs, StaticParams& static_params) { auto tensor_inputs = get_collection_inputs(g, static_params); - return pair_input_vals_with_specs(tensor_inputs, graph_inputs.collection_inputs); + return pair_input_vals_with_specs_collection(tensor_inputs, graph_inputs.collection_inputs); } InputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector specs) { + LOG_DEBUG("pair_input_vals_with_specs"); TORCHTRT_CHECK( vals.size() == specs.size(), "Expected dimension specifications for all input tensors" @@ -35,7 +36,8 @@ InputSpecMap pair_input_vals_with_specs(std::vector va return a; } -CollectionInputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector>& specs) { +CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector vals, std::vector>& specs) { + LOG_DEBUG("pair_input_vals_with_specs collection"); TORCHTRT_CHECK( vals.size() == specs.size(), "Expected dimension specifications for all input tensors" @@ -96,7 +98,7 @@ std::vector get_collection_inputs( StaticParams& static_params) { std::vector input_tensors; auto inputs = g->inputs(); - LOG_DEBUG("Inputs size " << inputs.size()); + LOG_DEBUG("get_collection_inputs, inputs size " << inputs.size()); for (auto in : inputs) { LOG_DEBUG("input debug name: " << in->debugName()); // Disregarding inputs that are not tensors or are static @@ -110,12 +112,9 @@ std::vector get_collection_inputs( // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) { input_tensors.push_back(in); // push original tuple at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(in); - LOG_DEBUG("Tuple size " << unpack_tuple.size()); - // for (auto item: unpack_tuple) { - // input_tensors.push_back(in); - // } + LOG_DEBUG("get_collection_inputs, tuple size " << unpack_tuple.size()); } else if (in->type()->kind() == torch::jit::TypeKind::ListType && static_params.find(in) == static_params.end()) { - LOG_DEBUG("List use size " << in->uses().size()); + LOG_DEBUG("get_collection_inputs, list use size " << in->uses().size()); input_tensors.push_back(in); // push original list } } @@ -248,13 +247,11 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* torch::jit::Value* in = i; types.insert({in, {get_value_first_calc_dtype_opt(b, i)}}); } else if(i->type()->kind() == torch::jit::TypeKind::TupleType) { - LOG_DEBUG("get_block_first_calc_dtypes_opt TupleType"); - - - // TODO: how to evaluate the data type of tuple element + LOG_DEBUG("get_block_first_calc_dtypes_opt_collection TupleType"); + // TODO: to evaluate the data type of tuple element // make sure very time get the same ptr at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(i); - LOG_DEBUG("get_block_first_calc_dtypes_opt: tuple size " << unpack_tuple.size()); + LOG_DEBUG("get_block_first_calc_dtypes_opt_collection: tuple size " << unpack_tuple.size()); std::vector> empty_dytpes(unpack_tuple.size()); types.insert({i, empty_dytpes}); // insert an empty // for (auto item: unpack_tuple) { @@ -263,10 +260,9 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* // } } else if(i->type()->kind() == torch::jit::TypeKind::ListType) { - // TODO: how to evaluate the data type of tuple element + // TODO: to decide the size of list and type of list element LOG_DEBUG("get_block_first_calc_dtypes_opt ListType"); types.insert({i, {}}); // insert an empty - // LOG_INFO("Unsupported type of c10::ListType::ofTensors()"); } } diff --git a/core/ir/ir.h b/core/ir/ir.h index 54e61a44a4..06e21fd53b 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -42,17 +42,17 @@ struct Input : torch::CustomClassHolder { // Add to spec struct GraphInputs { - GraphInputs() {} - GraphInputs(torch::jit::IValue inputs) { - input_signature = inputs; - // TODO flatten IValue - } - GraphInputs(std::vector inputs) { - flattened_inputs = inputs; - // TODO construct the IValue - } +// GraphInputs() {} +// GraphInputs(torch::jit::IValue inputs) { +// input_signature = inputs; +// // TODO flatten IValue +// } + // GraphInputs(std::vector inputs) { + // flattened_inputs = inputs; + // // TODO construct the IValue + // } torch::jit::IValue input_signature; // nested Input, full input spec - std::vector flattened_inputs; // flattend Input + std::vector flattened_inputs; // flattend Input, can be removed std::vector> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e) }; @@ -76,7 +76,7 @@ CollectionInputSpecMap associate_specs_with_collection_inputs( ir::GraphInputs graph_inputs, StaticParams& static_params); InputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector specs); -CollectionInputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector>& specs); +CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector vals, std::vector>& specs); std::vector get_tensor_inputs( std::shared_ptr& g, StaticParams& static_params); diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index 96b1312062..0f849c8871 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -8,27 +8,90 @@ namespace torch_tensorrt { namespace core { namespace partitioning { +at::Tensor generateSingleInput(ir::Input& input, c10::optional& type_opt) { + auto cur_shape = input.input_shape; + std::vector shape; + shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); + // auto type_opt = types[input.first][i]; + auto type = at::kFloat; + if (type_opt) { + type = type_opt.value(); + } else { + LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); + } + auto in = at::randint(5, shape, {at::kCUDA}).to(type); + // ivalue_map[input.first] = in.clone(); + return in; +} + std::unordered_map generateRandomInputs( - std::unordered_map& inputs, - std::unordered_map>& types) { +// std::unordered_map> generateRandomInputs( + // std::unordered_map& inputs, + std::unordered_map>& inputs, + // std::unordered_map>& types) { + std::unordered_map>>& types) { // generate random inputs for running pytorch segments std::unordered_map ivalue_map; - - uint64_t in_i = 0; + // std::unordered_map> ivalue_map; + // TODO + // uint64_t in_i = 0; for (auto& input : inputs) { - auto cur_shape = input.second.input_shape; - std::vector shape; - shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); - auto type_opt = types[input.first]; - auto type = at::kFloat; - if (type_opt) { - type = type_opt.value(); + + // for (int i = 0; i < input.second.size(); i++) { + // auto cur_shape = input.second[i].input_shape; + // std::vector shape; + // shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); + // auto type_opt = types[input.first][i]; + // auto type = at::kFloat; + // if (type_opt) { + // type = type_opt.value(); + // } else { + // LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); + // } + // auto in = at::randint(5, shape, {at::kCUDA}).to(type); + // // ivalue_map[input.first] = in.clone(); + // ivalue_map[input.first].push_back(in.clone()); + // // in_i++; + // } + + if (input.first->type()->kind() == torch::jit::TypeKind::ListType) { + // create list + // auto list = c10::impl::GenericList(c10::TensorType::get()); + // list.append(ivalues_maps[input]); + LOG_DEBUG("generateRandomInputs, generate random input of list type"); + // jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); + std::vector list; + c10::TypePtr elementType = c10::TensorType::get(); + auto generic_list = c10::impl::GenericList(elementType); + for (int i = 0; i < input.second.size(); i++) { + auto in = generateSingleInput(input.second[i], types[input.first][i]); + // list.push_back(in.clone()); + generic_list.push_back(in.clone()); + } + // c10::TypePtr elementType = list[0].type(); + + // generic_list.append(list); + ivalue_map[input.first] = generic_list; + // jit_inputs_ivalues.push_back(list); + } else if (input.first->type()->kind() == torch::jit::TypeKind::TupleType) { + // create tuple + // auto tuple = torch::jit::Tuple::create(ivalues_maps[input]); + LOG_DEBUG("generateRandomInputs, generate random input of tuple type"); + std::vector list; + for (int i = 0; i < input.second.size(); i++) { + auto in = generateSingleInput(input.second[i], types[input.first][i]); + list.push_back(in.clone()); + } + auto tuple = c10::ivalue::Tuple::create(list); // create tuple ptr + + ivalue_map[input.first] = c10::IValue(tuple); + // jit_inputs_ivalues.push_back(tuple); } else { - LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); + LOG_DEBUG("generateRandomInputs, generate random input of tensor type"); + auto in = generateSingleInput(input.second[0], types[input.first][0]); + ivalue_map[input.first] = in.clone(); + } - auto in = at::randint(5, shape, {at::kCUDA}).to(type); - ivalue_map[input.first] = in.clone(); - in_i++; } return ivalue_map; } @@ -36,6 +99,7 @@ std::unordered_map generateRandomI void getSegmentsOutputByRunning( SegmentedBlock& seg_block, std::unordered_map& ivalues_maps, + // std::unordered_map>& ivalues_maps, const PartitionInfo& partition_info) { // create a module to run the graph auto g = seg_block.g(); @@ -79,8 +143,16 @@ void getSegmentsOutputByRunning( } else if (input->type()->isSubtypeOf(torch::jit::BoolType::get())) { jit_inputs_ivalues.push_back(ivalues_maps[input].toBool()); } else if (input->type()->kind() == torch::jit::TypeKind::ListType) { + // create list + // auto list = c10::impl::GenericList(c10::TensorType::get()); + // list.append(ivalues_maps[input]); + LOG_DEBUG("getSegmentsOutputByRunning, handle list type"); jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); + // jit_inputs_ivalues.push_back(list); } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) { + // create tuple + // auto tuple = torch::jit::Tuple::create(ivalues_maps[input]); + LOG_DEBUG("getSegmentsOutputByRunning, handle tuple type"); jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple()); } else if (input->type()->kind() == torch::jit::TypeKind::NumberType) { jit_inputs_ivalues.push_back(ivalues_maps[input].toScalar()); @@ -141,6 +213,7 @@ void getSegmentsOutputByRunning( } input_types.push_back(cur_ivalue.toTensor().scalar_type()); } + // TODO: tuple and list inputs in subgraph } seg_block.register_inshapes(input_shapes); diff --git a/core/partitioning/shape_analysis.h b/core/partitioning/shape_analysis.h index 0626490222..46450eb0f8 100644 --- a/core/partitioning/shape_analysis.h +++ b/core/partitioning/shape_analysis.h @@ -6,9 +6,13 @@ namespace torch_tensorrt { namespace core { namespace partitioning { +// std::unordered_map generateRandomInputs( +// std::unordered_map& input_ranges, +// std::unordered_map>& input_types); + std::unordered_map generateRandomInputs( - std::unordered_map& input_ranges, - std::unordered_map>& input_types); + std::unordered_map>& input_ranges, + std::unordered_map>>& input_types); void runShapeAnalysis( std::vector& segmented_blocks, diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index 63dc96e654..1ee8dde3c9 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -521,7 +521,7 @@ struct TORCHTRT_API Input : torch::CustomClassHolder{ */ struct TORCHTRT_API GraphInputs { torch::jit::IValue input_signature; // nested Input, full input spec - std::vector flattened_inputs; // flattend Input + // std::vector flattened_inputs; // flattend Input }; /** @@ -599,9 +599,7 @@ struct TORCHTRT_API CompileSpec { * * @param inputs */ - CompileSpec(torch::jit::IValue input_signature) { - graph_inputs.input_signature = input_signature; - } + CompileSpec(torch::jit::IValue input_signature); // Defaults should reflect TensorRT defaults for BuilderConfig /** diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 74de9e4801..beac217677 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -20,82 +20,123 @@ CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { inputs.push_back(Input(in)); } - graph_inputs.flattened_inputs = inputs; + // graph_inputs.flattened_inputs = inputs; } CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { inputs.push_back(Input(in)); } - graph_inputs.flattened_inputs = inputs; + // graph_inputs.flattened_inputs = inputs; } -void flatten_dfs(std::vector& flattened_inputs, torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) { +CompileSpec::CompileSpec(torch::jit::IValue input_signature) { + graph_inputs.input_signature = input_signature; +} + +void flatten_dfs(std::vector& flattened_inputs, std::vector>& collection_inputs, + torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue, int level, int index) { if (input_ivalue.isTuple()) { auto input_tuple = input_ivalue.toTuple(); std::vector converted_elements; + int idx = 0; + if (level == 0) { + collection_inputs.resize(input_tuple->elements().size()); + } for (auto item: input_tuple->elements()) { torch::jit::IValue converted_item; - flatten_dfs(flattened_inputs, item, converted_item); + int cur_idx = level < 1 ? idx: index; + flatten_dfs(flattened_inputs, collection_inputs, item, converted_item, level+1, cur_idx); converted_elements.push_back(converted_item); auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements); converted_ivalue = torch::jit::IValue(tuple_ptr); + idx++; } } else if(input_ivalue.isList()) { auto input_list = input_ivalue.toList().vec(); + if (level == 0) { + collection_inputs.resize(input_list.size()); + } c10::TypePtr type = input_list[0].type(); auto converted_elements = c10::impl::GenericList(type); // std::vector converted_elements; + int idx = 0; for (auto item: input_list) { + int cur_idx = level < 1 ? idx: index; torch::jit::IValue converted_item; - flatten_dfs(flattened_inputs, item, converted_item); + flatten_dfs(flattened_inputs, collection_inputs, item, converted_item, level+1, cur_idx); converted_elements.push_back(converted_item); + idx++; } converted_ivalue = torch::jit::IValue(converted_elements); } else if(input_ivalue.isCustomClass()) { torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass())); flattened_inputs.push_back(cur_input); converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(cur_input))); + if (level == 0) { // a single value like A + collection_inputs.resize(1); + collection_inputs[0].push_back(cur_input); + } else if (level == 1) { // like A in [A, A] or [(B, B), A] + collection_inputs[index].push_back(cur_input); + } else if (level == 2) { // like A in [(A, A), C] + collection_inputs[index].push_back(cur_input); + } else {// only support 2 level + LOG_ERROR("3 level of input specs is not supported"); + } } } + torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs external_graph_input) { torch_tensorrt::core::ir::GraphInputs internal_graph_input; - // flattened version - if (external_graph_input.flattened_inputs.size() > 0) { - // std::vector input_shape_list; - auto empty_ivalue = torch::jit::IValue(c10::make_intrusive(torchtrt::core::ir::Input())); - c10::TypePtr type = empty_ivalue.type(); - auto input_shape_list = c10::impl::GenericList(type); - std::vector internal_input = to_vec_internal_inputs(external_graph_input.flattened_inputs); - for (auto input_shape: internal_input) { - auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); - input_shape_list.push_back(input_shape_ivalue); - } - - torch::jit::IValue input_signature(input_shape_list); - internal_graph_input.flattened_inputs = internal_input; - internal_graph_input.input_signature = input_signature; + // // flattened version + // if (external_graph_input.flattened_inputs.size() > 0) { + // // std::vector input_shape_list; + // auto empty_ivalue = torch::jit::IValue(c10::make_intrusive(torchtrt::core::ir::Input())); + // c10::TypePtr type = empty_ivalue.type(); + // auto input_shape_list = c10::impl::GenericList(type); + // std::vector internal_input = to_vec_internal_inputs(external_graph_input.flattened_inputs); + // for (auto input_shape: internal_input) { + // auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); + // input_shape_list.push_back(input_shape_ivalue); + // } + + // torch::jit::IValue input_signature(input_shape_list); + // internal_graph_input.flattened_inputs = internal_input; + // internal_graph_input.input_signature = input_signature; - } - // nested version - else { + // } + // // nested version + // else { std::vector flattened_inputs; - torch::jit::IValue input_signature; - flatten_dfs(flattened_inputs, external_graph_input.input_signature, input_signature); + std::vector> collection_inputs; + + torch::jit::IValue converted_input_signature; + flatten_dfs(flattened_inputs, collection_inputs, external_graph_input.input_signature, converted_input_signature, 0, 0); internal_graph_input.flattened_inputs = flattened_inputs; - internal_graph_input.input_signature = input_signature; - printf("in nested version branch\n"); + internal_graph_input.input_signature = converted_input_signature; + internal_graph_input.collection_inputs = collection_inputs; + + LOG_DEBUG("compile_spec.cpp, to_internal_graph_inputs, flattened_inputs size " << flattened_inputs.size() << ", collection_inputs size "<< collection_inputs.size()); - } return internal_graph_input; } torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) { torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.inputs)); - internal.graph_inputs = to_internal_graph_inputs(external.graph_inputs); - internal.inputs = internal.graph_inputs.flattened_inputs; + if (internal.inputs.size() == 0) { + LOG_DEBUG("to_internal_compile_spec, Input size == 0, using graph_input"); + internal.graph_inputs = to_internal_graph_inputs(external.graph_inputs); + internal.inputs = internal.graph_inputs.flattened_inputs; + } else { + LOG_DEBUG("to_internal_compile_spec, Input size != 0, using original Input to construct collection_input"); + internal.graph_inputs.collection_inputs.resize(internal.inputs.size()); + for (int i = 0; i < internal.inputs.size(); i++) { + internal.graph_inputs.collection_inputs[i].push_back(internal.inputs[i]); + } + } + for (auto p : external.enabled_precisions) { internal.convert_info.engine_settings.enabled_precisions.insert(toTRTDataType(p)); diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index 019dd6c7f1..c339b25645 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -70,10 +70,14 @@ TEST(CppAPITests, TestCollection) { torch::jit::IValue complex_input_shape(input_shape_tuple); + std::tuple input_tuple2(complex_input_shape); + torch::jit::IValue complex_input_shape2(input_tuple2); // torch::jit::IValue complex_input_shape(list); - auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape); + auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2); compile_settings.require_full_compilation = false; + compile_settings.min_block_size = 1; + // compile_settings.torch_executed_modules.push_back("model1"); // compile_settings.torch_executed_ops.push_back("aten::sub"); From b1d66cb73364ff92694145979bcbdc65abafc31d Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 10 Mar 2022 19:21:22 +0800 Subject: [PATCH 04/22] fix: [collection] test normal input, fix bug Signed-off-by: inocsin --- core/conversion/conversion.cpp | 19 +++++-- tests/cpp/test_collection.cpp | 56 ++++++++++++++++++--- tests/py/test_collection.py | 91 +++++++++++++++++++++------------- 3 files changed, 121 insertions(+), 45 deletions(-) diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index 56e484e898..bafde231a1 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -134,7 +134,11 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) { void AddInputs( ConversionCtx* ctx, c10::ArrayRef inputs, - std::unordered_map& input_specs) { + ConversionInfo& conversion_info) { + // std::unordered_map& input_specs) { + std::unordered_map& input_specs = conversion_info.inputs; + std::unordered_map> collection_input_spec = conversion_info.collection_inputs; + std::vector input_tensors; for (auto in : inputs) { // Disregarding inputs that are not tensors @@ -162,9 +166,15 @@ void AddInputs( for (auto input : input_tensors) { const torch::jit::Value* in = input; TORCHTRT_CHECK( - input_specs.find(in) != input_specs.end(), + input_specs.find(in) != input_specs.end() || collection_input_spec.find(in) != collection_input_spec.end(), "Cannot find an input spec associated with input: " << in->debugName()); - ir::Input& spec = input_specs.find(in)->second; + ir::Input spec; + if (input_specs.find(in) != input_specs.end()) { + spec = input_specs.find(in)->second; + } else { + spec = collection_input_spec.find(in)->second[0]; // assume input is tensor + } + // ir::Input& spec = input_specs.find(in)->second; std::string name = std::string("input_") + std::to_string(ctx->num_inputs); LOG_INFO( @@ -405,7 +415,8 @@ void ConvertBlockToNetDef( auto inputs = b->inputs(); AddParamsToCtxValueMap(ctx, static_params); - AddInputs(ctx, inputs, build_info.inputs); + // AddInputs(ctx, inputs, build_info.inputs); + AddInputs(ctx, inputs, build_info); auto nodes = b->nodes(); diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index c339b25645..b9e92cd732 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -6,15 +6,10 @@ #include "torch_tensorrt/torch_tensorrt.h" -TEST(CppAPITests, TestCollection) { - +TEST(CppAPITests, TestCollectionTupleInput) { std::string path = - // "/opt/trtorch/tuple2model.ts"; - // "/opt/trtorch/tuple2_list2_v3.ts"; - // "/opt/trtorch/tuple2_tuple2_v3.ts"; - "/opt/trtorch/tuple2_v3.ts"; - // "/opt/trtorch/list2_list2_v3.ts"; + "/root/Torch-TensorRT/tuple_input.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); std::vector inputs; inputs.push_back(in0); @@ -91,5 +86,52 @@ TEST(CppAPITests, TestCollection) { // auto trt_out = trt_mod.forward(complex_inputs_list); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); +} + + +TEST(CppAPITests, TestCollectionNormalInput) { + + std::string path = + "/root/Torch-TensorRT/normal_model.ts"; + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + std::vector inputs; + inputs.push_back(in0); + inputs.push_back(in0); + + torch::jit::Module mod; + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + mod = torch::jit::load(path); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + } + mod.eval(); + mod.to(torch::kCUDA); + + + std::vector inputs_; + + for (auto in : inputs) { + inputs_.push_back(torch::jit::IValue(in.clone())); + } + + auto out = mod.forward(inputs_); + LOG_DEBUG("Finish torchscirpt forward"); + + std::vector input_range; + input_range.push_back({in0.sizes(), torch::kF32}); + input_range.push_back({in0.sizes(), torch::kF32}); + torch_tensorrt::ts::CompileSpec compile_settings(input_range); + compile_settings.require_full_compilation = true; + compile_settings.min_block_size = 1; + + // // FP16 execution + // compile_settings.enabled_precisions = {torch::kHalf}; + // // Compile module + auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); + LOG_DEBUG("Finish compile"); + auto trt_out = trt_mod.forward(inputs_); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); } \ No newline at end of file diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py index 610bf9fe9c..03aa4aeb41 100644 --- a/tests/py/test_collection.py +++ b/tests/py/test_collection.py @@ -4,52 +4,75 @@ import torch.nn.functional as F from typing import Tuple, List, Dict -class Model1(nn.Module): - def __init__(self): - super(Model1, self).__init__() +# class Model1(nn.Module): +# def __init__(self): +# super(Model1, self).__init__() - def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): - r = z[1] + z[0] - return r, z[1] +# def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): +# r = z[1] + z[0] +# return r, z[1] + + +# class TestModel1(nn.Module): +# def __init__(self): +# super(TestModel, self).__init__() +# self.model1 = Model1() +# def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): +# r2, r1 = self.model1((z[0], z[1])) +# # unsupport ops +# i = r2.size(1) +# j = r2.size(2) +# # r3 = torch.tensor(i) * torch.tensor(j) +# r3 = r2[0,0,0,0] +# k = int(r3) - 5 -class TestModel1(nn.Module): +# # if k > 0: +# r = r1 - k +# result = (r, r1) +# # else: +# # r = r1 - k +# # result = (r1, r) +# return result + +class Normal(nn.Module): def __init__(self): - super(TestModel, self).__init__() - self.model1 = Model1() + super(Normal, self).__init__() - def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): - r2, r1 = self.model1((z[0], z[1])) - # unsupport ops - i = r2.size(1) - j = r2.size(2) -# r3 = torch.tensor(i) * torch.tensor(j) - r3 = r2[0,0,0,0] - k = int(r3) - 5 - -# if k > 0: - r = r1 - k - result = (r, r1) -# else: -# r = r1 - k -# result = (r1, r) - return result - -class TestModel(nn.Module): + def forward(self, x, y): + r = x + y + return r + +class TupleInput(nn.Module): def __init__(self): - super(TestModel, self).__init__() + super(TupleInput, self).__init__() def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): r = z[0] + z[1] return r -test_model = TestModel() +class ListInput(nn.Module): + def __init__(self): + super(ListInput, self).__init__() + + def forward(self, z: List[torch.Tensor]): + r = z[0] + z[1] + return r -ts = torch.jit.script(test_model) -print(ts.graph) -ts.to("cuda").eval() input_data = torch.randn((16, 3, 32, 32)) input_data = input_data.float().to("cuda") -result = ts((input_data, input_data)) -torch.jit.save(ts, "./tuple2_v3.ts") \ No newline at end of file + +normal_model = Normal() +normal_model_ts = torch.jit.script(normal_model) +print(normal_model_ts.graph) +result = normal_model_ts(input_data, input_data) +normal_model_ts.to("cuda").eval() +torch.jit.save(normal_model_ts, "./normal_model.ts") + +tuple_input = TupleInput() +tuple_input_ts = torch.jit.script(tuple_input) +print(tuple_input_ts.graph) +result = tuple_input_ts((input_data, input_data)) +tuple_input_ts.to("cuda").eval() +torch.jit.save(tuple_input_ts, "./tuple_input.ts") \ No newline at end of file From d4e54f12b659f47423f130cba5d6e15bad1bca99 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 10 Mar 2022 21:18:02 +0800 Subject: [PATCH 05/22] feat: [collection] support list input type Signed-off-by: inocsin --- core/partitioning/shape_analysis.cpp | 13 +++- tests/cpp/test_collection.cpp | 91 ++++++++++++++++++++++++---- tests/py/test_collection.py | 9 ++- 3 files changed, 98 insertions(+), 15 deletions(-) diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index 0f849c8871..a6459ebc6f 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -63,16 +63,23 @@ std::unordered_map generateRandomI std::vector list; c10::TypePtr elementType = c10::TensorType::get(); auto generic_list = c10::impl::GenericList(elementType); + LOG_DEBUG("generateRandomInputs, 0"); for (int i = 0; i < input.second.size(); i++) { - auto in = generateSingleInput(input.second[i], types[input.first][i]); + // types for list is {} + // auto in = generateSingleInput(input.second[i], types[input.first][i]); + // TODO: need to decide the input type of list elements in ir.cpp + c10::optional type_opt = {}; + auto in = generateSingleInput(input.second[i], type_opt); // list.push_back(in.clone()); generic_list.push_back(in.clone()); + LOG_DEBUG("generateRandomInputs, 1"); } // c10::TypePtr elementType = list[0].type(); - + LOG_DEBUG("generateRandomInputs, 2"); // generic_list.append(list); - ivalue_map[input.first] = generic_list; + ivalue_map[input.first] = c10::IValue(generic_list); // jit_inputs_ivalues.push_back(list); + LOG_DEBUG("generateRandomInputs, finish generate random input of list type"); } else if (input.first->type()->kind() == torch::jit::TypeKind::TupleType) { // create tuple // auto tuple = torch::jit::Tuple::create(ivalues_maps[input]); diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index b9e92cd732..7d3b54152b 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -33,7 +33,7 @@ TEST(CppAPITests, TestCollectionTupleInput) { std::vector complex_inputs, complex_inputs_list; - std::vector tuple; + // std::vector tuple; std::tuple input_tuple(in0, in0); // auto input_list = c10::impl::GenericList(c10::TensorType::get()); // input_list.push_back(inputs_[0]); @@ -42,8 +42,8 @@ TEST(CppAPITests, TestCollectionTupleInput) { // torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); complex_inputs.push_back(input_tuple); - complex_inputs_list.push_back(in0); - complex_inputs_list.push_back(in0); + // complex_inputs_list.push_back(in0); + // complex_inputs_list.push_back(in0); @@ -56,10 +56,10 @@ TEST(CppAPITests, TestCollectionTupleInput) { auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); - c10::TypePtr elementType = input_shape_ivalue.type(); - auto list = c10::impl::GenericList(elementType); - list.push_back(input_shape_ivalue); - list.push_back(input_shape_ivalue); + // c10::TypePtr elementType = input_shape_ivalue.type(); + // auto list = c10::impl::GenericList(elementType); + // list.push_back(input_shape_ivalue); + // list.push_back(input_shape_ivalue); std::tuple input_shape_tuple(input_shape_ivalue, input_shape_ivalue); @@ -73,10 +73,6 @@ TEST(CppAPITests, TestCollectionTupleInput) { compile_settings.require_full_compilation = false; compile_settings.min_block_size = 1; - // compile_settings.torch_executed_modules.push_back("model1"); - // compile_settings.torch_executed_ops.push_back("aten::sub"); - - // // FP16 execution // compile_settings.enabled_precisions = {torch::kHalf}; // // Compile module @@ -133,5 +129,78 @@ TEST(CppAPITests, TestCollectionNormalInput) { LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(inputs_); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); +} + + + +TEST(CppAPITests, TestCollectionListInput) { + + std::string path = + "/root/Torch-TensorRT/list_input.ts"; + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + std::vector inputs; + inputs.push_back(in0); + + torch::jit::Module mod; + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + mod = torch::jit::load(path); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + } + mod.eval(); + mod.to(torch::kCUDA); + + + std::vector inputs_; + + for (auto in : inputs) { + inputs_.push_back(torch::jit::IValue(in.clone())); + } + + std::vector complex_inputs; + auto input_list = c10::impl::GenericList(c10::TensorType::get()); + input_list.push_back(inputs_[0]); + input_list.push_back(inputs_[0]); + + torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); + + complex_inputs.push_back(input_list_ivalue); + + + auto out = mod.forward(complex_inputs); + LOG_DEBUG("Finish torchscirpt forward"); + + + auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + + auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); + + + c10::TypePtr elementType = input_shape_ivalue.type(); + auto list = c10::impl::GenericList(elementType); + list.push_back(input_shape_ivalue); + list.push_back(input_shape_ivalue); + + + torch::jit::IValue complex_input_shape(list); + std::tuple input_tuple2(complex_input_shape); + torch::jit::IValue complex_input_shape2(input_tuple2); + + auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2); + compile_settings.require_full_compilation = false; + compile_settings.min_block_size = 1; + compile_settings.torch_executed_ops.push_back("aten::__getitem__"); + + // // FP16 execution + // compile_settings.enabled_precisions = {torch::kHalf}; + // // Compile module + auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); + LOG_DEBUG("Finish compile"); + auto trt_out = trt_mod.forward(complex_inputs); + // auto trt_out = trt_mod.forward(complex_inputs_list); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); } \ No newline at end of file diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py index 03aa4aeb41..91eca4c854 100644 --- a/tests/py/test_collection.py +++ b/tests/py/test_collection.py @@ -75,4 +75,11 @@ def forward(self, z: List[torch.Tensor]): print(tuple_input_ts.graph) result = tuple_input_ts((input_data, input_data)) tuple_input_ts.to("cuda").eval() -torch.jit.save(tuple_input_ts, "./tuple_input.ts") \ No newline at end of file +torch.jit.save(tuple_input_ts, "./tuple_input.ts") + +list_input = ListInput() +list_input_ts = torch.jit.script(list_input) +print(list_input_ts.graph) +result = list_input_ts([input_data, input_data]) +list_input_ts.to("cuda").eval() +torch.jit.save(list_input_ts, "./list_input.ts") \ No newline at end of file From a9aa2e74cf041ec35aa022f1af795d832c193176 Mon Sep 17 00:00:00 2001 From: inocsin Date: Wed, 16 Mar 2022 18:08:26 +0800 Subject: [PATCH 06/22] feat: [collection] support user defined input data type Signed-off-by: inocsin --- core/compiler.cpp | 16 ++++++++-- core/ir/ir.cpp | 14 +++++--- core/ir/ir.h | 2 +- core/partitioning/shape_analysis.cpp | 5 +-- cpp/src/compile_spec.cpp | 20 +----------- cpp/src/torch_tensorrt.cpp | 1 - tests/cpp/test_collection.cpp | 48 +++++++++++++--------------- 7 files changed, 51 insertions(+), 55 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index a431bcdae3..1c8ed34762 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -343,10 +343,22 @@ void MapInputsAndDetermineDTypes( spec[i].dtype = nvinfer1::DataType::kFLOAT; } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) { if (!est_type_opt[i]) { - LOG_INFO("Cannot infer input tensor dtype in graph, unable to verify user input dtype settings"); + LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting"); + // TODO set input data type + + std::stringstream ss; + ss << "For input " << in->debugName() << ", found user specified input dtype as "; + ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; + // ss << cfg.convert_info.inputs.find(in)->second.dtype; + ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; + auto warn_str = ss.str(); + LOG_WARNING(warn_str); + // Overwrite type map with user settings + first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype)}; + } else { // if (util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype) != est_type_opt.value()) { - if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype) != est_type_opt[i].value()) { + if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype) != est_type_opt[i].value()) { std::stringstream ss; ss << "For input " << in->debugName() << ", found user specified input dtype as "; ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index a1a49ba3ca..52bd92a17f 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -250,10 +250,13 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* LOG_DEBUG("get_block_first_calc_dtypes_opt_collection TupleType"); // TODO: to evaluate the data type of tuple element // make sure very time get the same ptr + c10::optional tp = get_value_first_calc_dtype_opt(b, i); at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(i); LOG_DEBUG("get_block_first_calc_dtypes_opt_collection: tuple size " << unpack_tuple.size()); - std::vector> empty_dytpes(unpack_tuple.size()); - types.insert({i, empty_dytpes}); // insert an empty + // Assume all tuple has the same datatype + // std::vector> dytpes(unpack_tuple.size(), tp); + std::vector> dytpes(unpack_tuple.size()); + types.insert({i, dytpes}); // insert an empty // for (auto item: unpack_tuple) { // torch::jit::Value* in = item; // types.insert({in, get_value_first_calc_dtype_opt(b, i)}); @@ -261,8 +264,11 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* } else if(i->type()->kind() == torch::jit::TypeKind::ListType) { // TODO: to decide the size of list and type of list element - LOG_DEBUG("get_block_first_calc_dtypes_opt ListType"); - types.insert({i, {}}); // insert an empty + LOG_DEBUG("get_block_first_calc_dtypes_opt ListType: use size " << i->uses().size()); + c10::optional tp = get_value_first_calc_dtype_opt(b, i); + // std::vector> dytpes(i->uses().size()); + std::vector> dytpes(i->uses().size(), tp); + types.insert({i, dytpes}); // insert an empty } } diff --git a/core/ir/ir.h b/core/ir/ir.h index 06e21fd53b..69c70263ed 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -52,7 +52,7 @@ struct GraphInputs { // // TODO construct the IValue // } torch::jit::IValue input_signature; // nested Input, full input spec - std::vector flattened_inputs; // flattend Input, can be removed + std::vector flattened_inputs; // flattend Input std::vector> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e) }; diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index a6459ebc6f..6d69275e3a 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -68,8 +68,9 @@ std::unordered_map generateRandomI // types for list is {} // auto in = generateSingleInput(input.second[i], types[input.first][i]); // TODO: need to decide the input type of list elements in ir.cpp - c10::optional type_opt = {}; - auto in = generateSingleInput(input.second[i], type_opt); + // c10::optional type_opt = {}; + // auto in = generateSingleInput(input.second[i], type_opt); + auto in = generateSingleInput(input.second[i], types[input.first][i]); // list.push_back(in.clone()); generic_list.push_back(in.clone()); LOG_DEBUG("generateRandomInputs, 1"); diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index beac217677..5e015e3a6e 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -90,25 +90,6 @@ void flatten_dfs(std::vector& flattened_inputs, std:: torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs external_graph_input) { torch_tensorrt::core::ir::GraphInputs internal_graph_input; - // // flattened version - // if (external_graph_input.flattened_inputs.size() > 0) { - // // std::vector input_shape_list; - // auto empty_ivalue = torch::jit::IValue(c10::make_intrusive(torchtrt::core::ir::Input())); - // c10::TypePtr type = empty_ivalue.type(); - // auto input_shape_list = c10::impl::GenericList(type); - // std::vector internal_input = to_vec_internal_inputs(external_graph_input.flattened_inputs); - // for (auto input_shape: internal_input) { - // auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); - // input_shape_list.push_back(input_shape_ivalue); - // } - - // torch::jit::IValue input_signature(input_shape_list); - // internal_graph_input.flattened_inputs = internal_input; - // internal_graph_input.input_signature = input_signature; - - // } - // // nested version - // else { std::vector flattened_inputs; std::vector> collection_inputs; @@ -134,6 +115,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) { internal.graph_inputs.collection_inputs.resize(internal.inputs.size()); for (int i = 0; i < internal.inputs.size(); i++) { internal.graph_inputs.collection_inputs[i].push_back(internal.inputs[i]); + internal.graph_inputs.flattened_inputs = internal.inputs; } } diff --git a/cpp/src/torch_tensorrt.cpp b/cpp/src/torch_tensorrt.cpp index 9d6f271332..93813190ab 100644 --- a/cpp/src/torch_tensorrt.cpp +++ b/cpp/src/torch_tensorrt.cpp @@ -30,7 +30,6 @@ torch::jit::script::Module compile(const torch::jit::script::Module& module, Com LOG_DEBUG(get_build_info()); // Want to export a much simpler (non TRT header dependent) API so doing the // type conversion here - printf("in torch_tensorrt::ts::compile\n"); return torch_tensorrt::core::CompileGraph(module, to_internal_compile_spec(info)); } diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index 7d3b54152b..6ee0a78871 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -10,9 +10,10 @@ TEST(CppAPITests, TestCollectionTupleInput) { std::string path = "/root/Torch-TensorRT/tuple_input.ts"; - torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); - std::vector inputs; - inputs.push_back(in0); + // torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); + // std::vector inputs; + // inputs.push_back(in0); torch::jit::Module mod; try { @@ -23,13 +24,13 @@ TEST(CppAPITests, TestCollectionTupleInput) { } mod.eval(); mod.to(torch::kCUDA); - - std::vector inputs_; - for (auto in : inputs) { - inputs_.push_back(torch::jit::IValue(in.clone())); - } + // std::vector inputs_; + + // for (auto in : inputs) { + // inputs_.push_back(torch::jit::IValue(in.clone())); + // } std::vector complex_inputs, complex_inputs_list; @@ -42,16 +43,12 @@ TEST(CppAPITests, TestCollectionTupleInput) { // torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); complex_inputs.push_back(input_tuple); - // complex_inputs_list.push_back(in0); - // complex_inputs_list.push_back(in0); - - auto out = mod.forward(complex_inputs); LOG_DEBUG("Finish torchscirpt forward"); - - auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); @@ -63,7 +60,6 @@ TEST(CppAPITests, TestCollectionTupleInput) { std::tuple input_shape_tuple(input_shape_ivalue, input_shape_ivalue); - torch::jit::IValue complex_input_shape(input_shape_tuple); std::tuple input_tuple2(complex_input_shape); torch::jit::IValue complex_input_shape2(input_tuple2); @@ -74,13 +70,12 @@ TEST(CppAPITests, TestCollectionTupleInput) { compile_settings.min_block_size = 1; // // FP16 execution - // compile_settings.enabled_precisions = {torch::kHalf}; + compile_settings.enabled_precisions = {torch::kHalf}; // // Compile module auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); - // auto trt_out = trt_mod.forward(complex_inputs_list); - + // std::cout << out.toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); } @@ -90,7 +85,7 @@ TEST(CppAPITests, TestCollectionNormalInput) { std::string path = "/root/Torch-TensorRT/normal_model.ts"; - torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); inputs.push_back(in0); @@ -116,14 +111,14 @@ TEST(CppAPITests, TestCollectionNormalInput) { LOG_DEBUG("Finish torchscirpt forward"); std::vector input_range; - input_range.push_back({in0.sizes(), torch::kF32}); - input_range.push_back({in0.sizes(), torch::kF32}); + input_range.push_back({in0.sizes(), torch::kF16}); + input_range.push_back({in0.sizes(), torch::kF16}); torch_tensorrt::ts::CompileSpec compile_settings(input_range); compile_settings.require_full_compilation = true; compile_settings.min_block_size = 1; // // FP16 execution - // compile_settings.enabled_precisions = {torch::kHalf}; + compile_settings.enabled_precisions = {torch::kHalf}; // // Compile module auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); @@ -138,7 +133,7 @@ TEST(CppAPITests, TestCollectionListInput) { std::string path = "/root/Torch-TensorRT/list_input.ts"; - torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -173,7 +168,8 @@ TEST(CppAPITests, TestCollectionListInput) { LOG_DEBUG("Finish torchscirpt forward"); - auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); @@ -194,13 +190,13 @@ TEST(CppAPITests, TestCollectionListInput) { compile_settings.torch_executed_ops.push_back("aten::__getitem__"); // // FP16 execution - // compile_settings.enabled_precisions = {torch::kHalf}; + compile_settings.enabled_precisions = {torch::kHalf}; // // Compile module auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); // auto trt_out = trt_mod.forward(complex_inputs_list); - + // std::cout << out.toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); } \ No newline at end of file From 5830cbe99ea488a1f082f88b14b8d9873003c4a8 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 17 Mar 2022 16:46:30 +0800 Subject: [PATCH 07/22] feat: [collection] support output type of list and tuple Signed-off-by: inocsin --- tests/cpp/test_collection.cpp | 180 +++++++++++++++++++++++++++++++--- tests/py/test_collection.py | 35 ++++++- 2 files changed, 201 insertions(+), 14 deletions(-) diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index 6ee0a78871..0533b7ae77 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -6,6 +6,52 @@ #include "torch_tensorrt/torch_tensorrt.h" +TEST(CppAPITests, TestCollectionNormalInput) { + + std::string path = + "/root/Torch-TensorRT/normal_model.ts"; + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); + std::vector inputs; + inputs.push_back(in0); + inputs.push_back(in0); + + torch::jit::Module mod; + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + mod = torch::jit::load(path); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + } + mod.eval(); + mod.to(torch::kCUDA); + + + std::vector inputs_; + + for (auto in : inputs) { + inputs_.push_back(torch::jit::IValue(in.clone())); + } + + auto out = mod.forward(inputs_); + LOG_DEBUG("Finish torchscirpt forward"); + + std::vector input_range; + input_range.push_back({in0.sizes(), torch::kF16}); + input_range.push_back({in0.sizes(), torch::kF16}); + torch_tensorrt::ts::CompileSpec compile_settings(input_range); + compile_settings.require_full_compilation = true; + compile_settings.min_block_size = 1; + + // // FP16 execution + compile_settings.enabled_precisions = {torch::kHalf}; + // // Compile module + auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); + LOG_DEBUG("Finish compile"); + auto trt_out = trt_mod.forward(inputs_); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); +} + TEST(CppAPITests, TestCollectionTupleInput) { std::string path = @@ -81,14 +127,13 @@ TEST(CppAPITests, TestCollectionTupleInput) { } -TEST(CppAPITests, TestCollectionNormalInput) { +TEST(CppAPITests, TestCollectionListInput) { std::string path = - "/root/Torch-TensorRT/normal_model.ts"; + "/root/Torch-TensorRT/list_input.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); - inputs.push_back(in0); torch::jit::Module mod; try { @@ -107,32 +152,136 @@ TEST(CppAPITests, TestCollectionNormalInput) { inputs_.push_back(torch::jit::IValue(in.clone())); } - auto out = mod.forward(inputs_); + std::vector complex_inputs; + auto input_list = c10::impl::GenericList(c10::TensorType::get()); + input_list.push_back(inputs_[0]); + input_list.push_back(inputs_[0]); + + torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); + + complex_inputs.push_back(input_list_ivalue); + + + auto out = mod.forward(complex_inputs); LOG_DEBUG("Finish torchscirpt forward"); - std::vector input_range; - input_range.push_back({in0.sizes(), torch::kF16}); - input_range.push_back({in0.sizes(), torch::kF16}); - torch_tensorrt::ts::CompileSpec compile_settings(input_range); - compile_settings.require_full_compilation = true; + + // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); + + auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); + + + c10::TypePtr elementType = input_shape_ivalue.type(); + auto list = c10::impl::GenericList(elementType); + list.push_back(input_shape_ivalue); + list.push_back(input_shape_ivalue); + + + torch::jit::IValue complex_input_shape(list); + std::tuple input_tuple2(complex_input_shape); + torch::jit::IValue complex_input_shape2(input_tuple2); + + auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2); + compile_settings.require_full_compilation = false; compile_settings.min_block_size = 1; + compile_settings.torch_executed_ops.push_back("aten::__getitem__"); // // FP16 execution compile_settings.enabled_precisions = {torch::kHalf}; // // Compile module auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); - auto trt_out = trt_mod.forward(inputs_); + auto trt_out = trt_mod.forward(complex_inputs); + // auto trt_out = trt_mod.forward(complex_inputs_list); + // std::cout << out.toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); } +TEST(CppAPITests, TestCollectionTupleInputOutput) { -TEST(CppAPITests, TestCollectionListInput) { + std::string path = + "/root/Torch-TensorRT/tuple_input_output.ts"; + // torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); + // std::vector inputs; + // inputs.push_back(in0); + + torch::jit::Module mod; + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + mod = torch::jit::load(path); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + } + mod.eval(); + mod.to(torch::kCUDA); + + + // std::vector inputs_; + + // for (auto in : inputs) { + // inputs_.push_back(torch::jit::IValue(in.clone())); + // } + + + std::vector complex_inputs, complex_inputs_list; + // std::vector tuple; + std::tuple input_tuple(in0, in0); + // auto input_list = c10::impl::GenericList(c10::TensorType::get()); + // input_list.push_back(inputs_[0]); + // input_list.push_back(inputs_[0]); + + // torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); + + complex_inputs.push_back(input_tuple); + + auto out = mod.forward(complex_inputs); + LOG_DEBUG("Finish torchscirpt forward"); + + // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); + + auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); + + + // c10::TypePtr elementType = input_shape_ivalue.type(); + // auto list = c10::impl::GenericList(elementType); + // list.push_back(input_shape_ivalue); + // list.push_back(input_shape_ivalue); + + std::tuple input_shape_tuple(input_shape_ivalue, input_shape_ivalue); + + torch::jit::IValue complex_input_shape(input_shape_tuple); + std::tuple input_tuple2(complex_input_shape); + torch::jit::IValue complex_input_shape2(input_tuple2); + // torch::jit::IValue complex_input_shape(list); + + auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2); + compile_settings.require_full_compilation = false; + compile_settings.min_block_size = 1; + + // compile_settings.torch_executed_ops.push_back("prim::TupleConstruct"); + + // // FP16 execution + compile_settings.enabled_precisions = {torch::kHalf}; + // // Compile module + auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); + LOG_DEBUG("Finish compile"); + auto trt_out = trt_mod.forward(complex_inputs); + // std::cout << out.toTensor() << std::endl; + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5)); +} + + +TEST(CppAPITests, TestCollectionListInputOutput) { std::string path = - "/root/Torch-TensorRT/list_input.ts"; + "/root/Torch-TensorRT/list_input_output.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -187,7 +336,10 @@ TEST(CppAPITests, TestCollectionListInput) { auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2); compile_settings.require_full_compilation = false; compile_settings.min_block_size = 1; + + // Need to skip the conversion of __getitem__ and ListConstruct compile_settings.torch_executed_ops.push_back("aten::__getitem__"); + compile_settings.torch_executed_ops.push_back("prim::ListConstruct"); // // FP16 execution compile_settings.enabled_precisions = {torch::kHalf}; @@ -198,5 +350,7 @@ TEST(CppAPITests, TestCollectionListInput) { // auto trt_out = trt_mod.forward(complex_inputs_list); // std::cout << out.toTensor() << std::endl; - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5)); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5)); } \ No newline at end of file diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py index 91eca4c854..41c074e6f7 100644 --- a/tests/py/test_collection.py +++ b/tests/py/test_collection.py @@ -59,6 +59,25 @@ def forward(self, z: List[torch.Tensor]): r = z[0] + z[1] return r +class TupleInputOutput(nn.Module): + def __init__(self): + super(TupleInputOutput, self).__init__() + + def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): + r1 = z[0] + z[1] + r2 = z[0] - z[1] + r = (r1, r2) + return r + +class ListInputOutput(nn.Module): + def __init__(self): + super(ListInputOutput, self).__init__() + + def forward(self, z: List[torch.Tensor]): + r1 = z[0] + z[1] + r2 = z[0] - z[1] + r = [r1, r2] + return r input_data = torch.randn((16, 3, 32, 32)) input_data = input_data.float().to("cuda") @@ -82,4 +101,18 @@ def forward(self, z: List[torch.Tensor]): print(list_input_ts.graph) result = list_input_ts([input_data, input_data]) list_input_ts.to("cuda").eval() -torch.jit.save(list_input_ts, "./list_input.ts") \ No newline at end of file +torch.jit.save(list_input_ts, "./list_input.ts") + +tuple_input = TupleInputOutput() +tuple_input_ts = torch.jit.script(tuple_input) +print(tuple_input_ts.graph) +result = tuple_input_ts((input_data, input_data)) +tuple_input_ts.to("cuda").eval() +torch.jit.save(tuple_input_ts, "./tuple_input_output.ts") + +list_input = ListInputOutput() +list_input_ts = torch.jit.script(list_input) +print(list_input_ts.graph) +result = list_input_ts([input_data, input_data]) +list_input_ts.to("cuda").eval() +torch.jit.save(list_input_ts, "./list_input_output.ts") \ No newline at end of file From 6733cfb8d0c1302beb655aec9ce7cfcf7202f756 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 17 Mar 2022 17:33:11 +0800 Subject: [PATCH 08/22] feat: [collection] add unit test for complex collection model Signed-off-by: inocsin --- tests/cpp/test_collection.cpp | 78 +++++++++++++++++++++++++++++++++++ tests/py/test_collection.py | 25 ++++++++++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index 0533b7ae77..d7948b1a6c 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -353,4 +353,82 @@ TEST(CppAPITests, TestCollectionListInputOutput) { ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5)); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5)); +} + + +TEST(CppAPITests, TestCollectionComplexModel) { + + std::string path = + "/root/Torch-TensorRT/complex_model.ts"; + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); + std::vector inputs; + inputs.push_back(in0); + + torch::jit::Module mod; + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + mod = torch::jit::load(path); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + } + mod.eval(); + mod.to(torch::kCUDA); + + + std::vector inputs_; + + for (auto in : inputs) { + inputs_.push_back(torch::jit::IValue(in.clone())); + } + + std::vector complex_inputs; + auto input_list = c10::impl::GenericList(c10::TensorType::get()); + input_list.push_back(inputs_[0]); + input_list.push_back(inputs_[0]); + + torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); + + complex_inputs.push_back(input_list_ivalue); + + + auto out = mod.forward(complex_inputs); + LOG_DEBUG("Finish torchscirpt forward"); + + + // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); + auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); + + auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); + + + c10::TypePtr elementType = input_shape_ivalue.type(); + auto list = c10::impl::GenericList(elementType); + list.push_back(input_shape_ivalue); + list.push_back(input_shape_ivalue); + + + torch::jit::IValue complex_input_shape(list); + std::tuple input_tuple2(complex_input_shape); + torch::jit::IValue complex_input_shape2(input_tuple2); + + auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2); + compile_settings.require_full_compilation = false; + compile_settings.min_block_size = 1; + + // Need to skip the conversion of __getitem__ and ListConstruct + compile_settings.torch_executed_ops.push_back("aten::__getitem__"); + compile_settings.torch_executed_ops.push_back("prim::ListConstruct"); + + // // FP16 execution + compile_settings.enabled_precisions = {torch::kHalf}; + // // Compile module + auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); + LOG_DEBUG("Finish compile"); + auto trt_out = trt_mod.forward(complex_inputs); + // auto trt_out = trt_mod.forward(complex_inputs_list); + + // std::cout << out.toTuple()->elements()[0].toTensor() << std::endl; + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5)); } \ No newline at end of file diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py index 41c074e6f7..d23a12b349 100644 --- a/tests/py/test_collection.py +++ b/tests/py/test_collection.py @@ -79,6 +79,22 @@ def forward(self, z: List[torch.Tensor]): r = [r1, r2] return r +class ComplexModel(nn.Module): + def __init__(self): + super(ComplexModel, self).__init__() + self.list_model = ListInputOutput() + self.tuple_model = TupleInputOutput() + + def forward(self, z: List[torch.Tensor]): + r1 = z[0] + z[1] + r2 = z[0] - z[1] + r3 = (r1, r2) + r4 = [r2, r1] + tuple_out = self.tuple_model(r3) + list_out = self.list_model(r4) + r = (tuple_out[1], list_out[0]) + return r + input_data = torch.randn((16, 3, 32, 32)) input_data = input_data.float().to("cuda") @@ -115,4 +131,11 @@ def forward(self, z: List[torch.Tensor]): print(list_input_ts.graph) result = list_input_ts([input_data, input_data]) list_input_ts.to("cuda").eval() -torch.jit.save(list_input_ts, "./list_input_output.ts") \ No newline at end of file +torch.jit.save(list_input_ts, "./list_input_output.ts") + +complex_model = ComplexModel() +complex_model_ts = torch.jit.script(complex_model) +print(complex_model_ts.graph) +result = complex_model_ts([input_data, input_data]) +complex_model_ts.to("cuda").eval() +torch.jit.save(complex_model_ts, "./complex_model.ts") \ No newline at end of file From d21b0ab143413b97af6a43233d2f156984bc2878 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 17 Mar 2022 18:44:36 +0800 Subject: [PATCH 09/22] chore: [collection] delete comments Signed-off-by: inocsin --- core/compiler.cpp | 16 ++-- core/ir/ir.cpp | 35 +-------- core/ir/ir.h | 11 --- core/partitioning/shape_analysis.cpp | 24 +----- cpp/include/torch_tensorrt/torch_tensorrt.h | 1 - tests/cpp/test_collection.cpp | 85 ++------------------- tests/py/test_collection.py | 31 -------- 7 files changed, 19 insertions(+), 184 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 1c8ed34762..d16796bd8e 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -307,14 +307,14 @@ void MapInputsAndDetermineDTypes( ir::StaticParams& static_params, ir::CollectionTypeMap& first_use_type_map) { // ir::TypeMap& first_use_type_map) { - // Associate input specs with inputs - // cfg.convert_info.inputs = std::move(ir::associate_specs_with_inputs(g, cfg.inputs, static_params)); - cfg.convert_info.collection_inputs = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params)); - - auto collection_inputs = ir::get_collection_inputs(g, static_params); - LOG_DEBUG("In MapInputsAndDetermineDTypes " << "g->inputs() size " << g->inputs().size() << ", collection_inputs size " << collection_inputs.size()); - // for (auto& in : g->inputs()) { - // if (static_params.find(in) == static_params.end()) { + // Associate input specs with inputs + // cfg.convert_info.inputs = std::move(ir::associate_specs_with_inputs(g, cfg.inputs, static_params)); + cfg.convert_info.collection_inputs = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params)); + + auto collection_inputs = ir::get_collection_inputs(g, static_params); + LOG_DEBUG("In MapInputsAndDetermineDTypes " << "g->inputs() size " << g->inputs().size() << ", collection_inputs size " << collection_inputs.size()); + // for (auto& in : g->inputs()) { + // if (static_params.find(in) == static_params.end()) { for (auto in : collection_inputs) { std::vector& spec = cfg.convert_info.collection_inputs.find(in)->second; // ir::Input& spec = cfg.convert_info.inputs.find(in)->second; diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index 52bd92a17f..bbc8239097 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -67,28 +67,6 @@ std::vector get_tensor_inputs( if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) { input_tensors.push_back(in); } - // else if (in->type()->cast() && static_params.find(in) == static_params.end()) { - // // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) { - // at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(in); - // LOG_DEBUG("Tuple size " << unpack_tuple.size()); - // for (auto item: unpack_tuple) { - // input_tensors.push_back(in); - // } - // } else if (in->type()->isSubtypeOf(c10::ListType::ofTensors()) && static_params.find(in) == static_params.end()) { - - // LOG_DEBUG("List use size " << in->uses().size()); - // // for (auto use : in->uses()) { - // // LOG_DEBUG(use.user->outputs()[0]->debugName()); - // // } - // // TODO: set the correct list number according to the Input IValue - // int n = 2; - // auto unpack_node = g->createListUnpack(in, n); - // g->block()->appendNode(unpack_node); - // for (auto item: unpack_node->outputs()) { - // input_tensors.push_back(item); - // } - // LOG_DEBUG("Unpack List of size " << n); - // } } return input_tensors; } @@ -101,11 +79,6 @@ std::vector get_collection_inputs( LOG_DEBUG("get_collection_inputs, inputs size " << inputs.size()); for (auto in : inputs) { LOG_DEBUG("input debug name: " << in->debugName()); - // Disregarding inputs that are not tensors or are static - // - // Ex. - // self.1:__torch__.alexnet -> ignored - // input.1:Tensor -> used if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) { input_tensors.push_back(in); } else if (in->type()->kind() == torch::jit::TypeKind::TupleType && static_params.find(in) == static_params.end()) { @@ -246,6 +219,7 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* if (i->type() == c10::TensorType::get()) { torch::jit::Value* in = i; types.insert({in, {get_value_first_calc_dtype_opt(b, i)}}); + } else if(i->type()->kind() == torch::jit::TypeKind::TupleType) { LOG_DEBUG("get_block_first_calc_dtypes_opt_collection TupleType"); // TODO: to evaluate the data type of tuple element @@ -253,14 +227,10 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* c10::optional tp = get_value_first_calc_dtype_opt(b, i); at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(i); LOG_DEBUG("get_block_first_calc_dtypes_opt_collection: tuple size " << unpack_tuple.size()); - // Assume all tuple has the same datatype + // TODO: calculate the tuple element type // std::vector> dytpes(unpack_tuple.size(), tp); std::vector> dytpes(unpack_tuple.size()); types.insert({i, dytpes}); // insert an empty - // for (auto item: unpack_tuple) { - // torch::jit::Value* in = item; - // types.insert({in, get_value_first_calc_dtype_opt(b, i)}); - // } } else if(i->type()->kind() == torch::jit::TypeKind::ListType) { // TODO: to decide the size of list and type of list element @@ -269,7 +239,6 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* // std::vector> dytpes(i->uses().size()); std::vector> dytpes(i->uses().size(), tp); types.insert({i, dytpes}); // insert an empty - } } return types; diff --git a/core/ir/ir.h b/core/ir/ir.h index 69c70263ed..a66aaf7d33 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -12,8 +12,6 @@ namespace core { namespace ir { struct Input : torch::CustomClassHolder { - // Input(std::vector shape); - // Input(std::vector min_shape, std::vector opt_shape, std::vector max_shape); Input() {}; Input( std::vector shape, @@ -42,15 +40,6 @@ struct Input : torch::CustomClassHolder { // Add to spec struct GraphInputs { -// GraphInputs() {} -// GraphInputs(torch::jit::IValue inputs) { -// input_signature = inputs; -// // TODO flatten IValue -// } - // GraphInputs(std::vector inputs) { - // flattened_inputs = inputs; - // // TODO construct the IValue - // } torch::jit::IValue input_signature; // nested Input, full input spec std::vector flattened_inputs; // flattend Input std::vector> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e) diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index 6d69275e3a..1d330cc3d8 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -56,34 +56,18 @@ std::unordered_map generateRandomI if (input.first->type()->kind() == torch::jit::TypeKind::ListType) { // create list - // auto list = c10::impl::GenericList(c10::TensorType::get()); - // list.append(ivalues_maps[input]); LOG_DEBUG("generateRandomInputs, generate random input of list type"); - // jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); std::vector list; c10::TypePtr elementType = c10::TensorType::get(); auto generic_list = c10::impl::GenericList(elementType); - LOG_DEBUG("generateRandomInputs, 0"); for (int i = 0; i < input.second.size(); i++) { - // types for list is {} - // auto in = generateSingleInput(input.second[i], types[input.first][i]); - // TODO: need to decide the input type of list elements in ir.cpp - // c10::optional type_opt = {}; - // auto in = generateSingleInput(input.second[i], type_opt); auto in = generateSingleInput(input.second[i], types[input.first][i]); - // list.push_back(in.clone()); generic_list.push_back(in.clone()); - LOG_DEBUG("generateRandomInputs, 1"); } - // c10::TypePtr elementType = list[0].type(); - LOG_DEBUG("generateRandomInputs, 2"); - // generic_list.append(list); ivalue_map[input.first] = c10::IValue(generic_list); - // jit_inputs_ivalues.push_back(list); LOG_DEBUG("generateRandomInputs, finish generate random input of list type"); } else if (input.first->type()->kind() == torch::jit::TypeKind::TupleType) { // create tuple - // auto tuple = torch::jit::Tuple::create(ivalues_maps[input]); LOG_DEBUG("generateRandomInputs, generate random input of tuple type"); std::vector list; for (int i = 0; i < input.second.size(); i++) { @@ -91,9 +75,7 @@ std::unordered_map generateRandomI list.push_back(in.clone()); } auto tuple = c10::ivalue::Tuple::create(list); // create tuple ptr - ivalue_map[input.first] = c10::IValue(tuple); - // jit_inputs_ivalues.push_back(tuple); } else { LOG_DEBUG("generateRandomInputs, generate random input of tensor type"); auto in = generateSingleInput(input.second[0], types[input.first][0]); @@ -152,14 +134,10 @@ void getSegmentsOutputByRunning( jit_inputs_ivalues.push_back(ivalues_maps[input].toBool()); } else if (input->type()->kind() == torch::jit::TypeKind::ListType) { // create list - // auto list = c10::impl::GenericList(c10::TensorType::get()); - // list.append(ivalues_maps[input]); LOG_DEBUG("getSegmentsOutputByRunning, handle list type"); - jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); - // jit_inputs_ivalues.push_back(list); + jit_inputs_ivalues.push_back(ivalues_maps[input].toList());; } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) { // create tuple - // auto tuple = torch::jit::Tuple::create(ivalues_maps[input]); LOG_DEBUG("getSegmentsOutputByRunning, handle tuple type"); jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple()); } else if (input->type()->kind() == torch::jit::TypeKind::NumberType) { diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index 1ee8dde3c9..6da7534987 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -521,7 +521,6 @@ struct TORCHTRT_API Input : torch::CustomClassHolder{ */ struct TORCHTRT_API GraphInputs { torch::jit::IValue input_signature; // nested Input, full input spec - // std::vector flattened_inputs; // flattend Input }; /** diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index d7948b1a6c..f647af2c8c 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -8,8 +8,7 @@ TEST(CppAPITests, TestCollectionNormalInput) { - std::string path = - "/root/Torch-TensorRT/normal_model.ts"; + std::string path = "/root/Torch-TensorRT/normal_model.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -54,12 +53,8 @@ TEST(CppAPITests, TestCollectionNormalInput) { TEST(CppAPITests, TestCollectionTupleInput) { - std::string path = - "/root/Torch-TensorRT/tuple_input.ts"; - // torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + std::string path = "/root/Torch-TensorRT/tuple_input.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); - // std::vector inputs; - // inputs.push_back(in0); torch::jit::Module mod; try { @@ -71,45 +66,25 @@ TEST(CppAPITests, TestCollectionTupleInput) { mod.eval(); mod.to(torch::kCUDA); - - // std::vector inputs_; - - // for (auto in : inputs) { - // inputs_.push_back(torch::jit::IValue(in.clone())); - // } - - std::vector complex_inputs, complex_inputs_list; - // std::vector tuple; std::tuple input_tuple(in0, in0); - // auto input_list = c10::impl::GenericList(c10::TensorType::get()); - // input_list.push_back(inputs_[0]); - // input_list.push_back(inputs_[0]); - - // torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); complex_inputs.push_back(input_tuple); auto out = mod.forward(complex_inputs); LOG_DEBUG("Finish torchscirpt forward"); - // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); - // c10::TypePtr elementType = input_shape_ivalue.type(); - // auto list = c10::impl::GenericList(elementType); - // list.push_back(input_shape_ivalue); - // list.push_back(input_shape_ivalue); - std::tuple input_shape_tuple(input_shape_ivalue, input_shape_ivalue); torch::jit::IValue complex_input_shape(input_shape_tuple); std::tuple input_tuple2(complex_input_shape); torch::jit::IValue complex_input_shape2(input_tuple2); - // torch::jit::IValue complex_input_shape(list); + auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2); compile_settings.require_full_compilation = false; @@ -121,7 +96,6 @@ TEST(CppAPITests, TestCollectionTupleInput) { auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); - // std::cout << out.toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); } @@ -129,8 +103,7 @@ TEST(CppAPITests, TestCollectionTupleInput) { TEST(CppAPITests, TestCollectionListInput) { - std::string path = - "/root/Torch-TensorRT/list_input.ts"; + std::string path = "/root/Torch-TensorRT/list_input.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -165,13 +138,9 @@ TEST(CppAPITests, TestCollectionListInput) { auto out = mod.forward(complex_inputs); LOG_DEBUG("Finish torchscirpt forward"); - - // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); - auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); - c10::TypePtr elementType = input_shape_ivalue.type(); auto list = c10::impl::GenericList(elementType); list.push_back(input_shape_ivalue); @@ -193,21 +162,16 @@ TEST(CppAPITests, TestCollectionListInput) { auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); - // auto trt_out = trt_mod.forward(complex_inputs_list); - // std::cout << out.toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); } TEST(CppAPITests, TestCollectionTupleInputOutput) { - std::string path = - "/root/Torch-TensorRT/tuple_input_output.ts"; - // torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat); + std::string path = "/root/Torch-TensorRT/tuple_input_output.ts"; + torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); - // std::vector inputs; - // inputs.push_back(in0); torch::jit::Module mod; try { @@ -220,38 +184,19 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) { mod.to(torch::kCUDA); - // std::vector inputs_; - - // for (auto in : inputs) { - // inputs_.push_back(torch::jit::IValue(in.clone())); - // } - - std::vector complex_inputs, complex_inputs_list; - // std::vector tuple; std::tuple input_tuple(in0, in0); - // auto input_list = c10::impl::GenericList(c10::TensorType::get()); - // input_list.push_back(inputs_[0]); - // input_list.push_back(inputs_[0]); - - // torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list); complex_inputs.push_back(input_tuple); auto out = mod.forward(complex_inputs); LOG_DEBUG("Finish torchscirpt forward"); - // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); - // c10::TypePtr elementType = input_shape_ivalue.type(); - // auto list = c10::impl::GenericList(elementType); - // list.push_back(input_shape_ivalue); - // list.push_back(input_shape_ivalue); - std::tuple input_shape_tuple(input_shape_ivalue, input_shape_ivalue); torch::jit::IValue complex_input_shape(input_shape_tuple); @@ -271,7 +216,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) { auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); - // std::cout << out.toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5)); @@ -280,8 +224,7 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) { TEST(CppAPITests, TestCollectionListInputOutput) { - std::string path = - "/root/Torch-TensorRT/list_input_output.ts"; + std::string path = "/root/Torch-TensorRT/list_input_output.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -316,8 +259,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) { auto out = mod.forward(complex_inputs); LOG_DEBUG("Finish torchscirpt forward"); - - // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); @@ -347,9 +288,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) { auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); - // auto trt_out = trt_mod.forward(complex_inputs_list); - - // std::cout << out.toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5)); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5)); @@ -358,8 +296,7 @@ TEST(CppAPITests, TestCollectionListInputOutput) { TEST(CppAPITests, TestCollectionComplexModel) { - std::string path = - "/root/Torch-TensorRT/complex_model.ts"; + std::string path = "/root/Torch-TensorRT/complex_model.ts"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -394,13 +331,10 @@ TEST(CppAPITests, TestCollectionComplexModel) { auto out = mod.forward(complex_inputs); LOG_DEBUG("Finish torchscirpt forward"); - - // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown); auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf); auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(input_shape))); - c10::TypePtr elementType = input_shape_ivalue.type(); auto list = c10::impl::GenericList(elementType); list.push_back(input_shape_ivalue); @@ -425,9 +359,6 @@ TEST(CppAPITests, TestCollectionComplexModel) { auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); - // auto trt_out = trt_mod.forward(complex_inputs_list); - - // std::cout << out.toTuple()->elements()[0].toTensor() << std::endl; ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5)); diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py index d23a12b349..e71d754505 100644 --- a/tests/py/test_collection.py +++ b/tests/py/test_collection.py @@ -4,37 +4,6 @@ import torch.nn.functional as F from typing import Tuple, List, Dict -# class Model1(nn.Module): -# def __init__(self): -# super(Model1, self).__init__() - -# def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): -# r = z[1] + z[0] -# return r, z[1] - - -# class TestModel1(nn.Module): -# def __init__(self): -# super(TestModel, self).__init__() -# self.model1 = Model1() - -# def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): -# r2, r1 = self.model1((z[0], z[1])) -# # unsupport ops -# i = r2.size(1) -# j = r2.size(2) -# # r3 = torch.tensor(i) * torch.tensor(j) -# r3 = r2[0,0,0,0] -# k = int(r3) - 5 - -# # if k > 0: -# r = r1 - k -# result = (r, r1) -# # else: -# # r = r1 - k -# # result = (r1, r) -# return result - class Normal(nn.Module): def __init__(self): super(Normal, self).__init__() From eada66db06727bef0f7fce705a23fb96729defe5 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 31 Mar 2022 19:03:27 +0800 Subject: [PATCH 10/22] chore: [collection] update code and comments Signed-off-by: inocsin --- core/compiler.cpp | 23 ++++--------------- core/ir/ir.cpp | 21 ++++++------------ core/lowering/lowering.cpp | 1 - core/partitioning/shape_analysis.cpp | 33 +++------------------------- cpp/src/compile_spec.cpp | 8 +++---- 5 files changed, 18 insertions(+), 68 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index d16796bd8e..1d97139041 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -306,19 +306,13 @@ void MapInputsAndDetermineDTypes( std::shared_ptr& g, ir::StaticParams& static_params, ir::CollectionTypeMap& first_use_type_map) { - // ir::TypeMap& first_use_type_map) { - // Associate input specs with inputs - // cfg.convert_info.inputs = std::move(ir::associate_specs_with_inputs(g, cfg.inputs, static_params)); cfg.convert_info.collection_inputs = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params)); auto collection_inputs = ir::get_collection_inputs(g, static_params); - LOG_DEBUG("In MapInputsAndDetermineDTypes " << "g->inputs() size " << g->inputs().size() << ", collection_inputs size " << collection_inputs.size()); - // for (auto& in : g->inputs()) { - // if (static_params.find(in) == static_params.end()) { + LOG_DEBUG("In MapInputsAndDetermineDTypes, the g->inputs() size is " << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size()); + for (auto in : collection_inputs) { std::vector& spec = cfg.convert_info.collection_inputs.find(in)->second; - // ir::Input& spec = cfg.convert_info.inputs.find(in)->second; - // c10::optional est_type_opt = {}; std::vector> est_type_opt; auto est_it = first_use_type_map.find(in); @@ -331,9 +325,8 @@ void MapInputsAndDetermineDTypes( // If we can calculate the type from the graph and the type was not defined by the user then use the calculated // type LOG_INFO( - "Since input type is not explicitly defined, infering using first tensor calculation\n Found input " - << in->debugName() << " has type " << est_type_opt[i].value() - << ". If this is incorrect explicitly set dtype for input and file a bug"); + "Since input type is not explicitly defined, infering using first tensor calculation\n Inferred input " + << in->debugName() << " has type " << est_type_opt[i].value()); spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value()); } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) { // If we cannot calculate the type and the user did not define the type, then default to FP32 @@ -344,12 +337,9 @@ void MapInputsAndDetermineDTypes( } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) { if (!est_type_opt[i]) { LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting"); - // TODO set input data type - std::stringstream ss; ss << "For input " << in->debugName() << ", found user specified input dtype as "; ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; - // ss << cfg.convert_info.inputs.find(in)->second.dtype; ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; auto warn_str = ss.str(); LOG_WARNING(warn_str); @@ -357,15 +347,12 @@ void MapInputsAndDetermineDTypes( first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype)}; } else { - // if (util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype) != est_type_opt.value()) { if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype) != est_type_opt[i].value()) { std::stringstream ss; ss << "For input " << in->debugName() << ", found user specified input dtype as "; ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; - // ss << cfg.convert_info.inputs.find(in)->second.dtype; ss << ", however when inspecting the graph, the input type expected was inferred to be "; ss << est_type_opt[i].value() << std::endl; - // ss << "The compiler is going to use the user setting " << cfg.convert_info.inputs.find(in)->second.dtype; ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n"; ss << "compatibility with PyTorch's data type convention is required.\n"; @@ -375,7 +362,6 @@ void MapInputsAndDetermineDTypes( auto warn_str = ss.str(); LOG_WARNING(warn_str); // Overwrite type map with user settings - // first_use_type_map[in] = {util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype)}; first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype)}; } } @@ -447,7 +433,6 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) auto params = graph_and_parameters.second; auto static_params = ir::get_static_params(g->inputs(), params); // Infer the type of an input from the weights of the calculation - // auto first_use_types = ir::get_block_first_calc_dtypes_opt(g->block()); auto first_use_types = ir::get_block_first_calc_dtypes_opt_collection(g->block()); MapInputsAndDetermineDTypes(cfg, g, static_params, first_use_types); diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index bbc8239097..5da2c121f6 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -22,7 +22,6 @@ CollectionInputSpecMap associate_specs_with_collection_inputs( } InputSpecMap pair_input_vals_with_specs(std::vector vals, std::vector specs) { - LOG_DEBUG("pair_input_vals_with_specs"); TORCHTRT_CHECK( vals.size() == specs.size(), "Expected dimension specifications for all input tensors" @@ -37,7 +36,6 @@ InputSpecMap pair_input_vals_with_specs(std::vector va } CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector vals, std::vector>& specs) { - LOG_DEBUG("pair_input_vals_with_specs collection"); TORCHTRT_CHECK( vals.size() == specs.size(), "Expected dimension specifications for all input tensors" @@ -56,9 +54,9 @@ std::vector get_tensor_inputs( StaticParams& static_params) { std::vector input_tensors; auto inputs = g->inputs(); - LOG_DEBUG("Inputs size " << inputs.size()); + LOG_DEBUG("Raw inputs size of get_tensor_inputs: " << inputs.size()); for (auto in : inputs) { - LOG_DEBUG("input debug name: " << in->debugName()); + LOG_DEBUG("Handle input of debug name: " << in->debugName()); // Disregarding inputs that are not tensors or are static // // Ex. @@ -76,9 +74,9 @@ std::vector get_collection_inputs( StaticParams& static_params) { std::vector input_tensors; auto inputs = g->inputs(); - LOG_DEBUG("get_collection_inputs, inputs size " << inputs.size()); + LOG_DEBUG("Raw inputs size of get_collection_inputs: " << inputs.size()); for (auto in : inputs) { - LOG_DEBUG("input debug name: " << in->debugName()); + LOG_DEBUG("Handle input of debug name: " << in->debugName()); if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) { input_tensors.push_back(in); } else if (in->type()->kind() == torch::jit::TypeKind::TupleType && static_params.find(in) == static_params.end()) { @@ -101,12 +99,9 @@ c10::optional get_value_first_calc_dtype_opt(torch::jit::Block* auto b_ins = b->inputs(); std::unordered_set b_in_set(b_ins.begin(), b_ins.end()); - // TORCHTRT_ASSERT( - // in->type() == c10::TensorType::get(), "Input is not a tensor, cannot check for dtype based on calculation"); - auto consumers = in->uses(); auto search_list = std::vector(consumers.begin(), consumers.end()); - LOG_DEBUG("Users number for " << in->debugName() << ": " << consumers.size()); + while(search_list.size() > 0) { // after insertion, original iterator will be invalid auto& u = search_list.front(); @@ -221,13 +216,11 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block* types.insert({in, {get_value_first_calc_dtype_opt(b, i)}}); } else if(i->type()->kind() == torch::jit::TypeKind::TupleType) { - LOG_DEBUG("get_block_first_calc_dtypes_opt_collection TupleType"); // TODO: to evaluate the data type of tuple element // make sure very time get the same ptr - c10::optional tp = get_value_first_calc_dtype_opt(b, i); + // c10::optional tp = get_value_first_calc_dtype_opt(b, i); at::ArrayRef unpack_tuple = torch::jit::createTupleUnpack(i); - LOG_DEBUG("get_block_first_calc_dtypes_opt_collection: tuple size " << unpack_tuple.size()); - // TODO: calculate the tuple element type + // TODO: calculate the tuple element type, currently we use {} as default datatype // std::vector> dytpes(unpack_tuple.size(), tp); std::vector> dytpes(unpack_tuple.size()); types.insert({i, dytpes}); // insert an empty diff --git a/core/lowering/lowering.cpp b/core/lowering/lowering.cpp index 0051ad451c..8bbae296c3 100644 --- a/core/lowering/lowering.cpp +++ b/core/lowering/lowering.cpp @@ -33,7 +33,6 @@ void LowerGraph(std::shared_ptr& g, LowerInfo lower_info) { torch::jit::InlineFunctionalGraphs(g); torch::jit::PeepholeOptimize(g, false); torch::jit::FuseLinear(g); - // torch::jit::LowerAllTuples(g); if (!lower_info.disable_cse) { torch::jit::EliminateCommonSubexpression(g); } diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index 1d330cc3d8..961831cb47 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -25,38 +25,17 @@ at::Tensor generateSingleInput(ir::Input& input, c10::optional& } std::unordered_map generateRandomInputs( -// std::unordered_map> generateRandomInputs( - // std::unordered_map& inputs, std::unordered_map>& inputs, - // std::unordered_map>& types) { std::unordered_map>>& types) { + // generate random inputs for running pytorch segments std::unordered_map ivalue_map; - // std::unordered_map> ivalue_map; - // TODO - // uint64_t in_i = 0; - for (auto& input : inputs) { - // for (int i = 0; i < input.second.size(); i++) { - // auto cur_shape = input.second[i].input_shape; - // std::vector shape; - // shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); - // auto type_opt = types[input.first][i]; - // auto type = at::kFloat; - // if (type_opt) { - // type = type_opt.value(); - // } else { - // LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); - // } - // auto in = at::randint(5, shape, {at::kCUDA}).to(type); - // // ivalue_map[input.first] = in.clone(); - // ivalue_map[input.first].push_back(in.clone()); - // // in_i++; - // } + + for (auto& input : inputs) { if (input.first->type()->kind() == torch::jit::TypeKind::ListType) { // create list - LOG_DEBUG("generateRandomInputs, generate random input of list type"); std::vector list; c10::TypePtr elementType = c10::TensorType::get(); auto generic_list = c10::impl::GenericList(elementType); @@ -65,10 +44,8 @@ std::unordered_map generateRandomI generic_list.push_back(in.clone()); } ivalue_map[input.first] = c10::IValue(generic_list); - LOG_DEBUG("generateRandomInputs, finish generate random input of list type"); } else if (input.first->type()->kind() == torch::jit::TypeKind::TupleType) { // create tuple - LOG_DEBUG("generateRandomInputs, generate random input of tuple type"); std::vector list; for (int i = 0; i < input.second.size(); i++) { auto in = generateSingleInput(input.second[i], types[input.first][i]); @@ -77,7 +54,6 @@ std::unordered_map generateRandomI auto tuple = c10::ivalue::Tuple::create(list); // create tuple ptr ivalue_map[input.first] = c10::IValue(tuple); } else { - LOG_DEBUG("generateRandomInputs, generate random input of tensor type"); auto in = generateSingleInput(input.second[0], types[input.first][0]); ivalue_map[input.first] = in.clone(); @@ -89,7 +65,6 @@ std::unordered_map generateRandomI void getSegmentsOutputByRunning( SegmentedBlock& seg_block, std::unordered_map& ivalues_maps, - // std::unordered_map>& ivalues_maps, const PartitionInfo& partition_info) { // create a module to run the graph auto g = seg_block.g(); @@ -134,11 +109,9 @@ void getSegmentsOutputByRunning( jit_inputs_ivalues.push_back(ivalues_maps[input].toBool()); } else if (input->type()->kind() == torch::jit::TypeKind::ListType) { // create list - LOG_DEBUG("getSegmentsOutputByRunning, handle list type"); jit_inputs_ivalues.push_back(ivalues_maps[input].toList());; } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) { // create tuple - LOG_DEBUG("getSegmentsOutputByRunning, handle tuple type"); jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple()); } else if (input->type()->kind() == torch::jit::TypeKind::NumberType) { jit_inputs_ivalues.push_back(ivalues_maps[input].toScalar()); diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 5e015e3a6e..ef000506c7 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -81,7 +81,7 @@ void flatten_dfs(std::vector& flattened_inputs, std:: } else if (level == 2) { // like A in [(A, A), C] collection_inputs[index].push_back(cur_input); } else {// only support 2 level - LOG_ERROR("3 level of input specs is not supported"); + LOG_ERROR("Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]"); } } } @@ -99,7 +99,7 @@ torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs exter internal_graph_input.input_signature = converted_input_signature; internal_graph_input.collection_inputs = collection_inputs; - LOG_DEBUG("compile_spec.cpp, to_internal_graph_inputs, flattened_inputs size " << flattened_inputs.size() << ", collection_inputs size "<< collection_inputs.size()); + LOG_DEBUG("Convert external_graph_input to internal_graph_inputs, total input input spec number: " << flattened_inputs.size() << ", top level input spec number "<< collection_inputs.size()); return internal_graph_input; } @@ -107,11 +107,11 @@ torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs exter torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) { torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.inputs)); if (internal.inputs.size() == 0) { - LOG_DEBUG("to_internal_compile_spec, Input size == 0, using graph_input"); + LOG_DEBUG("GraphInput.inputs size == 0, using GraphInput.input_signature to get Input spec"); internal.graph_inputs = to_internal_graph_inputs(external.graph_inputs); internal.inputs = internal.graph_inputs.flattened_inputs; } else { - LOG_DEBUG("to_internal_compile_spec, Input size != 0, using original Input to construct collection_input"); + LOG_DEBUG("GraphInput.inputs size != 0, using GraphInput.inputs to get Input spec"); internal.graph_inputs.collection_inputs.resize(internal.inputs.size()); for (int i = 0; i < internal.inputs.size(); i++) { internal.graph_inputs.collection_inputs[i].push_back(internal.inputs[i]); From 633c00f9c122196cd2d4e567e3d4d6fbd64cd7c6 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 31 Mar 2022 19:13:20 +0800 Subject: [PATCH 11/22] chore: [collection] rename ConversionInfo.collection_inputs to ConversionInfo.collection_input_spec_map Signed-off-by: inocsin --- core/compiler.cpp | 20 ++++++++++---------- core/compiler.h | 6 +----- core/conversion/conversion.cpp | 4 +--- core/conversion/conversion.h | 2 +- 4 files changed, 13 insertions(+), 19 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 1d97139041..57b4667bce 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -306,13 +306,13 @@ void MapInputsAndDetermineDTypes( std::shared_ptr& g, ir::StaticParams& static_params, ir::CollectionTypeMap& first_use_type_map) { - cfg.convert_info.collection_inputs = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params)); + cfg.convert_info.collection_input_spec_map = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params)); auto collection_inputs = ir::get_collection_inputs(g, static_params); LOG_DEBUG("In MapInputsAndDetermineDTypes, the g->inputs() size is " << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size()); for (auto in : collection_inputs) { - std::vector& spec = cfg.convert_info.collection_inputs.find(in)->second; + std::vector& spec = cfg.convert_info.collection_input_spec_map.find(in)->second; std::vector> est_type_opt; auto est_it = first_use_type_map.find(in); @@ -339,21 +339,21 @@ void MapInputsAndDetermineDTypes( LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting"); std::stringstream ss; ss << "For input " << in->debugName() << ", found user specified input dtype as "; - ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; - ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; + ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype; + ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype; auto warn_str = ss.str(); LOG_WARNING(warn_str); // Overwrite type map with user settings - first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype)}; + first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)}; } else { - if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype) != est_type_opt[i].value()) { + if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) != est_type_opt[i].value()) { std::stringstream ss; ss << "For input " << in->debugName() << ", found user specified input dtype as "; - ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; + ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype; ss << ", however when inspecting the graph, the input type expected was inferred to be "; ss << est_type_opt[i].value() << std::endl; - ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_inputs.find(in)->second[i].dtype; + ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype; ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n"; ss << "compatibility with PyTorch's data type convention is required.\n"; ss << "If you do indeed see errors at runtime either:\n"; @@ -362,7 +362,7 @@ void MapInputsAndDetermineDTypes( auto warn_str = ss.str(); LOG_WARNING(warn_str); // Overwrite type map with user settings - first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype)}; + first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)}; } } } else { @@ -447,7 +447,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) !(cfg.lower_info.forced_fallback_modules.size() == 0 && cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)) { - auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_inputs, first_use_types); + auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types); auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), collection_input_ivalues_map, cfg, static_params); new_g = graph_and_mapping.first; LOG_INFO("Segmented Graph: " << *new_g); diff --git a/core/compiler.h b/core/compiler.h index 71aa8899b2..750cd59c8e 100644 --- a/core/compiler.h +++ b/core/compiler.h @@ -14,12 +14,8 @@ namespace torch_tensorrt { namespace core { struct CompileSpec { - CompileSpec(std::vector inputs) : inputs(inputs) { - // graph_inputs = ir::GraphInputs(inputs); - } + CompileSpec(std::vector inputs) : inputs(inputs) {} CompileSpec(torch::jit::IValue& input_signature) { - // graph_inputs = ir::GraphInputs(input_signature); - // inputs = graph_inputs.flattened_inputs; graph_inputs.input_signature = input_signature; } ir::GraphInputs graph_inputs; diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index bafde231a1..3c9eb0dea4 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -135,9 +135,8 @@ void AddInputs( ConversionCtx* ctx, c10::ArrayRef inputs, ConversionInfo& conversion_info) { - // std::unordered_map& input_specs) { std::unordered_map& input_specs = conversion_info.inputs; - std::unordered_map> collection_input_spec = conversion_info.collection_inputs; + std::unordered_map> collection_input_spec = conversion_info.collection_input_spec_map; std::vector input_tensors; for (auto in : inputs) { @@ -415,7 +414,6 @@ void ConvertBlockToNetDef( auto inputs = b->inputs(); AddParamsToCtxValueMap(ctx, static_params); - // AddInputs(ctx, inputs, build_info.inputs); AddInputs(ctx, inputs, build_info); auto nodes = b->nodes(); diff --git a/core/conversion/conversion.h b/core/conversion/conversion.h index ba194716e8..148d99ac13 100644 --- a/core/conversion/conversion.h +++ b/core/conversion/conversion.h @@ -13,7 +13,7 @@ namespace conversion { struct ConversionInfo { ir::InputSpecMap inputs; - ir::CollectionInputSpecMap collection_inputs; + ir::CollectionInputSpecMap collection_input_spec_map; BuilderSettings engine_settings; }; From 89665c8d67a99bbae1c559af83ae9a60eb0517a9 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 31 Mar 2022 19:40:11 +0800 Subject: [PATCH 12/22] refactor: [collection] fuse Input with GraphInputs Signed-off-by: inocsin --- core/compiler.h | 5 ++-- core/ir/ir.h | 2 +- cpp/include/torch_tensorrt/torch_tensorrt.h | 17 ++++--------- cpp/src/compile_spec.cpp | 27 +++++++++++---------- 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/core/compiler.h b/core/compiler.h index 750cd59c8e..85bc1d2c08 100644 --- a/core/compiler.h +++ b/core/compiler.h @@ -14,12 +14,13 @@ namespace torch_tensorrt { namespace core { struct CompileSpec { - CompileSpec(std::vector inputs) : inputs(inputs) {} + CompileSpec(std::vector inputs) { + graph_inputs.inputs = inputs; + } CompileSpec(torch::jit::IValue& input_signature) { graph_inputs.input_signature = input_signature; } ir::GraphInputs graph_inputs; - std::vector inputs; // can be replaced by graph_inputs conversion::ConversionInfo convert_info; lowering::LowerInfo lower_info; partitioning::PartitionInfo partition_info; diff --git a/core/ir/ir.h b/core/ir/ir.h index a66aaf7d33..c138ad693b 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -41,7 +41,7 @@ struct Input : torch::CustomClassHolder { // Add to spec struct GraphInputs { torch::jit::IValue input_signature; // nested Input, full input spec - std::vector flattened_inputs; // flattend Input + std::vector inputs; // flattend Input std::vector> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e) }; diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index 6da7534987..e19b9f1408 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -517,10 +517,11 @@ struct TORCHTRT_API Input : torch::CustomClassHolder{ /** * @brief A struct to hold complex inputs * - * This struct can either hold a conplex inputs of shape or a flattened one, + * This struct can either hold a complex inputs of shape or a flattened one, */ struct TORCHTRT_API GraphInputs { - torch::jit::IValue input_signature; // nested Input, full input spec + torch::jit::IValue input_signature; // nested Input, full input spec + std::vector inputs; // flatten input spec }; /** @@ -590,25 +591,17 @@ struct TORCHTRT_API CompileSpec { * * @param inputs */ - CompileSpec(std::vector inputs) : inputs(std::move(inputs)) {} + CompileSpec(std::vector inputs); /** * @brief Construct a new Extra Info object from IValue. * The IValue store a complex Input * - * @param inputs + * @param input_signature */ CompileSpec(torch::jit::IValue input_signature); // Defaults should reflect TensorRT defaults for BuilderConfig - /** - * @brief Specifications for inputs to the engine, can either be a single size or a range defined by min, opt and max - * sizes Users can also specify expected input type as well as tensor memory format - * - * Order in vector should match call order for the function - */ - std::vector inputs; - /** * @brief Specifications for inputs to the engine, can store a IValue which has stored complex Input * or a flatened Input diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index ef000506c7..366476b227 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -18,22 +18,26 @@ torchtrt::core::runtime::CudaDevice to_internal_cuda_device(Device device); namespace torchscript { CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { - inputs.push_back(Input(in)); + graph_inputs.inputs.push_back(Input(in)); } - // graph_inputs.flattened_inputs = inputs; } CompileSpec::CompileSpec(std::vector> fixed_sizes) { for (auto in : fixed_sizes) { - inputs.push_back(Input(in)); + graph_inputs.inputs.push_back(Input(in)); } - // graph_inputs.flattened_inputs = inputs; +} + +CompileSpec::CompileSpec(std::vector inputs) { + graph_inputs.inputs = std::move(inputs); } CompileSpec::CompileSpec(torch::jit::IValue input_signature) { graph_inputs.input_signature = input_signature; } + + void flatten_dfs(std::vector& flattened_inputs, std::vector>& collection_inputs, torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue, int level, int index) { if (input_ivalue.isTuple()) { @@ -59,7 +63,6 @@ void flatten_dfs(std::vector& flattened_inputs, std:: } c10::TypePtr type = input_list[0].type(); auto converted_elements = c10::impl::GenericList(type); - // std::vector converted_elements; int idx = 0; for (auto item: input_list) { int cur_idx = level < 1 ? idx: index; @@ -95,7 +98,7 @@ torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs exter torch::jit::IValue converted_input_signature; flatten_dfs(flattened_inputs, collection_inputs, external_graph_input.input_signature, converted_input_signature, 0, 0); - internal_graph_input.flattened_inputs = flattened_inputs; + internal_graph_input.inputs = flattened_inputs; internal_graph_input.input_signature = converted_input_signature; internal_graph_input.collection_inputs = collection_inputs; @@ -105,17 +108,15 @@ torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs exter } torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) { - torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.inputs)); - if (internal.inputs.size() == 0) { + torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.graph_inputs.inputs)); + if (internal.graph_inputs.inputs.size() == 0) { LOG_DEBUG("GraphInput.inputs size == 0, using GraphInput.input_signature to get Input spec"); internal.graph_inputs = to_internal_graph_inputs(external.graph_inputs); - internal.inputs = internal.graph_inputs.flattened_inputs; } else { LOG_DEBUG("GraphInput.inputs size != 0, using GraphInput.inputs to get Input spec"); - internal.graph_inputs.collection_inputs.resize(internal.inputs.size()); - for (int i = 0; i < internal.inputs.size(); i++) { - internal.graph_inputs.collection_inputs[i].push_back(internal.inputs[i]); - internal.graph_inputs.flattened_inputs = internal.inputs; + internal.graph_inputs.collection_inputs.resize(internal.graph_inputs.inputs.size()); + for (int i = 0; i < internal.graph_inputs.inputs.size(); i++) { + internal.graph_inputs.collection_inputs[i].push_back(internal.graph_inputs.inputs[i]); } } From 205452e95d97f35141670cb982888ebfa2273d63 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 31 Mar 2022 19:59:22 +0800 Subject: [PATCH 13/22] feat: [collection] move collection test model to hub.py Signed-off-by: inocsin --- tests/modules/hub.py | 95 +++++++++++++++++++++++++++++++ tests/py/test_collection.py | 110 ------------------------------------ 2 files changed, 95 insertions(+), 110 deletions(-) delete mode 100644 tests/py/test_collection.py diff --git a/tests/modules/hub.py b/tests/modules/hub.py index 7b707f5785..f03658321c 100644 --- a/tests/modules/hub.py +++ b/tests/modules/hub.py @@ -3,7 +3,11 @@ import torch.nn.functional as F import torchvision.models as models import timm +<<<<<<< HEAD from transformers import BertModel, BertTokenizer, BertConfig +======= +from typing import Tuple, List, Dict +>>>>>>> feat: [collection] move collection test model to hub.py torch.hub._validate_not_a_forked_repo = lambda a, b, c: True @@ -217,3 +221,94 @@ def forward(self, x): traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors]) torch.jit.save(traced_model, "bert_base_uncased_traced.jit.pt") + +# Collection input/output models +class Normal(nn.Module): + def __init__(self): + super(Normal, self).__init__() + + def forward(self, x, y): + r = x + y + return r + +class TupleInput(nn.Module): + def __init__(self): + super(TupleInput, self).__init__() + + def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): + r = z[0] + z[1] + return r + +class ListInput(nn.Module): + def __init__(self): + super(ListInput, self).__init__() + + def forward(self, z: List[torch.Tensor]): + r = z[0] + z[1] + return r + +class TupleInputOutput(nn.Module): + def __init__(self): + super(TupleInputOutput, self).__init__() + + def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): + r1 = z[0] + z[1] + r2 = z[0] - z[1] + r = (r1, r2) + return r + +class ListInputOutput(nn.Module): + def __init__(self): + super(ListInputOutput, self).__init__() + + def forward(self, z: List[torch.Tensor]): + r1 = z[0] + z[1] + r2 = z[0] - z[1] + r = [r1, r2] + return r + +class ComplexModel(nn.Module): + def __init__(self): + super(ComplexModel, self).__init__() + self.list_model = ListInputOutput() + self.tuple_model = TupleInputOutput() + + def forward(self, z: List[torch.Tensor]): + r1 = z[0] + z[1] + r2 = z[0] - z[1] + r3 = (r1, r2) + r4 = [r2, r1] + tuple_out = self.tuple_model(r3) + list_out = self.list_model(r4) + r = (tuple_out[1], list_out[0]) + return r + +normal_model = Normal() +normal_model_ts = torch.jit.script(normal_model) +normal_model_ts.to("cuda").eval() +torch.jit.save(normal_model_ts, "normal_model.ts") + +tuple_input = TupleInput() +tuple_input_ts = torch.jit.script(tuple_input) +tuple_input_ts.to("cuda").eval() +torch.jit.save(tuple_input_ts, "tuple_input.ts") + +list_input = ListInput() +list_input_ts = torch.jit.script(list_input) +list_input_ts.to("cuda").eval() +torch.jit.save(list_input_ts, "list_input.ts") + +tuple_input = TupleInputOutput() +tuple_input_ts = torch.jit.script(tuple_input) +tuple_input_ts.to("cuda").eval() +torch.jit.save(tuple_input_ts, "tuple_input_output.ts") + +list_input = ListInputOutput() +list_input_ts = torch.jit.script(list_input) +list_input_ts.to("cuda").eval() +torch.jit.save(list_input_ts, "list_input_output.ts") + +complex_model = ComplexModel() +complex_model_ts = torch.jit.script(complex_model) +complex_model_ts.to("cuda").eval() +torch.jit.save(complex_model_ts, "complex_model.ts") diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py deleted file mode 100644 index e71d754505..0000000000 --- a/tests/py/test_collection.py +++ /dev/null @@ -1,110 +0,0 @@ -import torch -import copy -import torch.nn as nn -import torch.nn.functional as F -from typing import Tuple, List, Dict - -class Normal(nn.Module): - def __init__(self): - super(Normal, self).__init__() - - def forward(self, x, y): - r = x + y - return r - -class TupleInput(nn.Module): - def __init__(self): - super(TupleInput, self).__init__() - - def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): - r = z[0] + z[1] - return r - -class ListInput(nn.Module): - def __init__(self): - super(ListInput, self).__init__() - - def forward(self, z: List[torch.Tensor]): - r = z[0] + z[1] - return r - -class TupleInputOutput(nn.Module): - def __init__(self): - super(TupleInputOutput, self).__init__() - - def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): - r1 = z[0] + z[1] - r2 = z[0] - z[1] - r = (r1, r2) - return r - -class ListInputOutput(nn.Module): - def __init__(self): - super(ListInputOutput, self).__init__() - - def forward(self, z: List[torch.Tensor]): - r1 = z[0] + z[1] - r2 = z[0] - z[1] - r = [r1, r2] - return r - -class ComplexModel(nn.Module): - def __init__(self): - super(ComplexModel, self).__init__() - self.list_model = ListInputOutput() - self.tuple_model = TupleInputOutput() - - def forward(self, z: List[torch.Tensor]): - r1 = z[0] + z[1] - r2 = z[0] - z[1] - r3 = (r1, r2) - r4 = [r2, r1] - tuple_out = self.tuple_model(r3) - list_out = self.list_model(r4) - r = (tuple_out[1], list_out[0]) - return r - -input_data = torch.randn((16, 3, 32, 32)) -input_data = input_data.float().to("cuda") - -normal_model = Normal() -normal_model_ts = torch.jit.script(normal_model) -print(normal_model_ts.graph) -result = normal_model_ts(input_data, input_data) -normal_model_ts.to("cuda").eval() -torch.jit.save(normal_model_ts, "./normal_model.ts") - -tuple_input = TupleInput() -tuple_input_ts = torch.jit.script(tuple_input) -print(tuple_input_ts.graph) -result = tuple_input_ts((input_data, input_data)) -tuple_input_ts.to("cuda").eval() -torch.jit.save(tuple_input_ts, "./tuple_input.ts") - -list_input = ListInput() -list_input_ts = torch.jit.script(list_input) -print(list_input_ts.graph) -result = list_input_ts([input_data, input_data]) -list_input_ts.to("cuda").eval() -torch.jit.save(list_input_ts, "./list_input.ts") - -tuple_input = TupleInputOutput() -tuple_input_ts = torch.jit.script(tuple_input) -print(tuple_input_ts.graph) -result = tuple_input_ts((input_data, input_data)) -tuple_input_ts.to("cuda").eval() -torch.jit.save(tuple_input_ts, "./tuple_input_output.ts") - -list_input = ListInputOutput() -list_input_ts = torch.jit.script(list_input) -print(list_input_ts.graph) -result = list_input_ts([input_data, input_data]) -list_input_ts.to("cuda").eval() -torch.jit.save(list_input_ts, "./list_input_output.ts") - -complex_model = ComplexModel() -complex_model_ts = torch.jit.script(complex_model) -print(complex_model_ts.graph) -result = complex_model_ts([input_data, input_data]) -complex_model_ts.to("cuda").eval() -torch.jit.save(complex_model_ts, "./complex_model.ts") \ No newline at end of file From a4d4131d78be021589dc825d9f107a119eb1346e Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 31 Mar 2022 20:14:45 +0800 Subject: [PATCH 14/22] test: [collection] update model path in test_collection.cpp Signed-off-by: inocsin --- tests/cpp/test_collection.cpp | 12 ++++++------ tests/modules/hub.py | 15 ++++++--------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index f647af2c8c..73bcabcf13 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -8,7 +8,7 @@ TEST(CppAPITests, TestCollectionNormalInput) { - std::string path = "/root/Torch-TensorRT/normal_model.ts"; + std::string path = "tests/modules/normal_model.jit.pt"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -53,7 +53,7 @@ TEST(CppAPITests, TestCollectionNormalInput) { TEST(CppAPITests, TestCollectionTupleInput) { - std::string path = "/root/Torch-TensorRT/tuple_input.ts"; + std::string path = "tests/modules/tuple_input.jit.pt"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); torch::jit::Module mod; @@ -103,7 +103,7 @@ TEST(CppAPITests, TestCollectionTupleInput) { TEST(CppAPITests, TestCollectionListInput) { - std::string path = "/root/Torch-TensorRT/list_input.ts"; + std::string path = "tests/modules/list_input.jit.pt"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -169,7 +169,7 @@ TEST(CppAPITests, TestCollectionListInput) { TEST(CppAPITests, TestCollectionTupleInputOutput) { - std::string path = "/root/Torch-TensorRT/tuple_input_output.ts"; + std::string path = "tests/modules/tuple_input_output.jit.pt"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); @@ -224,7 +224,7 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) { TEST(CppAPITests, TestCollectionListInputOutput) { - std::string path = "/root/Torch-TensorRT/list_input_output.ts"; + std::string path = "tests/modules/list_input_output.jit.pt"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); @@ -296,7 +296,7 @@ TEST(CppAPITests, TestCollectionListInputOutput) { TEST(CppAPITests, TestCollectionComplexModel) { - std::string path = "/root/Torch-TensorRT/complex_model.ts"; + std::string path = "tests/modules/complex_model.jit.pt"; torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf); std::vector inputs; inputs.push_back(in0); diff --git a/tests/modules/hub.py b/tests/modules/hub.py index f03658321c..a2adc3ab4b 100644 --- a/tests/modules/hub.py +++ b/tests/modules/hub.py @@ -3,11 +3,8 @@ import torch.nn.functional as F import torchvision.models as models import timm -<<<<<<< HEAD from transformers import BertModel, BertTokenizer, BertConfig -======= from typing import Tuple, List, Dict ->>>>>>> feat: [collection] move collection test model to hub.py torch.hub._validate_not_a_forked_repo = lambda a, b, c: True @@ -286,29 +283,29 @@ def forward(self, z: List[torch.Tensor]): normal_model = Normal() normal_model_ts = torch.jit.script(normal_model) normal_model_ts.to("cuda").eval() -torch.jit.save(normal_model_ts, "normal_model.ts") +torch.jit.save(normal_model_ts, "normal_model.jit.pt") tuple_input = TupleInput() tuple_input_ts = torch.jit.script(tuple_input) tuple_input_ts.to("cuda").eval() -torch.jit.save(tuple_input_ts, "tuple_input.ts") +torch.jit.save(tuple_input_ts, "tuple_input.jit.pt") list_input = ListInput() list_input_ts = torch.jit.script(list_input) list_input_ts.to("cuda").eval() -torch.jit.save(list_input_ts, "list_input.ts") +torch.jit.save(list_input_ts, "list_input.jit.pt") tuple_input = TupleInputOutput() tuple_input_ts = torch.jit.script(tuple_input) tuple_input_ts.to("cuda").eval() -torch.jit.save(tuple_input_ts, "tuple_input_output.ts") +torch.jit.save(tuple_input_ts, "tuple_input_output.jit.pt") list_input = ListInputOutput() list_input_ts = torch.jit.script(list_input) list_input_ts.to("cuda").eval() -torch.jit.save(list_input_ts, "list_input_output.ts") +torch.jit.save(list_input_ts, "list_input_output.jit.pt") complex_model = ComplexModel() complex_model_ts = torch.jit.script(complex_model) complex_model_ts.to("cuda").eval() -torch.jit.save(complex_model_ts, "complex_model.ts") +torch.jit.save(complex_model_ts, "complex_model.jit.pt") From 2d585e59d3a00974f32bd9f6d0c174a5be02903b Mon Sep 17 00:00:00 2001 From: inocsin Date: Tue, 5 Apr 2022 12:08:24 +0800 Subject: [PATCH 15/22] fix: [collection] solve confict in ir.cpp Signed-off-by: inocsin --- core/ir/ir.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index 5da2c121f6..061327c6bc 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -102,11 +102,8 @@ c10::optional get_value_first_calc_dtype_opt(torch::jit::Block* auto consumers = in->uses(); auto search_list = std::vector(consumers.begin(), consumers.end()); - while(search_list.size() > 0) { - // after insertion, original iterator will be invalid - auto& u = search_list.front(); - search_list.erase(search_list.begin()); - auto n = u.user; + for (auto iter = search_list.begin(); iter != search_list.end(); ++iter) { + auto n = iter->user; LOG_GRAPH("Node we are looking at: " << util::node_info(n)); auto ins = n->inputs(); auto outs = n->outputs(); From 5f368105e5abf093ab7af811a2589e4f32d87d63 Mon Sep 17 00:00:00 2001 From: inocsin Date: Wed, 6 Apr 2022 21:07:45 +0800 Subject: [PATCH 16/22] feat: [collection] update python api, refactor code Signed-off-by: inocsin --- core/compiler.h | 8 +- core/ir/BUILD | 3 +- core/ir/GraphInputs.cpp | 75 +++++++++++++++++++ core/ir/ir.h | 2 + cpp/src/compile_spec.cpp | 67 ++++------------- .../csrc/register_tensorrt_classes.cpp | 7 ++ py/torch_tensorrt/csrc/tensorrt_classes.cpp | 53 +++++++++++-- py/torch_tensorrt/csrc/tensorrt_classes.h | 8 ++ py/torch_tensorrt/csrc/torch_tensorrt_py.cpp | 7 ++ py/torch_tensorrt/ts/_compile_spec.py | 39 ++++++++-- tests/py/test_collection.py | 60 +++++++++++++++ 11 files changed, 255 insertions(+), 74 deletions(-) create mode 100644 core/ir/GraphInputs.cpp create mode 100644 tests/py/test_collection.py diff --git a/core/compiler.h b/core/compiler.h index 85bc1d2c08..c8dc85020b 100644 --- a/core/compiler.h +++ b/core/compiler.h @@ -14,12 +14,8 @@ namespace torch_tensorrt { namespace core { struct CompileSpec { - CompileSpec(std::vector inputs) { - graph_inputs.inputs = inputs; - } - CompileSpec(torch::jit::IValue& input_signature) { - graph_inputs.input_signature = input_signature; - } + CompileSpec(std::vector inputs) : graph_inputs(inputs) {} + CompileSpec(torch::jit::IValue& input_signature) : graph_inputs(input_signature) {} ir::GraphInputs graph_inputs; conversion::ConversionInfo convert_info; lowering::LowerInfo lower_info; diff --git a/core/ir/BUILD b/core/ir/BUILD index a613aaf489..2e9ef7e6a8 100644 --- a/core/ir/BUILD +++ b/core/ir/BUILD @@ -15,7 +15,8 @@ cc_library( srcs = [ "ir.cpp", "Input.cpp", - "StaticParams.cpp" + "StaticParams.cpp", + "GraphInputs.cpp" ], deps = [ "@tensorrt//:nvinfer", diff --git a/core/ir/GraphInputs.cpp b/core/ir/GraphInputs.cpp new file mode 100644 index 0000000000..645624f2f1 --- /dev/null +++ b/core/ir/GraphInputs.cpp @@ -0,0 +1,75 @@ +#include "core/ir/ir.h" +#include "core/util/prelude.h" + +namespace torch_tensorrt { +namespace core { +namespace ir { + +void flatten_dfs(std::vector& flattened_inputs, std::vector>& collection_inputs, + torch::jit::IValue input_ivalue, int level, int index) { + if (input_ivalue.isTuple()) { + auto input_tuple = input_ivalue.toTuple(); + int idx = 0; + if (level == 0) { + collection_inputs.resize(input_tuple->elements().size()); + } + for (auto item: input_tuple->elements()) { + torch::jit::IValue converted_item; + int cur_idx = level < 1 ? idx: index; + flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx); + idx++; + } + } else if(input_ivalue.isList()) { + auto input_list = input_ivalue.toList().vec(); + if (level == 0) { + collection_inputs.resize(input_list.size()); + } + c10::TypePtr type = input_list[0].type(); + auto converted_elements = c10::impl::GenericList(type); + int idx = 0; + for (auto item: input_list) { + int cur_idx = level < 1 ? idx: index; + flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx); + idx++; + } + } else if(input_ivalue.isCustomClass()) { + torch_tensorrt::core::ir::Input cur_input = *(input_ivalue.toCustomClass()); + flattened_inputs.push_back(cur_input); + if (level == 0) { // a single value like A + collection_inputs.resize(1); + collection_inputs[0].push_back(cur_input); + } else if (level == 1) { // like A in [A, A] or [(B, B), A] + collection_inputs[index].push_back(cur_input); + } else if (level == 2) { // like A in [(A, A), C] + collection_inputs[index].push_back(cur_input); + } else {// only support 2 level + LOG_ERROR("Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]"); + } + } +} + + +GraphInputs::GraphInputs(std::vector inputs_) { + LOG_DEBUG("Construct GraphInput with ir::Input"); + inputs = inputs_; + collection_inputs.resize(inputs_.size()); + for (int i = 0; i < inputs_.size(); i++) { + collection_inputs[i].push_back(inputs_[i]); + } +} + +GraphInputs::GraphInputs(torch::jit::IValue& input_signature_) { + LOG_DEBUG("Construct GraphInput with IValue"); + + std::vector flattened_inputs; + std::vector> collection_inputs_; + + flatten_dfs(flattened_inputs, collection_inputs_, input_signature_, 0, 0); + inputs = flattened_inputs; + input_signature = input_signature_; + collection_inputs = collection_inputs_; +} + +} // namespace ir +} // namespace core +} // namespace torch_tensorrt \ No newline at end of file diff --git a/core/ir/ir.h b/core/ir/ir.h index c138ad693b..966c747176 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -40,6 +40,8 @@ struct Input : torch::CustomClassHolder { // Add to spec struct GraphInputs { + GraphInputs(std::vector inputs); + GraphInputs(torch::jit::IValue& input_signature); torch::jit::IValue input_signature; // nested Input, full input spec std::vector inputs; // flattend Input std::vector> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e) diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 366476b227..9447def7e0 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -38,88 +38,47 @@ CompileSpec::CompileSpec(torch::jit::IValue input_signature) { -void flatten_dfs(std::vector& flattened_inputs, std::vector>& collection_inputs, - torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue, int level, int index) { +void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) { if (input_ivalue.isTuple()) { auto input_tuple = input_ivalue.toTuple(); std::vector converted_elements; - int idx = 0; - if (level == 0) { - collection_inputs.resize(input_tuple->elements().size()); - } for (auto item: input_tuple->elements()) { torch::jit::IValue converted_item; - int cur_idx = level < 1 ? idx: index; - flatten_dfs(flattened_inputs, collection_inputs, item, converted_item, level+1, cur_idx); + to_internal_input_signature(item, converted_item); converted_elements.push_back(converted_item); auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements); converted_ivalue = torch::jit::IValue(tuple_ptr); - idx++; } } else if(input_ivalue.isList()) { auto input_list = input_ivalue.toList().vec(); - if (level == 0) { - collection_inputs.resize(input_list.size()); - } c10::TypePtr type = input_list[0].type(); auto converted_elements = c10::impl::GenericList(type); - int idx = 0; for (auto item: input_list) { - int cur_idx = level < 1 ? idx: index; torch::jit::IValue converted_item; - flatten_dfs(flattened_inputs, collection_inputs, item, converted_item, level+1, cur_idx); + to_internal_input_signature(item, converted_item); converted_elements.push_back(converted_item); - idx++; } converted_ivalue = torch::jit::IValue(converted_elements); } else if(input_ivalue.isCustomClass()) { torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass())); - flattened_inputs.push_back(cur_input); converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(cur_input))); - if (level == 0) { // a single value like A - collection_inputs.resize(1); - collection_inputs[0].push_back(cur_input); - } else if (level == 1) { // like A in [A, A] or [(B, B), A] - collection_inputs[index].push_back(cur_input); - } else if (level == 2) { // like A in [(A, A), C] - collection_inputs[index].push_back(cur_input); - } else {// only support 2 level - LOG_ERROR("Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]"); - } } } - -torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs external_graph_input) { - torch_tensorrt::core::ir::GraphInputs internal_graph_input; - - std::vector flattened_inputs; - std::vector> collection_inputs; - +torchtrt::core::CompileSpec init_compile_spec(CompileSpec external) { + if (external.graph_inputs.inputs.size() > 0) { + torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.graph_inputs.inputs)); + return internal; + } else { torch::jit::IValue converted_input_signature; - flatten_dfs(flattened_inputs, collection_inputs, external_graph_input.input_signature, converted_input_signature, 0, 0); - internal_graph_input.inputs = flattened_inputs; - internal_graph_input.input_signature = converted_input_signature; - internal_graph_input.collection_inputs = collection_inputs; - - LOG_DEBUG("Convert external_graph_input to internal_graph_inputs, total input input spec number: " << flattened_inputs.size() << ", top level input spec number "<< collection_inputs.size()); - - return internal_graph_input; + to_internal_input_signature(external.graph_inputs.input_signature, converted_input_signature); + torchtrt::core::CompileSpec internal(converted_input_signature); + return internal; + } } torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) { - torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.graph_inputs.inputs)); - if (internal.graph_inputs.inputs.size() == 0) { - LOG_DEBUG("GraphInput.inputs size == 0, using GraphInput.input_signature to get Input spec"); - internal.graph_inputs = to_internal_graph_inputs(external.graph_inputs); - } else { - LOG_DEBUG("GraphInput.inputs size != 0, using GraphInput.inputs to get Input spec"); - internal.graph_inputs.collection_inputs.resize(internal.graph_inputs.inputs.size()); - for (int i = 0; i < internal.graph_inputs.inputs.size(); i++) { - internal.graph_inputs.collection_inputs[i].push_back(internal.graph_inputs.inputs[i]); - } - } - + torchtrt::core::CompileSpec internal = init_compile_spec(external); for (auto p : external.enabled_precisions) { internal.convert_info.engine_settings.enabled_precisions.insert(toTRTDataType(p)); diff --git a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp index 53b9fc2cdb..0a9f357c47 100644 --- a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp @@ -23,6 +23,13 @@ void RegisterTRTCompileSpec() { ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistration, torch_tensorrt::pyapi::Input, input_is_dynamic); ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistration, torch_tensorrt::pyapi::Input, explicit_set_dtype); + static auto TORCHTRT_UNUSED TRTGraphInpuTSRegistration = + torch::class_("tensorrt", "_GraphInputs") + .def(torch::init<>()) + .def("__str__", &torch_tensorrt::pyapi::GraphInputs::to_str); + + ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistration, torch_tensorrt::pyapi::GraphInputs, input_signature); + static auto TORCHTRT_UNUSED TRTDeviceTSRegistration = torch::class_("tensorrt", "_Device") .def(torch::init<>()) diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index a89fe692bd..9d2761ba95 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -104,6 +104,11 @@ std::string Input::to_str() { return ss.str(); } +std::string GraphInputs::to_str() { + std::stringstream ss; + return ss.str(); +} + std::string to_str(DeviceType value) { switch (value) { case DeviceType::kDLA: @@ -184,13 +189,51 @@ std::string TorchFallback::to_str() { return ss.str(); } -core::CompileSpec CompileSpec::toInternalCompileSpec() { - std::vector internal_inputs; - for (auto i : inputs) { - internal_inputs.push_back(i.toInternalInput()); +void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) { + if (input_ivalue.isTuple()) { + auto input_tuple = input_ivalue.toTuple(); + std::vector converted_elements; + for (auto item: input_tuple->elements()) { + torch::jit::IValue converted_item; + to_internal_input_signature(item, converted_item); + converted_elements.push_back(converted_item); + auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements); + converted_ivalue = torch::jit::IValue(tuple_ptr); + } + } else if(input_ivalue.isList()) { + auto input_list = input_ivalue.toList().vec(); + c10::TypePtr type = input_list[0].type(); + auto converted_elements = c10::impl::GenericList(type); + for (auto item: input_list) { + torch::jit::IValue converted_item; + to_internal_input_signature(item, converted_item); + converted_elements.push_back(converted_item); + } + converted_ivalue = torch::jit::IValue(converted_elements); + } else if(input_ivalue.isCustomClass()) { + core::ir::Input cur_input = (*(input_ivalue.toCustomClass())).toInternalInput(); + converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive(cur_input))); + } +} + +core::CompileSpec init_compile_spec(CompileSpec external) { + if (external.graph_inputs.inputs.size() > 0) { + std::vector internal_inputs; + for (auto i : external.graph_inputs.inputs) { + internal_inputs.push_back(i.toInternalInput()); + } + core::CompileSpec internal(internal_inputs); + return internal; + } else { + torch::jit::IValue converted_input_signature; + to_internal_input_signature(external.graph_inputs.input_signature, converted_input_signature); + core::CompileSpec internal(converted_input_signature); + return internal; } +} - auto info = core::CompileSpec(internal_inputs); +core::CompileSpec CompileSpec::toInternalCompileSpec() { + core::CompileSpec info = init_compile_spec(*this); for (auto p : enabled_precisions) { info.convert_info.engine_settings.enabled_precisions.insert(toTRTDataType(p)); diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.h b/py/torch_tensorrt/csrc/tensorrt_classes.h index 0c80641005..7231efa0fa 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.h +++ b/py/torch_tensorrt/csrc/tensorrt_classes.h @@ -57,6 +57,13 @@ struct Input : torch::CustomClassHolder { std::string to_str(); }; +struct GraphInputs : torch::CustomClassHolder { + torch::jit::IValue input_signature; // nested Input, full input spec + std::vector inputs; // flatten input spec + ADD_FIELD_GET_SET(input_signature, torch::jit::IValue); + std::string to_str(); +}; + enum DeviceType : int8_t { kGPU, kDLA, @@ -156,6 +163,7 @@ struct CompileSpec : torch::CustomClassHolder { ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*); std::vector inputs; + GraphInputs graph_inputs; nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; std::set enabled_precisions = {}; bool sparse_weights = false; diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp index 6e5f333f78..8e89441f56 100644 --- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp +++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp @@ -178,6 +178,12 @@ PYBIND11_MODULE(_C, m) { .def_readwrite("dtype", &Input::dtype) .def_readwrite("format", &Input::format); + py::class_(m, "GraphInputs") + .def(py::init<>()) + .def("__str__", &torch_tensorrt::pyapi::GraphInputs::to_str) + .def_readwrite("input_signature", &GraphInputs::input_signature) + .def_readwrite("inputs", &GraphInputs::inputs); + py::enum_(m, "dtype", "Enum to specifiy operating precision for engine execution") .value("float", DataType::kFloat, "32 bit floating point number") .value("float32", DataType::kFloat, "32 bit floating point number") @@ -292,6 +298,7 @@ PYBIND11_MODULE(_C, m) { .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify) .def("_get_calibrator_handle", &CompileSpec::getPTQCalibratorHandle, "[Internal] gets a handle from a calibrator") .def_readwrite("inputs", &CompileSpec::inputs) + .def_readwrite("graph_inputs", &CompileSpec::graph_inputs) .def_readwrite("enabled_precisions", &CompileSpec::enabled_precisions) .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) .def_readwrite("refit", &CompileSpec::refit) diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py index e406096677..5c046a7d1d 100644 --- a/py/torch_tensorrt/ts/_compile_spec.py +++ b/py/torch_tensorrt/ts/_compile_spec.py @@ -5,7 +5,7 @@ from torch_tensorrt import _enums from torch_tensorrt._Input import Input from torch_tensorrt._Device import Device - +from typing import Tuple, List, Dict import warnings @@ -156,6 +156,24 @@ def _parse_torch_fallback(fallback_info: Dict[str, Any]) -> _ts_C.TorchFallback: return info +def _parse_collection_input(input_signature: Any) -> _C.GraphInputs.input_signature: + if isinstance(input_signature, tuple): + input_list = [] + for item in input_signature: + input = _parse_collection_input(item) + input_list.append(input) + return tuple(input_list) + elif isinstance(input_signature, list): + input_list = [] + for item in input_signature: + input = _parse_collection_input(item) + input_list.append(input) + return input_list + elif isinstance(input_signature, Input) or isinstance(input_signature, torch.Tensor): + input = Input._from_tensor(input_signature) if isinstance(input_signature, torch.Tensor) else input_signature + return input._to_internal() + else: + raise KeyError("Invalid Input spec") def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec: info = _ts_C.CompileSpec() @@ -165,14 +183,19 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec: ) if "inputs" in compile_spec: - if not all([isinstance(i, torch.Tensor) or isinstance(i, Input) for i in compile_spec["inputs"]]): - raise KeyError("Input specs should be either torch_tensorrt.Input or torch.Tensor, found types: {}".format( - [type(i) for i in compile_spec["inputs"]])) - - inputs = [Input._from_tensor(i) if isinstance(i, torch.Tensor) else i for i in compile_spec["inputs"]] - info.inputs = [i._to_internal() for i in inputs] + # if not all([isinstance(i, torch.Tensor) or isinstance(i, Input) for i in compile_spec["inputs"]]): + # raise KeyError("Input specs should be either torch_tensorrt.Input or torch.Tensor, found types: {}".format( + # [type(i) for i in compile_spec["inputs"]])) + + if isinstance(compile_spec["inputs"], list) and all([isinstance(i, torch.Tensor) or isinstance(i, Input) for i in compile_spec["inputs"]]): + inputs = [Input._from_tensor(i) if isinstance(i, torch.Tensor) else i for i in compile_spec["inputs"]] + # from python Input to torch_tensorrt::pyapi::Input + # info.inputs = [i._to_internal() for i in inputs] + info.graph_inputs.inputs = [i._to_internal() for i in inputs] + else: + info.graph_inputs.input_signature = _parse_collection_input(compile_spec["inputs"]) - assert (len(info.inputs) > 0), "Require at least one input definition to compile model" + assert (len(info.graph_inputs.inputs) > 0), "Require at least one input definition to compile model" if "enabled_precisions" in compile_spec: info.enabled_precisions = _parse_enabled_precisions(compile_spec["enabled_precisions"]) diff --git a/tests/py/test_collection.py b/tests/py/test_collection.py new file mode 100644 index 0000000000..23e15c99b3 --- /dev/null +++ b/tests/py/test_collection.py @@ -0,0 +1,60 @@ +import torch +import torch.nn as nn +import torch_tensorrt as torchtrt +from typing import Tuple, List, Dict + +class Normal(nn.Module): + def __init__(self): + super(Normal, self).__init__() + + def forward(self, x, y): + r = x + y + return r + +class TupleInputOutput(nn.Module): + def __init__(self): + super(TupleInputOutput, self).__init__() + + def forward(self, z: Tuple[torch.Tensor, torch.Tensor]): + r1 = z[0] + z[1] + r2 = z[0] - z[1] + r = (r1, r2) + return r + +input = torch.randn((1, 3, 224, 224)).to("cuda") +normal_model = Normal() +scripted_model = torch.jit.script(normal_model) + +compile_spec = { + "inputs": [torchtrt.Input(input.shape, dtype=torch.float, format=torch.contiguous_format), + torchtrt.Input(input.shape, dtype=torch.float, format=torch.contiguous_format)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float} +} + +trt_mod = torchtrt.ts.compile(scripted_model, **compile_spec) +same = (trt_mod(input, input) - scripted_model(input, input)).abs().max() +print(same.cpu()) + +# input = torch.randn((1, 3, 224, 224)).to("cuda") +# tuple_model = TupleInputOutput() +# scripted_model = torch.jit.script(tuple_model) + +# compile_spec = { +# "inputs": (torchtrt.Input(input.shape, dtype=torch.float, format=torch.contiguous_format), +# torchtrt.Input(input.shape, dtype=torch.float, format=torch.contiguous_format)), +# "device": { +# "device_type": torchtrt.DeviceType.GPU, +# "gpu_id": 0, +# }, +# "enabled_precisions": {torch.float} +# } + +# trt_mod = torchtrt.ts.compile(scripted_model, **compile_spec) +# same = (trt_mod((input, input))[0] - scripted_model((input, input))[0]).abs().max() +# print(same.cpu()) + + From d9d86656c94d7633e2e5d2f27c4740b6fad9827d Mon Sep 17 00:00:00 2001 From: inocsin Date: Fri, 8 Apr 2022 18:26:41 +0800 Subject: [PATCH 17/22] fix: [collection] remove aten::__getitem__ and prim::ListConstruct Signed-off-by: inocsin --- core/conversion/evaluators/aten.cpp | 15 ------- core/conversion/evaluators/prim.cpp | 62 ----------------------------- 2 files changed, 77 deletions(-) diff --git a/core/conversion/evaluators/aten.cpp b/core/conversion/evaluators/aten.cpp index 30cdeaa46a..fde9e71e66 100644 --- a/core/conversion/evaluators/aten.cpp +++ b/core/conversion/evaluators/aten.cpp @@ -264,21 +264,6 @@ auto aten_registrations TORCHTRT_UNUSED = }, EvalOptions().validSchemas( {"aten::size(Tensor self) -> (int[])", "aten::size.int(Tensor self, int dim) -> (int)"})}) - .evaluator({c10::Symbol::fromQualString("aten::__getitem__"), - [](const torch::jit::Node* n, kwargs& args) -> c10::optional { - auto list = args.at(n->input(0)).IValue()->to>(); - auto idx = args.at(n->input(1)).unwrapToInt(); - - const int64_t list_size = list.size(); - const int64_t normalized_idx = normalizeIndex(idx, list_size); - TORCHTRT_CHECK( - normalized_idx >= 0 || normalized_idx < list_size, - "List index out of range (aten::__getitem__)"); - return list.get(normalized_idx); - }, - EvalOptions().validSchemas({ - "aten::__getitem__.t(t[](a) list, int idx) -> (t(*))", - })}) .evaluator({c10::Symbol::fromQualString("aten::append"), [](const torch::jit::Node* n, kwargs& args) -> c10::optional { auto list = args.at(n->input(0)).IValue()->to>(); diff --git a/core/conversion/evaluators/prim.cpp b/core/conversion/evaluators/prim.cpp index 7d5373a5f9..56e980189f 100755 --- a/core/conversion/evaluators/prim.cpp +++ b/core/conversion/evaluators/prim.cpp @@ -40,68 +40,6 @@ auto prim_registrations = auto outputVec = outputs->toList().vec(); return std::move(c10::ivalue::Tuple::create(outputVec)); }}) - .evaluator({torch::jit::prim::ListConstruct, - [](const torch::jit::Node* n, kwargs& args) -> c10::optional { - const auto num_inputs = n->inputs().size(); - if (constTypesOnly(args)) { - c10::ListTypePtr lt = n->output()->type()->expect(); - if (torch::jit::IntType::get() == lt->getElementType()) { - c10::List list; - list.reserve(num_inputs); - for (auto in : n->inputs()) { - list.emplace_back(std::move(args.at(in).unwrapToInt())); - } - return c10::optional(std::move(torch::jit::IValue(list))); - } else if (torch::jit::FloatType::get() == lt->getElementType()) { - c10::List list; - list.reserve(num_inputs); - for (auto in : n->inputs()) { - list.emplace_back(std::move(args.at(in).unwrapToDouble())); - } - return c10::optional(std::move(torch::jit::IValue(list))); - } else if (lt->getElementType() == torch::jit::BoolType::get()) { - c10::List list; - list.reserve(num_inputs); - for (auto in : n->inputs()) { - list.emplace_back(std::move(args.at(in).unwrapToBool())); - } - return c10::optional(std::move(torch::jit::IValue(list))); - } else if (lt->getElementType()->isSubtypeOf(torch::jit::TensorType::get())) { - c10::List list; - list.reserve(num_inputs); - for (auto in : n->inputs()) { - if (args.at(in).isIValue()) { - list.emplace_back(std::move(args.at(in).unwrapToTensor())); - } - } - return c10::optional(std::move(torch::jit::IValue(list))); - } else { - c10::TypePtr elementType = lt->getElementType(); - auto list = c10::impl::GenericList(elementType); - list.reserve(num_inputs); - for (auto in : n->inputs()) { - list.emplace_back(std::move(*(args.at(in).IValue()))); - } - return c10::optional(std::move(torch::jit::IValue(list))); - } - } else { - c10::ListTypePtr lt = n->output()->type()->expect(); - c10::TypePtr elementType = lt->getElementType(); - auto list = c10::impl::GenericList(elementType); - list.reserve(num_inputs); - for (auto in : n->inputs()) { - if (args.at(in).isITensor()) { - auto tensor_holder = TensorContainer(); - tensor_holder.hold_tensor(args.at(in).ITensor()); - auto ival = c10::IValue(std::move(c10::make_intrusive(tensor_holder))); - list.emplace_back(std::move(ival)); - } else { - list.emplace_back(std::move(args.at(in).unwrapToTensor())); - } - } - return c10::optional(std::move(torch::jit::IValue(list))); - } - }}) .evaluator({c10::Symbol::fromQualString("prim::dtype"), [](const torch::jit::Node* n, kwargs& args) -> c10::optional { auto input = args.at(n->input(0)); From 991f0232d046e50420cabf6e8cafa1c862b56061 Mon Sep 17 00:00:00 2001 From: inocsin Date: Tue, 12 Apr 2022 11:51:53 +0800 Subject: [PATCH 18/22] [collection] rebase to master, update some api Signed-off-by: inocsin --- core/conversion/evaluators/prim.cpp | 62 +++++++++++++++++++ core/partitioning/shape_analysis.h | 3 - .../test_resolve_nontensor_inputs.cpp | 16 ++--- .../core/partitioning/test_shape_analysis.cpp | 16 ++--- 4 files changed, 78 insertions(+), 19 deletions(-) diff --git a/core/conversion/evaluators/prim.cpp b/core/conversion/evaluators/prim.cpp index 56e980189f..7d5373a5f9 100755 --- a/core/conversion/evaluators/prim.cpp +++ b/core/conversion/evaluators/prim.cpp @@ -40,6 +40,68 @@ auto prim_registrations = auto outputVec = outputs->toList().vec(); return std::move(c10::ivalue::Tuple::create(outputVec)); }}) + .evaluator({torch::jit::prim::ListConstruct, + [](const torch::jit::Node* n, kwargs& args) -> c10::optional { + const auto num_inputs = n->inputs().size(); + if (constTypesOnly(args)) { + c10::ListTypePtr lt = n->output()->type()->expect(); + if (torch::jit::IntType::get() == lt->getElementType()) { + c10::List list; + list.reserve(num_inputs); + for (auto in : n->inputs()) { + list.emplace_back(std::move(args.at(in).unwrapToInt())); + } + return c10::optional(std::move(torch::jit::IValue(list))); + } else if (torch::jit::FloatType::get() == lt->getElementType()) { + c10::List list; + list.reserve(num_inputs); + for (auto in : n->inputs()) { + list.emplace_back(std::move(args.at(in).unwrapToDouble())); + } + return c10::optional(std::move(torch::jit::IValue(list))); + } else if (lt->getElementType() == torch::jit::BoolType::get()) { + c10::List list; + list.reserve(num_inputs); + for (auto in : n->inputs()) { + list.emplace_back(std::move(args.at(in).unwrapToBool())); + } + return c10::optional(std::move(torch::jit::IValue(list))); + } else if (lt->getElementType()->isSubtypeOf(torch::jit::TensorType::get())) { + c10::List list; + list.reserve(num_inputs); + for (auto in : n->inputs()) { + if (args.at(in).isIValue()) { + list.emplace_back(std::move(args.at(in).unwrapToTensor())); + } + } + return c10::optional(std::move(torch::jit::IValue(list))); + } else { + c10::TypePtr elementType = lt->getElementType(); + auto list = c10::impl::GenericList(elementType); + list.reserve(num_inputs); + for (auto in : n->inputs()) { + list.emplace_back(std::move(*(args.at(in).IValue()))); + } + return c10::optional(std::move(torch::jit::IValue(list))); + } + } else { + c10::ListTypePtr lt = n->output()->type()->expect(); + c10::TypePtr elementType = lt->getElementType(); + auto list = c10::impl::GenericList(elementType); + list.reserve(num_inputs); + for (auto in : n->inputs()) { + if (args.at(in).isITensor()) { + auto tensor_holder = TensorContainer(); + tensor_holder.hold_tensor(args.at(in).ITensor()); + auto ival = c10::IValue(std::move(c10::make_intrusive(tensor_holder))); + list.emplace_back(std::move(ival)); + } else { + list.emplace_back(std::move(args.at(in).unwrapToTensor())); + } + } + return c10::optional(std::move(torch::jit::IValue(list))); + } + }}) .evaluator({c10::Symbol::fromQualString("prim::dtype"), [](const torch::jit::Node* n, kwargs& args) -> c10::optional { auto input = args.at(n->input(0)); diff --git a/core/partitioning/shape_analysis.h b/core/partitioning/shape_analysis.h index 46450eb0f8..2654699a1d 100644 --- a/core/partitioning/shape_analysis.h +++ b/core/partitioning/shape_analysis.h @@ -6,9 +6,6 @@ namespace torch_tensorrt { namespace core { namespace partitioning { -// std::unordered_map generateRandomInputs( -// std::unordered_map& input_ranges, -// std::unordered_map>& input_types); std::unordered_map generateRandomInputs( std::unordered_map>& input_ranges, diff --git a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp index a83d2330e4..7daaedab8c 100644 --- a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp +++ b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp @@ -116,11 +116,11 @@ TEST(Partitioning, ResolveNonTensorInputsCorrectly) { inputs.push_back(torch_tensorrt::core::ir::Input({16, 3, 3, 3})); inputs.push_back(torch_tensorrt::core::ir::Input({16})); - std::unordered_map inputs_map; - std::unordered_map> input_types; + std::unordered_map> inputs_map; + std::unordered_map>> input_types; for (size_t i = 0; i < g->inputs().size(); ++i) { - inputs_map.insert({g->inputs()[i], inputs[i]}); - input_types.insert({g->inputs()[i], {at::kFloat}}); + inputs_map.insert({g->inputs()[i], {inputs[i]}}); + input_types.insert({g->inputs()[i], {{at::kFloat}}}); } auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); std::vector segmented_blocks = @@ -174,11 +174,11 @@ TEST(Partitioning, ResolveTensorListInputsInTrtCorrectly) { inputs.push_back(torch_tensorrt::core::ir::Input({16, 6, 3, 3})); inputs.push_back(torch_tensorrt::core::ir::Input({16})); - std::unordered_map inputs_map; - std::unordered_map> input_types; + std::unordered_map> inputs_map; + std::unordered_map>> input_types; for (size_t i = 0; i < g->inputs().size(); ++i) { - inputs_map.insert({g->inputs()[i], inputs[i]}); - input_types.insert({g->inputs()[i], {at::kFloat}}); + inputs_map.insert({g->inputs()[i], {inputs[i]}}); + input_types.insert({g->inputs()[i], {{at::kFloat}}}); } auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); std::vector segmented_blocks = diff --git a/tests/core/partitioning/test_shape_analysis.cpp b/tests/core/partitioning/test_shape_analysis.cpp index 8effa821ae..d05f10c163 100644 --- a/tests/core/partitioning/test_shape_analysis.cpp +++ b/tests/core/partitioning/test_shape_analysis.cpp @@ -59,11 +59,11 @@ TEST(Partitioning, InferSequentialModelSegmentedBlockShapeCorrectly) { inputs.push_back(torch_tensorrt::core::ir::Input({8, 16, 3, 3})); inputs.push_back(torch_tensorrt::core::ir::Input({8})); - std::unordered_map inputs_map; - std::unordered_map> input_types; + std::unordered_map> inputs_map; + std::unordered_map>> input_types; for (size_t i = 0; i < g->inputs().size(); ++i) { - inputs_map.insert({g->inputs()[i], inputs[i]}); - input_types.insert({g->inputs()[i], {at::kFloat}}); + inputs_map.insert({g->inputs()[i], {inputs[i]}}); + input_types.insert({g->inputs()[i], {{at::kFloat}}}); } auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); std::vector segmented_blocks = @@ -109,11 +109,11 @@ TEST(Partitioning, InferBranchModelSegmentedBlockShapeCorrectly) { inputs.push_back(torch_tensorrt::core::ir::Input({16, 32, 3, 3})); inputs.push_back(torch_tensorrt::core::ir::Input({16})); - std::unordered_map inputs_map; - std::unordered_map> input_types; + std::unordered_map> inputs_map; + std::unordered_map>> input_types; for (size_t i = 0; i < g->inputs().size(); ++i) { - inputs_map.insert({g->inputs()[i], inputs[i]}); - input_types.insert({g->inputs()[i], {at::kFloat}}); + inputs_map.insert({g->inputs()[i], {inputs[i]}}); + input_types.insert({g->inputs()[i], {{at::kFloat}}}); } auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); std::vector segmented_blocks = From 016c991faf7632add3a4929b16d197c826bfc694 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 14 Apr 2022 20:14:52 +0800 Subject: [PATCH 19/22] feat: [collection] handle prim::ListConstruct without fallback it manually Signed-off-by: inocsin --- core/partitioning/partitioning.cpp | 24 +++++++++++++++++------- tests/cpp/test_collection.cpp | 2 -- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp index d171ae15c0..93ee4ab2a6 100644 --- a/core/partitioning/partitioning.cpp +++ b/core/partitioning/partitioning.cpp @@ -419,6 +419,15 @@ bool checkLoopEvaluatable(torch::jit::Node* n) { return compile_to_trt; } +bool is_collection(torch::jit::Node* n) { + for (auto out: n->outputs()) { + if(out->type()->kind() == torch::jit::TypeKind::TupleType || out->type()->kind() == torch::jit::TypeKind::ListType) { + return true; + } + } + return false; +} + bool should_run_in_trt(torch::jit::Node* n, const std::unordered_set& torch_ops) { // If the op is not supported by the conversion phase it should run in PyTorch if (!conversion::OpSupported(n)) { @@ -459,18 +468,19 @@ PartitionedGraph segment_graph(torch::jit::Block* block, const PartitionInfo& pa partition_info.forced_fallback_operators.begin(), partition_info.forced_fallback_operators.end()); auto nodes = block->nodes(); + auto reverse_nodes = nodes.reverse(); // merge from output side to input side PartitionedGraph segmented_blocks; // segment the nodes std::vector in_prog_trt_blk_nodes, in_prog_pyt_blk_nodes; - for (const auto n : nodes) { + for (const auto n : reverse_nodes) { // Skip constant nodes as they are resources for both kinds of modules if (n->kind() == torch::jit::prim::Constant) { continue; } - - if (should_run_in_trt(n, forced_fallback_ops)) { - in_prog_trt_blk_nodes.push_back(n); + // the outputs of trt subgraph shouldn't be collections + if (should_run_in_trt(n, forced_fallback_ops) && !(in_prog_trt_blk_nodes.size() == 0 && is_collection(n))) { + in_prog_trt_blk_nodes.insert(in_prog_trt_blk_nodes.begin(), n); // If there is an active PyTorch block and we have passed the threshold for a valid TRT // block then segment and reset the active PyTorch block @@ -505,14 +515,14 @@ PartitionedGraph segment_graph(torch::jit::Block* block, const PartitionInfo& pa finalize_block(segmented_blocks, SegmentedBlock::kTorch, in_prog_pyt_blk_nodes); } if (checkLoopEvaluatable(n)) { - in_prog_trt_blk_nodes.push_back(n); + in_prog_trt_blk_nodes.insert(in_prog_trt_blk_nodes.begin(), n); } else { auto loop_node = std::vector{n}; finalize_block(segmented_blocks, SegmentedBlock::kTorch, loop_node); } continue; } - in_prog_pyt_blk_nodes.push_back(n); + in_prog_pyt_blk_nodes.insert(in_prog_pyt_blk_nodes.begin(), n); } } @@ -527,7 +537,7 @@ PartitionedGraph segment_graph(torch::jit::Block* block, const PartitionInfo& pa in_prog_pyt_blk_nodes.end(), in_prog_trt_blk_nodes.begin(), in_prog_trt_blk_nodes.end()); finalize_block(segmented_blocks, SegmentedBlock::kTorch, in_prog_pyt_blk_nodes); } - + std::reverse(segmented_blocks.begin(), segmented_blocks.end()); return segmented_blocks; } diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp index 73bcabcf13..9308d951f4 100644 --- a/tests/cpp/test_collection.cpp +++ b/tests/cpp/test_collection.cpp @@ -280,7 +280,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) { // Need to skip the conversion of __getitem__ and ListConstruct compile_settings.torch_executed_ops.push_back("aten::__getitem__"); - compile_settings.torch_executed_ops.push_back("prim::ListConstruct"); // // FP16 execution compile_settings.enabled_precisions = {torch::kHalf}; @@ -351,7 +350,6 @@ TEST(CppAPITests, TestCollectionComplexModel) { // Need to skip the conversion of __getitem__ and ListConstruct compile_settings.torch_executed_ops.push_back("aten::__getitem__"); - compile_settings.torch_executed_ops.push_back("prim::ListConstruct"); // // FP16 execution compile_settings.enabled_precisions = {torch::kHalf}; From 2e7cd5899d1b20946b419e218f00d4824ab2737b Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 14 Apr 2022 21:57:21 +0800 Subject: [PATCH 20/22] chore: [collection] update test_resolve_nontensor_inputs.cpp Signed-off-by: inocsin --- tests/core/partitioning/test_resolve_nontensor_inputs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp index 7daaedab8c..e70d5d2b5d 100644 --- a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp +++ b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp @@ -255,5 +255,5 @@ TEST(Partitioning, ConvertForTensorListInputsInFallbackCorrectly) { torch::jit::script::Module new_mod = torch_tensorrt::core::CompileGraph(mod, cfg); auto fallback_g = new_mod.get_method("forward").graph(); int count = count_trt_engines(fallback_g); - ASSERT_TRUE(count == 2); + ASSERT_TRUE(count == 1); } From b35cdd06e4910a978e35b7f7225a0b05331ff489 Mon Sep 17 00:00:00 2001 From: inocsin Date: Thu, 14 Apr 2022 22:36:03 +0800 Subject: [PATCH 21/22] fix: [collection] handle the case that only the output is collection and all the nodes can be converted Signed-off-by: inocsin --- core/compiler.cpp | 6 ++++-- core/conversion/conversion.cpp | 9 +++++++++ core/conversion/conversion.h | 2 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 57b4667bce..72243835dd 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -437,6 +437,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) MapInputsAndDetermineDTypes(cfg, g, static_params, first_use_types); auto isBlockConvertible = conversion::VerifyConverterSupportForBlock(g->block(), true); + auto outputIsCollection = conversion::OutputIsCollection(g->block()); if (cfg.partition_info.enabled && (cfg.lower_info.forced_fallback_modules.size() == 0 && cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)) { @@ -444,8 +445,9 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg) } if (cfg.partition_info.enabled && - !(cfg.lower_info.forced_fallback_modules.size() == 0 && - cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)) { + (!(cfg.lower_info.forced_fallback_modules.size() == 0 && + cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) + || outputIsCollection)) { auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types); auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), collection_input_ivalues_map, cfg, static_params); diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index 3c9eb0dea4..bddd8fd835 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -555,6 +555,15 @@ std::set ConvertableOpsInBlock(const torch::jit::Block* b) { return convertable_ops; } +bool OutputIsCollection(const torch::jit::Block* b) { + for (auto out: b->outputs()) { + if(out->type()->kind() == torch::jit::TypeKind::TupleType || out->type()->kind() == torch::jit::TypeKind::ListType) { + return true; + } + } + return false; +} + bool VerifyConverterSupportForBlock(const torch::jit::Block* b, bool suppress_errors) { auto unsupported_ops = GetUnsupportedOpsInBlock(b); if (unsupported_ops.size() != 0) { diff --git a/core/conversion/conversion.h b/core/conversion/conversion.h index 148d99ac13..a578c4288e 100644 --- a/core/conversion/conversion.h +++ b/core/conversion/conversion.h @@ -26,6 +26,8 @@ std::string ConvertBlockToEngine( bool OpSupported(const torch::jit::Node* n); +bool OutputIsCollection(const torch::jit::Block* b); + bool VerifyConverterSupportForBlock(const torch::jit::Block* b, bool suppress_errors = false); c10::optional EvaluateNode( From fa6c10e7fd72ec1989e6ed6b17c352c4cb0e4655 Mon Sep 17 00:00:00 2001 From: inocsin Date: Tue, 19 Apr 2022 10:02:54 +0800 Subject: [PATCH 22/22] fix: [collection] update tests/cpp/test_example_tensors.cpp Signed-off-by: inocsin --- tests/cpp/test_example_tensors.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cpp/test_example_tensors.cpp b/tests/cpp/test_example_tensors.cpp index 6561cd16a0..7e16f47f70 100644 --- a/tests/cpp/test_example_tensors.cpp +++ b/tests/cpp/test_example_tensors.cpp @@ -8,8 +8,8 @@ TEST_P(CppAPITests, InputsFromTensors) { jit_inputs_ivalues.push_back(in.clone()); trt_inputs_ivalues.push_back(in.clone()); } - - auto spec = torch_tensorrt::ts::CompileSpec({trt_inputs_ivalues[0].toTensor()}); + std::vector inputs = {trt_inputs_ivalues[0].toTensor()}; + auto spec = torch_tensorrt::ts::CompileSpec(inputs); auto trt_mod = torch_tensorrt::ts::compile(mod, spec); torch::jit::IValue trt_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(trt_mod, trt_inputs_ivalues);