Skip to content

Commit dcf27f0

Browse files
chilo-mskleiti
authored andcommitted
[TensorRT EP] Add unit test for user provided cuda stream (microsoft#17974)
Add a unit test for testing user provided CUDA stream
1 parent 9aa60b1 commit dcf27f0

1 file changed

Lines changed: 126 additions & 9 deletions

File tree

onnxruntime/test/shared_lib/test_inference.cc

Lines changed: 126 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,6 +2832,132 @@ TEST(CApiTest, ConfigureCudaArenaAndDemonstrateMemoryArenaShrinkage) {
28322832
#endif
28332833

28342834
#ifdef USE_TENSORRT
2835+
TEST(CApiTest, TestExternalCUDAStreamWithIOBinding) {
2836+
const auto& api = Ort::GetApi();
2837+
Ort::SessionOptions session_options;
2838+
2839+
OrtTensorRTProviderOptionsV2* trt_options;
2840+
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
2841+
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
2842+
rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
2843+
2844+
// updating provider option with user provided compute stream
2845+
cudaStream_t compute_stream = nullptr;
2846+
void* user_compute_stream = nullptr;
2847+
cudaStreamCreate(&compute_stream);
2848+
ASSERT_TRUE(api.UpdateTensorRTProviderOptionsWithValue(rel_trt_options.get(), "user_compute_stream", compute_stream) == nullptr);
2849+
ASSERT_TRUE(api.GetTensorRTProviderOptionsByName(rel_trt_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
2850+
ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
2851+
2852+
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
2853+
static_cast<OrtSessionOptions*>(session_options),
2854+
rel_trt_options.get()) == nullptr);
2855+
2856+
Ort::Session session(*ort_env, MODEL_URI, session_options);
2857+
Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
2858+
2859+
const std::array<int64_t, 2> x_shape = {3, 2};
2860+
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
2861+
2862+
/*
2863+
* Use cudaMallocHost() (pinned memory allocation) to create input/output tensors
2864+
*/
2865+
float* input_data;
2866+
cudaMallocHost(&input_data, 3 * 2 * sizeof(float));
2867+
ASSERT_NE(input_data, nullptr);
2868+
cudaMemcpy(input_data, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
2869+
2870+
std::cout << "pinned memory allocation" << std::endl;
2871+
std::cout << "input tesnor:" << std::endl;
2872+
for (int i = 0; i < 6; i++) {
2873+
std::cout << input_data[i] << std::endl;
2874+
}
2875+
2876+
// Create an OrtValue tensor backed by data on CUDA memory
2877+
Ort::Value bound_x = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data), x_values.size(),
2878+
x_shape.data(), x_shape.size());
2879+
2880+
const std::array<int64_t, 2> expected_y_shape = {3, 2};
2881+
std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
2882+
2883+
float* output_data;
2884+
cudaMallocHost(&output_data, 3 * 2 * sizeof(float));
2885+
ASSERT_NE(output_data, nullptr);
2886+
2887+
// Create an OrtValue tensor backed by data on CUDA memory
2888+
Ort::Value bound_y = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data),
2889+
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
2890+
2891+
// Create IoBinding for inputs and outputs.
2892+
Ort::IoBinding binding(session);
2893+
binding.BindInput("X", bound_x);
2894+
binding.BindOutput("Y", bound_y);
2895+
2896+
/*
2897+
* Use cudaMalloc() (pageable memory allocation first and then implicit pinned memory allocation) to create input/output tensors
2898+
*/
2899+
float* input_data_2;
2900+
cudaMalloc(&input_data_2, 3 * 2 * sizeof(float));
2901+
ASSERT_NE(input_data_2, nullptr);
2902+
cudaMemcpy(input_data_2, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
2903+
2904+
// Create an OrtValue tensor backed by data on CUDA memory
2905+
Ort::Value bound_x_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data_2), x_values.size(),
2906+
x_shape.data(), x_shape.size());
2907+
2908+
float* output_data_2;
2909+
cudaMalloc(&output_data_2, 3 * 2 * sizeof(float));
2910+
ASSERT_NE(output_data_2, nullptr);
2911+
2912+
// Create an OrtValue tensor backed by data on CUDA memory
2913+
Ort::Value bound_y_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data_2),
2914+
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
2915+
2916+
// Create IoBinding for inputs and outputs.
2917+
Ort::IoBinding binding_2(session);
2918+
binding_2.BindInput("X", bound_x_2);
2919+
binding_2.BindOutput("Y", bound_y_2);
2920+
2921+
// Run with first iobindings
2922+
session.Run(Ort::RunOptions(), binding);
2923+
2924+
// Check the values against the bound raw memory (needs copying from device to host first)
2925+
std::array<float, 3 * 2> y_values;
2926+
cudaMemcpy(y_values.data(), output_data, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
2927+
2928+
std::cout << "pinned memory allocation" << std::endl;
2929+
std::cout << "output: " << std::endl;
2930+
for (auto y : y_values) {
2931+
std::cout << y << std::endl;
2932+
}
2933+
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
2934+
2935+
// Run with second iobindings
2936+
session.Run(Ort::RunOptions(), binding_2);
2937+
2938+
// Check the values against the bound raw memory (needs copying from device to host first)
2939+
cudaMemcpy(y_values.data(), output_data_2, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
2940+
2941+
std::cout << "pageable memory allocation" << std::endl;
2942+
std::cout << "output: " << std::endl;
2943+
for (auto y : y_values) {
2944+
std::cout << y << std::endl;
2945+
}
2946+
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
2947+
2948+
// Clean up
2949+
binding.ClearBoundInputs();
2950+
binding.ClearBoundOutputs();
2951+
binding_2.ClearBoundInputs();
2952+
binding_2.ClearBoundOutputs();
2953+
2954+
cudaFreeHost(input_data);
2955+
cudaFreeHost(output_data);
2956+
cudaFree(input_data_2);
2957+
cudaFree(output_data_2);
2958+
cudaStreamDestroy(compute_stream);
2959+
}
2960+
28352961
class CApiTensorRTTest : public testing::Test, public ::testing::WithParamInterface<std::string> {};
28362962

28372963
// This test uses CreateTensorRTProviderOptions/UpdateTensorRTProviderOptions APIs to configure and create a TensorRT Execution Provider
@@ -2849,15 +2975,6 @@ TEST_P(CApiTensorRTTest, TestConfigureTensorRTProviderOptions) {
28492975
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
28502976
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
28512977

2852-
// Only test updating provider option with user provided compute stream
2853-
cudaStream_t compute_stream = nullptr;
2854-
void* user_compute_stream = nullptr;
2855-
cudaStreamCreateWithFlags(&compute_stream, cudaStreamNonBlocking);
2856-
ASSERT_TRUE(api.UpdateTensorRTProviderOptionsWithValue(rel_trt_options.get(), "user_compute_stream", compute_stream) == nullptr);
2857-
ASSERT_TRUE(api.GetTensorRTProviderOptionsByName(rel_trt_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
2858-
ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
2859-
cudaStreamDestroy(compute_stream);
2860-
28612978
const char* engine_cache_path = "./trt_engine_folder";
28622979

28632980
std::vector<const char*> keys{"device_id", "has_user_compute_stream", "trt_fp16_enable", "trt_int8_enable", "trt_engine_cache_enable",

0 commit comments

Comments
 (0)