Skip to content

Commit f145011

Browse files
committed
Revert "[TensorRT EP] Add unit test for user provided cuda stream (#17974)"
This reverts commit 555b2af.
1 parent 538e97c commit f145011

1 file changed

Lines changed: 9 additions & 126 deletions

File tree

onnxruntime/test/shared_lib/test_inference.cc

Lines changed: 9 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,132 +2832,6 @@ TEST(CApiTest, ConfigureCudaArenaAndDemonstrateMemoryArenaShrinkage) {
28322832
#endif
28332833

28342834
#ifdef USE_TENSORRT
2835-
TEST(CApiTest, TestExternalCUDAStreamWithIOBinding) {
2836-
const auto& api = Ort::GetApi();
2837-
Ort::SessionOptions session_options;
2838-
2839-
OrtTensorRTProviderOptionsV2* trt_options;
2840-
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
2841-
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
2842-
rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
2843-
2844-
// updating provider option with user provided compute stream
2845-
cudaStream_t compute_stream = nullptr;
2846-
void* user_compute_stream = nullptr;
2847-
cudaStreamCreate(&compute_stream);
2848-
ASSERT_TRUE(api.UpdateTensorRTProviderOptionsWithValue(rel_trt_options.get(), "user_compute_stream", compute_stream) == nullptr);
2849-
ASSERT_TRUE(api.GetTensorRTProviderOptionsByName(rel_trt_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
2850-
ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
2851-
2852-
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
2853-
static_cast<OrtSessionOptions*>(session_options),
2854-
rel_trt_options.get()) == nullptr);
2855-
2856-
Ort::Session session(*ort_env, MODEL_URI, session_options);
2857-
Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
2858-
2859-
const std::array<int64_t, 2> x_shape = {3, 2};
2860-
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
2861-
2862-
/*
2863-
* Use cudaMallocHost() (pinned memory allocation) to create input/output tensors
2864-
*/
2865-
float* input_data;
2866-
cudaMallocHost(&input_data, 3 * 2 * sizeof(float));
2867-
ASSERT_NE(input_data, nullptr);
2868-
cudaMemcpy(input_data, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
2869-
2870-
std::cout << "pinned memory allocation" << std::endl;
2871-
std::cout << "input tesnor:" << std::endl;
2872-
for (int i = 0; i < 6; i++) {
2873-
std::cout << input_data[i] << std::endl;
2874-
}
2875-
2876-
// Create an OrtValue tensor backed by data on CUDA memory
2877-
Ort::Value bound_x = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data), x_values.size(),
2878-
x_shape.data(), x_shape.size());
2879-
2880-
const std::array<int64_t, 2> expected_y_shape = {3, 2};
2881-
std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
2882-
2883-
float* output_data;
2884-
cudaMallocHost(&output_data, 3 * 2 * sizeof(float));
2885-
ASSERT_NE(output_data, nullptr);
2886-
2887-
// Create an OrtValue tensor backed by data on CUDA memory
2888-
Ort::Value bound_y = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data),
2889-
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
2890-
2891-
// Create IoBinding for inputs and outputs.
2892-
Ort::IoBinding binding(session);
2893-
binding.BindInput("X", bound_x);
2894-
binding.BindOutput("Y", bound_y);
2895-
2896-
/*
2897-
* Use cudaMalloc() (pageable memory allocation first and then implicit pinned memory allocation) to create input/output tensors
2898-
*/
2899-
float* input_data_2;
2900-
cudaMalloc(&input_data_2, 3 * 2 * sizeof(float));
2901-
ASSERT_NE(input_data_2, nullptr);
2902-
cudaMemcpy(input_data_2, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
2903-
2904-
// Create an OrtValue tensor backed by data on CUDA memory
2905-
Ort::Value bound_x_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data_2), x_values.size(),
2906-
x_shape.data(), x_shape.size());
2907-
2908-
float* output_data_2;
2909-
cudaMalloc(&output_data_2, 3 * 2 * sizeof(float));
2910-
ASSERT_NE(output_data_2, nullptr);
2911-
2912-
// Create an OrtValue tensor backed by data on CUDA memory
2913-
Ort::Value bound_y_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data_2),
2914-
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
2915-
2916-
// Create IoBinding for inputs and outputs.
2917-
Ort::IoBinding binding_2(session);
2918-
binding_2.BindInput("X", bound_x_2);
2919-
binding_2.BindOutput("Y", bound_y_2);
2920-
2921-
// Run with first iobindings
2922-
session.Run(Ort::RunOptions(), binding);
2923-
2924-
// Check the values against the bound raw memory (needs copying from device to host first)
2925-
std::array<float, 3 * 2> y_values;
2926-
cudaMemcpy(y_values.data(), output_data, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
2927-
2928-
std::cout << "pinned memory allocation" << std::endl;
2929-
std::cout << "output: " << std::endl;
2930-
for (auto y : y_values) {
2931-
std::cout << y << std::endl;
2932-
}
2933-
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
2934-
2935-
// Run with second iobindings
2936-
session.Run(Ort::RunOptions(), binding_2);
2937-
2938-
// Check the values against the bound raw memory (needs copying from device to host first)
2939-
cudaMemcpy(y_values.data(), output_data_2, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
2940-
2941-
std::cout << "pageable memory allocation" << std::endl;
2942-
std::cout << "output: " << std::endl;
2943-
for (auto y : y_values) {
2944-
std::cout << y << std::endl;
2945-
}
2946-
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
2947-
2948-
// Clean up
2949-
binding.ClearBoundInputs();
2950-
binding.ClearBoundOutputs();
2951-
binding_2.ClearBoundInputs();
2952-
binding_2.ClearBoundOutputs();
2953-
2954-
cudaFreeHost(input_data);
2955-
cudaFreeHost(output_data);
2956-
cudaFree(input_data_2);
2957-
cudaFree(output_data_2);
2958-
cudaStreamDestroy(compute_stream);
2959-
}
2960-
29612835
class CApiTensorRTTest : public testing::Test, public ::testing::WithParamInterface<std::string> {};
29622836

29632837
// This test uses CreateTensorRTProviderOptions/UpdateTensorRTProviderOptions APIs to configure and create a TensorRT Execution Provider
@@ -2975,6 +2849,15 @@ TEST_P(CApiTensorRTTest, TestConfigureTensorRTProviderOptions) {
29752849
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
29762850
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
29772851

2852+
// Only test updating provider option with user provided compute stream
2853+
cudaStream_t compute_stream = nullptr;
2854+
void* user_compute_stream = nullptr;
2855+
cudaStreamCreateWithFlags(&compute_stream, cudaStreamNonBlocking);
2856+
ASSERT_TRUE(api.UpdateTensorRTProviderOptionsWithValue(rel_trt_options.get(), "user_compute_stream", compute_stream) == nullptr);
2857+
ASSERT_TRUE(api.GetTensorRTProviderOptionsByName(rel_trt_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
2858+
ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
2859+
cudaStreamDestroy(compute_stream);
2860+
29782861
const char* engine_cache_path = "./trt_engine_folder";
29792862

29802863
std::vector<const char*> keys{"device_id", "has_user_compute_stream", "trt_fp16_enable", "trt_int8_enable", "trt_engine_cache_enable",

0 commit comments

Comments
 (0)