@@ -2832,132 +2832,6 @@ TEST(CApiTest, ConfigureCudaArenaAndDemonstrateMemoryArenaShrinkage) {
28322832#endif
28332833
28342834#ifdef USE_TENSORRT
2835- TEST (CApiTest, TestExternalCUDAStreamWithIOBinding) {
2836- const auto & api = Ort::GetApi ();
2837- Ort::SessionOptions session_options;
2838-
2839- OrtTensorRTProviderOptionsV2* trt_options;
2840- ASSERT_TRUE (api.CreateTensorRTProviderOptions (&trt_options) == nullptr );
2841- std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype (api.ReleaseTensorRTProviderOptions )>
2842- rel_trt_options (trt_options, api.ReleaseTensorRTProviderOptions );
2843-
2844- // updating provider option with user provided compute stream
2845- cudaStream_t compute_stream = nullptr ;
2846- void * user_compute_stream = nullptr ;
2847- cudaStreamCreate (&compute_stream);
2848- ASSERT_TRUE (api.UpdateTensorRTProviderOptionsWithValue (rel_trt_options.get (), " user_compute_stream" , compute_stream) == nullptr );
2849- ASSERT_TRUE (api.GetTensorRTProviderOptionsByName (rel_trt_options.get (), " user_compute_stream" , &user_compute_stream) == nullptr );
2850- ASSERT_TRUE (user_compute_stream == (void *)compute_stream);
2851-
2852- ASSERT_TRUE (api.SessionOptionsAppendExecutionProvider_TensorRT_V2 (
2853- static_cast <OrtSessionOptions*>(session_options),
2854- rel_trt_options.get ()) == nullptr );
2855-
2856- Ort::Session session (*ort_env, MODEL_URI, session_options);
2857- Ort::MemoryInfo info_cuda (" Cuda" , OrtAllocatorType::OrtArenaAllocator, 0 , OrtMemTypeDefault);
2858-
2859- const std::array<int64_t , 2 > x_shape = {3 , 2 };
2860- std::array<float , 3 * 2 > x_values = {1 .0f , 2 .0f , 3 .0f , 4 .0f , 5 .0f , 6 .0f };
2861-
2862- /*
2863- * Use cudaMallocHost() (pinned memory allocation) to create input/output tensors
2864- */
2865- float * input_data;
2866- cudaMallocHost (&input_data, 3 * 2 * sizeof (float ));
2867- ASSERT_NE (input_data, nullptr );
2868- cudaMemcpy (input_data, x_values.data (), sizeof (float ) * x_values.size (), cudaMemcpyHostToDevice);
2869-
2870- std::cout << " pinned memory allocation" << std::endl;
2871- std::cout << " input tesnor:" << std::endl;
2872- for (int i = 0 ; i < 6 ; i++) {
2873- std::cout << input_data[i] << std::endl;
2874- }
2875-
2876- // Create an OrtValue tensor backed by data on CUDA memory
2877- Ort::Value bound_x = Ort::Value::CreateTensor (info_cuda, reinterpret_cast <float *>(input_data), x_values.size (),
2878- x_shape.data (), x_shape.size ());
2879-
2880- const std::array<int64_t , 2 > expected_y_shape = {3 , 2 };
2881- std::array<float , 3 * 2 > expected_y = {1 .0f , 4 .0f , 9 .0f , 16 .0f , 25 .0f , 36 .0f };
2882-
2883- float * output_data;
2884- cudaMallocHost (&output_data, 3 * 2 * sizeof (float ));
2885- ASSERT_NE (output_data, nullptr );
2886-
2887- // Create an OrtValue tensor backed by data on CUDA memory
2888- Ort::Value bound_y = Ort::Value::CreateTensor (info_cuda, reinterpret_cast <float *>(output_data),
2889- expected_y.size (), expected_y_shape.data (), expected_y_shape.size ());
2890-
2891- // Create IoBinding for inputs and outputs.
2892- Ort::IoBinding binding (session);
2893- binding.BindInput (" X" , bound_x);
2894- binding.BindOutput (" Y" , bound_y);
2895-
2896- /*
2897- * Use cudaMalloc() (pageable memory allocation first and then implicit pinned memory allocation) to create input/output tensors
2898- */
2899- float * input_data_2;
2900- cudaMalloc (&input_data_2, 3 * 2 * sizeof (float ));
2901- ASSERT_NE (input_data_2, nullptr );
2902- cudaMemcpy (input_data_2, x_values.data (), sizeof (float ) * x_values.size (), cudaMemcpyHostToDevice);
2903-
2904- // Create an OrtValue tensor backed by data on CUDA memory
2905- Ort::Value bound_x_2 = Ort::Value::CreateTensor (info_cuda, reinterpret_cast <float *>(input_data_2), x_values.size (),
2906- x_shape.data (), x_shape.size ());
2907-
2908- float * output_data_2;
2909- cudaMalloc (&output_data_2, 3 * 2 * sizeof (float ));
2910- ASSERT_NE (output_data_2, nullptr );
2911-
2912- // Create an OrtValue tensor backed by data on CUDA memory
2913- Ort::Value bound_y_2 = Ort::Value::CreateTensor (info_cuda, reinterpret_cast <float *>(output_data_2),
2914- expected_y.size (), expected_y_shape.data (), expected_y_shape.size ());
2915-
2916- // Create IoBinding for inputs and outputs.
2917- Ort::IoBinding binding_2 (session);
2918- binding_2.BindInput (" X" , bound_x_2);
2919- binding_2.BindOutput (" Y" , bound_y_2);
2920-
2921- // Run with first iobindings
2922- session.Run (Ort::RunOptions (), binding);
2923-
2924- // Check the values against the bound raw memory (needs copying from device to host first)
2925- std::array<float , 3 * 2 > y_values;
2926- cudaMemcpy (y_values.data (), output_data, sizeof (float ) * y_values.size (), cudaMemcpyDeviceToHost);
2927-
2928- std::cout << " pinned memory allocation" << std::endl;
2929- std::cout << " output: " << std::endl;
2930- for (auto y : y_values) {
2931- std::cout << y << std::endl;
2932- }
2933- ASSERT_THAT (y_values, ::testing::ContainerEq (expected_y));
2934-
2935- // Run with second iobindings
2936- session.Run (Ort::RunOptions (), binding_2);
2937-
2938- // Check the values against the bound raw memory (needs copying from device to host first)
2939- cudaMemcpy (y_values.data (), output_data_2, sizeof (float ) * y_values.size (), cudaMemcpyDeviceToHost);
2940-
2941- std::cout << " pageable memory allocation" << std::endl;
2942- std::cout << " output: " << std::endl;
2943- for (auto y : y_values) {
2944- std::cout << y << std::endl;
2945- }
2946- ASSERT_THAT (y_values, ::testing::ContainerEq (expected_y));
2947-
2948- // Clean up
2949- binding.ClearBoundInputs ();
2950- binding.ClearBoundOutputs ();
2951- binding_2.ClearBoundInputs ();
2952- binding_2.ClearBoundOutputs ();
2953-
2954- cudaFreeHost (input_data);
2955- cudaFreeHost (output_data);
2956- cudaFree (input_data_2);
2957- cudaFree (output_data_2);
2958- cudaStreamDestroy (compute_stream);
2959- }
2960-
29612835class CApiTensorRTTest : public testing ::Test, public ::testing::WithParamInterface<std::string> {};
29622836
29632837// This test uses CreateTensorRTProviderOptions/UpdateTensorRTProviderOptions APIs to configure and create a TensorRT Execution Provider
@@ -2975,6 +2849,15 @@ TEST_P(CApiTensorRTTest, TestConfigureTensorRTProviderOptions) {
29752849 ASSERT_TRUE (api.CreateTensorRTProviderOptions (&trt_options) == nullptr );
29762850 std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype (api.ReleaseTensorRTProviderOptions )> rel_trt_options (trt_options, api.ReleaseTensorRTProviderOptions );
29772851
2852+ // Only test updating provider option with user provided compute stream
2853+ cudaStream_t compute_stream = nullptr ;
2854+ void * user_compute_stream = nullptr ;
2855+ cudaStreamCreateWithFlags (&compute_stream, cudaStreamNonBlocking);
2856+ ASSERT_TRUE (api.UpdateTensorRTProviderOptionsWithValue (rel_trt_options.get (), " user_compute_stream" , compute_stream) == nullptr );
2857+ ASSERT_TRUE (api.GetTensorRTProviderOptionsByName (rel_trt_options.get (), " user_compute_stream" , &user_compute_stream) == nullptr );
2858+ ASSERT_TRUE (user_compute_stream == (void *)compute_stream);
2859+ cudaStreamDestroy (compute_stream);
2860+
29782861 const char * engine_cache_path = " ./trt_engine_folder" ;
29792862
29802863 std::vector<const char *> keys{" device_id" , " has_user_compute_stream" , " trt_fp16_enable" , " trt_int8_enable" , " trt_engine_cache_enable" ,
0 commit comments