Skip to content

cuda::HoughSegmentDetector: macro to ignore stream should have been removed in #2801 #3015

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
deadprogram opened this issue Aug 11, 2021 · 3 comments

Comments

@deadprogram
Copy link
Contributor

We ran into a problem with trying to use mutiple cuda::HoughSegmentDetector each with their own non-default async cuda::Stream. On looking into the code, it seems like the macro CV_UNUSED(stream); should have been removed in that PR #2801 but was perhaps missed?

Not sure if this is actually to do with our error, listed here:

what():  OpenCV(4.5.3) /opencv_contrib-4.5.3/modules/cudaimgproc/src/cuda/hough_segments.cu:234: error: (-217:Gpu API call) an illegal memory access was encountered in function 'houghLinesProbabilistic_gpu'

However it certainly seems like something that should be addressed, right? Thanks.

@alalek
Copy link
Member

alalek commented Aug 11, 2021

CV_UNUSED(stream) is "no op" statement to avoid compiler warnings.
It can't cause any runtime errors.


Please provide complete minimal reproducer (including of input data).
Provide system specification which cause this error.

@mitul93
Copy link
Contributor

mitul93 commented Aug 17, 2021

Hi @alalek

Here are the details

OpenCV version : 4.5.3
opencv-contrib version : 4.5.3
OS : Ubuntu 18.04.1
CUDA version : 11.2
GPU : Quadro RTX 4000

Here is the problematic test case hough_test_case.zip.gz

The crash is random in nature. You may need to run it two or three times to see the crash as reported in the logs below.

The logs

root@df29f8235b3a:/src/testdata# ./hough_crash_reproduce 
Test case for hough segmentation cuda crash...
Reading test image...
Test image reading successful.
Thread, 4, index, 0,Found #lines, 4096
Thread, 3, index, 0,Found #lines, 4096
Thread, 1, index, 0,Found #lines, 4096
Thread, 0, index, 0,Found #lines, 4096
...
Thread, 4, index, 21,Found #lines, 4096
terminate called after throwing an instance of 'cv::Exception'
  what():  OpenCV(4.5.3) /opencv_contrib-4.5.3/modules/cudaimgproc/src/cuda/hough_segments.cu:234: error: (-217:Gpu API call) an illegal memory access was encountered in function 'houghLinesProbabilistic_gpu'
terminate called recursively

terminate called recursively
Aborted (core dumped)

It seems that hough_segments.cu uses texture memory and it is not thread safe. This is similar to issue #6742
I have applied following patch to opencv_contrib/modules/cudaimgproc/src/cuda/hough_segments.cu inspired from #6742. It stopped the crash and it also reports the same number of lines.

Here's the patch

--- /opencv_contrib-4.5.3/modules/cudaimgproc/src/cuda/hough_segments.cu	2021-08-18 20:15:33.624196369 +0200
+++ /opencv_contrib-4.5.3/modules/cudaimgproc/src/cuda/hough_segments_new.cu	2021-08-18 21:42:47.234149824 +0200
@@ -49,14 +49,13 @@
 {
     namespace hough_segments
     {
-        texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp);
 
         __global__ void houghLinesProbabilistic(const PtrStepSzi accum,
                                                 int4* out, const int maxSize,
                                                 const float rho, const float theta,
                                                 const int lineGap, const int lineLength,
                                                 const int rows, const int cols,
-                                                int* counterPtr)
+                                                int* counterPtr, cudaTextureObject_t maskTex)
         {
             const int r = blockIdx.x * blockDim.x + threadIdx.x;
             const int n = blockIdx.y * blockDim.y + threadIdx.y;
@@ -157,7 +156,7 @@
 
                 for (;;)
                 {
-                    if (tex2D(tex_mask, p1.x, p1.y))
+                    if (tex2D<uchar>(maskTex, p1.x, p1.y))
                     {
                         gap = 0;
 
@@ -213,21 +212,19 @@
             }
         }
 
-        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength, int* counterPtr, cudaStream_t stream)
+        int houghLinesProbabilistic_gpu(const cudaTextureObject_t &maskTex, const int &rows, const int &cols, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength, int* counterPtr, cudaStream_t stream)
         {
             cudaSafeCall( cudaMemsetAsync(counterPtr, 0, sizeof(int), stream) );
 
             const dim3 block(32, 8);
             const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
 
-            bindTexture(&tex_mask, mask);
-
             houghLinesProbabilistic<<<grid, block, 0, stream>>>(accum,
                                                      out, maxSize,
                                                      rho, theta,
                                                      lineGap, lineLength,
-                                                     mask.rows, mask.cols,
-                                                     counterPtr);
+                                                     rows, cols,
+                                                     counterPtr, maskTex);
             cudaSafeCall( cudaGetLastError() );
 
             int totalCount;
--- /opencv_contrib-4.5.3/modules/cudaimgproc/src/hough_segments.cpp	2021-08-18 19:34:21.780074389 +0200
+++ /opencv_contrib-4.5.3/modules/cudaimgproc/src/hough_segments.cpp	2021-08-18 19:42:26.199200180 +0200
@@ -65,7 +65,7 @@
 
     namespace hough_segments
     {
-        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength, int* counterPtr, cudaStream_t stream);
+        int houghLinesProbabilistic_gpu(const cudaTextureObject_t &maskTex, const int &rows, const int &cols, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength, int* counterPtr, cudaStream_t stream);
     }
 }}}
 
@@ -178,7 +178,19 @@
 
         ensureSizeIsEnough(1, maxLines_, CV_32SC4, result_);
 
-        int linesCount = houghLinesProbabilistic_gpu(src, accum_, result_.ptr<int4>(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_, counterPtr_, cudaStream);
+        cudaTextureDesc texDesc;
+        memset(&texDesc, 0, sizeof(texDesc));
+        texDesc.readMode = cudaReadModeElementType;
+        texDesc.filterMode = cudaFilterModePoint;
+        texDesc.addressMode[0] = cudaAddressModeClamp;
+        texDesc.addressMode[1] = cudaAddressModeClamp;
+        texDesc.addressMode[2] = cudaAddressModeClamp;
+
+        cudaTextureObject_t maskTex_;
+        PtrStepSzb mask = src;
+        cv::cuda::device::createTextureObjectPitch2D<unsigned char>(&maskTex_, mask, texDesc);
+
+        int linesCount = houghLinesProbabilistic_gpu(maskTex_, mask.rows, mask.cols, accum_, result_.ptr<int4>(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_, counterPtr_, cudaStream);
 
         if (linesCount == 0)
         {

Please will you verify if

  1. The problem is not because of how we use cv::cuda calls in multi-thread code
  2. The patch is a good solution for the problem

Let me know if you have any questions or need any further information.

Thanks
MV

@deadprogram
Copy link
Contributor Author

Closing as we will open new issue with correct description of problem and then PR with our solution.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

3 participants