4848#include  " opencv2/core/cuda/functional.hpp" 
4949#include  " opencv2/core/cuda/utility.hpp" 
5050#include  " opencv2/core/cuda.hpp" 
51+ #include  < opencv2/cudev/ptr2d/texture.hpp> 
5152
5253using  namespace  cv ::cuda; 
5354using  namespace  cv ::cuda::device; 
@@ -90,56 +91,17 @@ namespace cv { namespace cuda { namespace device
9091
9192namespace  canny 
9293{
93-     struct  SrcTex 
94-     {
95-         virtual  ~SrcTex () {}
96- 
97-         __host__  SrcTex (int  _xoff, int  _yoff) : xoff(_xoff), yoff(_yoff) {}
98- 
99-         __device__  __forceinline__  virtual  int  operator  ()(int  y, int  x) const  = 0;
100- 
101-         int  xoff;
102-         int  yoff;
103-     };
104- 
105-     texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src (false , cudaFilterModePoint, cudaAddressModeClamp);
106-     struct  SrcTexRef  : SrcTex
107-     {
108-         __host__  SrcTexRef (int  _xoff, int  _yoff) : SrcTex(_xoff, _yoff) {}
109- 
110-         __device__  __forceinline__  int  operator  ()(int  y, int  x) const  override 
111-         {
112-             return  tex2D (tex_src, x + xoff, y + yoff);
113-         }
114-     };
115- 
116-     struct  SrcTexObj  : SrcTex
117-     {
118-         __host__  SrcTexObj (int  _xoff, int  _yoff, cudaTextureObject_t _tex_src_object) : SrcTex(_xoff, _yoff), tex_src_object(_tex_src_object) { }
119- 
120-         __device__  __forceinline__  int  operator  ()(int  y, int  x) const  override 
121-         {
122-             return  tex2D <uchar>(tex_src_object, x + xoff, y + yoff);
123-         }
124- 
125-         cudaTextureObject_t tex_src_object;
126-     };
127- 
128-     template  <
129-         class  T ,
130-         class  Norm ,
131-         typename  = typename  std::enable_if<std::is_base_of<SrcTex, T>::value>::type
132-     >
133-     __global__  void  calcMagnitudeKernel (const  T src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const  Norm norm)
94+     template  <class  Norm >
95+     __global__  void  calcMagnitudeKernel (cv::cudev::TextureOffPtr<uchar> texSrc, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const  Norm norm)
13496    {
13597        const  int  x = blockIdx .x  * blockDim .x  + threadIdx .x ;
13698        const  int  y = blockIdx .y  * blockDim .y  + threadIdx .y ;
13799
138100        if  (y >= mag.rows  || x >= mag.cols )
139101            return ;
140102
141-         int  dxVal = (src (y - 1 , x + 1 ) + 2  * src (y, x + 1 ) + src (y + 1 , x + 1 )) - (src (y - 1 , x - 1 ) + 2  * src (y, x - 1 ) + src (y + 1 , x - 1 ));
142-         int  dyVal = (src (y + 1 , x - 1 ) + 2  * src (y + 1 , x) + src (y + 1 , x + 1 )) - (src (y - 1 , x - 1 ) + 2  * src (y - 1 , x) + src (y - 1 , x + 1 ));
103+         int  dxVal = (texSrc (y - 1 , x + 1 ) + 2  * texSrc (y, x + 1 ) + texSrc (y + 1 , x + 1 )) - (texSrc (y - 1 , x - 1 ) + 2  * texSrc (y, x - 1 ) + texSrc (y + 1 , x - 1 ));
104+         int  dyVal = (texSrc (y + 1 , x - 1 ) + 2  * texSrc (y + 1 , x) + texSrc (y + 1 , x + 1 )) - (texSrc (y - 1 , x - 1 ) + 2  * texSrc (y - 1 , x) + texSrc (y - 1 , x + 1 ));
143105
144106        dx (y, x) = dxVal;
145107        dy (y, x) = dyVal;
@@ -151,63 +113,20 @@ namespace canny
151113    {
152114        const  dim3  block (16 , 16 );
153115        const  dim3  grid (divUp (mag.cols , block.x ), divUp (mag.rows , block.y ));
154- 
155-         bool  cc30 = deviceSupports (FEATURE_SET_COMPUTE_30);
156- 
157-         if  (cc30)
116+         cv::cudev::TextureOff<uchar> texSrc (srcWhole, yoff, xoff);
117+         if  (L2Grad)
158118        {
159-             cudaTextureDesc texDesc;
160-             memset (&texDesc, 0 , sizeof (texDesc));
161-             texDesc.addressMode [0 ] = cudaAddressModeClamp;
162-             texDesc.addressMode [1 ] = cudaAddressModeClamp;
163-             texDesc.addressMode [2 ] = cudaAddressModeClamp;
164- 
165-             cudaTextureObject_t tex = 0 ;
166-             createTextureObjectPitch2D (&tex, srcWhole, texDesc);
167- 
168-             SrcTexObj src (xoff, yoff, tex);
169- 
170-             if  (L2Grad)
171-             {
172-                 L2 norm;
173-                 calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
174-             }
175-             else 
176-             {
177-                 L1 norm;
178-                 calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
179-             }
180- 
181-             cudaSafeCall ( cudaGetLastError () );
182- 
183-             if  (stream == NULL )
184-                 cudaSafeCall ( cudaDeviceSynchronize () );
185-             else 
186-                 cudaSafeCall ( cudaStreamSynchronize (stream) );
187- 
188-             cudaSafeCall ( cudaDestroyTextureObject (tex) );
119+             L2 norm;
120+             calcMagnitudeKernel << <grid, block, 0 , stream >> > (texSrc, dx, dy, mag, norm);
189121        }
190122        else 
191123        {
192-             bindTexture (&tex_src, srcWhole);
193-             SrcTexRef src (xoff, yoff);
194- 
195-             if  (L2Grad)
196-             {
197-                 L2 norm;
198-                 calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
199-             }
200-             else 
201-             {
202-                 L1 norm;
203-                 calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
204-             }
205- 
206-             cudaSafeCall ( cudaGetLastError () );
207- 
208-             if  (stream == NULL )
209-                 cudaSafeCall ( cudaDeviceSynchronize () );
124+             L1 norm;
125+             calcMagnitudeKernel << <grid, block, 0 , stream >> > (texSrc, dx, dy, mag, norm);
210126        }
127+ 
128+         if  (stream == NULL )
129+             cudaSafeCall (cudaDeviceSynchronize ());
211130    }
212131
213132    void  calcMagnitude (PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool  L2Grad, cudaStream_t stream)
@@ -229,8 +148,7 @@ namespace canny
229148
230149namespace  canny 
231150{
232-     texture<float , cudaTextureType2D, cudaReadModeElementType> tex_mag (false , cudaFilterModePoint, cudaAddressModeClamp);
233-     __global__  void  calcMapKernel (const  PtrStepSzi dx, const  PtrStepi dy, PtrStepi map, const  float  low_thresh, const  float  high_thresh)
151+     __global__  void  calcMapKernel (cv::cudev::TexturePtr<float > texMag, const  PtrStepSzi dx, const  PtrStepi dy, PtrStepi map, const  float  low_thresh, const  float  high_thresh)
234152    {
235153        const  int  CANNY_SHIFT = 15 ;
236154        const  int  TG22 = (int )(0.4142135623730950488016887242097 *(1 <<CANNY_SHIFT) + 0.5 );
@@ -245,7 +163,7 @@ namespace canny
245163        int  dyVal = dy (y, x);
246164
247165        const  int  s = (dxVal ^ dyVal) < 0  ? -1  : 1 ;
248-         const  float  m = tex2D (tex_mag , x, y );
166+         const  float  m = texMag (y , x);
249167
250168        dxVal = ::abs (dxVal);
251169        dyVal = ::abs (dyVal);
@@ -264,69 +182,17 @@ namespace canny
264182
265183            if  (dyVal < tg22x)
266184            {
267-                 if  (m > tex2D (tex_mag , x - 1 , y ) && m >= tex2D (tex_mag , x + 1 , y ))
185+                 if  (m > texMag (y , x - 1 ) && m >= texMag (y , x + 1 ))
268186                    edge_type = 1  + (int )(m > high_thresh);
269187            }
270188            else  if (dyVal > tg67x)
271189            {
272-                 if  (m > tex2D (tex_mag, x,  y - 1 ) && m >= tex2D (tex_mag, x,  y + 1 ))
190+                 if  (m > texMag ( y - 1 , x ) && m >= texMag ( y + 1 , x ))
273191                    edge_type = 1  + (int )(m > high_thresh);
274192            }
275193            else 
276194            {
277-                 if  (m > tex2D (tex_mag, x - s, y - 1 ) && m >= tex2D (tex_mag, x + s, y + 1 ))
278-                     edge_type = 1  + (int )(m > high_thresh);
279-             }
280-         }
281- 
282-         map (y, x) = edge_type;
283-     }
284- 
285-     __global__  void  calcMapKernel (const  PtrStepSzi dx, const  PtrStepi dy, PtrStepi map, const  float  low_thresh, const  float  high_thresh, cudaTextureObject_t tex_mag)
286-     {
287-         const  int  CANNY_SHIFT = 15 ;
288-         const  int  TG22 = (int )(0.4142135623730950488016887242097 *(1 <<CANNY_SHIFT) + 0.5 );
289- 
290-         const  int  x = blockIdx .x  * blockDim .x  + threadIdx .x ;
291-         const  int  y = blockIdx .y  * blockDim .y  + threadIdx .y ;
292- 
293-         if  (x == 0  || x >= dx.cols  - 1  || y == 0  || y >= dx.rows  - 1 )
294-             return ;
295- 
296-         int  dxVal = dx (y, x);
297-         int  dyVal = dy (y, x);
298- 
299-         const  int  s = (dxVal ^ dyVal) < 0  ? -1  : 1 ;
300-         const  float  m = tex2D <float >(tex_mag, x, y);
301- 
302-         dxVal = ::abs (dxVal);
303-         dyVal = ::abs (dyVal);
304- 
305-         //  0 - the pixel can not belong to an edge
306-         //  1 - the pixel might belong to an edge
307-         //  2 - the pixel does belong to an edge
308-         int  edge_type = 0 ;
309- 
310-         if  (m > low_thresh)
311-         {
312-             const  int  tg22x = dxVal * TG22;
313-             const  int  tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT);
314- 
315-             dyVal <<= CANNY_SHIFT;
316- 
317-             if  (dyVal < tg22x)
318-             {
319-                 if  (m > tex2D <float >(tex_mag, x - 1 , y) && m >= tex2D <float >(tex_mag, x + 1 , y))
320-                     edge_type = 1  + (int )(m > high_thresh);
321-             }
322-             else  if (dyVal > tg67x)
323-             {
324-                 if  (m > tex2D <float >(tex_mag, x, y - 1 ) && m >= tex2D <float >(tex_mag, x, y + 1 ))
325-                     edge_type = 1  + (int )(m > high_thresh);
326-             }
327-             else 
328-             {
329-                 if  (m > tex2D <float >(tex_mag, x - s, y - 1 ) && m >= tex2D <float >(tex_mag, x + s, y + 1 ))
195+                 if  (m > texMag (y - 1 , x - s) && m >= texMag (y + 1 , x + s))
330196                    edge_type = 1  + (int )(m > high_thresh);
331197            }
332198        }
@@ -338,47 +204,10 @@ namespace canny
338204    {
339205        const  dim3  block (16 , 16 );
340206        const  dim3  grid (divUp (dx.cols , block.x ), divUp (dx.rows , block.y ));
341- 
342-         if  (deviceSupports (FEATURE_SET_COMPUTE_30))
343-         {
344-             //  Use the texture object
345-             cudaResourceDesc resDesc;
346-             memset (&resDesc, 0 , sizeof (resDesc));
347-             resDesc.resType  = cudaResourceTypePitch2D;
348-             resDesc.res .pitch2D .devPtr  = mag.ptr ();
349-             resDesc.res .pitch2D .height  = mag.rows ;
350-             resDesc.res .pitch2D .width  = mag.cols ;
351-             resDesc.res .pitch2D .pitchInBytes  = mag.step ;
352-             resDesc.res .pitch2D .desc  = cudaCreateChannelDesc<float >();
353- 
354-             cudaTextureDesc texDesc;
355-             memset (&texDesc, 0 , sizeof (texDesc));
356-             texDesc.addressMode [0 ] = cudaAddressModeClamp;
357-             texDesc.addressMode [1 ] = cudaAddressModeClamp;
358-             texDesc.addressMode [2 ] = cudaAddressModeClamp;
359- 
360-             cudaTextureObject_t tex=0 ;
361-             cudaCreateTextureObject (&tex, &resDesc, &texDesc, NULL );
362-             calcMapKernel<<<grid, block, 0 , stream>>> (dx, dy, map, low_thresh, high_thresh, tex);
363-             cudaSafeCall ( cudaGetLastError () );
364- 
365-             if  (stream == NULL )
366-                 cudaSafeCall ( cudaDeviceSynchronize () );
367-             else 
368-                 cudaSafeCall ( cudaStreamSynchronize (stream) );
369- 
370-             cudaSafeCall ( cudaDestroyTextureObject (tex) );
371-         }
372-         else 
373-         {
374-             //  Use the texture reference
375-             bindTexture (&tex_mag, mag);
376-             calcMapKernel<<<grid, block, 0 , stream>>> (dx, dy, map, low_thresh, high_thresh);
377-             cudaSafeCall ( cudaGetLastError () );
378- 
379-             if  (stream == NULL )
380-                 cudaSafeCall ( cudaDeviceSynchronize () );
381-         }
207+         cv::cudev::Texture<float > texMag (mag);
208+         calcMapKernel<<<grid, block, 0 , stream>>> (texMag, dx, dy, map, low_thresh, high_thresh);
209+         if  (stream == NULL )
210+             cudaSafeCall ( cudaDeviceSynchronize () );
382211    }
383212}
384213
0 commit comments