48
48
#include " opencv2/core/cuda/functional.hpp"
49
49
#include " opencv2/core/cuda/utility.hpp"
50
50
#include " opencv2/core/cuda.hpp"
51
+ #include < opencv2/cudev/ptr2d/texture.hpp>
51
52
52
53
using namespace cv ::cuda;
53
54
using namespace cv ::cuda::device;
@@ -90,56 +91,17 @@ namespace cv { namespace cuda { namespace device
90
91
91
92
namespace canny
92
93
{
93
- struct SrcTex
94
- {
95
- virtual ~SrcTex () {}
96
-
97
- __host__ SrcTex (int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
98
-
99
- __device__ __forceinline__ virtual int operator ()(int y, int x) const = 0;
100
-
101
- int xoff;
102
- int yoff;
103
- };
104
-
105
- texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src (false , cudaFilterModePoint, cudaAddressModeClamp);
106
- struct SrcTexRef : SrcTex
107
- {
108
- __host__ SrcTexRef (int _xoff, int _yoff) : SrcTex(_xoff, _yoff) {}
109
-
110
- __device__ __forceinline__ int operator ()(int y, int x) const override
111
- {
112
- return tex2D (tex_src, x + xoff, y + yoff);
113
- }
114
- };
115
-
116
- struct SrcTexObj : SrcTex
117
- {
118
- __host__ SrcTexObj (int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : SrcTex(_xoff, _yoff), tex_src_object(_tex_src_object) { }
119
-
120
- __device__ __forceinline__ int operator ()(int y, int x) const override
121
- {
122
- return tex2D <uchar>(tex_src_object, x + xoff, y + yoff);
123
- }
124
-
125
- cudaTextureObject_t tex_src_object;
126
- };
127
-
128
- template <
129
- class T ,
130
- class Norm ,
131
- typename = typename std::enable_if<std::is_base_of<SrcTex, T>::value>::type
132
- >
133
- __global__ void calcMagnitudeKernel (const T src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
94
+ template <class Norm >
95
+ __global__ void calcMagnitudeKernel (cv::cudev::TextureOffPtr<uchar> texSrc, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
134
96
{
135
97
const int x = blockIdx .x * blockDim .x + threadIdx .x ;
136
98
const int y = blockIdx .y * blockDim .y + threadIdx .y ;
137
99
138
100
if (y >= mag.rows || x >= mag.cols )
139
101
return ;
140
102
141
- int dxVal = (src (y - 1 , x + 1 ) + 2 * src (y, x + 1 ) + src (y + 1 , x + 1 )) - (src (y - 1 , x - 1 ) + 2 * src (y, x - 1 ) + src (y + 1 , x - 1 ));
142
- int dyVal = (src (y + 1 , x - 1 ) + 2 * src (y + 1 , x) + src (y + 1 , x + 1 )) - (src (y - 1 , x - 1 ) + 2 * src (y - 1 , x) + src (y - 1 , x + 1 ));
103
+ int dxVal = (texSrc (y - 1 , x + 1 ) + 2 * texSrc (y, x + 1 ) + texSrc (y + 1 , x + 1 )) - (texSrc (y - 1 , x - 1 ) + 2 * texSrc (y, x - 1 ) + texSrc (y + 1 , x - 1 ));
104
+ int dyVal = (texSrc (y + 1 , x - 1 ) + 2 * texSrc (y + 1 , x) + texSrc (y + 1 , x + 1 )) - (texSrc (y - 1 , x - 1 ) + 2 * texSrc (y - 1 , x) + texSrc (y - 1 , x + 1 ));
143
105
144
106
dx (y, x) = dxVal;
145
107
dy (y, x) = dyVal;
@@ -151,63 +113,20 @@ namespace canny
151
113
{
152
114
const dim3 block (16 , 16 );
153
115
const dim3 grid (divUp (mag.cols , block.x ), divUp (mag.rows , block.y ));
154
-
155
- bool cc30 = deviceSupports (FEATURE_SET_COMPUTE_30);
156
-
157
- if (cc30)
116
+ cv::cudev::TextureOff<uchar> texSrc (srcWhole, yoff, xoff);
117
+ if (L2Grad)
158
118
{
159
- cudaTextureDesc texDesc;
160
- memset (&texDesc, 0 , sizeof (texDesc));
161
- texDesc.addressMode [0 ] = cudaAddressModeClamp;
162
- texDesc.addressMode [1 ] = cudaAddressModeClamp;
163
- texDesc.addressMode [2 ] = cudaAddressModeClamp;
164
-
165
- cudaTextureObject_t tex = 0 ;
166
- createTextureObjectPitch2D (&tex, srcWhole, texDesc);
167
-
168
- SrcTexObj src (xoff, yoff, tex);
169
-
170
- if (L2Grad)
171
- {
172
- L2 norm;
173
- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
174
- }
175
- else
176
- {
177
- L1 norm;
178
- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
179
- }
180
-
181
- cudaSafeCall ( cudaGetLastError () );
182
-
183
- if (stream == NULL )
184
- cudaSafeCall ( cudaDeviceSynchronize () );
185
- else
186
- cudaSafeCall ( cudaStreamSynchronize (stream) );
187
-
188
- cudaSafeCall ( cudaDestroyTextureObject (tex) );
119
+ L2 norm;
120
+ calcMagnitudeKernel << <grid, block, 0 , stream >> > (texSrc, dx, dy, mag, norm);
189
121
}
190
122
else
191
123
{
192
- bindTexture (&tex_src, srcWhole);
193
- SrcTexRef src (xoff, yoff);
194
-
195
- if (L2Grad)
196
- {
197
- L2 norm;
198
- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
199
- }
200
- else
201
- {
202
- L1 norm;
203
- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
204
- }
205
-
206
- cudaSafeCall ( cudaGetLastError () );
207
-
208
- if (stream == NULL )
209
- cudaSafeCall ( cudaDeviceSynchronize () );
124
+ L1 norm;
125
+ calcMagnitudeKernel << <grid, block, 0 , stream >> > (texSrc, dx, dy, mag, norm);
210
126
}
127
+
128
+ if (stream == NULL )
129
+ cudaSafeCall (cudaDeviceSynchronize ());
211
130
}
212
131
213
132
void calcMagnitude (PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
@@ -229,8 +148,7 @@ namespace canny
229
148
230
149
namespace canny
231
150
{
232
- texture<float , cudaTextureType2D, cudaReadModeElementType> tex_mag (false , cudaFilterModePoint, cudaAddressModeClamp);
233
- __global__ void calcMapKernel (const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
151
+ __global__ void calcMapKernel (cv::cudev::TexturePtr<float > texMag, const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
234
152
{
235
153
const int CANNY_SHIFT = 15 ;
236
154
const int TG22 = (int )(0.4142135623730950488016887242097 *(1 <<CANNY_SHIFT) + 0.5 );
@@ -245,7 +163,7 @@ namespace canny
245
163
int dyVal = dy (y, x);
246
164
247
165
const int s = (dxVal ^ dyVal) < 0 ? -1 : 1 ;
248
- const float m = tex2D (tex_mag , x, y );
166
+ const float m = texMag (y , x);
249
167
250
168
dxVal = ::abs (dxVal);
251
169
dyVal = ::abs (dyVal);
@@ -264,69 +182,17 @@ namespace canny
264
182
265
183
if (dyVal < tg22x)
266
184
{
267
- if (m > tex2D (tex_mag , x - 1 , y ) && m >= tex2D (tex_mag , x + 1 , y ))
185
+ if (m > texMag (y , x - 1 ) && m >= texMag (y , x + 1 ))
268
186
edge_type = 1 + (int )(m > high_thresh);
269
187
}
270
188
else if (dyVal > tg67x)
271
189
{
272
- if (m > tex2D (tex_mag, x, y - 1 ) && m >= tex2D (tex_mag, x, y + 1 ))
190
+ if (m > texMag ( y - 1 , x ) && m >= texMag ( y + 1 , x ))
273
191
edge_type = 1 + (int )(m > high_thresh);
274
192
}
275
193
else
276
194
{
277
- if (m > tex2D (tex_mag, x - s, y - 1 ) && m >= tex2D (tex_mag, x + s, y + 1 ))
278
- edge_type = 1 + (int )(m > high_thresh);
279
- }
280
- }
281
-
282
- map (y, x) = edge_type;
283
- }
284
-
285
- __global__ void calcMapKernel (const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh, cudaTextureObject_t tex_mag)
286
- {
287
- const int CANNY_SHIFT = 15 ;
288
- const int TG22 = (int )(0.4142135623730950488016887242097 *(1 <<CANNY_SHIFT) + 0.5 );
289
-
290
- const int x = blockIdx .x * blockDim .x + threadIdx .x ;
291
- const int y = blockIdx .y * blockDim .y + threadIdx .y ;
292
-
293
- if (x == 0 || x >= dx.cols - 1 || y == 0 || y >= dx.rows - 1 )
294
- return ;
295
-
296
- int dxVal = dx (y, x);
297
- int dyVal = dy (y, x);
298
-
299
- const int s = (dxVal ^ dyVal) < 0 ? -1 : 1 ;
300
- const float m = tex2D <float >(tex_mag, x, y);
301
-
302
- dxVal = ::abs (dxVal);
303
- dyVal = ::abs (dyVal);
304
-
305
- // 0 - the pixel can not belong to an edge
306
- // 1 - the pixel might belong to an edge
307
- // 2 - the pixel does belong to an edge
308
- int edge_type = 0 ;
309
-
310
- if (m > low_thresh)
311
- {
312
- const int tg22x = dxVal * TG22;
313
- const int tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT);
314
-
315
- dyVal <<= CANNY_SHIFT;
316
-
317
- if (dyVal < tg22x)
318
- {
319
- if (m > tex2D <float >(tex_mag, x - 1 , y) && m >= tex2D <float >(tex_mag, x + 1 , y))
320
- edge_type = 1 + (int )(m > high_thresh);
321
- }
322
- else if (dyVal > tg67x)
323
- {
324
- if (m > tex2D <float >(tex_mag, x, y - 1 ) && m >= tex2D <float >(tex_mag, x, y + 1 ))
325
- edge_type = 1 + (int )(m > high_thresh);
326
- }
327
- else
328
- {
329
- if (m > tex2D <float >(tex_mag, x - s, y - 1 ) && m >= tex2D <float >(tex_mag, x + s, y + 1 ))
195
+ if (m > texMag (y - 1 , x - s) && m >= texMag (y + 1 , x + s))
330
196
edge_type = 1 + (int )(m > high_thresh);
331
197
}
332
198
}
@@ -338,47 +204,10 @@ namespace canny
338
204
{
339
205
const dim3 block (16 , 16 );
340
206
const dim3 grid (divUp (dx.cols , block.x ), divUp (dx.rows , block.y ));
341
-
342
- if (deviceSupports (FEATURE_SET_COMPUTE_30))
343
- {
344
- // Use the texture object
345
- cudaResourceDesc resDesc;
346
- memset (&resDesc, 0 , sizeof (resDesc));
347
- resDesc.resType = cudaResourceTypePitch2D;
348
- resDesc.res .pitch2D .devPtr = mag.ptr ();
349
- resDesc.res .pitch2D .height = mag.rows ;
350
- resDesc.res .pitch2D .width = mag.cols ;
351
- resDesc.res .pitch2D .pitchInBytes = mag.step ;
352
- resDesc.res .pitch2D .desc = cudaCreateChannelDesc<float >();
353
-
354
- cudaTextureDesc texDesc;
355
- memset (&texDesc, 0 , sizeof (texDesc));
356
- texDesc.addressMode [0 ] = cudaAddressModeClamp;
357
- texDesc.addressMode [1 ] = cudaAddressModeClamp;
358
- texDesc.addressMode [2 ] = cudaAddressModeClamp;
359
-
360
- cudaTextureObject_t tex=0 ;
361
- cudaCreateTextureObject (&tex, &resDesc, &texDesc, NULL );
362
- calcMapKernel<<<grid, block, 0 , stream>>> (dx, dy, map, low_thresh, high_thresh, tex);
363
- cudaSafeCall ( cudaGetLastError () );
364
-
365
- if (stream == NULL )
366
- cudaSafeCall ( cudaDeviceSynchronize () );
367
- else
368
- cudaSafeCall ( cudaStreamSynchronize (stream) );
369
-
370
- cudaSafeCall ( cudaDestroyTextureObject (tex) );
371
- }
372
- else
373
- {
374
- // Use the texture reference
375
- bindTexture (&tex_mag, mag);
376
- calcMapKernel<<<grid, block, 0 , stream>>> (dx, dy, map, low_thresh, high_thresh);
377
- cudaSafeCall ( cudaGetLastError () );
378
-
379
- if (stream == NULL )
380
- cudaSafeCall ( cudaDeviceSynchronize () );
381
- }
207
+ cv::cudev::Texture<float > texMag (mag);
208
+ calcMapKernel<<<grid, block, 0 , stream>>> (texMag, dx, dy, map, low_thresh, high_thresh);
209
+ if (stream == NULL )
210
+ cudaSafeCall ( cudaDeviceSynchronize () );
382
211
}
383
212
}
384
213
0 commit comments