Skip to content

Commit 50c6568

Browse files
committed
Replace all instances of texture references with texture objects using the existing updated cv::cudev::Texture class.
Fixes bugs in cv::cuda::demosaicing, cv::cuda::resize and cv::cuda::HoughSegmentDetector.
1 parent 9d84eae commit 50c6568

File tree

28 files changed

+1065
-2192
lines changed

28 files changed

+1065
-2192
lines changed

modules/cudaimgproc/src/cuda/canny.cu

+23-194
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "opencv2/core/cuda/functional.hpp"
4949
#include "opencv2/core/cuda/utility.hpp"
5050
#include "opencv2/core/cuda.hpp"
51+
#include <opencv2/cudev/ptr2d/texture.hpp>
5152

5253
using namespace cv::cuda;
5354
using namespace cv::cuda::device;
@@ -90,56 +91,17 @@ namespace cv { namespace cuda { namespace device
9091

9192
namespace canny
9293
{
93-
struct SrcTex
94-
{
95-
virtual ~SrcTex() {}
96-
97-
__host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
98-
99-
__device__ __forceinline__ virtual int operator ()(int y, int x) const = 0;
100-
101-
int xoff;
102-
int yoff;
103-
};
104-
105-
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
106-
struct SrcTexRef : SrcTex
107-
{
108-
__host__ SrcTexRef(int _xoff, int _yoff) : SrcTex(_xoff, _yoff) {}
109-
110-
__device__ __forceinline__ int operator ()(int y, int x) const override
111-
{
112-
return tex2D(tex_src, x + xoff, y + yoff);
113-
}
114-
};
115-
116-
struct SrcTexObj : SrcTex
117-
{
118-
__host__ SrcTexObj(int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : SrcTex(_xoff, _yoff), tex_src_object(_tex_src_object) { }
119-
120-
__device__ __forceinline__ int operator ()(int y, int x) const override
121-
{
122-
return tex2D<uchar>(tex_src_object, x + xoff, y + yoff);
123-
}
124-
125-
cudaTextureObject_t tex_src_object;
126-
};
127-
128-
template <
129-
class T,
130-
class Norm,
131-
typename = typename std::enable_if<std::is_base_of<SrcTex, T>::value>::type
132-
>
133-
__global__ void calcMagnitudeKernel(const T src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
94+
template <class Norm>
95+
__global__ void calcMagnitudeKernel(cv::cudev::TextureOffPtr<uchar> texSrc, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
13496
{
13597
const int x = blockIdx.x * blockDim.x + threadIdx.x;
13698
const int y = blockIdx.y * blockDim.y + threadIdx.y;
13799

138100
if (y >= mag.rows || x >= mag.cols)
139101
return;
140102

141-
int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1));
142-
int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1));
103+
int dxVal = (texSrc(y - 1, x + 1) + 2 * texSrc(y, x + 1) + texSrc(y + 1, x + 1)) - (texSrc(y - 1, x - 1) + 2 * texSrc(y, x - 1) + texSrc(y + 1, x - 1));
104+
int dyVal = (texSrc(y + 1, x - 1) + 2 * texSrc(y + 1, x) + texSrc(y + 1, x + 1)) - (texSrc(y - 1, x - 1) + 2 * texSrc(y - 1, x) + texSrc(y - 1, x + 1));
143105

144106
dx(y, x) = dxVal;
145107
dy(y, x) = dyVal;
@@ -151,63 +113,20 @@ namespace canny
151113
{
152114
const dim3 block(16, 16);
153115
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
154-
155-
bool cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);
156-
157-
if (cc30)
116+
cv::cudev::TextureOff<uchar> texSrc(srcWhole, yoff, xoff);
117+
if (L2Grad)
158118
{
159-
cudaTextureDesc texDesc;
160-
memset(&texDesc, 0, sizeof(texDesc));
161-
texDesc.addressMode[0] = cudaAddressModeClamp;
162-
texDesc.addressMode[1] = cudaAddressModeClamp;
163-
texDesc.addressMode[2] = cudaAddressModeClamp;
164-
165-
cudaTextureObject_t tex = 0;
166-
createTextureObjectPitch2D(&tex, srcWhole, texDesc);
167-
168-
SrcTexObj src(xoff, yoff, tex);
169-
170-
if (L2Grad)
171-
{
172-
L2 norm;
173-
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
174-
}
175-
else
176-
{
177-
L1 norm;
178-
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
179-
}
180-
181-
cudaSafeCall( cudaGetLastError() );
182-
183-
if (stream == NULL)
184-
cudaSafeCall( cudaDeviceSynchronize() );
185-
else
186-
cudaSafeCall( cudaStreamSynchronize(stream) );
187-
188-
cudaSafeCall( cudaDestroyTextureObject(tex) );
119+
L2 norm;
120+
calcMagnitudeKernel << <grid, block, 0, stream >> > (texSrc, dx, dy, mag, norm);
189121
}
190122
else
191123
{
192-
bindTexture(&tex_src, srcWhole);
193-
SrcTexRef src(xoff, yoff);
194-
195-
if (L2Grad)
196-
{
197-
L2 norm;
198-
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
199-
}
200-
else
201-
{
202-
L1 norm;
203-
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
204-
}
205-
206-
cudaSafeCall( cudaGetLastError() );
207-
208-
if (stream == NULL)
209-
cudaSafeCall( cudaDeviceSynchronize() );
124+
L1 norm;
125+
calcMagnitudeKernel << <grid, block, 0, stream >> > (texSrc, dx, dy, mag, norm);
210126
}
127+
128+
if (stream == NULL)
129+
cudaSafeCall(cudaDeviceSynchronize());
211130
}
212131

213132
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
@@ -229,8 +148,7 @@ namespace canny
229148

230149
namespace canny
231150
{
232-
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
233-
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
151+
__global__ void calcMapKernel(cv::cudev::TexturePtr<float> texMag, const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
234152
{
235153
const int CANNY_SHIFT = 15;
236154
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
@@ -245,7 +163,7 @@ namespace canny
245163
int dyVal = dy(y, x);
246164

247165
const int s = (dxVal ^ dyVal) < 0 ? -1 : 1;
248-
const float m = tex2D(tex_mag, x, y);
166+
const float m = texMag(y, x);
249167

250168
dxVal = ::abs(dxVal);
251169
dyVal = ::abs(dyVal);
@@ -264,69 +182,17 @@ namespace canny
264182

265183
if (dyVal < tg22x)
266184
{
267-
if (m > tex2D(tex_mag, x - 1, y) && m >= tex2D(tex_mag, x + 1, y))
185+
if (m > texMag(y, x - 1) && m >= texMag(y, x + 1))
268186
edge_type = 1 + (int)(m > high_thresh);
269187
}
270188
else if(dyVal > tg67x)
271189
{
272-
if (m > tex2D(tex_mag, x, y - 1) && m >= tex2D(tex_mag, x, y + 1))
190+
if (m > texMag(y - 1, x) && m >= texMag(y + 1, x))
273191
edge_type = 1 + (int)(m > high_thresh);
274192
}
275193
else
276194
{
277-
if (m > tex2D(tex_mag, x - s, y - 1) && m >= tex2D(tex_mag, x + s, y + 1))
278-
edge_type = 1 + (int)(m > high_thresh);
279-
}
280-
}
281-
282-
map(y, x) = edge_type;
283-
}
284-
285-
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh, cudaTextureObject_t tex_mag)
286-
{
287-
const int CANNY_SHIFT = 15;
288-
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
289-
290-
const int x = blockIdx.x * blockDim.x + threadIdx.x;
291-
const int y = blockIdx.y * blockDim.y + threadIdx.y;
292-
293-
if (x == 0 || x >= dx.cols - 1 || y == 0 || y >= dx.rows - 1)
294-
return;
295-
296-
int dxVal = dx(y, x);
297-
int dyVal = dy(y, x);
298-
299-
const int s = (dxVal ^ dyVal) < 0 ? -1 : 1;
300-
const float m = tex2D<float>(tex_mag, x, y);
301-
302-
dxVal = ::abs(dxVal);
303-
dyVal = ::abs(dyVal);
304-
305-
// 0 - the pixel can not belong to an edge
306-
// 1 - the pixel might belong to an edge
307-
// 2 - the pixel does belong to an edge
308-
int edge_type = 0;
309-
310-
if (m > low_thresh)
311-
{
312-
const int tg22x = dxVal * TG22;
313-
const int tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT);
314-
315-
dyVal <<= CANNY_SHIFT;
316-
317-
if (dyVal < tg22x)
318-
{
319-
if (m > tex2D<float>(tex_mag, x - 1, y) && m >= tex2D<float>(tex_mag, x + 1, y))
320-
edge_type = 1 + (int)(m > high_thresh);
321-
}
322-
else if(dyVal > tg67x)
323-
{
324-
if (m > tex2D<float>(tex_mag, x, y - 1) && m >= tex2D<float>(tex_mag, x, y + 1))
325-
edge_type = 1 + (int)(m > high_thresh);
326-
}
327-
else
328-
{
329-
if (m > tex2D<float>(tex_mag, x - s, y - 1) && m >= tex2D<float>(tex_mag, x + s, y + 1))
195+
if (m > texMag(y - 1, x - s) && m >= texMag(y + 1, x + s))
330196
edge_type = 1 + (int)(m > high_thresh);
331197
}
332198
}
@@ -338,47 +204,10 @@ namespace canny
338204
{
339205
const dim3 block(16, 16);
340206
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
341-
342-
if (deviceSupports(FEATURE_SET_COMPUTE_30))
343-
{
344-
// Use the texture object
345-
cudaResourceDesc resDesc;
346-
memset(&resDesc, 0, sizeof(resDesc));
347-
resDesc.resType = cudaResourceTypePitch2D;
348-
resDesc.res.pitch2D.devPtr = mag.ptr();
349-
resDesc.res.pitch2D.height = mag.rows;
350-
resDesc.res.pitch2D.width = mag.cols;
351-
resDesc.res.pitch2D.pitchInBytes = mag.step;
352-
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<float>();
353-
354-
cudaTextureDesc texDesc;
355-
memset(&texDesc, 0, sizeof(texDesc));
356-
texDesc.addressMode[0] = cudaAddressModeClamp;
357-
texDesc.addressMode[1] = cudaAddressModeClamp;
358-
texDesc.addressMode[2] = cudaAddressModeClamp;
359-
360-
cudaTextureObject_t tex=0;
361-
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
362-
calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh, tex);
363-
cudaSafeCall( cudaGetLastError() );
364-
365-
if (stream == NULL)
366-
cudaSafeCall( cudaDeviceSynchronize() );
367-
else
368-
cudaSafeCall( cudaStreamSynchronize(stream) );
369-
370-
cudaSafeCall( cudaDestroyTextureObject(tex) );
371-
}
372-
else
373-
{
374-
// Use the texture reference
375-
bindTexture(&tex_mag, mag);
376-
calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh);
377-
cudaSafeCall( cudaGetLastError() );
378-
379-
if (stream == NULL)
380-
cudaSafeCall( cudaDeviceSynchronize() );
381-
}
207+
cv::cudev::Texture<float> texMag(mag);
208+
calcMapKernel<<<grid, block, 0, stream>>>(texMag, dx, dy, map, low_thresh, high_thresh);
209+
if (stream == NULL)
210+
cudaSafeCall( cudaDeviceSynchronize() );
382211
}
383212
}
384213

0 commit comments

Comments
 (0)