@@ -52,37 +52,33 @@ namespace cv { namespace cuda { namespace device
52
52
{
53
53
namespace gfft
54
54
{
55
- texture<float , cudaTextureType2D, cudaReadModeElementType> eigTex (0 , cudaFilterModePoint, cudaAddressModeClamp);
56
-
57
- __device__ int g_counter = 0 ;
58
-
59
- template <class Mask > __global__ void findCorners (float threshold, const Mask mask, float2 * corners, int max_count, int rows, int cols)
55
+ template <class Mask > __global__ void findCorners (float threshold, const Mask mask, float2 * corners, int max_count, int rows, int cols, cudaTextureObject_t eigTex, int *g_counter)
60
56
{
61
57
const int j = blockIdx .x * blockDim .x + threadIdx .x ;
62
58
const int i = blockIdx .y * blockDim .y + threadIdx .y ;
63
59
64
60
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask (i, j))
65
61
{
66
- float val = tex2D (eigTex, j, i);
62
+ float val = tex2D < float > (eigTex, j, i);
67
63
68
64
if (val > threshold)
69
65
{
70
66
float maxVal = val;
71
67
72
- maxVal = ::fmax (tex2D (eigTex, j - 1 , i - 1 ), maxVal);
73
- maxVal = ::fmax (tex2D (eigTex, j , i - 1 ), maxVal);
74
- maxVal = ::fmax (tex2D (eigTex, j + 1 , i - 1 ), maxVal);
68
+ maxVal = ::fmax (tex2D < float > (eigTex, j - 1 , i - 1 ), maxVal);
69
+ maxVal = ::fmax (tex2D < float > (eigTex, j , i - 1 ), maxVal);
70
+ maxVal = ::fmax (tex2D < float > (eigTex, j + 1 , i - 1 ), maxVal);
75
71
76
- maxVal = ::fmax (tex2D (eigTex, j - 1 , i), maxVal);
77
- maxVal = ::fmax (tex2D (eigTex, j + 1 , i), maxVal);
72
+ maxVal = ::fmax (tex2D < float > (eigTex, j - 1 , i), maxVal);
73
+ maxVal = ::fmax (tex2D < float > (eigTex, j + 1 , i), maxVal);
78
74
79
- maxVal = ::fmax (tex2D (eigTex, j - 1 , i + 1 ), maxVal);
80
- maxVal = ::fmax (tex2D (eigTex, j , i + 1 ), maxVal);
81
- maxVal = ::fmax (tex2D (eigTex, j + 1 , i + 1 ), maxVal);
75
+ maxVal = ::fmax (tex2D < float > (eigTex, j - 1 , i + 1 ), maxVal);
76
+ maxVal = ::fmax (tex2D < float > (eigTex, j , i + 1 ), maxVal);
77
+ maxVal = ::fmax (tex2D < float > (eigTex, j + 1 , i + 1 ), maxVal);
82
78
83
79
if (val == maxVal)
84
80
{
85
- const int ind = ::atomicAdd (& g_counter, 1 );
81
+ const int ind = ::atomicAdd (g_counter, 1 );
86
82
87
83
if (ind < max_count)
88
84
corners[ind] = make_float2 (j, i);
@@ -91,22 +87,20 @@ namespace cv { namespace cuda { namespace device
91
87
}
92
88
}
93
89
94
- int findCorners_gpu (PtrStepSzf eig , float threshold, PtrStepSzb mask, float2 * corners, int max_count, cudaStream_t stream)
90
+ int findCorners_gpu (const cudaTextureObject_t &eigTex, const int &rows, const int &cols , float threshold, PtrStepSzb mask, float2 * corners, int max_count, cudaStream_t stream)
95
91
{
96
- void * counter_ptr;
97
- cudaSafeCall ( cudaGetSymbolAddress (&counter_ptr, g_counter ) );
92
+ int * counter_ptr;
93
+ cudaSafeCall ( cudaMalloc (&counter_ptr, sizeof ( int ) ) );
98
94
99
95
cudaSafeCall ( cudaMemsetAsync (counter_ptr, 0 , sizeof (int ), stream) );
100
96
101
- bindTexture (&eigTex, eig);
102
-
103
97
dim3 block (16 , 16 );
104
- dim3 grid (divUp (eig. cols , block.x ), divUp (eig. rows , block.y ));
98
+ dim3 grid (divUp (cols, block.x ), divUp (rows, block.y ));
105
99
106
100
if (mask.data )
107
- findCorners<<<grid, block, 0 , stream>>> (threshold, SingleMask (mask), corners, max_count, eig. rows , eig. cols );
101
+ findCorners<<<grid, block, 0 , stream>>> (threshold, SingleMask (mask), corners, max_count, rows, cols, eigTex, counter_ptr );
108
102
else
109
- findCorners<<<grid, block, 0 , stream>>> (threshold, WithOutMask (), corners, max_count, eig. rows , eig. cols );
103
+ findCorners<<<grid, block, 0 , stream>>> (threshold, WithOutMask (), corners, max_count, rows, cols, eigTex, counter_ptr );
110
104
111
105
cudaSafeCall ( cudaGetLastError () );
112
106
@@ -122,25 +116,27 @@ namespace cv { namespace cuda { namespace device
122
116
class EigGreater
123
117
{
124
118
public:
119
+ EigGreater (const cudaTextureObject_t &eigTex_) : eigTex(eigTex_)
120
+ {
121
+ }
125
122
__device__ __forceinline__ bool operator ()(float2 a, float2 b) const
126
123
{
127
- return tex2D (eigTex, a.x , a.y ) > tex2D (eigTex, b.x , b.y );
124
+ return tex2D < float > (eigTex, a.x , a.y ) > tex2D < float > (eigTex, b.x , b.y );
128
125
}
129
- };
130
126
127
+ cudaTextureObject_t eigTex;
128
+ };
131
129
132
- void sortCorners_gpu (PtrStepSzf eig , float2 * corners, int count, cudaStream_t stream)
130
+ void sortCorners_gpu (const cudaTextureObject_t &eigTex , float2 * corners, int count, cudaStream_t stream)
133
131
{
134
- bindTexture (&eigTex, eig);
135
-
136
132
thrust::device_ptr<float2 > ptr (corners);
137
133
#if THRUST_VERSION >= 100802
138
134
if (stream)
139
- thrust::sort (thrust::cuda::par (ThrustAllocator::getAllocator ()).on (stream), ptr, ptr + count, EigGreater ());
135
+ thrust::sort (thrust::cuda::par (ThrustAllocator::getAllocator ()).on (stream), ptr, ptr + count, EigGreater (eigTex ));
140
136
else
141
- thrust::sort (thrust::cuda::par (ThrustAllocator::getAllocator ()), ptr, ptr + count, EigGreater ());
137
+ thrust::sort (thrust::cuda::par (ThrustAllocator::getAllocator ()), ptr, ptr + count, EigGreater (eigTex ));
142
138
#else
143
- thrust::sort (ptr, ptr + count, EigGreater ());
139
+ thrust::sort (ptr, ptr + count, EigGreater (eigTex ));
144
140
#endif
145
141
}
146
142
} // namespace optical_flow
0 commit comments