Skip to content

Commit c0133e5

Browse files
authored
Merge pull request #3386 from r2d3:convolve_cuda
keep cufftPlan2d across ConvolveImpl::convolve calls
2 parents 9d1c0f5 + 53ab7c3 commit c0133e5

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

modules/cudaarithm/src/arithm.cpp

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,8 @@ namespace
439439
class ConvolutionImpl : public Convolution
440440
{
441441
public:
442-
explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {}
442+
explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_), planR2C(0), planC2R(0) {}
443+
~ConvolutionImpl();
443444

444445
void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null());
445446

@@ -452,6 +453,9 @@ namespace
452453
Size user_block_size;
453454
Size dft_size;
454455

456+
cufftHandle planR2C, planC2R;
457+
Size plan_size;
458+
455459
GpuMat image_spect, templ_spect, result_spect;
456460
GpuMat image_block, templ_block, result_data;
457461
};
@@ -491,6 +495,27 @@ namespace
491495
// Use maximum result matrix block size for the estimated DFT block size
492496
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
493497
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
498+
499+
if (dft_size != plan_size)
500+
{
501+
if (planR2C != 0)
502+
cufftSafeCall( cufftDestroy(planR2C) );
503+
if (planC2R != 0)
504+
cufftSafeCall( cufftDestroy(planC2R) );
505+
506+
cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
507+
cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );
508+
509+
plan_size = dft_size;
510+
}
511+
}
512+
513+
ConvolutionImpl::~ConvolutionImpl()
514+
{
515+
if (planR2C != 0)
516+
cufftSafeCall( cufftDestroy(planR2C) );
517+
if (planC2R != 0)
518+
cufftSafeCall( cufftDestroy(planC2R) );
494519
}
495520

496521
Size ConvolutionImpl::estimateBlockSize(Size result_size)
@@ -516,10 +541,6 @@ namespace
516541

517542
cudaStream_t stream = StreamAccessor::getStream(_stream);
518543

519-
cufftHandle planR2C, planC2R;
520-
cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
521-
cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );
522-
523544
cufftSafeCall( cufftSetStream(planR2C, stream) );
524545
cufftSafeCall( cufftSetStream(planC2R, stream) );
525546

@@ -559,9 +580,6 @@ namespace
559580
}
560581
}
561582

562-
cufftSafeCall( cufftDestroy(planR2C) );
563-
cufftSafeCall( cufftDestroy(planC2R) );
564-
565583
syncOutput(result, _result, _stream);
566584
}
567585
}

0 commit comments

Comments
 (0)