Skip to content

Commit 07d31f6

Browse files
committed
ximgproc: optimize add_mul using NEON intrinsics for ARM64
1 parent 1e4d4e0 commit 07d31f6

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

modules/ximgproc/src/edgeaware_filters_common.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,19 @@ inline bool CPU_SUPPORT_SSE1()
6060
} // end
6161
#endif
6262

63+
#if CV_NEON
64+
namespace
65+
{
66+
67+
inline bool CPU_SUPPORT_NEON()
68+
{
69+
static const bool is_supported = cv::checkHardwareSupport(CV_CPU_NEON);
70+
return is_supported;
71+
}
72+
73+
} // end
74+
#endif
75+
6376
namespace cv
6477
{
6578
namespace ximgproc
@@ -288,6 +301,20 @@ void add_mul(float *dst, float *src1, float *src2, int w)
288301
_mm_storeu_ps(dst + j, c);
289302
}
290303
}
304+
#elif CV_NEON
305+
if (CPU_SUPPORT_NEON())
306+
{
307+
float32x4_t a, b, c;
308+
for (; j < w - 3; j += 4)
309+
{
310+
a = vld1q_f32(src1 + j);
311+
b = vld1q_f32(src2 + j);
312+
b = vmulq_f32(b, a);
313+
c = vld1q_f32(dst + j);
314+
c = vaddq_f32(c, b);
315+
vst1q_f32(dst + j, c);
316+
}
317+
}
291318
#endif
292319
for (; j < w; j++)
293320
{

0 commit comments

Comments
 (0)