Skip to content

Commit 9d733ef

Browse files
authored
Merge pull request #3620 from opencv-pushbot:gitee/alalek/signal_hotfix
signal: fix memory access
2 parents 64bd2e1 + ee5a2c3 commit 9d733ef

File tree

1 file changed

+44
-76
lines changed

1 file changed

+44
-76
lines changed

modules/signal/src/signal_resample.cpp

Lines changed: 44 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -105,102 +105,69 @@ static inline v_float32 simd_cubicHermite(const v_float32 &v_A, const v_float32
105105
}
106106
#endif
107107

108-
static void cubicInterpolate(const Mat1f &src, uint32_t dstlen, Mat1f &dst, uint32_t srclen)
108+
static void cubicInterpolate(const float* src/*[srclen]*/, uint32_t srclen, float* dst/*[dstlen]*/, uint32_t dstlen)
109109
{
110-
Mat1f tmp(Size(srclen + 3U, 1U));
111-
tmp.at<float>(0) = src.at<float>(0);
112-
113-
#if (CV_SIMD || CV_SIMD_SCALABLE)
114-
v_float32 v_reg = vx_setall_f32(src.at<float>(srclen - 1U));
115-
vx_store(tmp.ptr<float>(0) + (srclen - 1U), v_reg);
116-
#else // scalar version
117-
tmp.at<float>(srclen + 1U) = src.at<float>(srclen - 1U);
118-
tmp.at<float>(srclen + 2U) = src.at<float>(srclen - 1U);
119-
#endif
110+
const int srclen_1 = (int)srclen - 1;
120111

121112
uint32_t i = 0U;
122113

114+
const float dstToSrcScale = 1.0f / (float)(dstlen - 1U) * (float)srclen;
123115
#if (CV_SIMD || CV_SIMD_SCALABLE)
124-
uint32_t len_sub_vfloatStep = (uint32_t)std::max((int64_t)srclen - (int64_t)v_float32_width, (int64_t)0);
125-
for (; i < len_sub_vfloatStep; i+= v_float32_width)
126-
{
127-
v_float32 v_copy = vx_load(src.ptr<float>(0) + i);
128-
vx_store(tmp.ptr<float>(0) + (i + 1U), v_copy);
129-
}
130-
#endif
131-
132-
// if the tail exists or scalar version
133-
for (; i < srclen; ++i)
134-
{
135-
tmp.at<float>(i + 1U) = src.at<float>(i);
136-
}
137-
138-
i = 0U;
116+
const v_float32 v_dst2src_scale = vx_setall_f32(dstToSrcScale);
117+
const v_float32 v_half = vx_setall_f32(0.5f);
139118

140-
#if (CV_SIMD || CV_SIMD_SCALABLE)
141119
int ptr_x_int[v_float32_max_width];
142-
uint32_t j;
143-
144-
v_float32 v_dstlen_sub_1 = vx_setall_f32((float)(dstlen - 1U));
145-
v_float32 v_one = vx_setall_f32(1.0f);
146-
v_float32 v_x_start = v_div(v_one, v_dstlen_sub_1);
147-
v_float32 v_u = vx_setall_f32((float)srclen);
148-
v_float32 v_half = vx_setall_f32(0.5f);
149-
150-
len_sub_vfloatStep = (uint32_t)std::max((int64_t)dstlen - (int64_t)v_float32_width, (int64_t)0);
151-
for (; i < v_float32_width; ++i)
120+
for (unsigned j = 0; j < v_float32_width; ++j)
152121
{
153-
ptr_x_int[i] = (int)i;
122+
ptr_x_int[j] = (int)j;
154123
}
124+
const v_float32 v_sequence = v_cvt_f32(vx_load(ptr_x_int));
155125

156-
float ptr_for_cubicHermite[v_float32_max_width];
157-
v_float32 v_sequence = v_cvt_f32(vx_load(ptr_x_int));
158-
for (i = 0U; i < len_sub_vfloatStep; i+= v_float32_width)
126+
for (i = 0U; i <= dstlen - v_float32_width; i+= v_float32_width)
159127
{
160128
v_float32 v_reg_i = v_add(vx_setall_f32((float)i), v_sequence);
161129

162-
v_float32 v_x = v_sub(v_mul(v_x_start, v_reg_i, v_u), v_half);
130+
v_float32 v_x = v_sub(v_mul(v_reg_i, v_dst2src_scale), v_half);
163131

164132
v_int32 v_x_int = v_trunc(v_x);
165133
v_float32 v_x_fract = v_sub(v_x, v_cvt_f32(v_floor(v_x)));
166134

167135
vx_store(ptr_x_int, v_x_int);
168136

169-
for(j = 0U; j < v_float32_width; ++j)
170-
ptr_for_cubicHermite[j] = *(tmp.ptr<float>(0) + (ptr_x_int[j] - 1));
171-
v_float32 v_x_int_add_A = vx_load(ptr_for_cubicHermite);
172-
173-
for(j = 0U; j < v_float32_width; ++j)
174-
ptr_for_cubicHermite[j] = *(tmp.ptr<float>(0) + (ptr_x_int[j]));
175-
v_float32 v_x_int_add_B = vx_load(ptr_for_cubicHermite);
176-
177-
for(j = 0U; j < v_float32_width; ++j)
178-
ptr_for_cubicHermite[j] = *(tmp.ptr<float>(0) + (ptr_x_int[j] + 1));
179-
v_float32 v_x_int_add_C = vx_load(ptr_for_cubicHermite);
180-
181-
for(j = 0U; j < v_float32_width; ++j)
182-
ptr_for_cubicHermite[j] = *(tmp.ptr<float>(0) + (ptr_x_int[j] + 2));
183-
v_float32 v_x_int_add_D = vx_load(ptr_for_cubicHermite);
137+
float ptr_for_cubicHermiteA[v_float32_max_width];
138+
float ptr_for_cubicHermiteB[v_float32_max_width];
139+
float ptr_for_cubicHermiteC[v_float32_max_width];
140+
float ptr_for_cubicHermiteD[v_float32_max_width];
184141

142+
for (unsigned j = 0U; j < v_float32_width; ++j)
143+
{
144+
int src_offset = ptr_x_int[j];
145+
ptr_for_cubicHermiteA[j] = src[std::min(std::max(0, src_offset - 1), srclen_1)];
146+
ptr_for_cubicHermiteB[j] = src[std::min(std::max(0, src_offset + 0), srclen_1)];
147+
ptr_for_cubicHermiteC[j] = src[std::min(std::max(0, src_offset + 1), srclen_1)];
148+
ptr_for_cubicHermiteD[j] = src[std::min(std::max(0, src_offset + 2), srclen_1)];
149+
}
150+
v_float32 v_x_int_add_A = vx_load(ptr_for_cubicHermiteA);
151+
v_float32 v_x_int_add_B = vx_load(ptr_for_cubicHermiteB);
152+
v_float32 v_x_int_add_C = vx_load(ptr_for_cubicHermiteC);
153+
v_float32 v_x_int_add_D = vx_load(ptr_for_cubicHermiteD);
185154

186-
vx_store(dst.ptr<float>(0) + i, simd_cubicHermite(v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract));
155+
vx_store(&dst[i], simd_cubicHermite(v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract));
187156
}
188157
#endif
189158

190159
// if the tail exists or scalar version
191-
float *ptr = tmp.ptr<float>(0) + 1U;
192-
float lenScale = 1.0f / (float)(dstlen - 1U);
193-
float U, X, xfract;
194-
int xint;
195160
for(; i < dstlen; ++i)
196161
{
197-
U = (float)i * lenScale;
198-
X = (U * (float)srclen) - 0.5f;
199-
xfract = X - floor(X);
200-
xint = (int)X;
201-
dst.at<float>(i) = scal_cubicHermite(ptr[xint - 1], ptr[xint], ptr[xint + 1], ptr[xint + 2], xfract);
162+
float X = (float)i * dstToSrcScale - 0.5f;
163+
float xfract = X - floor(X);
164+
int xint = (int)X;
165+
float cubicHermiteA = src[std::min(std::max(0, xint - 1), srclen_1)];
166+
float cubicHermiteB = src[std::min(std::max(0, xint + 0), srclen_1)];
167+
float cubicHermiteC = src[std::min(std::max(0, xint + 1), srclen_1)];
168+
float cubicHermiteD = src[std::min(std::max(0, xint + 2), srclen_1)];
169+
dst[i] = scal_cubicHermite(cubicHermiteA, cubicHermiteB, cubicHermiteC, cubicHermiteD, xfract);
202170
}
203-
204171
}
205172

206173
static void fir_f32(const float *pSrc, float *pDst,
@@ -332,7 +299,7 @@ static void fir_f32(const float *pSrc, float *pDst,
332299
}
333300

334301
void resampleSignal(InputArray inputSignal, OutputArray outputSignal,
335-
const int inFreq, const int outFreq)
302+
const int inFreq, const int outFreq)
336303
{
337304
CV_TRACE_FUNCTION();
338305
CV_Assert(!inputSignal.empty());
@@ -343,16 +310,18 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal,
343310
inputSignal.copyTo(outputSignal);
344311
return;
345312
}
346-
uint32_t filtLen = 33U;
347-
float beta = 3.395f;
348-
std::vector<float> filt_window(filtLen, 0.f);
349-
init_filter(beta, filtLen, filt_window.data());
350313
float ratio = (float)outFreq / float(inFreq);
351314
Mat1f inMat = inputSignal.getMat();
352-
Mat1f outMat = Mat1f(Size(cvFloor(inMat.cols * ratio), 1));
353-
cubicInterpolate(inMat, outMat.cols, outMat, inMat.cols);
315+
outputSignal.create(Size(cvFloor(inMat.cols * ratio), 1), CV_32FC1);
316+
Mat1f outMat = outputSignal.getMat();
317+
cubicInterpolate(inMat.ptr<float>(0), inMat.cols, outMat.ptr<float>(0), outMat.cols);
354318
if (inFreq < 2 * outFreq)
355319
{
320+
uint32_t filtLen = 33U;
321+
float beta = 3.395f;
322+
std::vector<float> filt_window(filtLen, 0.f);
323+
init_filter(beta, filtLen, filt_window.data());
324+
356325
std::vector<float> dlyl(filtLen * 2 - 1, 0.f);
357326
std::vector<float> ptmp(outMat.cols + 2 * filtLen, 0.);
358327

@@ -367,7 +336,6 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal,
367336
outMat.at<float>(i - filtLen) = ptmp2[i + cvFloor((float)filtLen / 2.f)];
368337
}
369338
}
370-
outputSignal.assign(std::move(outMat));
371339
}
372340

373341

0 commit comments

Comments
 (0)