@@ -105,102 +105,69 @@ static inline v_float32 simd_cubicHermite(const v_float32 &v_A, const v_float32
105
105
}
106
106
#endif
107
107
108
- static void cubicInterpolate (const Mat1f & src, uint32_t dstlen, Mat1f & dst, uint32_t srclen )
108
+ static void cubicInterpolate (const float * src/* [srclen] */ , uint32_t srclen, float * dst/* [dstlen] */ , uint32_t dstlen )
109
109
{
110
- Mat1f tmp (Size (srclen + 3U , 1U ));
111
- tmp.at <float >(0 ) = src.at <float >(0 );
112
-
113
- #if (CV_SIMD || CV_SIMD_SCALABLE)
114
- v_float32 v_reg = vx_setall_f32 (src.at <float >(srclen - 1U ));
115
- vx_store (tmp.ptr <float >(0 ) + (srclen - 1U ), v_reg);
116
- #else // scalar version
117
- tmp.at <float >(srclen + 1U ) = src.at <float >(srclen - 1U );
118
- tmp.at <float >(srclen + 2U ) = src.at <float >(srclen - 1U );
119
- #endif
110
+ const int srclen_1 = (int )srclen - 1 ;
120
111
121
112
uint32_t i = 0U ;
122
113
114
+ const float dstToSrcScale = 1 .0f / (float )(dstlen - 1U ) * (float )srclen;
123
115
#if (CV_SIMD || CV_SIMD_SCALABLE)
124
- uint32_t len_sub_vfloatStep = (uint32_t )std::max ((int64_t )srclen - (int64_t )v_float32_width, (int64_t )0 );
125
- for (; i < len_sub_vfloatStep; i+= v_float32_width)
126
- {
127
- v_float32 v_copy = vx_load (src.ptr <float >(0 ) + i);
128
- vx_store (tmp.ptr <float >(0 ) + (i + 1U ), v_copy);
129
- }
130
- #endif
131
-
132
- // if the tail exists or scalar version
133
- for (; i < srclen; ++i)
134
- {
135
- tmp.at <float >(i + 1U ) = src.at <float >(i);
136
- }
137
-
138
- i = 0U ;
116
+ const v_float32 v_dst2src_scale = vx_setall_f32 (dstToSrcScale);
117
+ const v_float32 v_half = vx_setall_f32 (0 .5f );
139
118
140
- #if (CV_SIMD || CV_SIMD_SCALABLE)
141
119
int ptr_x_int[v_float32_max_width];
142
- uint32_t j;
143
-
144
- v_float32 v_dstlen_sub_1 = vx_setall_f32 ((float )(dstlen - 1U ));
145
- v_float32 v_one = vx_setall_f32 (1 .0f );
146
- v_float32 v_x_start = v_div (v_one, v_dstlen_sub_1);
147
- v_float32 v_u = vx_setall_f32 ((float )srclen);
148
- v_float32 v_half = vx_setall_f32 (0 .5f );
149
-
150
- len_sub_vfloatStep = (uint32_t )std::max ((int64_t )dstlen - (int64_t )v_float32_width, (int64_t )0 );
151
- for (; i < v_float32_width; ++i)
120
+ for (unsigned j = 0 ; j < v_float32_width; ++j)
152
121
{
153
- ptr_x_int[i ] = (int )i ;
122
+ ptr_x_int[j ] = (int )j ;
154
123
}
124
+ const v_float32 v_sequence = v_cvt_f32 (vx_load (ptr_x_int));
155
125
156
- float ptr_for_cubicHermite[v_float32_max_width];
157
- v_float32 v_sequence = v_cvt_f32 (vx_load (ptr_x_int));
158
- for (i = 0U ; i < len_sub_vfloatStep; i+= v_float32_width)
126
+ for (i = 0U ; i <= dstlen - v_float32_width; i+= v_float32_width)
159
127
{
160
128
v_float32 v_reg_i = v_add (vx_setall_f32 ((float )i), v_sequence);
161
129
162
- v_float32 v_x = v_sub (v_mul (v_x_start, v_reg_i, v_u ), v_half);
130
+ v_float32 v_x = v_sub (v_mul (v_reg_i, v_dst2src_scale ), v_half);
163
131
164
132
v_int32 v_x_int = v_trunc (v_x);
165
133
v_float32 v_x_fract = v_sub (v_x, v_cvt_f32 (v_floor (v_x)));
166
134
167
135
vx_store (ptr_x_int, v_x_int);
168
136
169
- for (j = 0U ; j < v_float32_width; ++j)
170
- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j] - 1 ));
171
- v_float32 v_x_int_add_A = vx_load (ptr_for_cubicHermite);
172
-
173
- for (j = 0U ; j < v_float32_width; ++j)
174
- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j]));
175
- v_float32 v_x_int_add_B = vx_load (ptr_for_cubicHermite);
176
-
177
- for (j = 0U ; j < v_float32_width; ++j)
178
- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j] + 1 ));
179
- v_float32 v_x_int_add_C = vx_load (ptr_for_cubicHermite);
180
-
181
- for (j = 0U ; j < v_float32_width; ++j)
182
- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j] + 2 ));
183
- v_float32 v_x_int_add_D = vx_load (ptr_for_cubicHermite);
137
+ float ptr_for_cubicHermiteA[v_float32_max_width];
138
+ float ptr_for_cubicHermiteB[v_float32_max_width];
139
+ float ptr_for_cubicHermiteC[v_float32_max_width];
140
+ float ptr_for_cubicHermiteD[v_float32_max_width];
184
141
142
+ for (unsigned j = 0U ; j < v_float32_width; ++j)
143
+ {
144
+ int src_offset = ptr_x_int[j];
145
+ ptr_for_cubicHermiteA[j] = src[std::min (std::max (0 , src_offset - 1 ), srclen_1)];
146
+ ptr_for_cubicHermiteB[j] = src[std::min (std::max (0 , src_offset + 0 ), srclen_1)];
147
+ ptr_for_cubicHermiteC[j] = src[std::min (std::max (0 , src_offset + 1 ), srclen_1)];
148
+ ptr_for_cubicHermiteD[j] = src[std::min (std::max (0 , src_offset + 2 ), srclen_1)];
149
+ }
150
+ v_float32 v_x_int_add_A = vx_load (ptr_for_cubicHermiteA);
151
+ v_float32 v_x_int_add_B = vx_load (ptr_for_cubicHermiteB);
152
+ v_float32 v_x_int_add_C = vx_load (ptr_for_cubicHermiteC);
153
+ v_float32 v_x_int_add_D = vx_load (ptr_for_cubicHermiteD);
185
154
186
- vx_store (dst. ptr < float >( 0 ) + i , simd_cubicHermite (v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract));
155
+ vx_store (& dst[i] , simd_cubicHermite (v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract));
187
156
}
188
157
#endif
189
158
190
159
// if the tail exists or scalar version
191
- float *ptr = tmp.ptr <float >(0 ) + 1U ;
192
- float lenScale = 1 .0f / (float )(dstlen - 1U );
193
- float U, X, xfract;
194
- int xint;
195
160
for (; i < dstlen; ++i)
196
161
{
197
- U = (float )i * lenScale;
198
- X = (U * (float )srclen) - 0 .5f ;
199
- xfract = X - floor (X);
200
- xint = (int )X;
201
- dst.at <float >(i) = scal_cubicHermite (ptr[xint - 1 ], ptr[xint], ptr[xint + 1 ], ptr[xint + 2 ], xfract);
162
+ float X = (float )i * dstToSrcScale - 0 .5f ;
163
+ float xfract = X - floor (X);
164
+ int xint = (int )X;
165
+ float cubicHermiteA = src[std::min (std::max (0 , xint - 1 ), srclen_1)];
166
+ float cubicHermiteB = src[std::min (std::max (0 , xint + 0 ), srclen_1)];
167
+ float cubicHermiteC = src[std::min (std::max (0 , xint + 1 ), srclen_1)];
168
+ float cubicHermiteD = src[std::min (std::max (0 , xint + 2 ), srclen_1)];
169
+ dst[i] = scal_cubicHermite (cubicHermiteA, cubicHermiteB, cubicHermiteC, cubicHermiteD, xfract);
202
170
}
203
-
204
171
}
205
172
206
173
static void fir_f32 (const float *pSrc, float *pDst,
@@ -332,7 +299,7 @@ static void fir_f32(const float *pSrc, float *pDst,
332
299
}
333
300
334
301
void resampleSignal (InputArray inputSignal, OutputArray outputSignal,
335
- const int inFreq, const int outFreq)
302
+ const int inFreq, const int outFreq)
336
303
{
337
304
CV_TRACE_FUNCTION ();
338
305
CV_Assert (!inputSignal.empty ());
@@ -343,16 +310,18 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal,
343
310
inputSignal.copyTo (outputSignal);
344
311
return ;
345
312
}
346
- uint32_t filtLen = 33U ;
347
- float beta = 3 .395f ;
348
- std::vector<float > filt_window (filtLen, 0 .f );
349
- init_filter (beta, filtLen, filt_window.data ());
350
313
float ratio = (float )outFreq / float (inFreq);
351
314
Mat1f inMat = inputSignal.getMat ();
352
- Mat1f outMat = Mat1f (Size (cvFloor (inMat.cols * ratio), 1 ));
353
- cubicInterpolate (inMat, outMat.cols , outMat, inMat.cols );
315
+ outputSignal.create (Size (cvFloor (inMat.cols * ratio), 1 ), CV_32FC1);
316
+ Mat1f outMat = outputSignal.getMat ();
317
+ cubicInterpolate (inMat.ptr <float >(0 ), inMat.cols , outMat.ptr <float >(0 ), outMat.cols );
354
318
if (inFreq < 2 * outFreq)
355
319
{
320
+ uint32_t filtLen = 33U ;
321
+ float beta = 3 .395f ;
322
+ std::vector<float > filt_window (filtLen, 0 .f );
323
+ init_filter (beta, filtLen, filt_window.data ());
324
+
356
325
std::vector<float > dlyl (filtLen * 2 - 1 , 0 .f );
357
326
std::vector<float > ptmp (outMat.cols + 2 * filtLen, 0 .);
358
327
@@ -367,7 +336,6 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal,
367
336
outMat.at <float >(i - filtLen) = ptmp2[i + cvFloor ((float )filtLen / 2 .f )];
368
337
}
369
338
}
370
- outputSignal.assign (std::move (outMat));
371
339
}
372
340
373
341
0 commit comments