Skip to content

Commit 050b60b

Browse files
authored
Avoid scalarization in _mm_madd_epi16 (#13454)
1 parent d996699 commit 050b60b

File tree

2 files changed

+2
-15
lines changed

2 files changed

+2
-15
lines changed

site/source/docs/porting/simd.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ The following table highlights the availability and expected performance of diff
500500
* - _mm_loadu_si32
501501
- ❌ emulated with wasm_i32x4_make
502502
* - _mm_madd_epi16
503-
- ❌ scalarized
503+
- ✅ wasm_dot_s_i32x4_i16x8
504504
* - _mm_maskmoveu_si128
505505
- ❌ scalarized
506506
* - _mm_max_epi16

system/include/compat/emmintrin.h

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -669,20 +669,7 @@ _mm_avg_epu16(__m128i __a, __m128i __b)
669669
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
670670
_mm_madd_epi16(__m128i __a, __m128i __b)
671671
{
672-
// TODO: optimize
673-
union {
674-
signed short x[8];
675-
__m128i m;
676-
} src, src2;
677-
union {
678-
signed int x[4];
679-
__m128i m;
680-
} dst;
681-
src.m = __a;
682-
src2.m = __b;
683-
for(int i = 0; i < 4; ++i)
684-
dst.x[i] = src.x[i*2] * src2.x[i*2] + src.x[i*2+1] * src2.x[i*2+1];
685-
return dst.m;
672+
return (__m128i)__builtin_wasm_dot_s_i32x4_i16x8((__i16x8)__a, (__i16x8)__b);
686673
}
687674

688675
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

0 commit comments

Comments
 (0)