Skip to content

Commit 5a4089c

Browse files
committed
Implemented VEX versions
Modified stdarch-test to accept VEX versions
1 parent fdc73f0 commit 5a4089c

File tree

7 files changed

+1198
-65
lines changed

7 files changed

+1198
-65
lines changed

crates/core_arch/missing-x86.md

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,82 +1135,20 @@
11351135
</p></details>
11361136

11371137

1138-
<details><summary>["AVX_IFMA"]</summary><p>
1139-
1140-
* [ ] [`_mm256_madd52hi_avx_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_madd52hi_avx_epu64)
1141-
* [ ] [`_mm256_madd52lo_avx_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_madd52lo_avx_epu64)
1142-
* [ ] [`_mm_madd52hi_avx_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd52hi_avx_epu64)
1143-
* [ ] [`_mm_madd52lo_avx_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd52lo_avx_epu64)
1144-
</p></details>
1145-
1146-
11471138
<details><summary>["AVX_NE_CONVERT"]</summary><p>
11481139

1149-
* [ ] [`_mm256_bcstnebf16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnebf16_ps)
11501140
* [ ] [`_mm256_bcstnesh_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnesh_ps)
1151-
* [ ] [`_mm256_cvtneebf16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneebf16_ps)
11521141
* [ ] [`_mm256_cvtneeph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneeph_ps)
1153-
* [ ] [`_mm256_cvtneobf16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneobf16_ps)
11541142
* [ ] [`_mm256_cvtneoph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneoph_ps)
11551143
* [ ] [`_mm256_cvtneps_avx_pbh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneps_avx_pbh)
1156-
* [ ] [`_mm_bcstnebf16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnebf16_ps)
11571144
* [ ] [`_mm_bcstnesh_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnesh_ps)
1158-
* [ ] [`_mm_cvtneebf16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneebf16_ps)
11591145
* [ ] [`_mm_cvtneeph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneeph_ps)
1160-
* [ ] [`_mm_cvtneobf16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneobf16_ps)
11611146
* [ ] [`_mm_cvtneoph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneoph_ps)
11621147
* [ ] [`_mm_cvtneps_avx_pbh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_avx_pbh)
11631148
* [ ] [`_mm_cvtneps_pbh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_pbh)
11641149
</p></details>
11651150

11661151

1167-
<details><summary>["AVX_VNNI"]</summary><p>
1168-
1169-
* [ ] [`_mm256_dpbusd_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbusd_avx_epi32)
1170-
* [ ] [`_mm256_dpbusds_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbusds_avx_epi32)
1171-
* [ ] [`_mm256_dpwssd_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwssd_avx_epi32)
1172-
* [ ] [`_mm256_dpwssds_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwssds_avx_epi32)
1173-
* [ ] [`_mm_dpbusd_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbusd_avx_epi32)
1174-
* [ ] [`_mm_dpbusds_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbusds_avx_epi32)
1175-
* [ ] [`_mm_dpwssd_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwssd_avx_epi32)
1176-
* [ ] [`_mm_dpwssds_avx_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwssds_avx_epi32)
1177-
</p></details>
1178-
1179-
1180-
<details><summary>["AVX_VNNI_INT16"]</summary><p>
1181-
1182-
* [ ] [`_mm256_dpwsud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwsud_epi32)
1183-
* [ ] [`_mm256_dpwsuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwsuds_epi32)
1184-
* [ ] [`_mm256_dpwusd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwusd_epi32)
1185-
* [ ] [`_mm256_dpwusds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwusds_epi32)
1186-
* [ ] [`_mm256_dpwuud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwuud_epi32)
1187-
* [ ] [`_mm256_dpwuuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpwuuds_epi32)
1188-
* [ ] [`_mm_dpwsud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwsud_epi32)
1189-
* [ ] [`_mm_dpwsuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwsuds_epi32)
1190-
* [ ] [`_mm_dpwusd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwusd_epi32)
1191-
* [ ] [`_mm_dpwusds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwusds_epi32)
1192-
* [ ] [`_mm_dpwuud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwuud_epi32)
1193-
* [ ] [`_mm_dpwuuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpwuuds_epi32)
1194-
</p></details>
1195-
1196-
1197-
<details><summary>["AVX_VNNI_INT8"]</summary><p>
1198-
1199-
* [ ] [`_mm256_dpbssd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbssd_epi32)
1200-
* [ ] [`_mm256_dpbssds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbssds_epi32)
1201-
* [ ] [`_mm256_dpbsud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbsud_epi32)
1202-
* [ ] [`_mm256_dpbsuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbsuds_epi32)
1203-
* [ ] [`_mm256_dpbuud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbuud_epi32)
1204-
* [ ] [`_mm256_dpbuuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dpbuuds_epi32)
1205-
* [ ] [`_mm_dpbssd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbssd_epi32)
1206-
* [ ] [`_mm_dpbssds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbssds_epi32)
1207-
* [ ] [`_mm_dpbsud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbsud_epi32)
1208-
* [ ] [`_mm_dpbsuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbsuds_epi32)
1209-
* [ ] [`_mm_dpbuud_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbuud_epi32)
1210-
* [ ] [`_mm_dpbuuds_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dpbuuds_epi32)
1211-
</p></details>
1212-
1213-
12141152
<details><summary>["CET_SS"]</summary><p>
12151153

12161154
* [ ] [`_clrssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clrssbsy)

crates/core_arch/src/x86/avx512ifma.rs

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,24 @@ pub unsafe fn _mm512_maskz_madd52lo_epu64(
114114
simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512())
115115
}
116116

117+
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
118+
/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
119+
/// unsigned integer from the intermediate result with the
120+
/// corresponding unsigned 64-bit integer in `a`, and store the
121+
/// results in `dst`.
122+
///
123+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64)
124+
#[inline]
125+
#[target_feature(enable = "avxifma")]
126+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
127+
#[cfg_attr(
128+
all(test, any(target_os = "linux", target_env = "msvc")),
129+
assert_instr(vpmadd52huq)
130+
)]
131+
pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
132+
vpmadd52huq_256(a, b, c)
133+
}
134+
117135
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
118136
/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
119137
/// unsigned integer from the intermediate result with the
@@ -169,6 +187,24 @@ pub unsafe fn _mm256_maskz_madd52hi_epu64(
169187
simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256())
170188
}
171189

190+
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
191+
/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
192+
/// unsigned integer from the intermediate result with the
193+
/// corresponding unsigned 64-bit integer in `a`, and store the
194+
/// results in `dst`.
195+
///
196+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64)
197+
#[inline]
198+
#[target_feature(enable = "avxifma")]
199+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
200+
#[cfg_attr(
201+
all(test, any(target_os = "linux", target_env = "msvc")),
202+
assert_instr(vpmadd52luq)
203+
)]
204+
pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
205+
vpmadd52luq_256(a, b, c)
206+
}
207+
172208
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
173209
/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
174210
/// unsigned integer from the intermediate result with the
@@ -224,6 +260,24 @@ pub unsafe fn _mm256_maskz_madd52lo_epu64(
224260
simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256())
225261
}
226262

263+
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
264+
/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
265+
/// unsigned integer from the intermediate result with the
266+
/// corresponding unsigned 64-bit integer in `a`, and store the
267+
/// results in `dst`.
268+
///
269+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64)
270+
#[inline]
271+
#[target_feature(enable = "avxifma")]
272+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
273+
#[cfg_attr(
274+
all(test, any(target_os = "linux", target_env = "msvc")),
275+
assert_instr(vpmadd52huq)
276+
)]
277+
pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
278+
vpmadd52huq_128(a, b, c)
279+
}
280+
227281
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
228282
/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
229283
/// unsigned integer from the intermediate result with the
@@ -269,6 +323,24 @@ pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: _
269323
simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128())
270324
}
271325

326+
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
327+
/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
328+
/// unsigned integer from the intermediate result with the
329+
/// corresponding unsigned 64-bit integer in `a`, and store the
330+
/// results in `dst`.
331+
///
332+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64)
333+
#[inline]
334+
#[target_feature(enable = "avxifma")]
335+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
336+
#[cfg_attr(
337+
all(test, any(target_os = "linux", target_env = "msvc")),
338+
assert_instr(vpmadd52luq)
339+
)]
340+
pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
341+
vpmadd52luq_128(a, b, c)
342+
}
343+
272344
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
273345
/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
274346
/// unsigned integer from the intermediate result with the
@@ -427,6 +499,20 @@ mod tests {
427499
assert_eq_m512i(expected, actual);
428500
}
429501

502+
#[simd_test(enable = "avxifma")]
503+
unsafe fn test_mm256_madd52hi_avx_epu64() {
504+
let a = _mm256_set1_epi64x(10 << 40);
505+
let b = _mm256_set1_epi64x((11 << 40) + 4);
506+
let c = _mm256_set1_epi64x((12 << 40) + 3);
507+
508+
let actual = _mm256_madd52hi_avx_epu64(a, b, c);
509+
510+
// (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
511+
let expected = _mm256_set1_epi64x(11030549757952);
512+
513+
assert_eq_m256i(expected, actual);
514+
}
515+
430516
#[simd_test(enable = "avx512ifma,avx512vl")]
431517
unsafe fn test_mm256_madd52hi_epu64() {
432518
let a = _mm256_set1_epi64x(10 << 40);
@@ -471,6 +557,20 @@ mod tests {
471557
assert_eq_m256i(expected, actual);
472558
}
473559

560+
#[simd_test(enable = "avxifma")]
561+
unsafe fn test_mm256_madd52lo_avx_epu64() {
562+
let a = _mm256_set1_epi64x(10 << 40);
563+
let b = _mm256_set1_epi64x((11 << 40) + 4);
564+
let c = _mm256_set1_epi64x((12 << 40) + 3);
565+
566+
let actual = _mm256_madd52lo_avx_epu64(a, b, c);
567+
568+
// (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
569+
let expected = _mm256_set1_epi64x(100055558127628);
570+
571+
assert_eq_m256i(expected, actual);
572+
}
573+
474574
#[simd_test(enable = "avx512ifma,avx512vl")]
475575
unsafe fn test_mm256_madd52lo_epu64() {
476576
let a = _mm256_set1_epi64x(10 << 40);
@@ -515,6 +615,20 @@ mod tests {
515615
assert_eq_m256i(expected, actual);
516616
}
517617

618+
#[simd_test(enable = "avxifma")]
619+
unsafe fn test_mm_madd52hi_avx_epu64() {
620+
let a = _mm_set1_epi64x(10 << 40);
621+
let b = _mm_set1_epi64x((11 << 40) + 4);
622+
let c = _mm_set1_epi64x((12 << 40) + 3);
623+
624+
let actual = _mm_madd52hi_avx_epu64(a, b, c);
625+
626+
// (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
627+
let expected = _mm_set1_epi64x(11030549757952);
628+
629+
assert_eq_m128i(expected, actual);
630+
}
631+
518632
#[simd_test(enable = "avx512ifma,avx512vl")]
519633
unsafe fn test_mm_madd52hi_epu64() {
520634
let a = _mm_set1_epi64x(10 << 40);
@@ -559,6 +673,20 @@ mod tests {
559673
assert_eq_m128i(expected, actual);
560674
}
561675

676+
#[simd_test(enable = "avxifma")]
677+
unsafe fn test_mm_madd52lo_avx_epu64() {
678+
let a = _mm_set1_epi64x(10 << 40);
679+
let b = _mm_set1_epi64x((11 << 40) + 4);
680+
let c = _mm_set1_epi64x((12 << 40) + 3);
681+
682+
let actual = _mm_madd52lo_avx_epu64(a, b, c);
683+
684+
// (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
685+
let expected = _mm_set1_epi64x(100055558127628);
686+
687+
assert_eq_m128i(expected, actual);
688+
}
689+
562690
#[simd_test(enable = "avx512ifma,avx512vl")]
563691
unsafe fn test_mm_madd52lo_epu64() {
564692
let a = _mm_set1_epi64x(10 << 40);

0 commit comments

Comments
 (0)