Skip to content

Commit 5ccd76c

Browse files
TDeckingAmanieu
authored andcommitted
Refactor avx512f: mask operations
1 parent e2148a2 commit 5ccd76c

File tree

2 files changed

+196
-16
lines changed

2 files changed

+196
-16
lines changed

crates/core_arch/missing-x86.md

-10
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,10 @@
149149

150150
<details><summary>["AVX512F"]</summary><p>
151151

152-
* [ ] [`_cvtmask16_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cvtmask16_u32)
153-
* [ ] [`_cvtu32_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cvtu32_mask16)
154-
* [ ] [`_kortest_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortest_mask16_u8)
155-
* [ ] [`_kortestc_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortestc_mask16_u8)
156-
* [ ] [`_kortestz_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortestz_mask16_u8)
157-
* [ ] [`_kshiftli_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kshiftli_mask16)
158-
* [ ] [`_kshiftri_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kshiftri_mask16)
159-
* [ ] [`_load_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask16)
160152
* [ ] [`_mm512_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
161153
* [ ] [`_mm512_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
162154
* [ ] [`_mm512_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
163155
* [ ] [`_mm512_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
164-
* [ ] [`_mm512_kortestz`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kortestz)
165156
* [ ] [`_mm512_mask_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
166157
* [ ] [`_mm512_mask_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
167158
* [ ] [`_mm512_mask_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
@@ -173,7 +164,6 @@
173164
* [ ] [`_mm_mask_store_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
174165
* [ ] [`_mm_maskz_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
175166
* [ ] [`_mm_maskz_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
176-
* [ ] [`_store_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask16)
177167
</p></details>
178168

179169

crates/core_arch/src/x86/avx512f.rs

+196-6
Original file line numberDiff line numberDiff line change
@@ -27274,6 +27274,26 @@ pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
2727427274
_mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
2727527275
}
2727627276

27277+
/// Convert 16-bit mask a into an integer value, and store the result in dst.
27278+
///
27279+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
27280+
#[inline]
27281+
#[target_feature(enable = "avx512f")]
27282+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27283+
pub unsafe fn _cvtmask16_u32(a: __mmask16) -> u32 {
27284+
a as u32
27285+
}
27286+
27287+
/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
27288+
///
27289+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
27290+
#[inline]
27291+
#[target_feature(enable = "avx512f")]
27292+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27293+
pub unsafe fn _cvtu32_mask16(a: u32) -> __mmask16 {
27294+
a as __mmask16
27295+
}
27296+
2727727297
/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
2727827298
///
2727927299
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
@@ -27404,6 +27424,83 @@ pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
2740427424
_mm512_knot(_mm512_kxor(a, b))
2740527425
}
2740627426

27427+
/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
27428+
/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
27429+
///
27430+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
27431+
#[inline]
27432+
#[target_feature(enable = "avx512f")]
27433+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27434+
pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
27435+
let tmp = _kor_mask16(a, b);
27436+
*all_ones = (tmp == 0xffff) as u8;
27437+
(tmp == 0) as u8
27438+
}
27439+
27440+
/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
27441+
/// store 0 in dst.
27442+
///
27443+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
27444+
#[inline]
27445+
#[target_feature(enable = "avx512f")]
27446+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27447+
pub unsafe fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
27448+
(_kor_mask16(a, b) == 0xffff) as u8
27449+
}
27450+
27451+
/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
27452+
/// store 0 in dst.
27453+
///
27454+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
27455+
#[inline]
27456+
#[target_feature(enable = "avx512f")]
27457+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27458+
pub unsafe fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
27459+
(_kor_mask16(a, b) == 0) as u8
27460+
}
27461+
27462+
/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
27463+
///
27464+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
27465+
#[inline]
27466+
#[target_feature(enable = "avx512f")]
27467+
#[rustc_legacy_const_generics(1)]
27468+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27469+
pub unsafe fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
27470+
a << COUNT
27471+
}
27472+
27473+
/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
27474+
///
27475+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
27476+
#[inline]
27477+
#[target_feature(enable = "avx512f")]
27478+
#[rustc_legacy_const_generics(1)]
27479+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27480+
pub unsafe fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
27481+
a >> COUNT
27482+
}
27483+
27484+
/// Load 16-bit mask from memory
27485+
///
27486+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
27487+
#[inline]
27488+
#[target_feature(enable = "avx512f")]
27489+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27490+
pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
27491+
*mem_addr
27492+
}
27493+
27494+
/// Store 16-bit mask to memory
27495+
///
27496+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
27497+
#[inline]
27498+
#[target_feature(enable = "avx512f")]
27499+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27500+
pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
27501+
*mem_addr = a;
27502+
}
27503+
2740727504
/// Copy 16-bit mask a to k.
2740827505
///
2740927506
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
@@ -27455,12 +27552,20 @@ pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
2745527552
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2745627553
#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
2745727554
pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
27458-
let r = a | b;
27459-
if r == 0b11111111_11111111 {
27460-
1
27461-
} else {
27462-
0
27463-
}
27555+
let r = (a | b) == 0b11111111_11111111;
27556+
r as i32
27557+
}
27558+
27559+
/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
27560+
///
27561+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kortestz)
27562+
#[inline]
27563+
#[target_feature(enable = "avx512f")]
27564+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27565+
#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
27566+
pub unsafe fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
27567+
let r = (a | b) == 0;
27568+
r as i32
2746427569
}
2746527570

2746627571
/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
@@ -54079,6 +54184,22 @@ mod tests {
5407954184
assert_eq_m128i(r, e);
5408054185
}
5408154186

54187+
#[simd_test(enable = "avx512f")]
54188+
unsafe fn test_cvtmask16_u32() {
54189+
let a: __mmask16 = 0b11001100_00110011;
54190+
let r = _cvtmask16_u32(a);
54191+
let e: u32 = 0b11001100_00110011;
54192+
assert_eq!(r, e);
54193+
}
54194+
54195+
#[simd_test(enable = "avx512f")]
54196+
unsafe fn test_cvtu32_mask16() {
54197+
let a: u32 = 0b11001100_00110011;
54198+
let r = _cvtu32_mask16(a);
54199+
let e: __mmask16 = 0b11001100_00110011;
54200+
assert_eq!(r, e);
54201+
}
54202+
5408254203
#[simd_test(enable = "avx512f")]
5408354204
unsafe fn test_mm512_kand() {
5408454205
let a: u16 = 0b11001100_00110011;
@@ -54185,6 +54306,65 @@ mod tests {
5418554306
assert_eq!(r, e);
5418654307
}
5418754308

54309+
#[simd_test(enable = "avx512dq")]
54310+
unsafe fn test_kortest_mask16_u8() {
54311+
let a: __mmask16 = 0b0110100101101001;
54312+
let b: __mmask16 = 0b1011011010110110;
54313+
let mut all_ones: u8 = 0;
54314+
let r = _kortest_mask16_u8(a, b, &mut all_ones);
54315+
assert_eq!(r, 0);
54316+
assert_eq!(all_ones, 1);
54317+
}
54318+
54319+
#[simd_test(enable = "avx512dq")]
54320+
unsafe fn test_kortestc_mask16_u8() {
54321+
let a: __mmask16 = 0b0110100101101001;
54322+
let b: __mmask16 = 0b1011011010110110;
54323+
let r = _kortestc_mask16_u8(a, b);
54324+
assert_eq!(r, 1);
54325+
}
54326+
54327+
#[simd_test(enable = "avx512dq")]
54328+
unsafe fn test_kortestz_mask16_u8() {
54329+
let a: __mmask16 = 0b0110100101101001;
54330+
let b: __mmask16 = 0b1011011010110110;
54331+
let r = _kortestz_mask16_u8(a, b);
54332+
assert_eq!(r, 0);
54333+
}
54334+
54335+
#[simd_test(enable = "avx512dq")]
54336+
unsafe fn test_kshiftli_mask16() {
54337+
let a: __mmask16 = 0b1001011011000011;
54338+
let r = _kshiftli_mask16::<3>(a);
54339+
let e: __mmask16 = 0b1011011000011000;
54340+
assert_eq!(r, e);
54341+
}
54342+
54343+
#[simd_test(enable = "avx512dq")]
54344+
unsafe fn test_kshiftri_mask16() {
54345+
let a: __mmask16 = 0b0110100100111100;
54346+
let r = _kshiftri_mask16::<3>(a);
54347+
let e: __mmask16 = 0b0000110100100111;
54348+
assert_eq!(r, e);
54349+
}
54350+
54351+
#[simd_test(enable = "avx512f")]
54352+
unsafe fn test_load_mask16() {
54353+
let a: __mmask16 = 0b1001011011000011;
54354+
let r = _load_mask16(&a);
54355+
let e: __mmask16 = 0b1001011011000011;
54356+
assert_eq!(r, e);
54357+
}
54358+
54359+
#[simd_test(enable = "avx512f")]
54360+
unsafe fn test_store_mask16() {
54361+
let a: __mmask16 = 0b0110100100111100;
54362+
let mut r = 0;
54363+
_store_mask16(&mut r, a);
54364+
let e: __mmask16 = 0b0110100100111100;
54365+
assert_eq!(r, e);
54366+
}
54367+
5418854368
#[simd_test(enable = "avx512f")]
5418954369
unsafe fn test_mm512_kmov() {
5419054370
let a: u16 = 0b11001100_00110011;
@@ -54229,6 +54409,16 @@ mod tests {
5422954409
assert_eq!(r, 1);
5423054410
}
5423154411

54412+
#[simd_test(enable = "avx512f")]
54413+
unsafe fn test_mm512_kortestz() {
54414+
let a: u16 = 0b11001100_00110011;
54415+
let b: u16 = 0b00101110_00001011;
54416+
let r = _mm512_kortestz(a, b);
54417+
assert_eq!(r, 0);
54418+
let r = _mm512_kortestz(0, 0);
54419+
assert_eq!(r, 1);
54420+
}
54421+
5423254422
#[simd_test(enable = "avx512f")]
5423354423
unsafe fn test_mm512_test_epi32_mask() {
5423454424
let a = _mm512_set1_epi32(1 << 0);

0 commit comments

Comments
 (0)