Skip to content

Commit 45340c0

Browse files
Daniel SmithAmanieu
Daniel Smith
authored andcommitted
Add 64 bit AVX512f le and ge comparisons
1 parent 01db337 commit 45340c0

File tree

2 files changed

+156
-0
lines changed

2 files changed

+156
-0
lines changed

crates/core_arch/src/x86/avx512f.rs

+84
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,48 @@ pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
136136
_mm512_cmpgt_epu64_mask(a, b) & m
137137
}
138138

139+
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
140+
///
141+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64)
142+
#[inline]
143+
#[target_feature(enable = "avx512f")]
144+
#[cfg_attr(test, assert_instr(vpcmp))]
145+
pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
146+
_mm512_cmpgt_epu64_mask(b, a)
147+
}
148+
149+
///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
150+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
151+
///
152+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64)
153+
#[inline]
154+
#[target_feature(enable = "avx512f")]
155+
#[cfg_attr(test, assert_instr(vpcmp))]
156+
pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
157+
_mm512_cmpgt_epu64_mask(b, a) & m
158+
}
159+
160+
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
161+
///
162+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64)
163+
#[inline]
164+
#[target_feature(enable = "avx512f")]
165+
#[cfg_attr(test, assert_instr(vpcmp))]
166+
pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
167+
_mm512_cmplt_epu64_mask(b, a)
168+
}
169+
170+
///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
171+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
172+
///
173+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64)
174+
#[inline]
175+
#[target_feature(enable = "avx512f")]
176+
#[cfg_attr(test, assert_instr(vpcmp))]
177+
pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
178+
_mm512_cmplt_epu64_mask(b, a) & m
179+
}
180+
139181
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
140182
///
141183
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
@@ -199,6 +241,48 @@ pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
199241
_mm512_cmpgt_epi64_mask(a, b) & m
200242
}
201243

244+
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
245+
///
246+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
247+
#[inline]
248+
#[target_feature(enable = "avx512f")]
249+
#[cfg_attr(test, assert_instr(vpcmp))]
250+
pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
251+
_mm512_cmpgt_epi64_mask(b, a)
252+
}
253+
254+
///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
255+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
256+
///
257+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
258+
#[inline]
259+
#[target_feature(enable = "avx512f")]
260+
#[cfg_attr(test, assert_instr(vpcmp))]
261+
pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
262+
_mm512_cmpgt_epi64_mask(b, a) & m
263+
}
264+
265+
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
266+
///
267+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
268+
#[inline]
269+
#[target_feature(enable = "avx512f")]
270+
#[cfg_attr(test, assert_instr(vpcmp))]
271+
pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
272+
_mm512_cmplt_epi64_mask(b, a)
273+
}
274+
275+
///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
276+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
277+
///
278+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
279+
#[inline]
280+
#[target_feature(enable = "avx512f")]
281+
#[cfg_attr(test, assert_instr(vpcmp))]
282+
pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
283+
_mm512_cmplt_epi64_mask(b, a) & m
284+
}
285+
202286
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
203287
///
204288
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)

crates/core_arch/src/x86_64/avx512f.rs

+72
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,42 @@ mod tests {
8383
assert_eq!(r, 0b01001010);
8484
}
8585

86+
#[simd_test(enable = "avx512f")]
87+
unsafe fn test_mm512_cmple_epu64_mask() {
88+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
89+
let b = _mm512_set1_epi64(-1);
90+
assert_eq!(_mm512_cmple_epu64_mask(a, b), _mm512_cmpgt_epu64_mask(b, a))
91+
}
92+
93+
#[simd_test(enable = "avx512f")]
94+
unsafe fn test_mm512_mask_cmple_epu64_mask() {
95+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
96+
let b = _mm512_set1_epi64(-1);
97+
let mask = 0b01111010;
98+
assert_eq!(
99+
_mm512_mask_cmple_epu64_mask(mask, a, b),
100+
_mm512_mask_cmpgt_epu64_mask(mask, b, a)
101+
);
102+
}
103+
104+
#[simd_test(enable = "avx512f")]
105+
unsafe fn test_mm512_cmpge_epu64_mask() {
106+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
107+
let b = _mm512_set1_epi64(-1);
108+
assert_eq!(_mm512_cmpge_epu64_mask(a, b), _mm512_cmplt_epu64_mask(b, a))
109+
}
110+
111+
#[simd_test(enable = "avx512f")]
112+
unsafe fn test_mm512_mask_cmpge_epu64_mask() {
113+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
114+
let b = _mm512_set1_epi64(-1);
115+
let mask = 0b01111010;
116+
assert_eq!(
117+
_mm512_mask_cmpge_epu64_mask(mask, a, b),
118+
_mm512_mask_cmplt_epu64_mask(mask, b, a)
119+
);
120+
}
121+
86122
#[simd_test(enable = "avx512f")]
87123
unsafe fn test_mm512_cmpeq_epu64_mask() {
88124
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
@@ -134,6 +170,42 @@ mod tests {
134170
assert_eq!(r, 0b00000100);
135171
}
136172

173+
#[simd_test(enable = "avx512f")]
174+
unsafe fn test_mm512_cmple_epi64_mask() {
175+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
176+
let b = _mm512_set1_epi64(-1);
177+
assert_eq!(_mm512_cmple_epi64_mask(a, b), _mm512_cmpgt_epi64_mask(b, a))
178+
}
179+
180+
#[simd_test(enable = "avx512f")]
181+
unsafe fn test_mm512_mask_cmple_epi64_mask() {
182+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
183+
let b = _mm512_set1_epi64(-1);
184+
let mask = 0b01111010;
185+
assert_eq!(
186+
_mm512_mask_cmple_epi64_mask(mask, a, b),
187+
_mm512_mask_cmpgt_epi64_mask(mask, b, a)
188+
);
189+
}
190+
191+
#[simd_test(enable = "avx512f")]
192+
unsafe fn test_mm512_cmpge_epi64_mask() {
193+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
194+
let b = _mm512_set1_epi64(-1);
195+
assert_eq!(_mm512_cmpge_epi64_mask(a, b), _mm512_cmplt_epi64_mask(b, a))
196+
}
197+
198+
#[simd_test(enable = "avx512f")]
199+
unsafe fn test_mm512_mask_cmpge_epi64_mask() {
200+
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
201+
let b = _mm512_set1_epi64(-1);
202+
let mask = 0b01111010;
203+
assert_eq!(
204+
_mm512_mask_cmpge_epi64_mask(mask, a, b),
205+
_mm512_mask_cmplt_epi64_mask(mask, b, a)
206+
);
207+
}
208+
137209
#[simd_test(enable = "avx512f")]
138210
unsafe fn test_mm512_cmpeq_epi64_mask() {
139211
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);

0 commit comments

Comments
 (0)