Skip to content

Commit 13c14a5

Browse files
authored
Remove known exceptions to Intel's signatures (#317)
We had a few lingering intrinsics which were getting some special treatment for having different types than what Intel specified. This commit removes all these cases and reverts to precisely what upstream Intel mentions (even if it doesn't make the most sense in some cases)
1 parent 1bb8150 commit 13c14a5

File tree

4 files changed

+43
-61
lines changed

4 files changed

+43
-61
lines changed

coresimd/src/x86/i586/sse2.rs

+38-38
Original file line numberDiff line numberDiff line change
@@ -1652,96 +1652,96 @@ pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
16521652
#[inline]
16531653
#[target_feature(enable = "sse2")]
16541654
#[cfg_attr(test, assert_instr(comisd))]
1655-
pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> bool {
1656-
comieqsd(a, b) as u8 != 0
1655+
pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
1656+
comieqsd(a, b)
16571657
}
16581658

16591659
/// Compare the lower element of `a` and `b` for less-than.
16601660
#[inline]
16611661
#[target_feature(enable = "sse2")]
16621662
#[cfg_attr(test, assert_instr(comisd))]
1663-
pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> bool {
1664-
comiltsd(a, b) as u8 != 0
1663+
pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
1664+
comiltsd(a, b)
16651665
}
16661666

16671667
/// Compare the lower element of `a` and `b` for less-than-or-equal.
16681668
#[inline]
16691669
#[target_feature(enable = "sse2")]
16701670
#[cfg_attr(test, assert_instr(comisd))]
1671-
pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> bool {
1672-
comilesd(a, b) as u8 != 0
1671+
pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
1672+
comilesd(a, b)
16731673
}
16741674

16751675
/// Compare the lower element of `a` and `b` for greater-than.
16761676
#[inline]
16771677
#[target_feature(enable = "sse2")]
16781678
#[cfg_attr(test, assert_instr(comisd))]
1679-
pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> bool {
1680-
comigtsd(a, b) as u8 != 0
1679+
pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
1680+
comigtsd(a, b)
16811681
}
16821682

16831683
/// Compare the lower element of `a` and `b` for greater-than-or-equal.
16841684
#[inline]
16851685
#[target_feature(enable = "sse2")]
16861686
#[cfg_attr(test, assert_instr(comisd))]
1687-
pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> bool {
1688-
comigesd(a, b) as u8 != 0
1687+
pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
1688+
comigesd(a, b)
16891689
}
16901690

16911691
/// Compare the lower element of `a` and `b` for not-equal.
16921692
#[inline]
16931693
#[target_feature(enable = "sse2")]
16941694
#[cfg_attr(test, assert_instr(comisd))]
1695-
pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> bool {
1696-
comineqsd(a, b) as u8 != 0
1695+
pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
1696+
comineqsd(a, b)
16971697
}
16981698

16991699
/// Compare the lower element of `a` and `b` for equality.
17001700
#[inline]
17011701
#[target_feature(enable = "sse2")]
17021702
#[cfg_attr(test, assert_instr(ucomisd))]
1703-
pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> bool {
1704-
ucomieqsd(a, b) as u8 != 0
1703+
pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
1704+
ucomieqsd(a, b)
17051705
}
17061706

17071707
/// Compare the lower element of `a` and `b` for less-than.
17081708
#[inline]
17091709
#[target_feature(enable = "sse2")]
17101710
#[cfg_attr(test, assert_instr(ucomisd))]
1711-
pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> bool {
1712-
ucomiltsd(a, b) as u8 != 0
1711+
pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
1712+
ucomiltsd(a, b)
17131713
}
17141714

17151715
/// Compare the lower element of `a` and `b` for less-than-or-equal.
17161716
#[inline]
17171717
#[target_feature(enable = "sse2")]
17181718
#[cfg_attr(test, assert_instr(ucomisd))]
1719-
pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> bool {
1720-
ucomilesd(a, b) as u8 != 0
1719+
pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
1720+
ucomilesd(a, b)
17211721
}
17221722

17231723
/// Compare the lower element of `a` and `b` for greater-than.
17241724
#[inline]
17251725
#[target_feature(enable = "sse2")]
17261726
#[cfg_attr(test, assert_instr(ucomisd))]
1727-
pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> bool {
1728-
ucomigtsd(a, b) as u8 != 0
1727+
pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
1728+
ucomigtsd(a, b)
17291729
}
17301730

17311731
/// Compare the lower element of `a` and `b` for greater-than-or-equal.
17321732
#[inline]
17331733
#[target_feature(enable = "sse2")]
17341734
#[cfg_attr(test, assert_instr(ucomisd))]
1735-
pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> bool {
1736-
ucomigesd(a, b) as u8 != 0
1735+
pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
1736+
ucomigesd(a, b)
17371737
}
17381738

17391739
/// Compare the lower element of `a` and `b` for not-equal.
17401740
#[inline]
17411741
#[target_feature(enable = "sse2")]
17421742
#[cfg_attr(test, assert_instr(ucomisd))]
1743-
pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> bool {
1744-
ucomineqsd(a, b) as u8 != 0
1743+
pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
1744+
ucomineqsd(a, b)
17451745
}
17461746

17471747
/// Convert packed double-precision (64-bit) floating-point elements in "a" to
@@ -3876,79 +3876,79 @@ mod tests {
38763876
#[simd_test = "sse2"]
38773877
unsafe fn test_mm_comieq_sd() {
38783878
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3879-
assert!(_mm_comieq_sd(a, b));
3879+
assert!(_mm_comieq_sd(a, b) != 0);
38803880

38813881
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
3882-
assert!(!_mm_comieq_sd(a, b));
3882+
assert!(_mm_comieq_sd(a, b) == 0);
38833883
}
38843884

38853885
#[simd_test = "sse2"]
38863886
unsafe fn test_mm_comilt_sd() {
38873887
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3888-
assert!(!_mm_comilt_sd(a, b));
3888+
assert!(_mm_comilt_sd(a, b) == 0);
38893889
}
38903890

38913891
#[simd_test = "sse2"]
38923892
unsafe fn test_mm_comile_sd() {
38933893
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3894-
assert!(_mm_comile_sd(a, b));
3894+
assert!(_mm_comile_sd(a, b) != 0);
38953895
}
38963896

38973897
#[simd_test = "sse2"]
38983898
unsafe fn test_mm_comigt_sd() {
38993899
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3900-
assert!(!_mm_comigt_sd(a, b));
3900+
assert!(_mm_comigt_sd(a, b) == 0);
39013901
}
39023902

39033903
#[simd_test = "sse2"]
39043904
unsafe fn test_mm_comige_sd() {
39053905
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3906-
assert!(_mm_comige_sd(a, b));
3906+
assert!(_mm_comige_sd(a, b) != 0);
39073907
}
39083908

39093909
#[simd_test = "sse2"]
39103910
unsafe fn test_mm_comineq_sd() {
39113911
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3912-
assert!(!_mm_comineq_sd(a, b));
3912+
assert!(_mm_comineq_sd(a, b) == 0);
39133913
}
39143914

39153915
#[simd_test = "sse2"]
39163916
unsafe fn test_mm_ucomieq_sd() {
39173917
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3918-
assert!(_mm_ucomieq_sd(a, b));
3918+
assert!(_mm_ucomieq_sd(a, b) != 0);
39193919

39203920
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
3921-
assert!(!_mm_ucomieq_sd(a, b));
3921+
assert!(_mm_ucomieq_sd(a, b) == 0);
39223922
}
39233923

39243924
#[simd_test = "sse2"]
39253925
unsafe fn test_mm_ucomilt_sd() {
39263926
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3927-
assert!(!_mm_ucomilt_sd(a, b));
3927+
assert!(_mm_ucomilt_sd(a, b) == 0);
39283928
}
39293929

39303930
#[simd_test = "sse2"]
39313931
unsafe fn test_mm_ucomile_sd() {
39323932
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3933-
assert!(_mm_ucomile_sd(a, b));
3933+
assert!(_mm_ucomile_sd(a, b) != 0);
39343934
}
39353935

39363936
#[simd_test = "sse2"]
39373937
unsafe fn test_mm_ucomigt_sd() {
39383938
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3939-
assert!(!_mm_ucomigt_sd(a, b));
3939+
assert!(_mm_ucomigt_sd(a, b) == 0);
39403940
}
39413941

39423942
#[simd_test = "sse2"]
39433943
unsafe fn test_mm_ucomige_sd() {
39443944
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3945-
assert!(_mm_ucomige_sd(a, b));
3945+
assert!(_mm_ucomige_sd(a, b) != 0);
39463946
}
39473947

39483948
#[simd_test = "sse2"]
39493949
unsafe fn test_mm_ucomineq_sd() {
39503950
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
3951-
assert!(!_mm_ucomineq_sd(a, b));
3951+
assert!(_mm_ucomineq_sd(a, b) == 0);
39523952
}
39533953

39543954
#[simd_test = "sse2"]

coresimd/src/x86/i586/sse41.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
184184
#[inline]
185185
#[target_feature(enable = "sse4.1")]
186186
#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
187-
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i8, imm8: i32) -> __m128i {
188-
mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i))
187+
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
188+
mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8))
189189
}
190190

191191
/// Return a copy of `a` with the 32-bit integer from `i` inserted at a

coresimd/src/x86/i686/sse.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -312,9 +312,9 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
312312
#[inline]
313313
#[target_feature(enable = "sse,mmx")]
314314
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
315-
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
315+
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
316316
macro_rules! call {
317-
($imm2:expr) => { pextrw(a, $imm2) as i16 }
317+
($imm2:expr) => { pextrw(a, $imm2) as i32 }
318318
}
319319
constify_imm2!(imm2, call)
320320
}
@@ -324,7 +324,7 @@ pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
324324
#[inline]
325325
#[target_feature(enable = "sse,mmx")]
326326
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
327-
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 {
327+
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
328328
_mm_extract_pi16(a, imm2)
329329
}
330330

stdsimd-verify/tests/x86-intel.rs

-18
Original file line numberDiff line numberDiff line change
@@ -407,30 +407,12 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) -> Result<(), String> {
407407
| (&Type::M256, "__m256")
408408
| (&Type::Ptr(&Type::M256), "__m256*") => {}
409409

410-
// These two intrinsics return a 16-bit element but in Intel's
411-
// intrinsics they're listed as returning an `int`.
412-
(&Type::PrimSigned(16), "int") if intrinsic == "_mm_extract_pi16" => {}
413-
(&Type::PrimSigned(16), "int") if intrinsic == "_m_pextrw" => {}
414-
415-
// This intrinsic takes an `i8` to get inserted into an i8 vector, but
416-
// Intel says the argument is i32...
417-
(&Type::PrimSigned(8), "int") if intrinsic == "_mm_insert_epi8" => {}
418-
419410
// This is a macro (?) in C which seems to mutate its arguments, but
420411
// that means that we're taking pointers to arguments in rust
421412
// as we're not exposing it as a macro.
422413
(&Type::Ptr(&Type::M128), "__m128")
423414
if intrinsic == "_MM_TRANSPOSE4_PS" => {}
424415

425-
// These intrinsics return an `int` in C but they're always either the
426-
// bit 1 or 0 so we switch it to returning `bool` in rust
427-
(&Type::Bool, "int")
428-
if intrinsic.starts_with("_mm_comi")
429-
&& intrinsic.ends_with("_sd") => {}
430-
(&Type::Bool, "int")
431-
if intrinsic.starts_with("_mm_ucomi")
432-
&& intrinsic.ends_with("_sd") => {}
433-
434416
_ => bail!(
435417
"failed to equate: `{}` and {:?} for {}",
436418
intel,

0 commit comments

Comments
 (0)