diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index aa8d4c9820..cca9f7f323 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -21,7 +21,6 @@ stdsimd, staged_api, doc_cfg, - mmx_target_feature, tbm_target_feature, sse4a_target_feature, arm_target_feature, diff --git a/crates/core_arch/src/x86/mmx.rs b/crates/core_arch/src/x86/mmx.rs deleted file mode 100644 index 3947f2bea6..0000000000 --- a/crates/core_arch/src/x86/mmx.rs +++ /dev/null @@ -1,786 +0,0 @@ -//! `i586` MMX instruction set. -//! -//! The intrinsics here roughly correspond to those in the `mmintrin.h` C -//! header. -//! -//! The reference is [Intel 64 and IA-32 Architectures Software Developer's -//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. -//! -//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - -use crate::{ - core_arch::{simd::*, x86::*}, - mem::transmute, -}; - -#[cfg(test)] -use stdarch_test::assert_instr; - -/// Constructs a 64-bit integer vector initialized to zero. -#[inline] -#[target_feature(enable = "mmx")] -// FIXME: this produces a movl instead of xorps on x86 -// FIXME: this produces a xor intrinsic instead of xorps on x86_64 -#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))] -pub unsafe fn _mm_setzero_si64() -> __m64 { - transmute(0_i64) -} - -/// Adds packed 8-bit integers in `a` and `b`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddb))] -pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 { - paddb(a, b) -} - -/// Adds packed 8-bit integers in `a` and `b`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddb))] -pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 { - _mm_add_pi8(a, b) -} - -/// Adds packed 16-bit integers in `a` and `b`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddw))] -pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 { - paddw(a, b) -} - -/// Adds packed 16-bit integers in `a` and `b`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddw))] -pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 { - _mm_add_pi16(a, b) -} - -/// Adds packed 32-bit integers in `a` and `b`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddd))] -pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 { - paddd(a, b) -} - -/// Adds packed 32-bit integers in `a` and `b`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddd))] -pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 { - _mm_add_pi32(a, b) -} - -/// Adds packed 8-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddsb))] -pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 { - paddsb(a, b) -} - -/// Adds packed 8-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddsb))] -pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 { - _mm_adds_pi8(a, b) -} - -/// Adds packed 16-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddsw))] -pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 { - paddsw(a, b) -} - -/// Adds packed 16-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddsw))] -pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 { - _mm_adds_pi16(a, b) -} - -/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddusb))] -pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 { - paddusb(a, b) -} - -/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddusb))] -pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 { - _mm_adds_pu8(a, b) -} - -/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddusw))] -pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 { - paddusw(a, b) -} - -/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(paddusw))] -pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 { - _mm_adds_pu16(a, b) -} - -/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubb))] -pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 { - psubb(a, b) -} - -/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubb))] -pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 { - _mm_sub_pi8(a, b) -} - -/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubw))] -pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 { - psubw(a, b) -} - -/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubw))] -pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 { - _mm_sub_pi16(a, b) -} - -/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubd))] -pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 { - psubd(a, b) -} - -/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubd))] -pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 { - _mm_sub_pi32(a, b) -} - -/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` -/// using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubsb))] -pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 { - psubsb(a, b) -} - -/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` -/// using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubsb))] -pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 { - _mm_subs_pi8(a, b) -} - -/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` -/// using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubsw))] -pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 { - psubsw(a, b) -} - -/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` -/// using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubsw))] -pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 { - _mm_subs_pi16(a, b) -} - -/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit -/// integers in `a` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubusb))] -pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 { - psubusb(a, b) -} - -/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit -/// integers in `a` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubusb))] -pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 { - _mm_subs_pu8(a, b) -} - -/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned -/// 16-bit integers in `a` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubusw))] -pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 { - psubusw(a, b) -} - -/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned -/// 16-bit integers in `a` using saturation. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(psubusw))] -pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 { - _mm_subs_pu16(a, b) -} - -/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers -/// using signed saturation. -/// -/// Positive values greater than 0x7F are saturated to 0x7F. Negative values -/// less than 0x80 are saturated to 0x80. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(packsswb))] -pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { - packsswb(a, b) -} - -/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers -/// using signed saturation. -/// -/// Positive values greater than 0x7F are saturated to 0x7F. Negative values -/// less than 0x80 are saturated to 0x80. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(packssdw))] -pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 { - packssdw(a, b) -} - -/// Compares whether each element of `a` is greater than the corresponding -/// element of `b` returning `0` for `false` and `-1` for `true`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(pcmpgtb))] -pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 { - pcmpgtb(a, b) -} - -/// Compares whether each element of `a` is greater than the corresponding -/// element of `b` returning `0` for `false` and `-1` for `true`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(pcmpgtw))] -pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 { - pcmpgtw(a, b) -} - -/// Compares whether each element of `a` is greater than the corresponding -/// element of `b` returning `0` for `false` and `-1` for `true`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(pcmpgtd))] -pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 { - pcmpgtd(a, b) -} - -/// Unpacks the upper two elements from two `i16x4` vectors and interleaves -/// them into the result: `[a.2, b.2, a.3, b.3]`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected -pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 { - punpckhwd(a, b) -} - -/// Unpacks the upper four elements from two `i8x8` vectors and interleaves -/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(punpckhbw))] -pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 { - punpckhbw(a, b) -} - -/// Unpacks the lower four elements from two `i8x8` vectors and interleaves -/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(punpcklbw))] -pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 { - punpcklbw(a, b) -} - -/// Unpacks the lower two elements from two `i16x4` vectors and interleaves -/// them into the result: `[a.0 b.0 a.1 b.1]`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(punpcklwd))] -pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 { - punpcklwd(a, b) -} - -/// Unpacks the upper element from two `i32x2` vectors and interleaves them -/// into the result: `[a.1, b.1]`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(punpckhdq))] -pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 { - punpckhdq(a, b) -} - -/// Unpacks the lower element from two `i32x2` vectors and interleaves them -/// into the result: `[a.0, b.0]`. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(punpckldq))] -pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 { - punpckldq(a, b) -} - -/// Sets packed 16-bit integers in dst with the supplied values. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 { - _mm_setr_pi16(e0, e1, e2, e3) -} - -/// Sets packed 32-bit integers in dst with the supplied values. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 { - _mm_setr_pi32(e0, e1) -} - -/// Sets packed 8-bit integers in dst with the supplied values. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 { - _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) -} - -/// Broadcasts 16-bit integer a to all all elements of dst. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 { - _mm_setr_pi16(a, a, a, a) -} - -/// Broadcasts 32-bit integer a to all all elements of dst. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 { - _mm_setr_pi32(a, a) -} - -/// Broadcasts 8-bit integer a to all all elements of dst. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 { - _mm_setr_pi8(a, a, a, a, a, a, a, a) -} - -/// Sets packed 16-bit integers in dst with the supplied values in reverse -/// order. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 { - transmute(i16x4::new(e0, e1, e2, e3)) -} - -/// Sets packed 32-bit integers in dst with the supplied values in reverse -/// order. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { - transmute(i32x2::new(e0, e1)) -} - -/// Sets packed 8-bit integers in dst with the supplied values in reverse order. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_setr_pi8( - e0: i8, - e1: i8, - e2: i8, - e3: i8, - e4: i8, - e5: i8, - e6: i8, - e7: i8, -) -> __m64 { - transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) -} - -/// Empty the MMX state, which marks the x87 FPU registers as available for use -/// by x87 instructions. This instruction must be used at the end of all MMX -/// technology procedures. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(emms))] -pub unsafe fn _mm_empty() { - emms() -} - -/// Empty the MMX state, which marks the x87 FPU registers as available for use -/// by x87 instructions. This instruction must be used at the end of all MMX -/// technology procedures. -#[inline] -#[target_feature(enable = "mmx")] -#[cfg_attr(test, assert_instr(emms))] -pub unsafe fn _m_empty() { - emms() -} - -/// Copies 32-bit integer `a` to the lower elements of the return value, and zero -/// the upper element of the return value. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_cvtsi32_si64(a: i32) -> __m64 { - transmute(i32x2::new(a, 0)) -} - -/// Return the lower 32-bit integer in `a`. -#[inline] -#[target_feature(enable = "mmx")] -pub unsafe fn _mm_cvtsi64_si32(a: __m64) -> i32 { - let r: i32x2 = transmute(a); - r.0 -} - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.mmx.padd.b"] - fn paddb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.padd.w"] - fn paddw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.padd.d"] - fn paddd(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.padds.b"] - fn paddsb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.padds.w"] - fn paddsw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.paddus.b"] - fn paddusb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.paddus.w"] - fn paddusw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psub.b"] - fn psubb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psub.w"] - fn psubw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psub.d"] - fn psubd(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psubs.b"] - fn psubsb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psubs.w"] - fn psubsw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psubus.b"] - fn psubusb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psubus.w"] - fn psubusw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.packsswb"] - fn packsswb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.packssdw"] - fn packssdw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pcmpgt.b"] - fn pcmpgtb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pcmpgt.w"] - fn pcmpgtw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pcmpgt.d"] - fn pcmpgtd(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.punpckhwd"] - fn punpckhwd(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.punpcklwd"] - fn punpcklwd(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.punpckhbw"] - fn punpckhbw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.punpcklbw"] - fn punpcklbw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.punpckhdq"] - fn punpckhdq(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.punpckldq"] - fn punpckldq(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.emms"] - fn emms(); -} - -#[cfg(test)] -mod tests { - use crate::core_arch::x86::*; - use stdarch_test::simd_test; - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_setzero_si64() { - let r: __m64 = transmute(0_i64); - assert_eq_m64(r, _mm_setzero_si64()); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_add_pi8() { - let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0); - let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1); - let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1); - assert_eq_m64(e, _mm_add_pi8(a, b)); - assert_eq_m64(e, _m_paddb(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_add_pi16() { - let a = _mm_setr_pi16(-1, -1, 1, 1); - let b = _mm_setr_pi16(i16::MIN + 1, 30001, -30001, i16::MAX - 1); - let e = _mm_setr_pi16(i16::MIN, 30000, -30000, i16::MAX); - assert_eq_m64(e, _mm_add_pi16(a, b)); - assert_eq_m64(e, _m_paddw(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_add_pi32() { - let a = _mm_setr_pi32(1, -1); - let b = _mm_setr_pi32(i32::MAX - 1, i32::MIN + 1); - let e = _mm_setr_pi32(i32::MAX, i32::MIN); - assert_eq_m64(e, _mm_add_pi32(a, b)); - assert_eq_m64(e, _m_paddd(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_adds_pi8() { - let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0); - let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1); - let e = _mm_setr_pi8(i8::MIN, 0, 0, i8::MAX, -1, -1, 1, 1); - assert_eq_m64(e, _mm_adds_pi8(a, b)); - assert_eq_m64(e, _m_paddsb(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_adds_pi16() { - let a = _mm_setr_pi16(-32000, 32000, 4, 0); - let b = _mm_setr_pi16(-32000, 32000, -5, 1); - let e = _mm_setr_pi16(i16::MIN, i16::MAX, -1, 1); - assert_eq_m64(e, _mm_adds_pi16(a, b)); - assert_eq_m64(e, _m_paddsw(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_adds_pu8() { - let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8); - let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8); - let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::MAX as i8); - assert_eq_m64(e, _mm_adds_pu8(a, b)); - assert_eq_m64(e, _m_paddusb(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_adds_pu16() { - let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16); - let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16); - let e = _mm_setr_pi16(0, 11, 22, u16::MAX as i16); - assert_eq_m64(e, _mm_adds_pu16(a, b)); - assert_eq_m64(e, _m_paddusw(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_sub_pi8() { - let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0); - let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127); - let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127); - assert_eq_m64(e, _mm_sub_pi8(a, b)); - assert_eq_m64(e, _m_psubb(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_sub_pi16() { - let a = _mm_setr_pi16(-20000, -20000, 20000, 30000); - let b = _mm_setr_pi16(-10000, 10000, -10000, 30000); - let e = _mm_setr_pi16(-10000, -30000, 30000, 0); - assert_eq_m64(e, _mm_sub_pi16(a, b)); - assert_eq_m64(e, _m_psubw(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_sub_pi32() { - let a = _mm_setr_pi32(500_000, -500_000); - let b = _mm_setr_pi32(500_000, 500_000); - let e = _mm_setr_pi32(0, -1_000_000); - assert_eq_m64(e, _mm_sub_pi32(a, b)); - assert_eq_m64(e, _m_psubd(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_subs_pi8() { - let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5); - let b = _mm_setr_pi8(100, -100, i8::MIN, 127, -1, 1, 3, -3); - let e = _mm_setr_pi8(i8::MIN, i8::MAX, i8::MAX, -127, 1, -1, -8, 8); - assert_eq_m64(e, _mm_subs_pi8(a, b)); - assert_eq_m64(e, _m_psubsb(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_subs_pi16() { - let a = _mm_setr_pi16(-20000, 20000, 0, 0); - let b = _mm_setr_pi16(20000, -20000, -1, 1); - let e = _mm_setr_pi16(i16::MIN, i16::MAX, 1, -1); - assert_eq_m64(e, _mm_subs_pi16(a, b)); - assert_eq_m64(e, _m_psubsw(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_subs_pu8() { - let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80); - let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0); - let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80); - assert_eq_m64(e, _mm_subs_pu8(a, b)); - assert_eq_m64(e, _m_psubusb(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_subs_pu16() { - let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16); - let b = _mm_setr_pi16(20000, 300, 1, 11111); - let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16); - assert_eq_m64(e, _mm_subs_pu16(a, b)); - assert_eq_m64(e, _m_psubusw(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_packs_pi16() { - let a = _mm_setr_pi16(-1, 2, -3, 4); - let b = _mm_setr_pi16(-5, 6, -7, 8); - let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8); - assert_eq_m64(r, _mm_packs_pi16(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_packs_pi32() { - let a = _mm_setr_pi32(-1, 2); - let b = _mm_setr_pi32(-5, 6); - let r = _mm_setr_pi16(-1, 2, -5, 6); - assert_eq_m64(r, _mm_packs_pi32(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_cmpgt_pi8() { - let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); - let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1); - let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1); - assert_eq_m64(r, _mm_cmpgt_pi8(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_cmpgt_pi16() { - let a = _mm_setr_pi16(0, 1, 2, 3); - let b = _mm_setr_pi16(4, 3, 2, 1); - let r = _mm_setr_pi16(0, 0, 0, -1); - assert_eq_m64(r, _mm_cmpgt_pi16(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_cmpgt_pi32() { - let a = _mm_setr_pi32(0, 3); - let b = _mm_setr_pi32(1, 2); - let r0 = _mm_setr_pi32(0, -1); - let r1 = _mm_setr_pi32(-1, 0); - - assert_eq_m64(r0, _mm_cmpgt_pi32(a, b)); - assert_eq_m64(r1, _mm_cmpgt_pi32(b, a)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_unpackhi_pi8() { - let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15); - let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14); - let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14); - - assert_eq_m64(r, _mm_unpackhi_pi8(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_unpacklo_pi8() { - let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); - let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15); - let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11); - assert_eq_m64(r, _mm_unpacklo_pi8(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_unpackhi_pi16() { - let a = _mm_setr_pi16(0, 1, 2, 3); - let b = _mm_setr_pi16(4, 5, 6, 7); - let r = _mm_setr_pi16(2, 6, 3, 7); - assert_eq_m64(r, _mm_unpackhi_pi16(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_unpacklo_pi16() { - let a = _mm_setr_pi16(0, 1, 2, 3); - let b = _mm_setr_pi16(4, 5, 6, 7); - let r = _mm_setr_pi16(0, 4, 1, 5); - assert_eq_m64(r, _mm_unpacklo_pi16(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_unpackhi_pi32() { - let a = _mm_setr_pi32(0, 3); - let b = _mm_setr_pi32(1, 2); - let r = _mm_setr_pi32(3, 2); - - assert_eq_m64(r, _mm_unpackhi_pi32(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_unpacklo_pi32() { - let a = _mm_setr_pi32(0, 3); - let b = _mm_setr_pi32(1, 2); - let r = _mm_setr_pi32(0, 1); - - assert_eq_m64(r, _mm_unpacklo_pi32(a, b)); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_empty() { - _mm_empty(); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_m_empty() { - _m_empty(); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_cvtsi32_si64() { - let a = _mm_cvtsi32_si64(42); - let b = _mm_setr_pi32(42, 0); - assert_eq_m64(a, b); - } - - #[simd_test(enable = "mmx")] - unsafe fn test_mm_cvtsi64_si32() { - let a = _mm_setr_pi32(42, 666); - let b = _mm_cvtsi64_si32(a); - assert_eq!(b, 42); - } -} diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs index 60eb890c2f..0edaa13689 100644 --- a/crates/core_arch/src/x86/mod.rs +++ b/crates/core_arch/src/x86/mod.rs @@ -6,50 +6,6 @@ use crate::{intrinsics, marker::Sized, mem::transmute}; mod macros; types! { - /// 64-bit wide integer vector type, x86-specific - /// - /// This type is the same as the `__m64` type defined by Intel, - /// representing a 64-bit SIMD register. Usage of this type typically - /// corresponds to the `mmx` target feature. - /// - /// Internally this type may be viewed as: - /// - /// * `i8x8` - eight `i8` variables packed together - /// * `i16x4` - four `i16` variables packed together - /// * `i32x2` - two `i32` variables packed together - /// - /// (as well as unsigned versions). Each intrinsic may interpret the - /// internal bits differently, check the documentation of the intrinsic - /// to see how it's being used. - /// - /// Note that this means that an instance of `__m64` typically just means - /// a "bag of bits" which is left up to interpretation at the point of use. - /// - /// Most intrinsics using `__m64` are prefixed with `_mm_` and the - /// integer types tend to correspond to suffixes like "pi8" or "pi32" (not - /// to be confused with "epiXX", used for `__m128i`). - /// - /// # Examples - /// - /// ``` - /// # #![feature(stdsimd, mmx_target_feature)] - /// #[cfg(target_arch = "x86")] - /// use std::arch::x86::*; - /// #[cfg(target_arch = "x86_64")] - /// use std::arch::x86_64::*; - /// - /// # fn main() { - /// # #[target_feature(enable = "mmx")] - /// # unsafe fn foo() { - /// let all_bytes_zero = _mm_setzero_si64(); - /// let all_bytes_one = _mm_set1_pi8(1); - /// let two_i32 = _mm_set_pi32(1, 2); - /// # } - /// # if is_x86_feature_detected!("mmx") { unsafe { foo() } } - /// # } - /// ``` - pub struct __m64(i64); - /// 128-bit wide integer vector type, x86-specific /// /// This type is the same as the `__m128i` type defined by Intel, @@ -359,49 +315,6 @@ mod test; #[cfg(test)] pub use self::test::*; -#[allow(non_camel_case_types)] -#[unstable(feature = "stdimd_internal", issue = "none")] -pub(crate) trait m64Ext: Sized { - fn as_m64(self) -> __m64; - - #[inline] - fn as_u8x8(self) -> crate::core_arch::simd::u8x8 { - unsafe { transmute(self.as_m64()) } - } - - #[inline] - fn as_u16x4(self) -> crate::core_arch::simd::u16x4 { - unsafe { transmute(self.as_m64()) } - } - - #[inline] - fn as_u32x2(self) -> crate::core_arch::simd::u32x2 { - unsafe { transmute(self.as_m64()) } - } - - #[inline] - fn as_i8x8(self) -> crate::core_arch::simd::i8x8 { - unsafe { transmute(self.as_m64()) } - } - - #[inline] - fn as_i16x4(self) -> crate::core_arch::simd::i16x4 { - unsafe { transmute(self.as_m64()) } - } - - #[inline] - fn as_i32x2(self) -> crate::core_arch::simd::i32x2 { - unsafe { transmute(self.as_m64()) } - } -} - -impl m64Ext for __m64 { - #[inline] - fn as_m64(self) -> Self { - self - } -} - #[allow(non_camel_case_types)] #[unstable(feature = "stdimd_internal", issue = "none")] pub(crate) trait m128iExt: Sized { @@ -649,9 +562,6 @@ mod tbm; #[cfg(not(stdarch_intel_sde))] pub use self::tbm::*; -mod mmx; -pub use self::mmx::*; - mod pclmulqdq; pub use self::pclmulqdq::*; diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index c01aa1bc9f..ba3efae3c9 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1115,33 +1115,6 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { movmskps(a) } -/// Sets the upper two single-precision floating-point values with 64 bits of -/// data loaded from the address `p`; the lower two values are passed through -/// from `a`. -#[inline] -#[target_feature(enable = "sse")] -#[cfg_attr(test, assert_instr(movhps))] -// TODO: this function is actually not limited to floats, but that's what -// what matches the C type most closely: `(__m128, *const __m64) -> __m128`. -pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { - let q = p as *const f32x2; - let b: f32x2 = *q; - let bb = simd_shuffle4(b, b, [0, 1, 0, 1]); - simd_shuffle4(a, bb, [0, 1, 4, 5]) -} - -/// Loads two floats from `p` into the lower half of a `__m128`. The upper half -/// is copied from the upper half of `a`. -#[inline] -#[target_feature(enable = "sse")] -#[cfg_attr(test, assert_instr(movlps))] -pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 { - let q = p as *const f32x2; - let b: f32x2 = *q; - let bb = simd_shuffle4(b, b, [0, 1, 0, 1]); - simd_shuffle4(a, bb, [4, 5, 2, 3]) -} - /// Construct a `__m128` with the lowest element read from `p` and the other /// elements set to zero. /// @@ -1270,72 +1243,6 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i { transmute(i64x2(0, ptr::read_unaligned(mem_addr as *const i64))) } -/// Stores the upper half of `a` (64 bits) into memory. -/// -/// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may -/// choose to generate an equivalent sequence of other instructions. -#[inline] -#[target_feature(enable = "sse")] -// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's -// fine. -// On i586 (no SSE2) it just generates plain MOV instructions. -#[cfg_attr( - all(test, any(target_arch = "x86_64", target_feature = "sse2"), - not(target_os = "windows")), - // assert_instr(movhpd) - assert_instr(movhps) // LLVM7 prefers single-precision instructions -)] -pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) { - #[cfg(target_arch = "x86")] - { - // If this is a `f64x2` then on i586, LLVM generates fldl & fstpl which - // is just silly - let a64: u64x2 = mem::transmute(a); - let a_hi = a64.extract(1); - *(p as *mut u64) = a_hi; - } - #[cfg(target_arch = "x86_64")] - { - // If this is a `u64x2` LLVM generates a pshufd + movq, but we really - // want a a MOVHPD or MOVHPS here. - let a64: f64x2 = mem::transmute(a); - let a_hi = a64.extract(1); - *p = mem::transmute(a_hi); - } -} - -/// Stores the lower half of `a` (64 bits) into memory. -/// -/// This intrinsic corresponds to the `MOVQ` instruction. The compiler may -/// choose to generate an equivalent sequence of other instructions. -#[inline] -#[target_feature(enable = "sse")] -// On i586 the codegen just generates plane MOVs. No need to test for that. -#[cfg_attr( - all( - test, - any(target_arch = "x86_64", target_feature = "sse2"), - not(target_os = "windows") - ), - assert_instr(movlps) -)] -pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) { - #[cfg(target_arch = "x86")] - { - // Same as for _mm_storeh_pi: i586 code gen would use floating point - // stack. - let a64: u64x2 = mem::transmute(a); - let a_hi = a64.extract(0); - *(p as *mut u64) = a_hi; - } - #[cfg(target_arch = "x86_64")] - { - let a64: f64x2 = mem::transmute(a); - let a_hi = a64.extract(0); - *p = mem::transmute(a_hi); - } -} - /// Stores the lowest 32 bit float of `a` into memory. /// /// This intrinsic corresponds to the `MOVSS` instruction. @@ -1985,42 +1892,6 @@ extern "C" { fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32); #[link_name = "llvm.x86.sse.cmp.ss"] fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128; - #[link_name = "llvm.x86.mmx.movnt.dq"] - fn movntdq(a: *mut __m64, b: __m64); - #[link_name = "llvm.x86.sse.cvtpi2ps"] - fn cvtpi2ps(a: __m128, b: __m64) -> __m128; - #[link_name = "llvm.x86.mmx.maskmovq"] - fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8); - #[link_name = "llvm.x86.mmx.pextr.w"] - fn pextrw(a: __m64, imm8: i32) -> i32; - #[link_name = "llvm.x86.mmx.pinsr.w"] - fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64; - #[link_name = "llvm.x86.mmx.pmovmskb"] - fn pmovmskb(a: __m64) -> i32; - #[link_name = "llvm.x86.sse.pshuf.w"] - fn pshufw(a: __m64, imm8: i8) -> __m64; - #[link_name = "llvm.x86.mmx.pmaxs.w"] - fn pmaxsw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmaxu.b"] - fn pmaxub(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmins.w"] - fn pminsw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pminu.b"] - fn pminub(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmulhu.w"] - fn pmulhuw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmull.w"] - fn pmullw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pavg.b"] - fn pavgb(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pavg.w"] - fn pavgw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psad.bw"] - fn psadbw(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.sse.cvtps2pi"] - fn cvtps2pi(a: __m128) -> __m64; - #[link_name = "llvm.x86.sse.cvttps2pi"] - fn cvttps2pi(a: __m128) -> __m64; } /// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint. @@ -2038,463 +1909,6 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) { intrinsics::nontemporal_store(mem_addr as *mut __m128, a); } -/// Stores 64-bits of integer data from a into memory using a non-temporal -/// memory hint. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(movntq))] -pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) { - movntdq(mem_addr, a) -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxsw))] -pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 { - pmaxsw(a, b) -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxsw))] -pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 { - _mm_max_pi16(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxub))] -pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 { - pmaxub(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// greatest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmaxub))] -pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 { - _mm_max_pu8(a, b) -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminsw))] -pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 { - pminsw(a, b) -} - -/// Compares the packed 16-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminsw))] -pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 { - _mm_min_pi16(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminub))] -pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 { - pminub(a, b) -} - -/// Compares the packed 8-bit signed integers of `a` and `b` writing the -/// smallest value into the result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pminub))] -pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 { - _mm_min_pu8(a, b) -} - -/// Multiplies packed 16-bit unsigned integer values and writes the -/// high-order 16 bits of each 32-bit product to the corresponding bits in -/// the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmulhuw))] -pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 { - pmulhuw(a, b) -} - -/// Multiplies packed 16-bit integer values and writes the -/// low-order 16 bits of each 32-bit product to the corresponding bits in -/// the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmullw))] -pub unsafe fn _mm_mullo_pi16(a: __m64, b: __m64) -> __m64 { - pmullw(a, b) -} - -/// Multiplies packed 16-bit unsigned integer values and writes the -/// high-order 16 bits of each 32-bit product to the corresponding bits in -/// the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmulhuw))] -pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 { - _mm_mulhi_pu16(a, b) -} - -/// Computes the rounded averages of the packed unsigned 8-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgb))] -pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 { - pavgb(a, b) -} - -/// Computes the rounded averages of the packed unsigned 8-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgb))] -pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 { - _mm_avg_pu8(a, b) -} - -/// Computes the rounded averages of the packed unsigned 16-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgw))] -pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 { - pavgw(a, b) -} - -/// Computes the rounded averages of the packed unsigned 16-bit integer -/// values and writes the averages to the corresponding bits in the -/// destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pavgw))] -pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 { - _mm_avg_pu16(a, b) -} - -/// Subtracts the corresponding 8-bit unsigned integer values of the two -/// 64-bit vector operands and computes the absolute value for each of the -/// difference. Then sum of the 8 absolute differences is written to the -/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(psadbw))] -pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 { - psadbw(a, b) -} - -/// Subtracts the corresponding 8-bit unsigned integer values of the two -/// 64-bit vector operands and computes the absolute value for each of the -/// difference. Then sum of the 8 absolute differences is written to the -/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(psadbw))] -pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 { - _mm_sad_pu8(a, b) -} - -/// Converts two elements of a 64-bit vector of `[2 x i32]` into two -/// floating point values and writes them to the lower 64-bits of the -/// destination. The remaining higher order elements of the destination are -/// copied from the corresponding elements in the first operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 { - cvtpi2ps(a, b) -} - -/// Converts two elements of a 64-bit vector of `[2 x i32]` into two -/// floating point values and writes them to the lower 64-bits of the -/// destination. The remaining higher order elements of the destination are -/// copied from the corresponding elements in the first operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 { - _mm_cvtpi32_ps(a, b) -} - -/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let b = _mm_cmpgt_pi8(b, a); - let b = _mm_unpacklo_pi8(a, b); - _mm_cvtpi16_ps(b) -} - -/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let b = _mm_unpacklo_pi8(a, b); - _mm_cvtpi16_ps(b) -} - -/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let b = _mm_cmpgt_pi16(b, a); - let c = _mm_unpackhi_pi16(a, b); - let r = _mm_setzero_ps(); - let r = cvtpi2ps(r, c); - let r = _mm_movelh_ps(r, r); - let c = _mm_unpacklo_pi16(a, b); - cvtpi2ps(r, c) -} - -/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 { - let b = _mm_setzero_si64(); - let c = _mm_unpackhi_pi16(a, b); - let r = _mm_setzero_ps(); - let r = cvtpi2ps(r, c); - let r = _mm_movelh_ps(r, r); - let c = _mm_unpacklo_pi16(a, b); - cvtpi2ps(r, c) -} - -/// Converts the two 32-bit signed integer values from each 64-bit vector -/// operand of `[2 x i32]` into a 128-bit vector of `[4 x float]`. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 { - let c = _mm_setzero_ps(); - let c = _mm_cvtpi32_ps(c, b); - let c = _mm_movelh_ps(c, c); - _mm_cvtpi32_ps(c, a) -} - -/// Conditionally copies the values from each 8-bit element in the first -/// 64-bit integer vector operand to the specified memory location, as -/// specified by the most significant bit in the corresponding element in the -/// second 64-bit integer vector operand. -/// -/// To minimize caching, the data is flagged as non-temporal -/// (unlikely to be used again soon). -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(maskmovq))] -pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) { - maskmovq(a, mask, mem_addr) -} - -/// Conditionally copies the values from each 8-bit element in the first -/// 64-bit integer vector operand to the specified memory location, as -/// specified by the most significant bit in the corresponding element in the -/// second 64-bit integer vector operand. -/// -/// To minimize caching, the data is flagged as non-temporal -/// (unlikely to be used again soon). -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(maskmovq))] -pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) { - _mm_maskmove_si64(a, mask, mem_addr) -} - -/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and -/// returns it, as specified by the immediate integer operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 { - macro_rules! call { - ($imm2:expr) => { - pextrw(a, $imm2) as i32 - }; - } - constify_imm2!(imm2, call) -} - -/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and -/// returns it, as specified by the immediate integer operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 { - macro_rules! call { - ($imm2:expr) => { - pextrw(a, $imm2) as i32 - }; - } - constify_imm2!(imm2, call) -} - -/// Copies data from the 64-bit vector of `[4 x i16]` to the destination, -/// and inserts the lower 16-bits of an integer operand at the 16-bit offset -/// specified by the immediate operand `n`. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 { - macro_rules! call { - ($imm2:expr) => { - pinsrw(a, d, $imm2) - }; - } - constify_imm2!(imm2, call) -} - -/// Copies data from the 64-bit vector of `[4 x i16]` to the destination, -/// and inserts the lower 16-bits of an integer operand at the 16-bit offset -/// specified by the immediate operand `n`. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 { - macro_rules! call { - ($imm2:expr) => { - pinsrw(a, d, $imm2) - }; - } - constify_imm2!(imm2, call) -} - -/// Takes the most significant bit from each 8-bit element in a 64-bit -/// integer vector to create a 16-bit mask value. Zero-extends the value to -/// 32-bit integer and writes it to the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmovmskb))] -pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 { - pmovmskb(a) -} - -/// Takes the most significant bit from each 8-bit element in a 64-bit -/// integer vector to create a 16-bit mask value. Zero-extends the value to -/// 32-bit integer and writes it to the destination. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pmovmskb))] -pub unsafe fn _m_pmovmskb(a: __m64) -> i32 { - _mm_movemask_pi8(a) -} - -/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the -/// destination, as specified by the immediate value operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 { - macro_rules! call { - ($imm8:expr) => { - pshufw(a, $imm8) - }; - } - constify_imm8!(imm8, call) -} - -/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the -/// destination, as specified by the immediate value operand. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { - macro_rules! call { - ($imm8:expr) => { - pshufw(a, $imm8) - }; - } - constify_imm8!(imm8, call) -} - -/// Converts the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers with truncation. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvttps2pi))] -pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 { - cvttps2pi(a) -} - -/// Converts the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers with truncation. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvttps2pi))] -pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 { - _mm_cvttps_pi32(a) -} - -/// Converts the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 { - cvtps2pi(a) -} - -/// Converts the two lower packed single-precision (32-bit) floating-point -/// elements in `a` to packed 32-bit integers. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 { - _mm_cvtps_pi32(a) -} - -/// Converts packed single-precision (32-bit) floating-point elements in `a` to -/// packed 16-bit integers. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 { - let b = _mm_cvtps_pi32(a); - let a = _mm_movehl_ps(a, a); - let c = _mm_cvtps_pi32(a); - _mm_packs_pi32(b, c) -} - -/// Converts packed single-precision (32-bit) floating-point elements in `a` to -/// packed 8-bit integers, and returns theem in the lower 4 elements of the -/// result. -#[inline] -#[target_feature(enable = "sse,mmx")] -#[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 { - let b = _mm_cvtps_pi16(a); - let c = _mm_setzero_si64(); - _mm_packs_pi16(b, c) -} - #[cfg(test)] mod tests { use crate::{hint::black_box, mem::transmute}; @@ -3593,24 +3007,6 @@ mod tests { assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0)); } - #[simd_test(enable = "sse")] - unsafe fn test_mm_loadh_pi() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0]; - let p = x[..].as_ptr(); - let r = _mm_loadh_pi(a, p as *const _); - assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0)); - } - - #[simd_test(enable = "sse")] - unsafe fn test_mm_loadl_pi() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0]; - let p = x[..].as_ptr(); - let r = _mm_loadl_pi(a, p as *const _); - assert_eq_m128(r, _mm_setr_ps(5.0, 6.0, 3.0, 4.0)); - } - #[simd_test(enable = "sse")] unsafe fn test_mm_load_ss() { let a = 42.0f32; @@ -3684,28 +3080,6 @@ mod tests { assert_eq_m128i(r, _mm_set_epi64x(5, 0)); } - #[simd_test(enable = "sse")] - unsafe fn test_mm_storeh_pi() { - let mut vals = [0.0f32; 8]; - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - _mm_storeh_pi(vals.as_mut_ptr() as *mut _, a); - - assert_eq!(vals[0], 3.0); - assert_eq!(vals[1], 4.0); - assert_eq!(vals[2], 0.0); - } - - #[simd_test(enable = "sse")] - unsafe fn test_mm_storel_pi() { - let mut vals = [0.0f32; 8]; - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - _mm_storel_pi(vals.as_mut_ptr() as *mut _, a); - - assert_eq!(vals[0], 1.0); - assert_eq!(vals[1], 2.0); - assert_eq!(vals[2], 0.0); - } - #[simd_test(enable = "sse")] unsafe fn test_mm_store_ss() { let mut vals = [0.0f32; 8]; @@ -3926,254 +3300,4 @@ mod tests { assert_eq!(mem.data[i], get_m128(a, i)); } } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_stream_pi() { - let a = transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7)); - let mut mem = boxed::Box::<__m64>::new(transmute(i8x8::splat(1))); - _mm_stream_pi(&mut *mem as *mut _ as *mut _, a); - assert_eq_m64(a, *mem); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_max_pi16() { - let a = _mm_setr_pi16(-1, 6, -3, 8); - let b = _mm_setr_pi16(5, -2, 7, -4); - let r = _mm_setr_pi16(5, 6, 7, 8); - - assert_eq_m64(r, _mm_max_pi16(a, b)); - assert_eq_m64(r, _m_pmaxsw(a, b)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_max_pu8() { - let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); - let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); - let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8); - - assert_eq_m64(r, _mm_max_pu8(a, b)); - assert_eq_m64(r, _m_pmaxub(a, b)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_min_pi16() { - let a = _mm_setr_pi16(-1, 6, -3, 8); - let b = _mm_setr_pi16(5, -2, 7, -4); - let r = _mm_setr_pi16(-1, -2, -3, -4); - - assert_eq_m64(r, _mm_min_pi16(a, b)); - assert_eq_m64(r, _m_pminsw(a, b)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_min_pu8() { - let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); - let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); - let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4); - - assert_eq_m64(r, _mm_min_pu8(a, b)); - assert_eq_m64(r, _m_pminub(a, b)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_mulhi_pu16() { - let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); - let r = _mm_mulhi_pu16(a, b); - assert_eq_m64(r, _mm_set1_pi16(15)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_mullo_pi16() { - let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); - let r = _mm_mullo_pi16(a, b); - assert_eq_m64(r, _mm_set1_pi16(17960)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_m_pmulhuw() { - let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); - let r = _m_pmulhuw(a, b); - assert_eq_m64(r, _mm_set1_pi16(15)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_avg_pu8() { - let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9)); - let r = _mm_avg_pu8(a, b); - assert_eq_m64(r, _mm_set1_pi8(6)); - - let r = _m_pavgb(a, b); - assert_eq_m64(r, _mm_set1_pi8(6)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_avg_pu16() { - let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9)); - let r = _mm_avg_pu16(a, b); - assert_eq_m64(r, _mm_set1_pi16(6)); - - let r = _m_pavgw(a, b); - assert_eq_m64(r, _mm_set1_pi16(6)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_sad_pu8() { - #[rustfmt::skip] - let a = _mm_setr_pi8( - 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, - 1, 2, 3, 4, - ); - let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1); - let r = _mm_sad_pu8(a, b); - assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); - - let r = _m_psadbw(a, b); - assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtpi32_ps() { - let a = _mm_setr_ps(0., 0., 3., 4.); - let b = _mm_setr_pi32(1, 2); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi32_ps(a, b); - assert_eq_m128(r, expected); - - let r = _mm_cvt_pi2ps(a, b); - assert_eq_m128(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtpi16_ps() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi16_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtpu16_ps() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpu16_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtpi8_ps() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi8_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtpu8_ps() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpu8_ps(a); - assert_eq_m128(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtpi32x2_ps() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(3, 4); - let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi32x2_ps(a, b); - assert_eq_m128(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_maskmove_si64() { - let a = _mm_set1_pi8(9); - let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0); - let mut r = _mm_set1_pi8(0); - _mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8); - let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0); - assert_eq_m64(r, e); - - let mut r = _mm_set1_pi8(0); - _m_maskmovq(a, mask, &mut r as *mut _ as *mut i8); - assert_eq_m64(r, e); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_extract_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let r = _mm_extract_pi16(a, 0); - assert_eq!(r, 1); - let r = _mm_extract_pi16(a, 1); - assert_eq!(r, 2); - - let r = _m_pextrw(a, 1); - assert_eq!(r, 2); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_insert_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let r = _mm_insert_pi16(a, 0, 0b0); - let expected = _mm_setr_pi16(0, 2, 3, 4); - assert_eq_m64(r, expected); - let r = _mm_insert_pi16(a, 0, 0b10); - let expected = _mm_setr_pi16(1, 2, 0, 4); - assert_eq_m64(r, expected); - - let r = _m_pinsrw(a, 0, 0b10); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_movemask_pi8() { - let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000); - let r = _mm_movemask_pi8(a); - assert_eq!(r, 0b10001); - - let r = _m_pmovmskb(a); - assert_eq!(r, 0b10001); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_shuffle_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let r = _mm_shuffle_pi16(a, 0b00_01_01_11); - let expected = _mm_setr_pi16(4, 2, 2, 1); - assert_eq_m64(r, expected); - - let r = _m_pshufw(a, 0b00_01_01_11); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtps_pi32() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi32(1, 2); - - assert_eq_m64(r, _mm_cvtps_pi32(a)); - assert_eq_m64(r, _mm_cvt_ps2pi(a)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvttps_pi32() { - let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi32(7, 2); - - assert_eq_m64(r, _mm_cvttps_pi32(a)); - assert_eq_m64(r, _mm_cvtt_ps2pi(a)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtps_pi16() { - let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi16(7, 2, 3, 4); - assert_eq_m64(r, _mm_cvtps_pi16(a)); - } - - #[simd_test(enable = "sse,mmx")] - unsafe fn test_mm_cvtps_pi8() { - let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0); - assert_eq_m64(r, _mm_cvtps_pi8(a)); - } } diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 90a2cf7a70..b6c19cdef4 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -2958,113 +2958,6 @@ pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [0, 2]) } -/// Adds two signed or unsigned 64-bit integer values, returning the -/// lower 64 bits of the sum. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(paddq))] -pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 { - paddq(a, b) -} - -/// Multiplies 32-bit unsigned integer values contained in the lower bits -/// of the two 64-bit integer vectors and returns the 64-bit unsigned -/// product. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(pmuludq))] -pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 { - pmuludq2(a, b) -} - -/// Subtracts signed or unsigned 64-bit integer values and writes the -/// difference to the corresponding bits in the destination. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(psubq))] -pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 { - psubq(a, b) -} - -/// Converts the two signed 32-bit integer elements of a 64-bit vector of -/// `[2 x i32]` into two double-precision floating-point values, returned in a -/// 128-bit vector of `[2 x double]`. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(cvtpi2pd))] -pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d { - cvtpi2pd(a) -} - -/// Initializes both 64-bit values in a 128-bit vector of `[2 x i64]` with -/// the specified 64-bit integer values. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// no particular instruction to test -pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i { - _mm_set_epi64x(transmute(e1), transmute(e0)) -} - -/// Initializes both values in a 128-bit vector of `[2 x i64]` with the -/// specified 64-bit value. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// no particular instruction to test -pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i { - _mm_set_epi64x(transmute(a), transmute(a)) -} - -/// Constructs a 128-bit integer vector, initialized in reverse order -/// with the specified 64-bit integral values. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// no particular instruction to test -pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i { - _mm_set_epi64x(transmute(e0), transmute(e1)) -} - -/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit -/// integer. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong -// instr? -pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 { - transmute(simd_extract::<_, i64>(a.as_i64x2(), 0)) -} - -/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the -/// upper bits. -#[inline] -#[target_feature(enable = "sse2,mmx")] -// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong -// instr? -pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i { - _mm_set_epi64x(0, transmute(a)) -} - -/// Converts the two double-precision floating-point elements of a -/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values, -/// returned in a 64-bit vector of `[2 x i32]`. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(cvtpd2pi))] -pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 { - cvtpd2pi(a) -} - -/// Converts the two double-precision floating-point elements of a -/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values, -/// returned in a 64-bit vector of `[2 x i32]`. -/// If the result of either conversion is inexact, the result is truncated -/// (rounded towards zero) regardless of the current MXCSR setting. -#[inline] -#[target_feature(enable = "sse2,mmx")] -#[cfg_attr(test, assert_instr(cvttpd2pi))] -pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 { - cvttpd2pi(a) -} - #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse2.pause"] @@ -3207,18 +3100,6 @@ extern "C" { fn storeudq(mem_addr: *mut i8, a: __m128i); #[link_name = "llvm.x86.sse2.storeu.pd"] fn storeupd(mem_addr: *mut i8, a: __m128d); - #[link_name = "llvm.x86.mmx.padd.q"] - fn paddq(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.pmulu.dq"] - fn pmuludq2(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.mmx.psub.q"] - fn psubq(a: __m64, b: __m64) -> __m64; - #[link_name = "llvm.x86.sse.cvtpi2pd"] - fn cvtpi2pd(a: __m64) -> __m128d; - #[link_name = "llvm.x86.sse.cvtpd2pi"] - fn cvtpd2pi(a: __m128d) -> __m64; - #[link_name = "llvm.x86.sse.cvttpd2pi"] - fn cvttpd2pi(a: __m128d) -> __m64; } #[cfg(test)] @@ -5208,87 +5089,4 @@ mod tests { let r = _mm_castsi128_ps(a); assert_eq_m128(r, expected); } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_add_si64() { - let a = 1i64; - let b = 2i64; - let expected = 3i64; - let r = _mm_add_si64(transmute(a), transmute(b)); - assert_eq!(transmute::<__m64, i64>(r), expected); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_mul_su32() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(3, 4); - let expected = 3u64; - let r = _mm_mul_su32(a, b); - assert_eq_m64(r, transmute(expected)); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_sub_si64() { - let a = 1i64; - let b = 2i64; - let expected = -1i64; - let r = _mm_sub_si64(transmute(a), transmute(b)); - assert_eq!(transmute::<__m64, i64>(r), expected); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_cvtpi32_pd() { - let a = _mm_setr_pi32(1, 2); - let expected = _mm_setr_pd(1., 2.); - let r = _mm_cvtpi32_pd(a); - assert_eq_m128d(r, expected); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_set_epi64() { - let r = _mm_set_epi64(transmute(1i64), transmute(2i64)); - assert_eq_m128i(r, _mm_setr_epi64x(2, 1)); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_set1_epi64() { - let r = _mm_set1_epi64(transmute(1i64)); - assert_eq_m128i(r, _mm_setr_epi64x(1, 1)); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_setr_epi64() { - let r = _mm_setr_epi64(transmute(1i64), transmute(2i64)); - assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_movepi64_pi64() { - let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0)); - assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_movpi64_epi64() { - let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); - assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_cvtpd_pi32() { - let a = _mm_setr_pd(5., 0.); - let r = _mm_cvtpd_pi32(a); - assert_eq_m64(r, _mm_setr_pi32(5, 0)); - } - - #[simd_test(enable = "sse2,mmx")] - unsafe fn test_mm_cvttpd_pi32() { - let a = _mm_setr_pd(5., 0.); - let r = _mm_cvttpd_pi32(a); - assert_eq_m64(r, _mm_setr_pi32(5, 0)); - - let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); - let r = _mm_cvttpd_pi32(a); - assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN)); - } } diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index 6a45603e44..669f51ce7b 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -299,169 +299,6 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) } -/// Computes the absolute value of packed 8-bit integers in `a` and -/// return the unsigned results. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pabsb))] -pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 { - pabsb(a) -} - -/// Computes the absolute value of packed 8-bit integers in `a`, and returns the -/// unsigned results. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pabsw))] -pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 { - pabsw(a) -} - -/// Computes the absolute value of packed 32-bit integers in `a`, and returns the -/// unsigned results. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pabsd))] -pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 { - pabsd(a) -} - -/// Shuffles packed 8-bit integers in `a` according to shuffle control mask in -/// the corresponding 8-bit element of `b`, and returns the results -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pshufb))] -pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 { - pshufb(a, b) -} - -/// Concatenates the two 64-bit integer vector operands, and right-shifts -/// the result by the number of bytes specified in the immediate operand. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(palignr, n = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 { - macro_rules! call { - ($imm8:expr) => { - palignrb(a, b, $imm8) - }; - } - constify_imm8!(n, call) -} - -/// Horizontally adds the adjacent pairs of values contained in 2 packed -/// 64-bit vectors of `[4 x i16]`. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phaddw))] -pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 { - phaddw(a, b) -} - -/// Horizontally adds the adjacent pairs of values contained in 2 packed -/// 64-bit vectors of `[2 x i32]`. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phaddd))] -pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 { - phaddd(a, b) -} - -/// Horizontally adds the adjacent pairs of values contained in 2 packed -/// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are -/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phaddsw))] -pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 { - phaddsw(a, b) -} - -/// Horizontally subtracts the adjacent pairs of values contained in 2 -/// packed 64-bit vectors of `[4 x i16]`. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phsubw))] -pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 { - phsubw(a, b) -} - -/// Horizontally subtracts the adjacent pairs of values contained in 2 -/// packed 64-bit vectors of `[2 x i32]`. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phsubd))] -pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 { - phsubd(a, b) -} - -/// Horizontally subtracts the adjacent pairs of values contained in 2 -/// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than -/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are -/// saturated to 8000h. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(phsubsw))] -pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 { - phsubsw(a, b) -} - -/// Multiplies corresponding pairs of packed 8-bit unsigned integer -/// values contained in the first source operand and packed 8-bit signed -/// integer values contained in the second source operand, adds pairs of -/// contiguous products with signed saturation, and writes the 16-bit sums to -/// the corresponding bits in the destination. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pmaddubsw))] -pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 { - pmaddubsw(a, b) -} - -/// Multiplies packed 16-bit signed integer values, truncates the 32-bit -/// products to the 18 most significant bits by right-shifting, rounds the -/// truncated value by adding 1, and writes bits `[16:1]` to the destination. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(pmulhrsw))] -pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 { - pmulhrsw(a, b) -} - -/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit -/// integer in `b` is negative, and returns the results. -/// Element in result are zeroed out when the corresponding element in `b` is -/// zero. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(psignb))] -pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 { - psignb(a, b) -} - -/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit -/// integer in `b` is negative, and returns the results. -/// Element in result are zeroed out when the corresponding element in `b` is -/// zero. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(psignw))] -pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 { - psignw(a, b) -} - -/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit -/// integer in `b` is negative, and returns the results. -/// Element in result are zeroed out when the corresponding element in `b` is -/// zero. -#[inline] -#[target_feature(enable = "ssse3,mmx")] -#[cfg_attr(test, assert_instr(psignd))] -pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 { - psignd(a, b) -} - #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.ssse3.pabs.b.128"] @@ -508,54 +345,6 @@ extern "C" { #[link_name = "llvm.x86.ssse3.psign.d.128"] fn psignd128(a: i32x4, b: i32x4) -> i32x4; - - #[link_name = "llvm.x86.ssse3.pabs.b"] - fn pabsb(a: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pabs.w"] - fn pabsw(a: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pabs.d"] - fn pabsd(a: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pshuf.b"] - fn pshufb(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.mmx.palignr.b"] - fn palignrb(a: __m64, b: __m64, n: u8) -> __m64; - - #[link_name = "llvm.x86.ssse3.phadd.w"] - fn phaddw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phadd.d"] - fn phaddd(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phadd.sw"] - fn phaddsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phsub.w"] - fn phsubw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phsub.d"] - fn phsubd(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.phsub.sw"] - fn phsubsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"] - fn pmaddubsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.pmul.hr.sw"] - fn pmulhrsw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.psign.b"] - fn psignb(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.psign.w"] - fn psignw(a: __m64, b: __m64) -> __m64; - - #[link_name = "llvm.x86.ssse3.psign.d"] - fn psignd(a: __m64, b: __m64) -> __m64; } #[cfg(test)] @@ -761,138 +550,4 @@ mod tests { let r = _mm_sign_epi32(a, b); assert_eq_m128i(r, expected); } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_abs_pi8() { - let r = _mm_abs_pi8(_mm_set1_pi8(-5)); - assert_eq_m64(r, _mm_set1_pi8(5)); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_abs_pi16() { - let r = _mm_abs_pi16(_mm_set1_pi16(-5)); - assert_eq_m64(r, _mm_set1_pi16(5)); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_abs_pi32() { - let r = _mm_abs_pi32(_mm_set1_pi32(-5)); - assert_eq_m64(r, _mm_set1_pi32(5)); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_shuffle_pi8() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19); - let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4); - let r = _mm_shuffle_pi8(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_alignr_pi8() { - let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32); - let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32); - let r = _mm_alignr_pi8(a, b, 4); - assert_eq_m64(r, transmute(0x89abcdefffddeecc_u64)); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_hadd_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 128, 4, 3); - let expected = _mm_setr_pi16(3, 7, 132, 7); - let r = _mm_hadd_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_hadd_pi32() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(4, 128); - let expected = _mm_setr_pi32(3, 132); - let r = _mm_hadd_pi32(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_hadds_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(32767, 1, -32768, -1); - let expected = _mm_setr_pi16(3, 7, 32767, -32768); - let r = _mm_hadds_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_hsub_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 128, 4, 3); - let expected = _mm_setr_pi16(-1, -1, -124, 1); - let r = _mm_hsub_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_hsub_pi32() { - let a = _mm_setr_pi32(1, 2); - let b = _mm_setr_pi32(4, 128); - let expected = _mm_setr_pi32(-1, -124); - let r = _mm_hsub_pi32(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_hsubs_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 128, 4, 3); - let expected = _mm_setr_pi16(-1, -1, -124, 1); - let r = _mm_hsubs_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_maddubs_pi16() { - let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); - let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19); - let expected = _mm_setr_pi16(130, 24, 192, 194); - let r = _mm_maddubs_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_mulhrs_pi16() { - let a = _mm_setr_pi16(1, 2, 3, 4); - let b = _mm_setr_pi16(4, 32767, -1, -32768); - let expected = _mm_setr_pi16(0, 2, 0, -4); - let r = _mm_mulhrs_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_sign_pi8() { - let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8); - let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1); - let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8); - let r = _mm_sign_pi8(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_sign_pi16() { - let a = _mm_setr_pi16(-1, 2, 3, 4); - let b = _mm_setr_pi16(1, -1, 1, 0); - let expected = _mm_setr_pi16(-1, -2, 3, 0); - let r = _mm_sign_pi16(a, b); - assert_eq_m64(r, expected); - } - - #[simd_test(enable = "ssse3,mmx")] - unsafe fn test_mm_sign_pi32() { - let a = _mm_setr_pi32(-1, 2); - let b = _mm_setr_pi32(1, 0); - let expected = _mm_setr_pi32(-1, 0); - let r = _mm_sign_pi32(a, b); - assert_eq_m64(r, expected); - } } diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs index b9c4537da5..a3ca0e0820 100644 --- a/crates/core_arch/src/x86/test.rs +++ b/crates/core_arch/src/x86/test.rs @@ -2,15 +2,6 @@ use crate::core_arch::x86::*; -#[target_feature(enable = "mmx")] -pub unsafe fn assert_eq_m64(a: __m64, b: __m64) { - union A { - a: __m64, - b: u64, - } - assert_eq!(A { a }.b, A { a: b }.b) -} - #[target_feature(enable = "sse2")] pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { union A { diff --git a/crates/simd-test-macro/src/lib.rs b/crates/simd-test-macro/src/lib.rs index 6df48b1e6a..8e65c43992 100644 --- a/crates/simd-test-macro/src/lib.rs +++ b/crates/simd-test-macro/src/lib.rs @@ -44,8 +44,6 @@ pub fn simd_test( .map(String::from) .collect(); - let mmx = target_features.iter().any(|s| s.starts_with("mmx")); - let enable_feature = string(enable_feature); let item = TokenStream::from(item); let name = find_name(item.clone()); @@ -106,15 +104,6 @@ pub fn simd_test( TokenStream::new() }; - let emms = if mmx { - // note: if the test requires MMX we need to clear the FPU - // registers once the test finishes before interfacing with - // other x87 code: - quote! { unsafe { super::_mm_empty() }; } - } else { - TokenStream::new() - }; - let ret: TokenStream = quote_spanned! { proc_macro2::Span::call_site() => #[allow(non_snake_case)] @@ -123,7 +112,6 @@ pub fn simd_test( fn #name() { if #force_test | (#cfg_target_features) { let v = unsafe { #name() }; - #emms return v; } else { ::stdarch_test::assert_skip_test_ok(stringify!(#name));