diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs
index aa8d4c9820..cca9f7f323 100644
--- a/crates/core_arch/src/lib.rs
+++ b/crates/core_arch/src/lib.rs
@@ -21,7 +21,6 @@
     stdsimd,
     staged_api,
     doc_cfg,
-    mmx_target_feature,
     tbm_target_feature,
     sse4a_target_feature,
     arm_target_feature,
diff --git a/crates/core_arch/src/x86/mmx.rs b/crates/core_arch/src/x86/mmx.rs
deleted file mode 100644
index 3947f2bea6..0000000000
--- a/crates/core_arch/src/x86/mmx.rs
+++ /dev/null
@@ -1,786 +0,0 @@
-//! `i586` MMX instruction set.
-//!
-//! The intrinsics here roughly correspond to those in the `mmintrin.h` C
-//! header.
-//!
-//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
-//!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-
-use crate::{
-    core_arch::{simd::*, x86::*},
-    mem::transmute,
-};
-
-#[cfg(test)]
-use stdarch_test::assert_instr;
-
-/// Constructs a 64-bit integer vector initialized to zero.
-#[inline]
-#[target_feature(enable = "mmx")]
-// FIXME: this produces a movl instead of xorps on x86
-// FIXME: this produces a xor intrinsic instead of xorps on x86_64
-#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
-pub unsafe fn _mm_setzero_si64() -> __m64 {
-    transmute(0_i64)
-}
-
-/// Adds packed 8-bit integers in `a` and `b`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddb))]
-pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
-    paddb(a, b)
-}
-
-/// Adds packed 8-bit integers in `a` and `b`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddb))]
-pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 {
-    _mm_add_pi8(a, b)
-}
-
-/// Adds packed 16-bit integers in `a` and `b`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddw))]
-pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
-    paddw(a, b)
-}
-
-/// Adds packed 16-bit integers in `a` and `b`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddw))]
-pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 {
-    _mm_add_pi16(a, b)
-}
-
-/// Adds packed 32-bit integers in `a` and `b`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddd))]
-pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
-    paddd(a, b)
-}
-
-/// Adds packed 32-bit integers in `a` and `b`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddd))]
-pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 {
-    _mm_add_pi32(a, b)
-}
-
-/// Adds packed 8-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddsb))]
-pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
-    paddsb(a, b)
-}
-
-/// Adds packed 8-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddsb))]
-pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 {
-    _mm_adds_pi8(a, b)
-}
-
-/// Adds packed 16-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddsw))]
-pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
-    paddsw(a, b)
-}
-
-/// Adds packed 16-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddsw))]
-pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 {
-    _mm_adds_pi16(a, b)
-}
-
-/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddusb))]
-pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
-    paddusb(a, b)
-}
-
-/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddusb))]
-pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 {
-    _mm_adds_pu8(a, b)
-}
-
-/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddusw))]
-pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
-    paddusw(a, b)
-}
-
-/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(paddusw))]
-pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 {
-    _mm_adds_pu16(a, b)
-}
-
-/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubb))]
-pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 {
-    psubb(a, b)
-}
-
-/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubb))]
-pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 {
-    _mm_sub_pi8(a, b)
-}
-
-/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubw))]
-pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 {
-    psubw(a, b)
-}
-
-/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubw))]
-pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 {
-    _mm_sub_pi16(a, b)
-}
-
-/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubd))]
-pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 {
-    psubd(a, b)
-}
-
-/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubd))]
-pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 {
-    _mm_sub_pi32(a, b)
-}
-
-/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
-/// using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubsb))]
-pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 {
-    psubsb(a, b)
-}
-
-/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
-/// using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubsb))]
-pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 {
-    _mm_subs_pi8(a, b)
-}
-
-/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
-/// using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubsw))]
-pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 {
-    psubsw(a, b)
-}
-
-/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
-/// using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubsw))]
-pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 {
-    _mm_subs_pi16(a, b)
-}
-
-/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
-/// integers in `a` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubusb))]
-pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 {
-    psubusb(a, b)
-}
-
-/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
-/// integers in `a` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubusb))]
-pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 {
-    _mm_subs_pu8(a, b)
-}
-
-/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
-/// 16-bit integers in `a` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubusw))]
-pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 {
-    psubusw(a, b)
-}
-
-/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
-/// 16-bit integers in `a` using saturation.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(psubusw))]
-pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 {
-    _mm_subs_pu16(a, b)
-}
-
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
-/// using signed saturation.
-///
-/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
-/// less than 0x80 are saturated to 0x80.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(packsswb))]
-pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
-    packsswb(a, b)
-}
-
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
-/// using signed saturation.
-///
-/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
-/// less than 0x80 are saturated to 0x80.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(packssdw))]
-pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
-    packssdw(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(pcmpgtb))]
-pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
-    pcmpgtb(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(pcmpgtw))]
-pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
-    pcmpgtw(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(pcmpgtd))]
-pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
-    pcmpgtd(a, b)
-}
-
-/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
-/// them into the result: `[a.2, b.2, a.3, b.3]`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
-pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
-    punpckhwd(a, b)
-}
-
-/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
-/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(punpckhbw))]
-pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
-    punpckhbw(a, b)
-}
-
-/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
-/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(punpcklbw))]
-pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
-    punpcklbw(a, b)
-}
-
-/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
-/// them into the result: `[a.0 b.0 a.1 b.1]`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(punpcklwd))]
-pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
-    punpcklwd(a, b)
-}
-
-/// Unpacks the upper element from two `i32x2` vectors and interleaves them
-/// into the result: `[a.1, b.1]`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(punpckhdq))]
-pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
-    punpckhdq(a, b)
-}
-
-/// Unpacks the lower element from two `i32x2` vectors and interleaves them
-/// into the result: `[a.0, b.0]`.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(punpckldq))]
-pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
-    punpckldq(a, b)
-}
-
-/// Sets packed 16-bit integers in dst with the supplied values.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 {
-    _mm_setr_pi16(e0, e1, e2, e3)
-}
-
-/// Sets packed 32-bit integers in dst with the supplied values.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
-    _mm_setr_pi32(e0, e1)
-}
-
-/// Sets packed 8-bit integers in dst with the supplied values.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
-    _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
-}
-
-/// Broadcasts 16-bit integer a to all all elements of dst.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 {
-    _mm_setr_pi16(a, a, a, a)
-}
-
-/// Broadcasts 32-bit integer a to all all elements of dst.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 {
-    _mm_setr_pi32(a, a)
-}
-
-/// Broadcasts 8-bit integer a to all all elements of dst.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
-    _mm_setr_pi8(a, a, a, a, a, a, a, a)
-}
-
-/// Sets packed 16-bit integers in dst with the supplied values in reverse
-/// order.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
-    transmute(i16x4::new(e0, e1, e2, e3))
-}
-
-/// Sets packed 32-bit integers in dst with the supplied values in reverse
-/// order.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
-    transmute(i32x2::new(e0, e1))
-}
-
-/// Sets packed 8-bit integers in dst with the supplied values in reverse order.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_setr_pi8(
-    e0: i8,
-    e1: i8,
-    e2: i8,
-    e3: i8,
-    e4: i8,
-    e5: i8,
-    e6: i8,
-    e7: i8,
-) -> __m64 {
-    transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
-}
-
-/// Empty the MMX state, which marks the x87 FPU registers as available for use
-/// by x87 instructions. This instruction must be used at the end of all MMX
-/// technology procedures.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(emms))]
-pub unsafe fn _mm_empty() {
-    emms()
-}
-
-/// Empty the MMX state, which marks the x87 FPU registers as available for use
-/// by x87 instructions. This instruction must be used at the end of all MMX
-/// technology procedures.
-#[inline]
-#[target_feature(enable = "mmx")]
-#[cfg_attr(test, assert_instr(emms))]
-pub unsafe fn _m_empty() {
-    emms()
-}
-
-/// Copies 32-bit integer `a` to the lower elements of the return value, and zero
-/// the upper element of the return value.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_cvtsi32_si64(a: i32) -> __m64 {
-    transmute(i32x2::new(a, 0))
-}
-
-/// Return the lower 32-bit integer in `a`.
-#[inline]
-#[target_feature(enable = "mmx")]
-pub unsafe fn _mm_cvtsi64_si32(a: __m64) -> i32 {
-    let r: i32x2 = transmute(a);
-    r.0
-}
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.x86.mmx.padd.b"]
-    fn paddb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.padd.w"]
-    fn paddw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.padd.d"]
-    fn paddd(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.padds.b"]
-    fn paddsb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.padds.w"]
-    fn paddsw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.paddus.b"]
-    fn paddusb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.paddus.w"]
-    fn paddusw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psub.b"]
-    fn psubb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psub.w"]
-    fn psubw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psub.d"]
-    fn psubd(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psubs.b"]
-    fn psubsb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psubs.w"]
-    fn psubsw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psubus.b"]
-    fn psubusb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psubus.w"]
-    fn psubusw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.packsswb"]
-    fn packsswb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.packssdw"]
-    fn packssdw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pcmpgt.b"]
-    fn pcmpgtb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pcmpgt.w"]
-    fn pcmpgtw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pcmpgt.d"]
-    fn pcmpgtd(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.punpckhwd"]
-    fn punpckhwd(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.punpcklwd"]
-    fn punpcklwd(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.punpckhbw"]
-    fn punpckhbw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.punpcklbw"]
-    fn punpcklbw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.punpckhdq"]
-    fn punpckhdq(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.punpckldq"]
-    fn punpckldq(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.emms"]
-    fn emms();
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::core_arch::x86::*;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_setzero_si64() {
-        let r: __m64 = transmute(0_i64);
-        assert_eq_m64(r, _mm_setzero_si64());
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_add_pi8() {
-        let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0);
-        let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1);
-        let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1);
-        assert_eq_m64(e, _mm_add_pi8(a, b));
-        assert_eq_m64(e, _m_paddb(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_add_pi16() {
-        let a = _mm_setr_pi16(-1, -1, 1, 1);
-        let b = _mm_setr_pi16(i16::MIN + 1, 30001, -30001, i16::MAX - 1);
-        let e = _mm_setr_pi16(i16::MIN, 30000, -30000, i16::MAX);
-        assert_eq_m64(e, _mm_add_pi16(a, b));
-        assert_eq_m64(e, _m_paddw(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_add_pi32() {
-        let a = _mm_setr_pi32(1, -1);
-        let b = _mm_setr_pi32(i32::MAX - 1, i32::MIN + 1);
-        let e = _mm_setr_pi32(i32::MAX, i32::MIN);
-        assert_eq_m64(e, _mm_add_pi32(a, b));
-        assert_eq_m64(e, _m_paddd(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_adds_pi8() {
-        let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0);
-        let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1);
-        let e = _mm_setr_pi8(i8::MIN, 0, 0, i8::MAX, -1, -1, 1, 1);
-        assert_eq_m64(e, _mm_adds_pi8(a, b));
-        assert_eq_m64(e, _m_paddsb(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_adds_pi16() {
-        let a = _mm_setr_pi16(-32000, 32000, 4, 0);
-        let b = _mm_setr_pi16(-32000, 32000, -5, 1);
-        let e = _mm_setr_pi16(i16::MIN, i16::MAX, -1, 1);
-        assert_eq_m64(e, _mm_adds_pi16(a, b));
-        assert_eq_m64(e, _m_paddsw(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_adds_pu8() {
-        let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8);
-        let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8);
-        let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::MAX as i8);
-        assert_eq_m64(e, _mm_adds_pu8(a, b));
-        assert_eq_m64(e, _m_paddusb(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_adds_pu16() {
-        let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16);
-        let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16);
-        let e = _mm_setr_pi16(0, 11, 22, u16::MAX as i16);
-        assert_eq_m64(e, _mm_adds_pu16(a, b));
-        assert_eq_m64(e, _m_paddusw(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_sub_pi8() {
-        let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0);
-        let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127);
-        let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127);
-        assert_eq_m64(e, _mm_sub_pi8(a, b));
-        assert_eq_m64(e, _m_psubb(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_sub_pi16() {
-        let a = _mm_setr_pi16(-20000, -20000, 20000, 30000);
-        let b = _mm_setr_pi16(-10000, 10000, -10000, 30000);
-        let e = _mm_setr_pi16(-10000, -30000, 30000, 0);
-        assert_eq_m64(e, _mm_sub_pi16(a, b));
-        assert_eq_m64(e, _m_psubw(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_sub_pi32() {
-        let a = _mm_setr_pi32(500_000, -500_000);
-        let b = _mm_setr_pi32(500_000, 500_000);
-        let e = _mm_setr_pi32(0, -1_000_000);
-        assert_eq_m64(e, _mm_sub_pi32(a, b));
-        assert_eq_m64(e, _m_psubd(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_subs_pi8() {
-        let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5);
-        let b = _mm_setr_pi8(100, -100, i8::MIN, 127, -1, 1, 3, -3);
-        let e = _mm_setr_pi8(i8::MIN, i8::MAX, i8::MAX, -127, 1, -1, -8, 8);
-        assert_eq_m64(e, _mm_subs_pi8(a, b));
-        assert_eq_m64(e, _m_psubsb(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_subs_pi16() {
-        let a = _mm_setr_pi16(-20000, 20000, 0, 0);
-        let b = _mm_setr_pi16(20000, -20000, -1, 1);
-        let e = _mm_setr_pi16(i16::MIN, i16::MAX, 1, -1);
-        assert_eq_m64(e, _mm_subs_pi16(a, b));
-        assert_eq_m64(e, _m_psubsw(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_subs_pu8() {
-        let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80);
-        let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0);
-        let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80);
-        assert_eq_m64(e, _mm_subs_pu8(a, b));
-        assert_eq_m64(e, _m_psubusb(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_subs_pu16() {
-        let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16);
-        let b = _mm_setr_pi16(20000, 300, 1, 11111);
-        let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16);
-        assert_eq_m64(e, _mm_subs_pu16(a, b));
-        assert_eq_m64(e, _m_psubusw(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_packs_pi16() {
-        let a = _mm_setr_pi16(-1, 2, -3, 4);
-        let b = _mm_setr_pi16(-5, 6, -7, 8);
-        let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8);
-        assert_eq_m64(r, _mm_packs_pi16(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_packs_pi32() {
-        let a = _mm_setr_pi32(-1, 2);
-        let b = _mm_setr_pi32(-5, 6);
-        let r = _mm_setr_pi16(-1, 2, -5, 6);
-        assert_eq_m64(r, _mm_packs_pi32(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_cmpgt_pi8() {
-        let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
-        let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1);
-        let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1);
-        assert_eq_m64(r, _mm_cmpgt_pi8(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_cmpgt_pi16() {
-        let a = _mm_setr_pi16(0, 1, 2, 3);
-        let b = _mm_setr_pi16(4, 3, 2, 1);
-        let r = _mm_setr_pi16(0, 0, 0, -1);
-        assert_eq_m64(r, _mm_cmpgt_pi16(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_cmpgt_pi32() {
-        let a = _mm_setr_pi32(0, 3);
-        let b = _mm_setr_pi32(1, 2);
-        let r0 = _mm_setr_pi32(0, -1);
-        let r1 = _mm_setr_pi32(-1, 0);
-
-        assert_eq_m64(r0, _mm_cmpgt_pi32(a, b));
-        assert_eq_m64(r1, _mm_cmpgt_pi32(b, a));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_unpackhi_pi8() {
-        let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15);
-        let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14);
-        let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14);
-
-        assert_eq_m64(r, _mm_unpackhi_pi8(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_unpacklo_pi8() {
-        let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
-        let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15);
-        let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11);
-        assert_eq_m64(r, _mm_unpacklo_pi8(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_unpackhi_pi16() {
-        let a = _mm_setr_pi16(0, 1, 2, 3);
-        let b = _mm_setr_pi16(4, 5, 6, 7);
-        let r = _mm_setr_pi16(2, 6, 3, 7);
-        assert_eq_m64(r, _mm_unpackhi_pi16(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_unpacklo_pi16() {
-        let a = _mm_setr_pi16(0, 1, 2, 3);
-        let b = _mm_setr_pi16(4, 5, 6, 7);
-        let r = _mm_setr_pi16(0, 4, 1, 5);
-        assert_eq_m64(r, _mm_unpacklo_pi16(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_unpackhi_pi32() {
-        let a = _mm_setr_pi32(0, 3);
-        let b = _mm_setr_pi32(1, 2);
-        let r = _mm_setr_pi32(3, 2);
-
-        assert_eq_m64(r, _mm_unpackhi_pi32(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_unpacklo_pi32() {
-        let a = _mm_setr_pi32(0, 3);
-        let b = _mm_setr_pi32(1, 2);
-        let r = _mm_setr_pi32(0, 1);
-
-        assert_eq_m64(r, _mm_unpacklo_pi32(a, b));
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_empty() {
-        _mm_empty();
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_m_empty() {
-        _m_empty();
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_cvtsi32_si64() {
-        let a = _mm_cvtsi32_si64(42);
-        let b = _mm_setr_pi32(42, 0);
-        assert_eq_m64(a, b);
-    }
-
-    #[simd_test(enable = "mmx")]
-    unsafe fn test_mm_cvtsi64_si32() {
-        let a = _mm_setr_pi32(42, 666);
-        let b = _mm_cvtsi64_si32(a);
-        assert_eq!(b, 42);
-    }
-}
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 60eb890c2f..0edaa13689 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -6,50 +6,6 @@ use crate::{intrinsics, marker::Sized, mem::transmute};
 mod macros;
 
 types! {
-    /// 64-bit wide integer vector type, x86-specific
-    ///
-    /// This type is the same as the `__m64` type defined by Intel,
-    /// representing a 64-bit SIMD register. Usage of this type typically
-    /// corresponds to the `mmx` target feature.
-    ///
-    /// Internally this type may be viewed as:
-    ///
-    /// * `i8x8` - eight `i8` variables packed together
-    /// * `i16x4` - four `i16` variables packed together
-    /// * `i32x2` - two `i32` variables packed together
-    ///
-    /// (as well as unsigned versions). Each intrinsic may interpret the
-    /// internal bits differently, check the documentation of the intrinsic
-    /// to see how it's being used.
-    ///
-    /// Note that this means that an instance of `__m64` typically just means
-    /// a "bag of bits" which is left up to interpretation at the point of use.
-    ///
-    /// Most intrinsics using `__m64` are prefixed with `_mm_` and the
-    /// integer types tend to correspond to suffixes like "pi8" or "pi32" (not
-    /// to be confused with "epiXX", used for `__m128i`).
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # #![feature(stdsimd, mmx_target_feature)]
-    /// #[cfg(target_arch = "x86")]
-    /// use std::arch::x86::*;
-    /// #[cfg(target_arch = "x86_64")]
-    /// use std::arch::x86_64::*;
-    ///
-    /// # fn main() {
-    /// # #[target_feature(enable = "mmx")]
-    /// # unsafe fn foo() {
-    /// let all_bytes_zero = _mm_setzero_si64();
-    /// let all_bytes_one = _mm_set1_pi8(1);
-    /// let two_i32 = _mm_set_pi32(1, 2);
-    /// # }
-    /// # if is_x86_feature_detected!("mmx") { unsafe { foo() } }
-    /// # }
-    /// ```
-    pub struct __m64(i64);
-
     /// 128-bit wide integer vector type, x86-specific
     ///
     /// This type is the same as the `__m128i` type defined by Intel,
@@ -359,49 +315,6 @@ mod test;
 #[cfg(test)]
 pub use self::test::*;
 
-#[allow(non_camel_case_types)]
-#[unstable(feature = "stdimd_internal", issue = "none")]
-pub(crate) trait m64Ext: Sized {
-    fn as_m64(self) -> __m64;
-
-    #[inline]
-    fn as_u8x8(self) -> crate::core_arch::simd::u8x8 {
-        unsafe { transmute(self.as_m64()) }
-    }
-
-    #[inline]
-    fn as_u16x4(self) -> crate::core_arch::simd::u16x4 {
-        unsafe { transmute(self.as_m64()) }
-    }
-
-    #[inline]
-    fn as_u32x2(self) -> crate::core_arch::simd::u32x2 {
-        unsafe { transmute(self.as_m64()) }
-    }
-
-    #[inline]
-    fn as_i8x8(self) -> crate::core_arch::simd::i8x8 {
-        unsafe { transmute(self.as_m64()) }
-    }
-
-    #[inline]
-    fn as_i16x4(self) -> crate::core_arch::simd::i16x4 {
-        unsafe { transmute(self.as_m64()) }
-    }
-
-    #[inline]
-    fn as_i32x2(self) -> crate::core_arch::simd::i32x2 {
-        unsafe { transmute(self.as_m64()) }
-    }
-}
-
-impl m64Ext for __m64 {
-    #[inline]
-    fn as_m64(self) -> Self {
-        self
-    }
-}
-
 #[allow(non_camel_case_types)]
 #[unstable(feature = "stdimd_internal", issue = "none")]
 pub(crate) trait m128iExt: Sized {
@@ -649,9 +562,6 @@ mod tbm;
 #[cfg(not(stdarch_intel_sde))]
 pub use self::tbm::*;
 
-mod mmx;
-pub use self::mmx::*;
-
 mod pclmulqdq;
 pub use self::pclmulqdq::*;
 
diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs
index c01aa1bc9f..ba3efae3c9 100644
--- a/crates/core_arch/src/x86/sse.rs
+++ b/crates/core_arch/src/x86/sse.rs
@@ -1115,33 +1115,6 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
     movmskps(a)
 }
 
-/// Sets the upper two single-precision floating-point values with 64 bits of
-/// data loaded from the address `p`; the lower two values are passed through
-/// from `a`.
-#[inline]
-#[target_feature(enable = "sse")]
-#[cfg_attr(test, assert_instr(movhps))]
-// TODO: this function is actually not limited to floats, but that's what
-// what matches the C type most closely: `(__m128, *const __m64) -> __m128`.
-pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
-    let q = p as *const f32x2;
-    let b: f32x2 = *q;
-    let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
-    simd_shuffle4(a, bb, [0, 1, 4, 5])
-}
-
-/// Loads two floats from `p` into the lower half of a `__m128`. The upper half
-/// is copied from the upper half of `a`.
-#[inline]
-#[target_feature(enable = "sse")]
-#[cfg_attr(test, assert_instr(movlps))]
-pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
-    let q = p as *const f32x2;
-    let b: f32x2 = *q;
-    let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
-    simd_shuffle4(a, bb, [4, 5, 2, 3])
-}
-
 /// Construct a `__m128` with the lowest element read from `p` and the other
 /// elements set to zero.
 ///
@@ -1270,72 +1243,6 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
     transmute(i64x2(0, ptr::read_unaligned(mem_addr as *const i64)))
 }
 
-/// Stores the upper half of `a` (64 bits) into memory.
-///
-/// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may
-/// choose to generate an equivalent sequence of other instructions.
-#[inline]
-#[target_feature(enable = "sse")]
-// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
-// fine.
-// On i586 (no SSE2) it just generates plain MOV instructions.
-#[cfg_attr(
-    all(test, any(target_arch = "x86_64", target_feature = "sse2"),
-        not(target_os = "windows")),
-    // assert_instr(movhpd)
-    assert_instr(movhps) // LLVM7 prefers single-precision instructions
-)]
-pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
-    #[cfg(target_arch = "x86")]
-    {
-        // If this is a `f64x2` then on i586, LLVM generates fldl & fstpl which
-        // is just silly
-        let a64: u64x2 = mem::transmute(a);
-        let a_hi = a64.extract(1);
-        *(p as *mut u64) = a_hi;
-    }
-    #[cfg(target_arch = "x86_64")]
-    {
-        // If this is a `u64x2` LLVM generates a pshufd + movq, but we really
-        // want a a MOVHPD or MOVHPS here.
-        let a64: f64x2 = mem::transmute(a);
-        let a_hi = a64.extract(1);
-        *p = mem::transmute(a_hi);
-    }
-}
-
-/// Stores the lower half of `a` (64 bits) into memory.
-///
-/// This intrinsic corresponds to the `MOVQ` instruction. The compiler may
-/// choose to generate an equivalent sequence of other instructions.
-#[inline]
-#[target_feature(enable = "sse")]
-// On i586 the codegen just generates plane MOVs. No need to test for that.
-#[cfg_attr(
-    all(
-        test,
-        any(target_arch = "x86_64", target_feature = "sse2"),
-        not(target_os = "windows")
-    ),
-    assert_instr(movlps)
-)]
-pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
-    #[cfg(target_arch = "x86")]
-    {
-        // Same as for _mm_storeh_pi: i586 code gen would use floating point
-        // stack.
-        let a64: u64x2 = mem::transmute(a);
-        let a_hi = a64.extract(0);
-        *(p as *mut u64) = a_hi;
-    }
-    #[cfg(target_arch = "x86_64")]
-    {
-        let a64: f64x2 = mem::transmute(a);
-        let a_hi = a64.extract(0);
-        *p = mem::transmute(a_hi);
-    }
-}
-
 /// Stores the lowest 32 bit float of `a` into memory.
 ///
 /// This intrinsic corresponds to the `MOVSS` instruction.
@@ -1985,42 +1892,6 @@ extern "C" {
     fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
     #[link_name = "llvm.x86.sse.cmp.ss"]
     fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
-    #[link_name = "llvm.x86.mmx.movnt.dq"]
-    fn movntdq(a: *mut __m64, b: __m64);
-    #[link_name = "llvm.x86.sse.cvtpi2ps"]
-    fn cvtpi2ps(a: __m128, b: __m64) -> __m128;
-    #[link_name = "llvm.x86.mmx.maskmovq"]
-    fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8);
-    #[link_name = "llvm.x86.mmx.pextr.w"]
-    fn pextrw(a: __m64, imm8: i32) -> i32;
-    #[link_name = "llvm.x86.mmx.pinsr.w"]
-    fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64;
-    #[link_name = "llvm.x86.mmx.pmovmskb"]
-    fn pmovmskb(a: __m64) -> i32;
-    #[link_name = "llvm.x86.sse.pshuf.w"]
-    fn pshufw(a: __m64, imm8: i8) -> __m64;
-    #[link_name = "llvm.x86.mmx.pmaxs.w"]
-    fn pmaxsw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pmaxu.b"]
-    fn pmaxub(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pmins.w"]
-    fn pminsw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pminu.b"]
-    fn pminub(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pmulhu.w"]
-    fn pmulhuw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pmull.w"]
-    fn pmullw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pavg.b"]
-    fn pavgb(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pavg.w"]
-    fn pavgw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psad.bw"]
-    fn psadbw(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.sse.cvtps2pi"]
-    fn cvtps2pi(a: __m128) -> __m64;
-    #[link_name = "llvm.x86.sse.cvttps2pi"]
-    fn cvttps2pi(a: __m128) -> __m64;
 }
 
 /// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint.
@@ -2038,463 +1909,6 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
     intrinsics::nontemporal_store(mem_addr as *mut __m128, a);
 }
 
-/// Stores 64-bits of integer data from a into memory using a non-temporal
-/// memory hint.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(movntq))]
-pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
-    movntdq(mem_addr, a)
-}
-
-/// Compares the packed 16-bit signed integers of `a` and `b` writing the
-/// greatest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmaxsw))]
-pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
-    pmaxsw(a, b)
-}
-
-/// Compares the packed 16-bit signed integers of `a` and `b` writing the
-/// greatest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmaxsw))]
-pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
-    _mm_max_pi16(a, b)
-}
-
-/// Compares the packed 8-bit signed integers of `a` and `b` writing the
-/// greatest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmaxub))]
-pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
-    pmaxub(a, b)
-}
-
-/// Compares the packed 8-bit signed integers of `a` and `b` writing the
-/// greatest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmaxub))]
-pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
-    _mm_max_pu8(a, b)
-}
-
-/// Compares the packed 16-bit signed integers of `a` and `b` writing the
-/// smallest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pminsw))]
-pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
-    pminsw(a, b)
-}
-
-/// Compares the packed 16-bit signed integers of `a` and `b` writing the
-/// smallest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pminsw))]
-pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
-    _mm_min_pi16(a, b)
-}
-
-/// Compares the packed 8-bit signed integers of `a` and `b` writing the
-/// smallest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pminub))]
-pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
-    pminub(a, b)
-}
-
-/// Compares the packed 8-bit signed integers of `a` and `b` writing the
-/// smallest value into the result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pminub))]
-pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
-    _mm_min_pu8(a, b)
-}
-
-/// Multiplies packed 16-bit unsigned integer values and writes the
-/// high-order 16 bits of each 32-bit product to the corresponding bits in
-/// the destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmulhuw))]
-pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
-    pmulhuw(a, b)
-}
-
-/// Multiplies packed 16-bit integer values and writes the
-/// low-order 16 bits of each 32-bit product to the corresponding bits in
-/// the destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmullw))]
-pub unsafe fn _mm_mullo_pi16(a: __m64, b: __m64) -> __m64 {
-    pmullw(a, b)
-}
-
-/// Multiplies packed 16-bit unsigned integer values and writes the
-/// high-order 16 bits of each 32-bit product to the corresponding bits in
-/// the destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmulhuw))]
-pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
-    _mm_mulhi_pu16(a, b)
-}
-
-/// Computes the rounded averages of the packed unsigned 8-bit integer
-/// values and writes the averages to the corresponding bits in the
-/// destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pavgb))]
-pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
-    pavgb(a, b)
-}
-
-/// Computes the rounded averages of the packed unsigned 8-bit integer
-/// values and writes the averages to the corresponding bits in the
-/// destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pavgb))]
-pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
-    _mm_avg_pu8(a, b)
-}
-
-/// Computes the rounded averages of the packed unsigned 16-bit integer
-/// values and writes the averages to the corresponding bits in the
-/// destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pavgw))]
-pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
-    pavgw(a, b)
-}
-
-/// Computes the rounded averages of the packed unsigned 16-bit integer
-/// values and writes the averages to the corresponding bits in the
-/// destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pavgw))]
-pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
-    _mm_avg_pu16(a, b)
-}
-
-/// Subtracts the corresponding 8-bit unsigned integer values of the two
-/// 64-bit vector operands and computes the absolute value for each of the
-/// difference. Then sum of the 8 absolute differences is written to the
-/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(psadbw))]
-pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
-    psadbw(a, b)
-}
-
-/// Subtracts the corresponding 8-bit unsigned integer values of the two
-/// 64-bit vector operands and computes the absolute value for each of the
-/// difference. Then sum of the 8 absolute differences is written to the
-/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(psadbw))]
-pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
-    _mm_sad_pu8(a, b)
-}
-
-/// Converts two elements of a 64-bit vector of `[2 x i32]` into two
-/// floating point values and writes them to the lower 64-bits of the
-/// destination. The remaining higher order elements of the destination are
-/// copied from the corresponding elements in the first operand.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
-    cvtpi2ps(a, b)
-}
-
-/// Converts two elements of a 64-bit vector of `[2 x i32]` into two
-/// floating point values and writes them to the lower 64-bits of the
-/// destination. The remaining higher order elements of the destination are
-/// copied from the corresponding elements in the first operand.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
-    _mm_cvtpi32_ps(a, b)
-}
-
-/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
-    let b = _mm_setzero_si64();
-    let b = _mm_cmpgt_pi8(b, a);
-    let b = _mm_unpacklo_pi8(a, b);
-    _mm_cvtpi16_ps(b)
-}
-
-/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
-    let b = _mm_setzero_si64();
-    let b = _mm_unpacklo_pi8(a, b);
-    _mm_cvtpi16_ps(b)
-}
-
-/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
-    let b = _mm_setzero_si64();
-    let b = _mm_cmpgt_pi16(b, a);
-    let c = _mm_unpackhi_pi16(a, b);
-    let r = _mm_setzero_ps();
-    let r = cvtpi2ps(r, c);
-    let r = _mm_movelh_ps(r, r);
-    let c = _mm_unpacklo_pi16(a, b);
-    cvtpi2ps(r, c)
-}
-
-/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
-    let b = _mm_setzero_si64();
-    let c = _mm_unpackhi_pi16(a, b);
-    let r = _mm_setzero_ps();
-    let r = cvtpi2ps(r, c);
-    let r = _mm_movelh_ps(r, r);
-    let c = _mm_unpacklo_pi16(a, b);
-    cvtpi2ps(r, c)
-}
-
-/// Converts the two 32-bit signed integer values from each 64-bit vector
-/// operand of `[2 x i32]` into a 128-bit vector of `[4 x float]`.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
-    let c = _mm_setzero_ps();
-    let c = _mm_cvtpi32_ps(c, b);
-    let c = _mm_movelh_ps(c, c);
-    _mm_cvtpi32_ps(c, a)
-}
-
-/// Conditionally copies the values from each 8-bit element in the first
-/// 64-bit integer vector operand to the specified memory location, as
-/// specified by the most significant bit in the corresponding element in the
-/// second 64-bit integer vector operand.
-///
-/// To minimize caching, the data is flagged as non-temporal
-/// (unlikely to be used again soon).
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(maskmovq))]
-pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
-    maskmovq(a, mask, mem_addr)
-}
-
-/// Conditionally copies the values from each 8-bit element in the first
-/// 64-bit integer vector operand to the specified memory location, as
-/// specified by the most significant bit in the corresponding element in the
-/// second 64-bit integer vector operand.
-///
-/// To minimize caching, the data is flagged as non-temporal
-/// (unlikely to be used again soon).
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(maskmovq))]
-pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
-    _mm_maskmove_si64(a, mask, mem_addr)
-}
-
-/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and
-/// returns it, as specified by the immediate integer operand.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
-    macro_rules! call {
-        ($imm2:expr) => {
-            pextrw(a, $imm2) as i32
-        };
-    }
-    constify_imm2!(imm2, call)
-}
-
-/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and
-/// returns it, as specified by the immediate integer operand.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
-    macro_rules! call {
-        ($imm2:expr) => {
-            pextrw(a, $imm2) as i32
-        };
-    }
-    constify_imm2!(imm2, call)
-}
-
-/// Copies data from the 64-bit vector of `[4 x i16]` to the destination,
-/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
-/// specified by the immediate operand `n`.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
-    macro_rules! call {
-        ($imm2:expr) => {
-            pinsrw(a, d, $imm2)
-        };
-    }
-    constify_imm2!(imm2, call)
-}
-
-/// Copies data from the 64-bit vector of `[4 x i16]` to the destination,
-/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
-/// specified by the immediate operand `n`.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
-    macro_rules! call {
-        ($imm2:expr) => {
-            pinsrw(a, d, $imm2)
-        };
-    }
-    constify_imm2!(imm2, call)
-}
-
-/// Takes the most significant bit from each 8-bit element in a 64-bit
-/// integer vector to create a 16-bit mask value. Zero-extends the value to
-/// 32-bit integer and writes it to the destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmovmskb))]
-pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
-    pmovmskb(a)
-}
-
-/// Takes the most significant bit from each 8-bit element in a 64-bit
-/// integer vector to create a 16-bit mask value. Zero-extends the value to
-/// 32-bit integer and writes it to the destination.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pmovmskb))]
-pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
-    _mm_movemask_pi8(a)
-}
-
-/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
-/// destination, as specified by the immediate value operand.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
-    macro_rules! call {
-        ($imm8:expr) => {
-            pshufw(a, $imm8)
-        };
-    }
-    constify_imm8!(imm8, call)
-}
-
-/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
-/// destination, as specified by the immediate value operand.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
-    macro_rules! call {
-        ($imm8:expr) => {
-            pshufw(a, $imm8)
-        };
-    }
-    constify_imm8!(imm8, call)
-}
-
-/// Converts the two lower packed single-precision (32-bit) floating-point
-/// elements in `a` to packed 32-bit integers with truncation.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvttps2pi))]
-pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
-    cvttps2pi(a)
-}
-
-/// Converts the two lower packed single-precision (32-bit) floating-point
-/// elements in `a` to packed 32-bit integers with truncation.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvttps2pi))]
-pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
-    _mm_cvttps_pi32(a)
-}
-
-/// Converts the two lower packed single-precision (32-bit) floating-point
-/// elements in `a` to packed 32-bit integers.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
-    cvtps2pi(a)
-}
-
-/// Converts the two lower packed single-precision (32-bit) floating-point
-/// elements in `a` to packed 32-bit integers.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
-    _mm_cvtps_pi32(a)
-}
-
-/// Converts packed single-precision (32-bit) floating-point elements in `a` to
-/// packed 16-bit integers.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
-    let b = _mm_cvtps_pi32(a);
-    let a = _mm_movehl_ps(a, a);
-    let c = _mm_cvtps_pi32(a);
-    _mm_packs_pi32(b, c)
-}
-
-/// Converts packed single-precision (32-bit) floating-point elements in `a` to
-/// packed 8-bit integers, and returns theem in the lower 4 elements of the
-/// result.
-#[inline]
-#[target_feature(enable = "sse,mmx")]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
-    let b = _mm_cvtps_pi16(a);
-    let c = _mm_setzero_si64();
-    _mm_packs_pi16(b, c)
-}
-
 #[cfg(test)]
 mod tests {
     use crate::{hint::black_box, mem::transmute};
@@ -3593,24 +3007,6 @@ mod tests {
         assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
     }
 
-    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_loadh_pi() {
-        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
-        let p = x[..].as_ptr();
-        let r = _mm_loadh_pi(a, p as *const _);
-        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
-    }
-
-    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_loadl_pi() {
-        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
-        let p = x[..].as_ptr();
-        let r = _mm_loadl_pi(a, p as *const _);
-        assert_eq_m128(r, _mm_setr_ps(5.0, 6.0, 3.0, 4.0));
-    }
-
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_load_ss() {
         let a = 42.0f32;
@@ -3684,28 +3080,6 @@ mod tests {
         assert_eq_m128i(r, _mm_set_epi64x(5, 0));
     }
 
-    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_storeh_pi() {
-        let mut vals = [0.0f32; 8];
-        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        _mm_storeh_pi(vals.as_mut_ptr() as *mut _, a);
-
-        assert_eq!(vals[0], 3.0);
-        assert_eq!(vals[1], 4.0);
-        assert_eq!(vals[2], 0.0);
-    }
-
-    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_storel_pi() {
-        let mut vals = [0.0f32; 8];
-        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        _mm_storel_pi(vals.as_mut_ptr() as *mut _, a);
-
-        assert_eq!(vals[0], 1.0);
-        assert_eq!(vals[1], 2.0);
-        assert_eq!(vals[2], 0.0);
-    }
-
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_store_ss() {
         let mut vals = [0.0f32; 8];
@@ -3926,254 +3300,4 @@ mod tests {
             assert_eq!(mem.data[i], get_m128(a, i));
         }
     }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_stream_pi() {
-        let a = transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
-        let mut mem = boxed::Box::<__m64>::new(transmute(i8x8::splat(1)));
-        _mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
-        assert_eq_m64(a, *mem);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_max_pi16() {
-        let a = _mm_setr_pi16(-1, 6, -3, 8);
-        let b = _mm_setr_pi16(5, -2, 7, -4);
-        let r = _mm_setr_pi16(5, 6, 7, 8);
-
-        assert_eq_m64(r, _mm_max_pi16(a, b));
-        assert_eq_m64(r, _m_pmaxsw(a, b));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_max_pu8() {
-        let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
-        let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
-        let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
-
-        assert_eq_m64(r, _mm_max_pu8(a, b));
-        assert_eq_m64(r, _m_pmaxub(a, b));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_min_pi16() {
-        let a = _mm_setr_pi16(-1, 6, -3, 8);
-        let b = _mm_setr_pi16(5, -2, 7, -4);
-        let r = _mm_setr_pi16(-1, -2, -3, -4);
-
-        assert_eq_m64(r, _mm_min_pi16(a, b));
-        assert_eq_m64(r, _m_pminsw(a, b));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_min_pu8() {
-        let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
-        let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
-        let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
-
-        assert_eq_m64(r, _mm_min_pu8(a, b));
-        assert_eq_m64(r, _m_pminub(a, b));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_mulhi_pu16() {
-        let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
-        let r = _mm_mulhi_pu16(a, b);
-        assert_eq_m64(r, _mm_set1_pi16(15));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_mullo_pi16() {
-        let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
-        let r = _mm_mullo_pi16(a, b);
-        assert_eq_m64(r, _mm_set1_pi16(17960));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_m_pmulhuw() {
-        let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
-        let r = _m_pmulhuw(a, b);
-        assert_eq_m64(r, _mm_set1_pi16(15));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_avg_pu8() {
-        let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
-        let r = _mm_avg_pu8(a, b);
-        assert_eq_m64(r, _mm_set1_pi8(6));
-
-        let r = _m_pavgb(a, b);
-        assert_eq_m64(r, _mm_set1_pi8(6));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_avg_pu16() {
-        let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
-        let r = _mm_avg_pu16(a, b);
-        assert_eq_m64(r, _mm_set1_pi16(6));
-
-        let r = _m_pavgw(a, b);
-        assert_eq_m64(r, _mm_set1_pi16(6));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_sad_pu8() {
-        #[rustfmt::skip]
-        let a = _mm_setr_pi8(
-            255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
-            1, 2, 3, 4,
-        );
-        let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
-        let r = _mm_sad_pu8(a, b);
-        assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
-
-        let r = _m_psadbw(a, b);
-        assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtpi32_ps() {
-        let a = _mm_setr_ps(0., 0., 3., 4.);
-        let b = _mm_setr_pi32(1, 2);
-        let expected = _mm_setr_ps(1., 2., 3., 4.);
-        let r = _mm_cvtpi32_ps(a, b);
-        assert_eq_m128(r, expected);
-
-        let r = _mm_cvt_pi2ps(a, b);
-        assert_eq_m128(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtpi16_ps() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let expected = _mm_setr_ps(1., 2., 3., 4.);
-        let r = _mm_cvtpi16_ps(a);
-        assert_eq_m128(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtpu16_ps() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let expected = _mm_setr_ps(1., 2., 3., 4.);
-        let r = _mm_cvtpu16_ps(a);
-        assert_eq_m128(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtpi8_ps() {
-        let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
-        let expected = _mm_setr_ps(1., 2., 3., 4.);
-        let r = _mm_cvtpi8_ps(a);
-        assert_eq_m128(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtpu8_ps() {
-        let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
-        let expected = _mm_setr_ps(1., 2., 3., 4.);
-        let r = _mm_cvtpu8_ps(a);
-        assert_eq_m128(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtpi32x2_ps() {
-        let a = _mm_setr_pi32(1, 2);
-        let b = _mm_setr_pi32(3, 4);
-        let expected = _mm_setr_ps(1., 2., 3., 4.);
-        let r = _mm_cvtpi32x2_ps(a, b);
-        assert_eq_m128(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_maskmove_si64() {
-        let a = _mm_set1_pi8(9);
-        let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
-        let mut r = _mm_set1_pi8(0);
-        _mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
-        let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
-        assert_eq_m64(r, e);
-
-        let mut r = _mm_set1_pi8(0);
-        _m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
-        assert_eq_m64(r, e);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_extract_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let r = _mm_extract_pi16(a, 0);
-        assert_eq!(r, 1);
-        let r = _mm_extract_pi16(a, 1);
-        assert_eq!(r, 2);
-
-        let r = _m_pextrw(a, 1);
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_insert_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let r = _mm_insert_pi16(a, 0, 0b0);
-        let expected = _mm_setr_pi16(0, 2, 3, 4);
-        assert_eq_m64(r, expected);
-        let r = _mm_insert_pi16(a, 0, 0b10);
-        let expected = _mm_setr_pi16(1, 2, 0, 4);
-        assert_eq_m64(r, expected);
-
-        let r = _m_pinsrw(a, 0, 0b10);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_movemask_pi8() {
-        let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
-        let r = _mm_movemask_pi8(a);
-        assert_eq!(r, 0b10001);
-
-        let r = _m_pmovmskb(a);
-        assert_eq!(r, 0b10001);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_shuffle_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
-        let expected = _mm_setr_pi16(4, 2, 2, 1);
-        assert_eq_m64(r, expected);
-
-        let r = _m_pshufw(a, 0b00_01_01_11);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtps_pi32() {
-        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        let r = _mm_setr_pi32(1, 2);
-
-        assert_eq_m64(r, _mm_cvtps_pi32(a));
-        assert_eq_m64(r, _mm_cvt_ps2pi(a));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvttps_pi32() {
-        let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
-        let r = _mm_setr_pi32(7, 2);
-
-        assert_eq_m64(r, _mm_cvttps_pi32(a));
-        assert_eq_m64(r, _mm_cvtt_ps2pi(a));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtps_pi16() {
-        let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
-        let r = _mm_setr_pi16(7, 2, 3, 4);
-        assert_eq_m64(r, _mm_cvtps_pi16(a));
-    }
-
-    #[simd_test(enable = "sse,mmx")]
-    unsafe fn test_mm_cvtps_pi8() {
-        let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
-        let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
-        assert_eq_m64(r, _mm_cvtps_pi8(a));
-    }
 }
diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs
index 90a2cf7a70..b6c19cdef4 100644
--- a/crates/core_arch/src/x86/sse2.rs
+++ b/crates/core_arch/src/x86/sse2.rs
@@ -2958,113 +2958,6 @@ pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
     simd_shuffle2(a, b, [0, 2])
 }
 
-/// Adds two signed or unsigned 64-bit integer values, returning the
-/// lower 64 bits of the sum.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-#[cfg_attr(test, assert_instr(paddq))]
-pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
-    paddq(a, b)
-}
-
-/// Multiplies 32-bit unsigned integer values contained in the lower bits
-/// of the two 64-bit integer vectors and returns the 64-bit unsigned
-/// product.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-#[cfg_attr(test, assert_instr(pmuludq))]
-pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
-    pmuludq2(a, b)
-}
-
-/// Subtracts signed or unsigned 64-bit integer values and writes the
-/// difference to the corresponding bits in the destination.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-#[cfg_attr(test, assert_instr(psubq))]
-pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
-    psubq(a, b)
-}
-
-/// Converts the two signed 32-bit integer elements of a 64-bit vector of
-/// `[2 x i32]` into two double-precision floating-point values, returned in a
-/// 128-bit vector of `[2 x double]`.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-#[cfg_attr(test, assert_instr(cvtpi2pd))]
-pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
-    cvtpi2pd(a)
-}
-
-/// Initializes both 64-bit values in a 128-bit vector of `[2 x i64]` with
-/// the specified 64-bit integer values.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-// no particular instruction to test
-pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
-    _mm_set_epi64x(transmute(e1), transmute(e0))
-}
-
-/// Initializes both values in a 128-bit vector of `[2 x i64]` with the
-/// specified 64-bit value.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-// no particular instruction to test
-pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
-    _mm_set_epi64x(transmute(a), transmute(a))
-}
-
-/// Constructs a 128-bit integer vector, initialized in reverse order
-/// with the specified 64-bit integral values.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-// no particular instruction to test
-pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
-    _mm_set_epi64x(transmute(e0), transmute(e1))
-}
-
-/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
-/// integer.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
-// instr?
-pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
-    transmute(simd_extract::<_, i64>(a.as_i64x2(), 0))
-}
-
-/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
-/// upper bits.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
-// instr?
-pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
-    _mm_set_epi64x(0, transmute(a))
-}
-
-/// Converts the two double-precision floating-point elements of a
-/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values,
-/// returned in a 64-bit vector of `[2 x i32]`.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-#[cfg_attr(test, assert_instr(cvtpd2pi))]
-pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
-    cvtpd2pi(a)
-}
-
-/// Converts the two double-precision floating-point elements of a
-/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values,
-/// returned in a 64-bit vector of `[2 x i32]`.
-/// If the result of either conversion is inexact, the result is truncated
-/// (rounded towards zero) regardless of the current MXCSR setting.
-#[inline]
-#[target_feature(enable = "sse2,mmx")]
-#[cfg_attr(test, assert_instr(cvttpd2pi))]
-pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
-    cvttpd2pi(a)
-}
-
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.sse2.pause"]
@@ -3207,18 +3100,6 @@ extern "C" {
     fn storeudq(mem_addr: *mut i8, a: __m128i);
     #[link_name = "llvm.x86.sse2.storeu.pd"]
     fn storeupd(mem_addr: *mut i8, a: __m128d);
-    #[link_name = "llvm.x86.mmx.padd.q"]
-    fn paddq(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.pmulu.dq"]
-    fn pmuludq2(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.mmx.psub.q"]
-    fn psubq(a: __m64, b: __m64) -> __m64;
-    #[link_name = "llvm.x86.sse.cvtpi2pd"]
-    fn cvtpi2pd(a: __m64) -> __m128d;
-    #[link_name = "llvm.x86.sse.cvtpd2pi"]
-    fn cvtpd2pi(a: __m128d) -> __m64;
-    #[link_name = "llvm.x86.sse.cvttpd2pi"]
-    fn cvttpd2pi(a: __m128d) -> __m64;
 }
 
 #[cfg(test)]
@@ -5208,87 +5089,4 @@ mod tests {
         let r = _mm_castsi128_ps(a);
         assert_eq_m128(r, expected);
     }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_add_si64() {
-        let a = 1i64;
-        let b = 2i64;
-        let expected = 3i64;
-        let r = _mm_add_si64(transmute(a), transmute(b));
-        assert_eq!(transmute::<__m64, i64>(r), expected);
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_mul_su32() {
-        let a = _mm_setr_pi32(1, 2);
-        let b = _mm_setr_pi32(3, 4);
-        let expected = 3u64;
-        let r = _mm_mul_su32(a, b);
-        assert_eq_m64(r, transmute(expected));
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_sub_si64() {
-        let a = 1i64;
-        let b = 2i64;
-        let expected = -1i64;
-        let r = _mm_sub_si64(transmute(a), transmute(b));
-        assert_eq!(transmute::<__m64, i64>(r), expected);
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_cvtpi32_pd() {
-        let a = _mm_setr_pi32(1, 2);
-        let expected = _mm_setr_pd(1., 2.);
-        let r = _mm_cvtpi32_pd(a);
-        assert_eq_m128d(r, expected);
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_set_epi64() {
-        let r = _mm_set_epi64(transmute(1i64), transmute(2i64));
-        assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_set1_epi64() {
-        let r = _mm_set1_epi64(transmute(1i64));
-        assert_eq_m128i(r, _mm_setr_epi64x(1, 1));
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_setr_epi64() {
-        let r = _mm_setr_epi64(transmute(1i64), transmute(2i64));
-        assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_movepi64_pi64() {
-        let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
-        assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_movpi64_epi64() {
-        let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
-        assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_cvtpd_pi32() {
-        let a = _mm_setr_pd(5., 0.);
-        let r = _mm_cvtpd_pi32(a);
-        assert_eq_m64(r, _mm_setr_pi32(5, 0));
-    }
-
-    #[simd_test(enable = "sse2,mmx")]
-    unsafe fn test_mm_cvttpd_pi32() {
-        let a = _mm_setr_pd(5., 0.);
-        let r = _mm_cvttpd_pi32(a);
-        assert_eq_m64(r, _mm_setr_pi32(5, 0));
-
-        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
-        let r = _mm_cvttpd_pi32(a);
-        assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN));
-    }
 }
diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs
index 6a45603e44..669f51ce7b 100644
--- a/crates/core_arch/src/x86/ssse3.rs
+++ b/crates/core_arch/src/x86/ssse3.rs
@@ -299,169 +299,6 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
     transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Computes the absolute value of packed 8-bit integers in `a` and
-/// return the unsigned results.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(pabsb))]
-pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
-    pabsb(a)
-}
-
-/// Computes the absolute value of packed 8-bit integers in `a`, and returns the
-/// unsigned results.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(pabsw))]
-pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
-    pabsw(a)
-}
-
-/// Computes the absolute value of packed 32-bit integers in `a`, and returns the
-/// unsigned results.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(pabsd))]
-pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
-    pabsd(a)
-}
-
-/// Shuffles packed 8-bit integers in `a` according to shuffle control mask in
-/// the corresponding 8-bit element of `b`, and returns the results
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(pshufb))]
-pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
-    pshufb(a, b)
-}
-
-/// Concatenates the two 64-bit integer vector operands, and right-shifts
-/// the result by the number of bytes specified in the immediate operand.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(palignr, n = 15))]
-#[rustc_args_required_const(2)]
-pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
-    macro_rules! call {
-        ($imm8:expr) => {
-            palignrb(a, b, $imm8)
-        };
-    }
-    constify_imm8!(n, call)
-}
-
-/// Horizontally adds the adjacent pairs of values contained in 2 packed
-/// 64-bit vectors of `[4 x i16]`.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(phaddw))]
-pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
-    phaddw(a, b)
-}
-
-/// Horizontally adds the adjacent pairs of values contained in 2 packed
-/// 64-bit vectors of `[2 x i32]`.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(phaddd))]
-pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
-    phaddd(a, b)
-}
-
-/// Horizontally adds the adjacent pairs of values contained in 2 packed
-/// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are
-/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(phaddsw))]
-pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
-    phaddsw(a, b)
-}
-
-/// Horizontally subtracts the adjacent pairs of values contained in 2
-/// packed 64-bit vectors of `[4 x i16]`.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(phsubw))]
-pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
-    phsubw(a, b)
-}
-
-/// Horizontally subtracts the adjacent pairs of values contained in 2
-/// packed 64-bit vectors of `[2 x i32]`.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(phsubd))]
-pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
-    phsubd(a, b)
-}
-
-/// Horizontally subtracts the adjacent pairs of values contained in 2
-/// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than
-/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
-/// saturated to 8000h.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(phsubsw))]
-pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
-    phsubsw(a, b)
-}
-
-/// Multiplies corresponding pairs of packed 8-bit unsigned integer
-/// values contained in the first source operand and packed 8-bit signed
-/// integer values contained in the second source operand, adds pairs of
-/// contiguous products with signed saturation, and writes the 16-bit sums to
-/// the corresponding bits in the destination.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(pmaddubsw))]
-pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
-    pmaddubsw(a, b)
-}
-
-/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
-/// products to the 18 most significant bits by right-shifting, rounds the
-/// truncated value by adding 1, and writes bits `[16:1]` to the destination.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(pmulhrsw))]
-pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
-    pmulhrsw(a, b)
-}
-
-/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
-/// integer in `b` is negative, and returns the results.
-/// Element in result are zeroed out when the corresponding element in `b` is
-/// zero.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(psignb))]
-pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
-    psignb(a, b)
-}
-
-/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
-/// integer in `b` is negative, and returns the results.
-/// Element in result are zeroed out when the corresponding element in `b` is
-/// zero.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(psignw))]
-pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
-    psignw(a, b)
-}
-
-/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
-/// integer in `b` is negative, and returns the results.
-/// Element in result are zeroed out when the corresponding element in `b` is
-/// zero.
-#[inline]
-#[target_feature(enable = "ssse3,mmx")]
-#[cfg_attr(test, assert_instr(psignd))]
-pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
-    psignd(a, b)
-}
-
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.ssse3.pabs.b.128"]
@@ -508,54 +345,6 @@ extern "C" {
 
     #[link_name = "llvm.x86.ssse3.psign.d.128"]
     fn psignd128(a: i32x4, b: i32x4) -> i32x4;
-
-    #[link_name = "llvm.x86.ssse3.pabs.b"]
-    fn pabsb(a: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.pabs.w"]
-    fn pabsw(a: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.pabs.d"]
-    fn pabsd(a: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.pshuf.b"]
-    fn pshufb(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.mmx.palignr.b"]
-    fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.phadd.w"]
-    fn phaddw(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.phadd.d"]
-    fn phaddd(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.phadd.sw"]
-    fn phaddsw(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.phsub.w"]
-    fn phsubw(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.phsub.d"]
-    fn phsubd(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.phsub.sw"]
-    fn phsubsw(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
-    fn pmaddubsw(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
-    fn pmulhrsw(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.psign.b"]
-    fn psignb(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.psign.w"]
-    fn psignw(a: __m64, b: __m64) -> __m64;
-
-    #[link_name = "llvm.x86.ssse3.psign.d"]
-    fn psignd(a: __m64, b: __m64) -> __m64;
 }
 
 #[cfg(test)]
@@ -761,138 +550,4 @@ mod tests {
         let r = _mm_sign_epi32(a, b);
         assert_eq_m128i(r, expected);
     }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_abs_pi8() {
-        let r = _mm_abs_pi8(_mm_set1_pi8(-5));
-        assert_eq_m64(r, _mm_set1_pi8(5));
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_abs_pi16() {
-        let r = _mm_abs_pi16(_mm_set1_pi16(-5));
-        assert_eq_m64(r, _mm_set1_pi16(5));
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_abs_pi32() {
-        let r = _mm_abs_pi32(_mm_set1_pi32(-5));
-        assert_eq_m64(r, _mm_set1_pi32(5));
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_shuffle_pi8() {
-        let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
-        let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
-        let r = _mm_shuffle_pi8(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_alignr_pi8() {
-        let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
-        let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
-        let r = _mm_alignr_pi8(a, b, 4);
-        assert_eq_m64(r, transmute(0x89abcdefffddeecc_u64));
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_hadd_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let b = _mm_setr_pi16(4, 128, 4, 3);
-        let expected = _mm_setr_pi16(3, 7, 132, 7);
-        let r = _mm_hadd_pi16(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_hadd_pi32() {
-        let a = _mm_setr_pi32(1, 2);
-        let b = _mm_setr_pi32(4, 128);
-        let expected = _mm_setr_pi32(3, 132);
-        let r = _mm_hadd_pi32(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_hadds_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let b = _mm_setr_pi16(32767, 1, -32768, -1);
-        let expected = _mm_setr_pi16(3, 7, 32767, -32768);
-        let r = _mm_hadds_pi16(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_hsub_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let b = _mm_setr_pi16(4, 128, 4, 3);
-        let expected = _mm_setr_pi16(-1, -1, -124, 1);
-        let r = _mm_hsub_pi16(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_hsub_pi32() {
-        let a = _mm_setr_pi32(1, 2);
-        let b = _mm_setr_pi32(4, 128);
-        let expected = _mm_setr_pi32(-1, -124);
-        let r = _mm_hsub_pi32(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_hsubs_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let b = _mm_setr_pi16(4, 128, 4, 3);
-        let expected = _mm_setr_pi16(-1, -1, -124, 1);
-        let r = _mm_hsubs_pi16(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_maddubs_pi16() {
-        let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
-        let expected = _mm_setr_pi16(130, 24, 192, 194);
-        let r = _mm_maddubs_pi16(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_mulhrs_pi16() {
-        let a = _mm_setr_pi16(1, 2, 3, 4);
-        let b = _mm_setr_pi16(4, 32767, -1, -32768);
-        let expected = _mm_setr_pi16(0, 2, 0, -4);
-        let r = _mm_mulhrs_pi16(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_sign_pi8() {
-        let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
-        let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
-        let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
-        let r = _mm_sign_pi8(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_sign_pi16() {
-        let a = _mm_setr_pi16(-1, 2, 3, 4);
-        let b = _mm_setr_pi16(1, -1, 1, 0);
-        let expected = _mm_setr_pi16(-1, -2, 3, 0);
-        let r = _mm_sign_pi16(a, b);
-        assert_eq_m64(r, expected);
-    }
-
-    #[simd_test(enable = "ssse3,mmx")]
-    unsafe fn test_mm_sign_pi32() {
-        let a = _mm_setr_pi32(-1, 2);
-        let b = _mm_setr_pi32(1, 0);
-        let expected = _mm_setr_pi32(-1, 0);
-        let r = _mm_sign_pi32(a, b);
-        assert_eq_m64(r, expected);
-    }
 }
diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs
index b9c4537da5..a3ca0e0820 100644
--- a/crates/core_arch/src/x86/test.rs
+++ b/crates/core_arch/src/x86/test.rs
@@ -2,15 +2,6 @@
 
 use crate::core_arch::x86::*;
 
-#[target_feature(enable = "mmx")]
-pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
-    union A {
-        a: __m64,
-        b: u64,
-    }
-    assert_eq!(A { a }.b, A { a: b }.b)
-}
-
 #[target_feature(enable = "sse2")]
 pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
     union A {
diff --git a/crates/simd-test-macro/src/lib.rs b/crates/simd-test-macro/src/lib.rs
index 6df48b1e6a..8e65c43992 100644
--- a/crates/simd-test-macro/src/lib.rs
+++ b/crates/simd-test-macro/src/lib.rs
@@ -44,8 +44,6 @@ pub fn simd_test(
         .map(String::from)
         .collect();
 
-    let mmx = target_features.iter().any(|s| s.starts_with("mmx"));
-
     let enable_feature = string(enable_feature);
     let item = TokenStream::from(item);
     let name = find_name(item.clone());
@@ -106,15 +104,6 @@ pub fn simd_test(
         TokenStream::new()
     };
 
-    let emms = if mmx {
-        // note: if the test requires MMX we need to clear the FPU
-        // registers once the test finishes before interfacing with
-        // other x87 code:
-        quote! { unsafe { super::_mm_empty() }; }
-    } else {
-        TokenStream::new()
-    };
-
     let ret: TokenStream = quote_spanned! {
         proc_macro2::Span::call_site() =>
         #[allow(non_snake_case)]
@@ -123,7 +112,6 @@ pub fn simd_test(
         fn #name() {
             if #force_test | (#cfg_target_features) {
                 let v = unsafe { #name() };
-                #emms
                 return v;
             } else {
                 ::stdarch_test::assert_skip_test_ok(stringify!(#name));