From 9971391025f50848f792d215a8794c9dbd1287c4 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 17 Jul 2018 15:34:21 +0200
Subject: [PATCH 1/2] remove portable vector types

---
 coresimd/aarch64/crypto.rs                 | 111 ++--
 coresimd/aarch64/neon.rs                   | 133 ++---
 coresimd/arm/neon.rs                       | 395 ++-----------
 coresimd/macros.rs                         |  41 --
 coresimd/mips/msa.rs                       |  20 +-
 coresimd/mod.rs                            |  13 +-
 coresimd/powerpc/altivec.rs                | 379 ++----------
 coresimd/powerpc64/vsx.rs                  | 204 +------
 coresimd/ppsv/api/arithmetic_ops.rs        | 147 -----
 coresimd/ppsv/api/arithmetic_reductions.rs | 261 ---------
 coresimd/ppsv/api/arithmetic_scalar_ops.rs | 202 -------
 coresimd/ppsv/api/bitwise_ops.rs           | 179 ------
 coresimd/ppsv/api/bitwise_reductions.rs    | 194 ------
 coresimd/ppsv/api/bitwise_scalar_ops.rs    | 222 -------
 coresimd/ppsv/api/cmp.rs                   | 142 -----
 coresimd/ppsv/api/default.rs               |  27 -
 coresimd/ppsv/api/eq.rs                    |   8 -
 coresimd/ppsv/api/float_math.rs            | 182 ------
 coresimd/ppsv/api/fmt.rs                   | 152 -----
 coresimd/ppsv/api/from.rs                  |  48 --
 coresimd/ppsv/api/from_bits.rs             |  47 --
 coresimd/ppsv/api/hash.rs                  |  40 --
 coresimd/ppsv/api/load_store.rs            | 312 ----------
 coresimd/ppsv/api/masks.rs                 | 144 -----
 coresimd/ppsv/api/masks_reductions.rs      |  84 ---
 coresimd/ppsv/api/masks_select.rs          |  59 --
 coresimd/ppsv/api/minimal.rs               | 141 -----
 coresimd/ppsv/api/minmax.rs                | 148 -----
 coresimd/ppsv/api/minmax_reductions.rs     |  85 ---
 coresimd/ppsv/api/mod.rs                   | 266 ---------
 coresimd/ppsv/api/neg.rs                   |  43 --
 coresimd/ppsv/api/partial_eq.rs            |  47 --
 coresimd/ppsv/api/scalar_shifts.rs         | 120 ----
 coresimd/ppsv/api/shifts.rs                |  95 ---
 coresimd/ppsv/api/swap_bytes.rs            | 130 ----
 coresimd/ppsv/codegen/abs.rs               |  77 ---
 coresimd/ppsv/codegen/cos.rs               |  78 ---
 coresimd/ppsv/codegen/fma.rs               |  51 --
 coresimd/ppsv/codegen/masks_reductions.rs  | 651 ---------------------
 coresimd/ppsv/codegen/mod.rs               |  13 -
 coresimd/ppsv/codegen/sin.rs               |  78 ---
 coresimd/ppsv/codegen/sqrt.rs              |  77 ---
 coresimd/ppsv/codegen/swap_bytes.rs        | 140 -----
 coresimd/ppsv/codegen/wrapping.rs          |  42 --
 coresimd/ppsv/mod.rs                       |  96 ---
 coresimd/ppsv/v128.rs                      | 550 -----------------
 coresimd/ppsv/v16.rs                       |  60 --
 coresimd/ppsv/v256.rs                      | 472 ---------------
 coresimd/ppsv/v32.rs                       | 156 -----
 coresimd/ppsv/v512.rs                      | 451 --------------
 coresimd/ppsv/v64.rs                       | 388 ------------
 coresimd/simd.rs                           | 161 +++++
 coresimd/x86/avx.rs                        |  16 +-
 coresimd/x86/mod.rs                        | 117 ----
 crates/coresimd/src/lib.rs                 |  24 -
 crates/coresimd/tests/endian_tests.rs      | 278 ---------
 crates/coresimd/tests/reductions.rs        | 510 ----------------
 crates/coresimd/tests/v128.rs              |  56 --
 crates/coresimd/tests/v16.rs               |  56 --
 crates/coresimd/tests/v256.rs              |  56 --
 crates/coresimd/tests/v32.rs               |  56 --
 crates/coresimd/tests/v512.rs              |  56 --
 crates/coresimd/tests/v64.rs               |  56 --
 crates/stdsimd/Cargo.toml                  |   4 -
 examples/nbody.rs                          | 243 --------
 stdsimd/mod.rs                             |   3 -
 66 files changed, 387 insertions(+), 9506 deletions(-)
 delete mode 100644 coresimd/ppsv/api/arithmetic_ops.rs
 delete mode 100644 coresimd/ppsv/api/arithmetic_reductions.rs
 delete mode 100644 coresimd/ppsv/api/arithmetic_scalar_ops.rs
 delete mode 100644 coresimd/ppsv/api/bitwise_ops.rs
 delete mode 100644 coresimd/ppsv/api/bitwise_reductions.rs
 delete mode 100644 coresimd/ppsv/api/bitwise_scalar_ops.rs
 delete mode 100644 coresimd/ppsv/api/cmp.rs
 delete mode 100644 coresimd/ppsv/api/default.rs
 delete mode 100644 coresimd/ppsv/api/eq.rs
 delete mode 100644 coresimd/ppsv/api/float_math.rs
 delete mode 100644 coresimd/ppsv/api/fmt.rs
 delete mode 100644 coresimd/ppsv/api/from.rs
 delete mode 100644 coresimd/ppsv/api/from_bits.rs
 delete mode 100644 coresimd/ppsv/api/hash.rs
 delete mode 100644 coresimd/ppsv/api/load_store.rs
 delete mode 100644 coresimd/ppsv/api/masks.rs
 delete mode 100644 coresimd/ppsv/api/masks_reductions.rs
 delete mode 100644 coresimd/ppsv/api/masks_select.rs
 delete mode 100644 coresimd/ppsv/api/minimal.rs
 delete mode 100755 coresimd/ppsv/api/minmax.rs
 delete mode 100644 coresimd/ppsv/api/minmax_reductions.rs
 delete mode 100644 coresimd/ppsv/api/mod.rs
 delete mode 100644 coresimd/ppsv/api/neg.rs
 delete mode 100644 coresimd/ppsv/api/partial_eq.rs
 delete mode 100644 coresimd/ppsv/api/scalar_shifts.rs
 delete mode 100644 coresimd/ppsv/api/shifts.rs
 delete mode 100644 coresimd/ppsv/api/swap_bytes.rs
 delete mode 100644 coresimd/ppsv/codegen/abs.rs
 delete mode 100644 coresimd/ppsv/codegen/cos.rs
 delete mode 100644 coresimd/ppsv/codegen/fma.rs
 delete mode 100644 coresimd/ppsv/codegen/masks_reductions.rs
 delete mode 100644 coresimd/ppsv/codegen/mod.rs
 delete mode 100644 coresimd/ppsv/codegen/sin.rs
 delete mode 100644 coresimd/ppsv/codegen/sqrt.rs
 delete mode 100644 coresimd/ppsv/codegen/swap_bytes.rs
 delete mode 100644 coresimd/ppsv/codegen/wrapping.rs
 delete mode 100644 coresimd/ppsv/mod.rs
 delete mode 100644 coresimd/ppsv/v128.rs
 delete mode 100644 coresimd/ppsv/v16.rs
 delete mode 100644 coresimd/ppsv/v256.rs
 delete mode 100644 coresimd/ppsv/v32.rs
 delete mode 100644 coresimd/ppsv/v512.rs
 delete mode 100644 coresimd/ppsv/v64.rs
 create mode 100644 coresimd/simd.rs
 delete mode 100644 crates/coresimd/tests/endian_tests.rs
 delete mode 100644 crates/coresimd/tests/reductions.rs
 delete mode 100644 crates/coresimd/tests/v128.rs
 delete mode 100644 crates/coresimd/tests/v16.rs
 delete mode 100644 crates/coresimd/tests/v256.rs
 delete mode 100644 crates/coresimd/tests/v32.rs
 delete mode 100644 crates/coresimd/tests/v512.rs
 delete mode 100644 crates/coresimd/tests/v64.rs
 delete mode 100644 examples/nbody.rs

diff --git a/coresimd/aarch64/crypto.rs b/coresimd/aarch64/crypto.rs
index 75f247585c..8752945146 100644
--- a/coresimd/aarch64/crypto.rs
+++ b/coresimd/aarch64/crypto.rs
@@ -185,17 +185,15 @@ pub unsafe fn vsha256su1q_u32(
 #[cfg(test)]
 mod tests {
     use coresimd::aarch64::*;
-    use simd::*;
+    use coresimd::simd::*;
     use std::mem;
     use stdsimd_test::simd_test;
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vaeseq_u8() {
-        let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)
-            .into_bits();
-        let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)
-            .into_bits();
-        let r: u8x16 = vaeseq_u8(data, key).into_bits();
+        let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
+        let key =  ::mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
+        let r: u8x16 = ::mem::transmute(vaeseq_u8(data, key));
         assert_eq!(
             r,
             u8x16::new(
@@ -207,11 +205,9 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vaesdq_u8() {
-        let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)
-            .into_bits();
-        let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)
-            .into_bits();
-        let r: u8x16 = vaesdq_u8(data, key).into_bits();
+        let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
+        let key = ::mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
+        let r: u8x16 = ::mem::transmute(vaesdq_u8(data, key));
         assert_eq!(
             r,
             u8x16::new(
@@ -222,9 +218,8 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vaesmcq_u8() {
-        let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)
-            .into_bits();
-        let r: u8x16 = vaesmcq_u8(data).into_bits();
+        let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
+        let r: u8x16 = ::mem::transmute(vaesmcq_u8(data));
         assert_eq!(
             r,
             u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30)
@@ -233,9 +228,8 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vaesimcq_u8() {
-        let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)
-            .into_bits();
-        let r: u8x16 = vaesimcq_u8(data).into_bits();
+        let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
+        let r: u8x16 = ::mem::transmute(vaesimcq_u8(data));
         assert_eq!(
             r,
             u8x16::new(
@@ -253,23 +247,20 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha1su0q_u32() {
-        let r: u32x4 = vsha1su0q_u32(
-            u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)
-                .into_bits(),
-            u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)
-                .into_bits(),
-            u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)
-                .into_bits(),
-        ).into_bits();
+        let r: u32x4 = ::mem::transmute(vsha1su0q_u32(
+            ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
+            ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
+            ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
+        ));
         assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678));
     }
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha1su1q_u32() {
-        let r: u32x4 = vsha1su1q_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+        let r: u32x4 = ::mem::transmute(vsha1su1q_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0)
@@ -278,11 +269,11 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha1cq_u32() {
-        let r: u32x4 = vsha1cq_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
+        let r: u32x4 = ::mem::transmute(vsha1cq_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
             0x1234,
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168)
@@ -291,11 +282,11 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha1pq_u32() {
-        let r: u32x4 = vsha1pq_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
+        let r: u32x4 = ::mem::transmute(vsha1pq_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
             0x1234,
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47)
@@ -304,11 +295,11 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha1mq_u32() {
-        let r: u32x4 = vsha1mq_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
+        let r: u32x4 = ::mem::transmute(vsha1mq_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
             0x1234,
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278)
@@ -317,11 +308,11 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha256hq_u32() {
-        let r: u32x4 = vsha256hq_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+        let r: u32x4 = ::mem::transmute(vsha256hq_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef)
@@ -330,11 +321,11 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha256h2q_u32() {
-        let r: u32x4 = vsha256h2q_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+        let r: u32x4 = ::mem::transmute(vsha256h2q_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516)
@@ -343,10 +334,10 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha256su0q_u32() {
-        let r: u32x4 = vsha256su0q_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+        let r: u32x4 = ::mem::transmute(vsha256su0q_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152)
@@ -355,11 +346,11 @@ mod tests {
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vsha256su1q_u32() {
-        let r: u32x4 = vsha256su1q_u32(
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-            u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(),
-        ).into_bits();
+        let r: u32x4 = ::mem::transmute(vsha256su1q_u32(
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+            ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
+        ));
         assert_eq!(
             r,
             u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f)
diff --git a/coresimd/aarch64/neon.rs b/coresimd/aarch64/neon.rs
index 9656c36302..cc43530bbc 100644
--- a/coresimd/aarch64/neon.rs
+++ b/coresimd/aarch64/neon.rs
@@ -3,7 +3,6 @@
 // FIXME: replace neon with asimd
 
 use coresimd::arm::*;
-use coresimd::simd::*;
 use coresimd::simd_llvm::simd_add;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -14,34 +13,6 @@ types! {
     /// ARM-specific 128-bit wide vector of two packed `f64`.
     pub struct float64x2_t(f64, f64);
 }
-impl_from_bits_!(
-    float64x1_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    float64x2_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -549,7 +520,7 @@ pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg(test)]
 mod tests {
     use coresimd::aarch64::*;
-    use simd::*;
+    use coresimd::simd::*;
     use std::mem;
     use stdsimd_test::simd_test;
 
@@ -568,7 +539,7 @@ mod tests {
         let a = f64x2::new(1., 2.);
         let b = f64x2::new(8., 7.);
         let e = f64x2::new(9., 9.);
-        let r: f64x2 = vaddq_f64(a.into_bits(), b.into_bits()).into_bits();
+        let r: f64x2 = ::mem::transmute(vaddq_f64(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -594,205 +565,205 @@ mod tests {
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxv_s8() {
-        let r = vmaxv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5).into_bits());
+        let r = vmaxv_s8(::mem::transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5)));
         assert_eq!(r, 7_i8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_s8() {
         #[cfg_attr(rustfmt, rustfmt_skip)]
-        let r = vmaxvq_s8(i8x16::new(
+        let r = vmaxvq_s8(::mem::transmute(i8x16::new(
             1, 2, 3, 4,
             -16, 6, 7, 5,
             8, 1, 1, 1,
             1, 1, 1, 1,
-        ).into_bits());
+        )));
         assert_eq!(r, 8_i8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxv_s16() {
-        let r = vmaxv_s16(i16x4::new(1, 2, -4, 3).into_bits());
+        let r = vmaxv_s16(::mem::transmute(i16x4::new(1, 2, -4, 3)));
         assert_eq!(r, 3_i16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_s16() {
-        let r = vmaxvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5).into_bits());
+        let r = vmaxvq_s16(::mem::transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5)));
         assert_eq!(r, 7_i16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxv_s32() {
-        let r = vmaxv_s32(i32x2::new(1, -4).into_bits());
+        let r = vmaxv_s32(::mem::transmute(i32x2::new(1, -4)));
         assert_eq!(r, 1_i32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_s32() {
-        let r = vmaxvq_s32(i32x4::new(1, 2, -32, 4).into_bits());
+        let r = vmaxvq_s32(::mem::transmute(i32x4::new(1, 2, -32, 4)));
         assert_eq!(r, 4_i32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxv_u8() {
-        let r = vmaxv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5).into_bits());
+        let r = vmaxv_u8(::mem::transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5)));
         assert_eq!(r, 8_u8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_u8() {
         #[cfg_attr(rustfmt, rustfmt_skip)]
-        let r = vmaxvq_u8(u8x16::new(
+        let r = vmaxvq_u8(::mem::transmute(u8x16::new(
             1, 2, 3, 4,
             16, 6, 7, 5,
             8, 1, 1, 1,
             1, 1, 1, 1,
-        ).into_bits());
+        )));
         assert_eq!(r, 16_u8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxv_u16() {
-        let r = vmaxv_u16(u16x4::new(1, 2, 4, 3).into_bits());
+        let r = vmaxv_u16(::mem::transmute(u16x4::new(1, 2, 4, 3)));
         assert_eq!(r, 4_u16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_u16() {
-        let r = vmaxvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5).into_bits());
+        let r = vmaxvq_u16(::mem::transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5)));
         assert_eq!(r, 16_u16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxv_u32() {
-        let r = vmaxv_u32(u32x2::new(1, 4).into_bits());
+        let r = vmaxv_u32(::mem::transmute(u32x2::new(1, 4)));
         assert_eq!(r, 4_u32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_u32() {
-        let r = vmaxvq_u32(u32x4::new(1, 2, 32, 4).into_bits());
+        let r = vmaxvq_u32(::mem::transmute(u32x4::new(1, 2, 32, 4)));
         assert_eq!(r, 32_u32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxv_f32() {
-        let r = vmaxv_f32(f32x2::new(1., 4.).into_bits());
+        let r = vmaxv_f32(::mem::transmute(f32x2::new(1., 4.)));
         assert_eq!(r, 4_f32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_f32() {
-        let r = vmaxvq_f32(f32x4::new(1., 2., 32., 4.).into_bits());
+        let r = vmaxvq_f32(::mem::transmute(f32x4::new(1., 2., 32., 4.)));
         assert_eq!(r, 32_f32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vmaxvq_f64() {
-        let r = vmaxvq_f64(f64x2::new(1., 4.).into_bits());
+        let r = vmaxvq_f64(::mem::transmute(f64x2::new(1., 4.)));
         assert_eq!(r, 4_f64);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminv_s8() {
-        let r = vminv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5).into_bits());
+        let r = vminv_s8(::mem::transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5)));
         assert_eq!(r, -8_i8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_s8() {
         #[cfg_attr(rustfmt, rustfmt_skip)]
-        let r = vminvq_s8(i8x16::new(
+        let r = vminvq_s8(::mem::transmute(i8x16::new(
             1, 2, 3, 4,
             -16, 6, 7, 5,
             8, 1, 1, 1,
             1, 1, 1, 1,
-        ).into_bits());
+        )));
         assert_eq!(r, -16_i8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminv_s16() {
-        let r = vminv_s16(i16x4::new(1, 2, -4, 3).into_bits());
+        let r = vminv_s16(::mem::transmute(i16x4::new(1, 2, -4, 3)));
         assert_eq!(r, -4_i16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_s16() {
-        let r = vminvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5).into_bits());
+        let r = vminvq_s16(::mem::transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5)));
         assert_eq!(r, -16_i16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminv_s32() {
-        let r = vminv_s32(i32x2::new(1, -4).into_bits());
+        let r = vminv_s32(::mem::transmute(i32x2::new(1, -4)));
         assert_eq!(r, -4_i32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_s32() {
-        let r = vminvq_s32(i32x4::new(1, 2, -32, 4).into_bits());
+        let r = vminvq_s32(::mem::transmute(i32x4::new(1, 2, -32, 4)));
         assert_eq!(r, -32_i32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminv_u8() {
-        let r = vminv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5).into_bits());
+        let r = vminv_u8(::mem::transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5)));
         assert_eq!(r, 1_u8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_u8() {
         #[cfg_attr(rustfmt, rustfmt_skip)]
-        let r = vminvq_u8(u8x16::new(
+        let r = vminvq_u8(::mem::transmute(u8x16::new(
             1, 2, 3, 4,
             16, 6, 7, 5,
             8, 1, 1, 1,
             1, 1, 1, 1,
-        ).into_bits());
+        )));
         assert_eq!(r, 1_u8);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminv_u16() {
-        let r = vminv_u16(u16x4::new(1, 2, 4, 3).into_bits());
+        let r = vminv_u16(::mem::transmute(u16x4::new(1, 2, 4, 3)));
         assert_eq!(r, 1_u16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_u16() {
-        let r = vminvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5).into_bits());
+        let r = vminvq_u16(::mem::transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5)));
         assert_eq!(r, 1_u16);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminv_u32() {
-        let r = vminv_u32(u32x2::new(1, 4).into_bits());
+        let r = vminv_u32(::mem::transmute(u32x2::new(1, 4)));
         assert_eq!(r, 1_u32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_u32() {
-        let r = vminvq_u32(u32x4::new(1, 2, 32, 4).into_bits());
+        let r = vminvq_u32(::mem::transmute(u32x4::new(1, 2, 32, 4)));
         assert_eq!(r, 1_u32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminv_f32() {
-        let r = vminv_f32(f32x2::new(1., 4.).into_bits());
+        let r = vminv_f32(::mem::transmute(f32x2::new(1., 4.)));
         assert_eq!(r, 1_f32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_f32() {
-        let r = vminvq_f32(f32x4::new(1., 2., 32., 4.).into_bits());
+        let r = vminvq_f32(::mem::transmute(f32x4::new(1., 2., 32., 4.)));
         assert_eq!(r, 1_f32);
     }
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vminvq_f64() {
-        let r = vminvq_f64(f64x2::new(1., 4.).into_bits());
+        let r = vminvq_f64(::mem::transmute(f64x2::new(1., 4.)));
         assert_eq!(r, 1_f64);
     }
 
@@ -804,7 +775,7 @@ mod tests {
         let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
         #[cfg_attr(rustfmt, skip)]
         let e = i8x16::new(-2, -4, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
-        let r: i8x16 = vpminq_s8(a.into_bits(), b.into_bits()).into_bits();
+        let r: i8x16 = ::mem::transmute(vpminq_s8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -813,7 +784,7 @@ mod tests {
         let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8);
         let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = i16x8::new(-2, 3, 5, 7, 0, 2, 4, 6);
-        let r: i16x8 = vpminq_s16(a.into_bits(), b.into_bits()).into_bits();
+        let r: i16x8 = ::mem::transmute(vpminq_s16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -822,7 +793,7 @@ mod tests {
         let a = i32x4::new(1, -2, 3, 4);
         let b = i32x4::new(0, 3, 2, 5);
         let e = i32x4::new(-2, 3, 0, 2);
-        let r: i32x4 = vpminq_s32(a.into_bits(), b.into_bits()).into_bits();
+        let r: i32x4 = ::mem::transmute(vpminq_s32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -834,7 +805,7 @@ mod tests {
         let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
         #[cfg_attr(rustfmt, skip)]
         let e = u8x16::new(1, 3, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
-        let r: u8x16 = vpminq_u8(a.into_bits(), b.into_bits()).into_bits();
+        let r: u8x16 = ::mem::transmute(vpminq_u8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -843,7 +814,7 @@ mod tests {
         let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = u16x8::new(1, 3, 5, 7, 0, 2, 4, 6);
-        let r: u16x8 = vpminq_u16(a.into_bits(), b.into_bits()).into_bits();
+        let r: u16x8 = ::mem::transmute(vpminq_u16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -852,7 +823,7 @@ mod tests {
         let a = u32x4::new(1, 2, 3, 4);
         let b = u32x4::new(0, 3, 2, 5);
         let e = u32x4::new(1, 3, 0, 2);
-        let r: u32x4 = vpminq_u32(a.into_bits(), b.into_bits()).into_bits();
+        let r: u32x4 = ::mem::transmute(vpminq_u32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -861,7 +832,7 @@ mod tests {
         let a = f32x4::new(1., -2., 3., 4.);
         let b = f32x4::new(0., 3., 2., 5.);
         let e = f32x4::new(-2., 3., 0., 2.);
-        let r: f32x4 = vpminq_f32(a.into_bits(), b.into_bits()).into_bits();
+        let r: f32x4 = ::mem::transmute(vpminq_f32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -870,7 +841,7 @@ mod tests {
         let a = f64x2::new(1., -2.);
         let b = f64x2::new(0., 3.);
         let e = f64x2::new(-2., 0.);
-        let r: f64x2 = vpminq_f64(a.into_bits(), b.into_bits()).into_bits();
+        let r: f64x2 = ::mem::transmute(vpminq_f64(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -882,7 +853,7 @@ mod tests {
         let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
         #[cfg_attr(rustfmt, skip)]
         let e = i8x16::new(1, 3, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
-        let r: i8x16 = vpmaxq_s8(a.into_bits(), b.into_bits()).into_bits();
+        let r: i8x16 = ::mem::transmute(vpmaxq_s8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -891,7 +862,7 @@ mod tests {
         let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8);
         let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = i16x8::new(1, 4, 6, 8, 3, 5, 7, 9);
-        let r: i16x8 = vpmaxq_s16(a.into_bits(), b.into_bits()).into_bits();
+        let r: i16x8 = ::mem::transmute(vpmaxq_s16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -900,7 +871,7 @@ mod tests {
         let a = i32x4::new(1, -2, 3, 4);
         let b = i32x4::new(0, 3, 2, 5);
         let e = i32x4::new(1, 4, 3, 5);
-        let r: i32x4 = vpmaxq_s32(a.into_bits(), b.into_bits()).into_bits();
+        let r: i32x4 = ::mem::transmute(vpmaxq_s32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -912,7 +883,7 @@ mod tests {
         let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
         #[cfg_attr(rustfmt, skip)]
         let e = u8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
-        let r: u8x16 = vpmaxq_u8(a.into_bits(), b.into_bits()).into_bits();
+        let r: u8x16 = ::mem::transmute(vpmaxq_u8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -921,7 +892,7 @@ mod tests {
         let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = u16x8::new(2, 4, 6, 8, 3, 5, 7, 9);
-        let r: u16x8 = vpmaxq_u16(a.into_bits(), b.into_bits()).into_bits();
+        let r: u16x8 = ::mem::transmute(vpmaxq_u16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -930,7 +901,7 @@ mod tests {
         let a = u32x4::new(1, 2, 3, 4);
         let b = u32x4::new(0, 3, 2, 5);
         let e = u32x4::new(2, 4, 3, 5);
-        let r: u32x4 = vpmaxq_u32(a.into_bits(), b.into_bits()).into_bits();
+        let r: u32x4 = ::mem::transmute(vpmaxq_u32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -939,7 +910,7 @@ mod tests {
         let a = f32x4::new(1., -2., 3., 4.);
         let b = f32x4::new(0., 3., 2., 5.);
         let e = f32x4::new(1., 4., 3., 5.);
-        let r: f32x4 = vpmaxq_f32(a.into_bits(), b.into_bits()).into_bits();
+        let r: f32x4 = ::mem::transmute(vpmaxq_f32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -948,7 +919,7 @@ mod tests {
         let a = f64x2::new(1., -2.);
         let b = f64x2::new(0., 3.);
         let e = f64x2::new(1., 3.);
-        let r: f64x2 = vpmaxq_f64(a.into_bits(), b.into_bits()).into_bits();
+        let r: f64x2 = ::mem::transmute(vpmaxq_f64(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 }
diff --git a/coresimd/arm/neon.rs b/coresimd/arm/neon.rs
index f00096505a..1734de1efb 100644
--- a/coresimd/arm/neon.rs
+++ b/coresimd/arm/neon.rs
@@ -1,6 +1,5 @@
 //! ARMv7 NEON intrinsics
 
-use coresimd::simd::*;
 use coresimd::simd_llvm::*;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -66,304 +65,6 @@ types! {
     pub struct uint64x2_t(u64, u64);
 }
 
-impl_from_bits_!(
-    int8x8_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    uint8x8_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    int16x4_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    uint16x4_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    int32x2_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    uint32x2_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    int64x1_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    float32x2_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    poly8x8_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from_bits_!(
-    poly16x4_t: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-
-impl_from_bits_!(
-    int8x16_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    uint8x16_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    poly8x16_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    int16x8_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    uint16x8_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    poly16x8_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    int32x4_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    uint32x4_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    float32x4_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    int64x2_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    uint64x2_t: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-
 #[allow(improper_ctypes)]
 extern "C" {
     #[cfg_attr(
@@ -949,7 +650,7 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[cfg(test)]
 mod tests {
     use coresimd::arm::*;
-    use simd::*;
+    use coresimd::simd::*;
     use std::mem;
     use stdsimd_test::simd_test;
 
@@ -958,7 +659,7 @@ mod tests {
         let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = i8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r: i8x8 = vadd_s8(a.into_bits(), b.into_bits()).into_bits();
+        let r: i8x8 = ::mem::transmute(vadd_s8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -967,7 +668,7 @@ mod tests {
         let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
         let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
         let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
-        let r: i8x16 = vaddq_s8(a.into_bits(), b.into_bits()).into_bits();
+        let r: i8x16 = ::mem::transmute(vaddq_s8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -976,7 +677,7 @@ mod tests {
         let a = i16x4::new(1, 2, 3, 4);
         let b = i16x4::new(8, 7, 6, 5);
         let e = i16x4::new(9, 9, 9, 9);
-        let r: i16x4 = vadd_s16(a.into_bits(), b.into_bits()).into_bits();
+        let r: i16x4 = ::mem::transmute(vadd_s16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -985,7 +686,7 @@ mod tests {
         let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = i16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r: i16x8 = vaddq_s16(a.into_bits(), b.into_bits()).into_bits();
+        let r: i16x8 = ::mem::transmute(vaddq_s16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -994,7 +695,7 @@ mod tests {
         let a = i32x2::new(1, 2);
         let b = i32x2::new(8, 7);
         let e = i32x2::new(9, 9);
-        let r: i32x2 = vadd_s32(a.into_bits(), b.into_bits()).into_bits();
+        let r: i32x2 = ::mem::transmute(vadd_s32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1003,7 +704,7 @@ mod tests {
         let a = i32x4::new(1, 2, 3, 4);
         let b = i32x4::new(8, 7, 6, 5);
         let e = i32x4::new(9, 9, 9, 9);
-        let r: i32x4 = vaddq_s32(a.into_bits(), b.into_bits()).into_bits();
+        let r: i32x4 = ::mem::transmute(vaddq_s32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1012,7 +713,7 @@ mod tests {
         let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = u8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r: u8x8 = vadd_u8(a.into_bits(), b.into_bits()).into_bits();
+        let r: u8x8 = ::mem::transmute(vadd_u8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1021,7 +722,7 @@ mod tests {
         let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
         let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
         let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
-        let r: u8x16 = vaddq_u8(a.into_bits(), b.into_bits()).into_bits();
+        let r: u8x16 = ::mem::transmute(vaddq_u8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1030,7 +731,7 @@ mod tests {
         let a = u16x4::new(1, 2, 3, 4);
         let b = u16x4::new(8, 7, 6, 5);
         let e = u16x4::new(9, 9, 9, 9);
-        let r: u16x4 = vadd_u16(a.into_bits(), b.into_bits()).into_bits();
+        let r: u16x4 = ::mem::transmute(vadd_u16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1039,7 +740,7 @@ mod tests {
         let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = u16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r: u16x8 = vaddq_u16(a.into_bits(), b.into_bits()).into_bits();
+        let r: u16x8 = ::mem::transmute(vaddq_u16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1048,7 +749,7 @@ mod tests {
         let a = u32x2::new(1, 2);
         let b = u32x2::new(8, 7);
         let e = u32x2::new(9, 9);
-        let r: u32x2 = vadd_u32(a.into_bits(), b.into_bits()).into_bits();
+        let r: u32x2 = ::mem::transmute(vadd_u32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1057,7 +758,7 @@ mod tests {
         let a = u32x4::new(1, 2, 3, 4);
         let b = u32x4::new(8, 7, 6, 5);
         let e = u32x4::new(9, 9, 9, 9);
-        let r: u32x4 = vaddq_u32(a.into_bits(), b.into_bits()).into_bits();
+        let r: u32x4 = ::mem::transmute(vaddq_u32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1066,7 +767,7 @@ mod tests {
         let a = f32x2::new(1., 2.);
         let b = f32x2::new(8., 7.);
         let e = f32x2::new(9., 9.);
-        let r: f32x2 = vadd_f32(a.into_bits(), b.into_bits()).into_bits();
+        let r: f32x2 = ::mem::transmute(vadd_f32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1075,7 +776,7 @@ mod tests {
         let a = f32x4::new(1., 2., 3., 4.);
         let b = f32x4::new(8., 7., 6., 5.);
         let e = f32x4::new(9., 9., 9., 9.);
-        let r: f32x4 = vaddq_f32(a.into_bits(), b.into_bits()).into_bits();
+        let r: f32x4 = ::mem::transmute(vaddq_f32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1085,7 +786,7 @@ mod tests {
         let a = i8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as i16);
         let e = i16x8::new(v, v, v, v, v, v, v, v);
-        let r: i16x8 = vaddl_s8(a.into_bits(), a.into_bits()).into_bits();
+        let r: i16x8 = ::mem::transmute(vaddl_s8(::mem::transmute(a), ::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1095,7 +796,7 @@ mod tests {
         let a = i16x4::new(v, v, v, v);
         let v = 2 * (v as i32);
         let e = i32x4::new(v, v, v, v);
-        let r: i32x4 = vaddl_s16(a.into_bits(), a.into_bits()).into_bits();
+        let r: i32x4 = ::mem::transmute(vaddl_s16(::mem::transmute(a), ::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1105,7 +806,7 @@ mod tests {
         let a = i32x2::new(v, v);
         let v = 2 * (v as i64);
         let e = i64x2::new(v, v);
-        let r: i64x2 = vaddl_s32(a.into_bits(), a.into_bits()).into_bits();
+        let r: i64x2 = ::mem::transmute(vaddl_s32(::mem::transmute(a), ::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1115,7 +816,7 @@ mod tests {
         let a = u8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as u16);
         let e = u16x8::new(v, v, v, v, v, v, v, v);
-        let r: u16x8 = vaddl_u8(a.into_bits(), a.into_bits()).into_bits();
+        let r: u16x8 = ::mem::transmute(vaddl_u8(::mem::transmute(a), ::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1125,7 +826,7 @@ mod tests {
         let a = u16x4::new(v, v, v, v);
         let v = 2 * (v as u32);
         let e = u32x4::new(v, v, v, v);
-        let r: u32x4 = vaddl_u16(a.into_bits(), a.into_bits()).into_bits();
+        let r: u32x4 = ::mem::transmute(vaddl_u16(::mem::transmute(a), ::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1135,7 +836,7 @@ mod tests {
         let a = u32x2::new(v, v);
         let v = 2 * (v as u64);
         let e = u64x2::new(v, v);
-        let r: u64x2 = vaddl_u32(a.into_bits(), a.into_bits()).into_bits();
+        let r: u64x2 = ::mem::transmute(vaddl_u32(::mem::transmute(a), ::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1143,7 +844,7 @@ mod tests {
     unsafe fn test_vmovn_s16() {
         let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = vmovn_s16(a.into_bits()).into_bits();
+        let r: i8x8 = ::mem::transmute(vmovn_s16(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1151,7 +852,7 @@ mod tests {
     unsafe fn test_vmovn_s32() {
         let a = i32x4::new(1, 2, 3, 4);
         let e = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = vmovn_s32(a.into_bits()).into_bits();
+        let r: i16x4 = ::mem::transmute(vmovn_s32(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1159,7 +860,7 @@ mod tests {
     unsafe fn test_vmovn_s64() {
         let a = i64x2::new(1, 2);
         let e = i32x2::new(1, 2);
-        let r: i32x2 = vmovn_s64(a.into_bits()).into_bits();
+        let r: i32x2 = ::mem::transmute(vmovn_s64(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1167,7 +868,7 @@ mod tests {
     unsafe fn test_vmovn_u16() {
         let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = vmovn_u16(a.into_bits()).into_bits();
+        let r: u8x8 = ::mem::transmute(vmovn_u16(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1175,7 +876,7 @@ mod tests {
     unsafe fn test_vmovn_u32() {
         let a = u32x4::new(1, 2, 3, 4);
         let e = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = vmovn_u32(a.into_bits()).into_bits();
+        let r: u16x4 = ::mem::transmute(vmovn_u32(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1183,7 +884,7 @@ mod tests {
     unsafe fn test_vmovn_u64() {
         let a = u64x2::new(1, 2);
         let e = u32x2::new(1, 2);
-        let r: u32x2 = vmovn_u64(a.into_bits()).into_bits();
+        let r: u32x2 = ::mem::transmute(vmovn_u64(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1191,7 +892,7 @@ mod tests {
     unsafe fn test_vmovl_s8() {
         let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = vmovl_s8(a.into_bits()).into_bits();
+        let r: i16x8 = ::mem::transmute(vmovl_s8(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1199,7 +900,7 @@ mod tests {
     unsafe fn test_vmovl_s16() {
         let e = i32x4::new(1, 2, 3, 4);
         let a = i16x4::new(1, 2, 3, 4);
-        let r: i32x4 = vmovl_s16(a.into_bits()).into_bits();
+        let r: i32x4 = ::mem::transmute(vmovl_s16(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1207,7 +908,7 @@ mod tests {
     unsafe fn test_vmovl_s32() {
         let e = i64x2::new(1, 2);
         let a = i32x2::new(1, 2);
-        let r: i64x2 = vmovl_s32(a.into_bits()).into_bits();
+        let r: i64x2 = ::mem::transmute(vmovl_s32(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1215,7 +916,7 @@ mod tests {
     unsafe fn test_vmovl_u8() {
         let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = vmovl_u8(a.into_bits()).into_bits();
+        let r: u16x8 = ::mem::transmute(vmovl_u8(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1223,7 +924,7 @@ mod tests {
     unsafe fn test_vmovl_u16() {
         let e = u32x4::new(1, 2, 3, 4);
         let a = u16x4::new(1, 2, 3, 4);
-        let r: u32x4 = vmovl_u16(a.into_bits()).into_bits();
+        let r: u32x4 = ::mem::transmute(vmovl_u16(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1231,7 +932,7 @@ mod tests {
     unsafe fn test_vmovl_u32() {
         let e = u64x2::new(1, 2);
         let a = u32x2::new(1, 2);
-        let r: u64x2 = vmovl_u32(a.into_bits()).into_bits();
+        let r: u64x2 = ::mem::transmute(vmovl_u32(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1239,7 +940,7 @@ mod tests {
     unsafe fn test_vrsqrt_f32() {
         let a = f32x2::new(1.0, 2.0);
         let e = f32x2::new(0.9980469, 0.7050781);
-        let r: f32x2 = vrsqrte_f32(a.into_bits()).into_bits();
+        let r: f32x2 = ::mem::transmute(vrsqrte_f32(::mem::transmute(a)));
         assert_eq!(r, e);
     }
 
@@ -1248,7 +949,7 @@ mod tests {
         let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8);
         let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6);
-        let r: i8x8 = vpmin_s8(a.into_bits(), b.into_bits()).into_bits();
+        let r: i8x8 = ::mem::transmute(vpmin_s8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1257,7 +958,7 @@ mod tests {
         let a = i16x4::new(1, 2, 3, -4);
         let b = i16x4::new(0, 3, 2, 5);
         let e = i16x4::new(1, -4, 0, 2);
-        let r: i16x4 = vpmin_s16(a.into_bits(), b.into_bits()).into_bits();
+        let r: i16x4 = ::mem::transmute(vpmin_s16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1266,7 +967,7 @@ mod tests {
         let a = i32x2::new(1, -2);
         let b = i32x2::new(0, 3);
         let e = i32x2::new(-2, 0);
-        let r: i32x2 = vpmin_s32(a.into_bits(), b.into_bits()).into_bits();
+        let r: i32x2 = ::mem::transmute(vpmin_s32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1275,7 +976,7 @@ mod tests {
         let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6);
-        let r: u8x8 = vpmin_u8(a.into_bits(), b.into_bits()).into_bits();
+        let r: u8x8 = ::mem::transmute(vpmin_u8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1284,7 +985,7 @@ mod tests {
         let a = u16x4::new(1, 2, 3, 4);
         let b = u16x4::new(0, 3, 2, 5);
         let e = u16x4::new(1, 3, 0, 2);
-        let r: u16x4 = vpmin_u16(a.into_bits(), b.into_bits()).into_bits();
+        let r: u16x4 = ::mem::transmute(vpmin_u16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1293,7 +994,7 @@ mod tests {
         let a = u32x2::new(1, 2);
         let b = u32x2::new(0, 3);
         let e = u32x2::new(1, 0);
-        let r: u32x2 = vpmin_u32(a.into_bits(), b.into_bits()).into_bits();
+        let r: u32x2 = ::mem::transmute(vpmin_u32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1302,7 +1003,7 @@ mod tests {
         let a = f32x2::new(1., -2.);
         let b = f32x2::new(0., 3.);
         let e = f32x2::new(-2., 0.);
-        let r: f32x2 = vpmin_f32(a.into_bits(), b.into_bits()).into_bits();
+        let r: f32x2 = ::mem::transmute(vpmin_f32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1311,7 +1012,7 @@ mod tests {
         let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8);
         let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9);
-        let r: i8x8 = vpmax_s8(a.into_bits(), b.into_bits()).into_bits();
+        let r: i8x8 = ::mem::transmute(vpmax_s8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1320,7 +1021,7 @@ mod tests {
         let a = i16x4::new(1, 2, 3, -4);
         let b = i16x4::new(0, 3, 2, 5);
         let e = i16x4::new(2, 3, 3, 5);
-        let r: i16x4 = vpmax_s16(a.into_bits(), b.into_bits()).into_bits();
+        let r: i16x4 = ::mem::transmute(vpmax_s16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1329,7 +1030,7 @@ mod tests {
         let a = i32x2::new(1, -2);
         let b = i32x2::new(0, 3);
         let e = i32x2::new(1, 3);
-        let r: i32x2 = vpmax_s32(a.into_bits(), b.into_bits()).into_bits();
+        let r: i32x2 = ::mem::transmute(vpmax_s32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1338,7 +1039,7 @@ mod tests {
         let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
         let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9);
-        let r: u8x8 = vpmax_u8(a.into_bits(), b.into_bits()).into_bits();
+        let r: u8x8 = ::mem::transmute(vpmax_u8(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1347,7 +1048,7 @@ mod tests {
         let a = u16x4::new(1, 2, 3, 4);
         let b = u16x4::new(0, 3, 2, 5);
         let e = u16x4::new(2, 4, 3, 5);
-        let r: u16x4 = vpmax_u16(a.into_bits(), b.into_bits()).into_bits();
+        let r: u16x4 = ::mem::transmute(vpmax_u16(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1356,7 +1057,7 @@ mod tests {
         let a = u32x2::new(1, 2);
         let b = u32x2::new(0, 3);
         let e = u32x2::new(2, 3);
-        let r: u32x2 = vpmax_u32(a.into_bits(), b.into_bits()).into_bits();
+        let r: u32x2 = ::mem::transmute(vpmax_u32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -1365,7 +1066,7 @@ mod tests {
         let a = f32x2::new(1., -2.);
         let b = f32x2::new(0., 3.);
         let e = f32x2::new(1., 3.);
-        let r: f32x2 = vpmax_f32(a.into_bits(), b.into_bits()).into_bits();
+        let r: f32x2 = ::mem::transmute(vpmax_f32(::mem::transmute(a), ::mem::transmute(b)));
         assert_eq!(r, e);
     }
 }
diff --git a/coresimd/macros.rs b/coresimd/macros.rs
index fa96f50c81..343f425c1a 100644
--- a/coresimd/macros.rs
+++ b/coresimd/macros.rs
@@ -13,44 +13,3 @@ macro_rules! types {
         pub struct $name($($fields)*);
     )*)
 }
-
-macro_rules! cfg_if {
-    ($(
-        if #[cfg($($meta:meta),*)] { $($it:item)* }
-    ) else * else {
-        $($it2:item)*
-    }) => {
-        __cfg_if_items! {
-            () ;
-            $( ( ($($meta),*) ($($it)*) ), )*
-            ( () ($($it2)*) ),
-        }
-    };
-    (
-        if #[cfg($($i_met:meta),*)] { $($i_it:item)* }
-        $(
-            else if #[cfg($($e_met:meta),*)] { $($e_it:item)* }
-        )*
-    ) => {
-        __cfg_if_items! {
-            () ;
-            ( ($($i_met),*) ($($i_it)*) ),
-            $( ( ($($e_met),*) ($($e_it)*) ), )*
-            ( () () ),
-        }
-    }
-}
-
-macro_rules! __cfg_if_items {
-    (($($not:meta,)*) ; ) => {};
-    (($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => {
-        __cfg_if_apply! { cfg(all($($m,)* not(any($($not),*)))), $($it)* }
-        __cfg_if_items! { ($($not,)* $($m,)*) ; $($rest)* }
-    }
-}
-
-macro_rules! __cfg_if_apply {
-    ($m:meta, $($it:item)*) => {
-        $(#[$m] $it)*
-    }
-}
diff --git a/coresimd/mips/msa.rs b/coresimd/mips/msa.rs
index d26ad305d6..cad533dd82 100644
--- a/coresimd/mips/msa.rs
+++ b/coresimd/mips/msa.rs
@@ -5,10 +5,17 @@
 //!
 //! [msa_ref]: http://cdn2.imgtec.com/documentation/MD00866-2B-MSA32-AFP-01.12.pdf
 
-use coresimd::simd::*;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
+types! {
+    /// MIPS-specific 128-bit wide vector of 16 packed `i8`.
+    pub struct i8x16(
+        i8, i8, i8, i8, i8, i8, i8, i8,
+        i8, i8, i8, i8, i8, i8, i8, i8,
+    );
+}
+
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.mips.add.a.b"]
@@ -35,20 +42,25 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn __msa_add_a_b() {
         #[cfg_attr(rustfmt, rustfmt_skip)]
-        let a = i8x16::new(
+        let a = i8x16(
             1, 2, 3, 4,
             1, 2, 3, 4,
             1, 2, 3, 4,
             1, 2, 3, 4,
         );
         #[cfg_attr(rustfmt, rustfmt_skip)]
-        let b = i8x16::new(
+        let b = i8x16(
             -4, -3, -2, -1,
             -4, -3, -2, -1,
             -4, -3, -2, -1,
             -4, -3, -2, -1,
         );
-        let r = i8x16::splat(5);
+        let r = i8x16(
+            5, 5, 5, 5,
+            5, 5, 5, 5,
+            5, 5, 5, 5,
+            5, 5, 5, 5,
+        );
 
         assert_eq!(r, msa::__msa_add_a_b(a, b));
     }
diff --git a/coresimd/mod.rs b/coresimd/mod.rs
index 6fc312f420..67bdbf3114 100644
--- a/coresimd/mod.rs
+++ b/coresimd/mod.rs
@@ -3,18 +3,7 @@
 #[macro_use]
 mod macros;
 
-#[macro_use]
-mod ppsv;
-
-/// Platform independent SIMD vector types and operations.
-///
-/// This is an **unstable** module for portable SIMD operations. This module
-/// has not yet gone through an RFC and is likely to change, but feedback is
-/// always welcome!
-#[unstable(feature = "stdsimd", issue = "27731")]
-pub mod simd {
-    pub use coresimd::ppsv::*;
-}
+mod simd;
 
 /// Platform dependent vendor intrinsics.
 ///
diff --git a/coresimd/powerpc/altivec.rs b/coresimd/powerpc/altivec.rs
index 62371a6a87..1049a023b4 100644
--- a/coresimd/powerpc/altivec.rs
+++ b/coresimd/powerpc/altivec.rs
@@ -13,8 +13,8 @@
 
 #![allow(non_camel_case_types)]
 
-use coresimd::simd::*;
 use coresimd::simd_llvm::*;
+use coresimd::simd::*;
 
 use mem;
 
@@ -49,304 +49,6 @@ types! {
     pub struct vector_float(f32, f32, f32, f32);
 }
 
-impl_from_bits_!(
-    vector_signed_char: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-impl_from_bits_!(
-    i8x16: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-
-impl_from_bits_!(
-    vector_unsigned_char: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_signed_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-impl_from_bits_!(
-    u8x16: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-
-impl_from_bits_!(
-    vector_bool_char: m64x2,
-    m32x4,
-    m16x8,
-    m8x16,
-    vector_bool_short,
-    vector_bool_int
-);
-impl_from_bits_!(m8x16: vector_bool_char, vector_bool_short, vector_bool_int);
-
-impl_from_bits_!(
-    vector_signed_short: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_signed_char,
-    vector_bool_char,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-impl_from_bits_!(
-    i16x8: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-
-impl_from_bits_!(
-    vector_unsigned_short: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_signed_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-impl_from_bits_!(
-    u16x8: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-
-impl_from_bits_!(
-    vector_bool_short: m64x2,
-    m32x4,
-    m16x8,
-    m8x16,
-    vector_bool_int
-);
-impl_from_bits_!(m16x8: vector_bool_short, vector_bool_int);
-
-impl_from_bits_!(
-    vector_signed_int: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_signed_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-impl_from_bits_!(
-    i32x4: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-
-impl_from_bits_!(
-    vector_unsigned_int: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_signed_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_float,
-    vector_bool_int
-);
-impl_from_bits_!(
-    u32x4: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-
-impl_from_bits_!(
-    vector_bool_int: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(m32x4: vector_bool_int);
-
-impl_from_bits_!(
-    vector_float: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_signed_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_bool_int
-);
-impl_from_bits_!(
-    f32x4: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int
-);
-
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.ppc.altivec.vperm"]
@@ -455,7 +157,7 @@ mod sealed {
     pub unsafe fn vec_add_bc_sc(
         a: vector_bool_char, b: vector_signed_char,
     ) -> vector_signed_char {
-        simd_add(a.into_bits(), b)
+        simd_add(::mem::transmute(a), b)
     }
     impl VectorAdd<vector_signed_char> for vector_bool_char {
         type Result = vector_signed_char;
@@ -497,7 +199,7 @@ mod sealed {
     pub unsafe fn vec_add_bc_uc(
         a: vector_bool_char, b: vector_unsigned_char,
     ) -> vector_unsigned_char {
-        simd_add(a.into_bits(), b)
+        simd_add(::mem::transmute(a), b)
     }
     impl VectorAdd<vector_unsigned_char> for vector_bool_char {
         type Result = vector_unsigned_char;
@@ -539,7 +241,7 @@ mod sealed {
     pub unsafe fn vec_add_bs_ss(
         a: vector_bool_short, b: vector_signed_short,
     ) -> vector_signed_short {
-        let a: i16x8 = a.into_bits();
+        let a: i16x8 = ::mem::transmute(a);
         let a: vector_signed_short = simd_cast(a);
         simd_add(a, b)
     }
@@ -584,7 +286,7 @@ mod sealed {
     pub unsafe fn vec_add_bs_us(
         a: vector_bool_short, b: vector_unsigned_short,
     ) -> vector_unsigned_short {
-        let a: i16x8 = a.into_bits();
+        let a: i16x8 = ::mem::transmute(a);
         let a: vector_unsigned_short = simd_cast(a);
         simd_add(a, b)
     }
@@ -629,7 +331,7 @@ mod sealed {
     pub unsafe fn vec_add_bi_si(
         a: vector_bool_int, b: vector_signed_int,
     ) -> vector_signed_int {
-        let a: i32x4 = a.into_bits();
+        let a: i32x4 = ::mem::transmute(a);
         let a: vector_signed_int = simd_cast(a);
         simd_add(a, b)
     }
@@ -673,7 +375,7 @@ mod sealed {
     pub unsafe fn vec_add_bi_ui(
         a: vector_bool_int, b: vector_unsigned_int,
     ) -> vector_unsigned_int {
-        let a: i32x4 = a.into_bits();
+        let a: i32x4 = ::mem::transmute(a);
         let a: vector_unsigned_int = simd_cast(a);
         simd_add(a, b)
     }
@@ -754,10 +456,10 @@ mod endian {
         // vperm has big-endian bias
         //
         // Xor the mask and flip the arguments
-        let d = u8x16::new(
+        let d = ::mem::transmute(u8x16::new(
             255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
             255, 255, 255,
-        ).into_bits();
+        ));
         let c = simd_xor(c, d);
 
         b.vec_vperm(a, c)
@@ -816,19 +518,22 @@ mod tests {
     #[cfg(target_arch = "powerpc64")]
     use coresimd::arch::powerpc64::*;
 
-    use simd::*;
+    use coresimd::simd::*;
     use stdsimd_test::simd_test;
 
     macro_rules! test_vec_perm {
-        {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
+        {$name:ident,
+         $shorttype:ident, $longtype:ident,
+         [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
             #[simd_test(enable = "altivec")]
             unsafe fn $name() {
-                let a: $longtype = $shorttype::new($($a),+).into_bits();
-                let b = $shorttype::new($($b),+).into_bits();
-                let c = u8x16::new($($c),+).into_bits();
+                let a: $longtype = ::mem::transmute($shorttype::new($($a),+));
+                let b: $longtype = ::mem::transmute($shorttype::new($($b),+));
+                let c: vector_unsigned_char = ::mem::transmute(u8x16::new($($c),+));
                 let d = $shorttype::new($($d),+);
 
-                assert_eq!(d, vec_perm(a, b, c).into_bits());
+                let r: $shorttype = ::mem::transmute(vec_perm(a, b, c));
+                assert_eq!(d, r);
             }
         }
     }
@@ -847,6 +552,7 @@ mod tests {
     [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
      0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
     [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]}
+
     test_vec_perm!{test_vec_perm_m8x16,
     m8x16, vector_bool_char,
     [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false],
@@ -854,7 +560,6 @@ mod tests {
     [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
      0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
     [false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, true]}
-
     test_vec_perm!{test_vec_perm_u16x8,
     u16x8, vector_unsigned_short,
     [0, 1, 2, 3, 4, 5, 6, 7],
@@ -908,7 +613,7 @@ mod tests {
 
     #[simd_test(enable = "altivec")]
     unsafe fn test_vec_madds() {
-        let a: vector_signed_short = i16x8::new(
+        let a: vector_signed_short = ::mem::transmute(i16x8::new(
             0 * 256,
             1 * 256,
             2 * 256,
@@ -917,20 +622,20 @@ mod tests {
             5 * 256,
             6 * 256,
             7 * 256,
-        ).into_bits();
+        ));
         let b: vector_signed_short =
-            i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits();
+            ::mem::transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
         let c: vector_signed_short =
-            i16x8::new(0, 1, 2, 3, 4, 5, 6, 7).into_bits();
+            ::mem::transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
 
         let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21);
 
-        assert_eq!(d, vec_madds(a, b, c).into_bits());
+        assert_eq!(d, ::mem::transmute(vec_madds(a, b, c)));
     }
 
     #[simd_test(enable = "altivec")]
     unsafe fn test_vec_mradds() {
-        let a: vector_signed_short = i16x8::new(
+        let a: vector_signed_short = ::mem::transmute(i16x8::new(
             0 * 256,
             1 * 256,
             2 * 256,
@@ -939,20 +644,20 @@ mod tests {
             5 * 256,
             6 * 256,
             7 * 256,
-        ).into_bits();
+        ));
         let b: vector_signed_short =
-            i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits();
+            ::mem::transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
         let c: vector_signed_short =
-            i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::max_value() - 1).into_bits();
+            ::mem::transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::max_value() - 1));
 
         let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, i16::max_value());
 
-        assert_eq!(d, vec_mradds(a, b, c).into_bits());
+        assert_eq!(d, ::mem::transmute(vec_mradds(a, b, c)));
     }
 
     #[simd_test(enable = "altivec")]
     unsafe fn test_vec_msums_unsigned() {
-        let a: vector_unsigned_short = u16x8::new(
+        let a: vector_unsigned_short = ::mem::transmute(u16x8::new(
             0 * 256,
             1 * 256,
             2 * 256,
@@ -961,10 +666,10 @@ mod tests {
             5 * 256,
             6 * 256,
             7 * 256,
-        ).into_bits();
+        ));
         let b: vector_unsigned_short =
-            u16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits();
-        let c: vector_unsigned_int = u32x4::new(0, 1, 2, 3).into_bits();
+            ::mem::transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3));
         let d = u32x4::new(
             (0 + 1) * 256 * 256 + 0,
             (2 + 3) * 256 * 256 + 1,
@@ -972,12 +677,12 @@ mod tests {
             (6 + 7) * 256 * 256 + 3,
         );
 
-        assert_eq!(d, vec_msums(a, b, c).into_bits());
+        assert_eq!(d, ::mem::transmute(vec_msums(a, b, c)));
     }
 
     #[simd_test(enable = "altivec")]
     unsafe fn test_vec_msums_signed() {
-        let a: vector_signed_short = i16x8::new(
+        let a: vector_signed_short = ::mem::transmute(i16x8::new(
             0 * 256,
            -1 * 256,
             2 * 256,
@@ -986,10 +691,10 @@ mod tests {
            -5 * 256,
             6 * 256,
            -7 * 256,
-        ).into_bits();
+        ));
         let b: vector_signed_short =
-            i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits();
-        let c: vector_signed_int = i32x4::new(0, 1, 2, 3).into_bits();
+            ::mem::transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
         let d = i32x4::new(
             (0 - 1) * 256 * 256 + 0,
             (2 - 3) * 256 * 256 + 1,
@@ -997,16 +702,16 @@ mod tests {
             (6 - 7) * 256 * 256 + 3,
         );
 
-        assert_eq!(d, vec_msums(a, b, c).into_bits());
+        assert_eq!(d, ::mem::transmute(vec_msums(a, b, c)));
     }
 
     #[simd_test(enable = "altivec")]
     unsafe fn vec_add_i32x4_i32x4() {
         let x = i32x4::new(1, 2, 3, 4);
         let y = i32x4::new(4, 3, 2, 1);
-        let x: vector_signed_int = x.into_bits();
-        let y: vector_signed_int = y.into_bits();
+        let x: vector_signed_int = ::mem::transmute(x);
+        let y: vector_signed_int = ::mem::transmute(y);
         let z = vec_add(x, y);
-        assert_eq!(i32x4::splat(5), z.into_bits());
+        assert_eq!(i32x4::splat(5), ::mem::transmute(z));
     }
 }
diff --git a/coresimd/powerpc64/vsx.rs b/coresimd/powerpc64/vsx.rs
index 51a8e824c1..b5363f5ad6 100644
--- a/coresimd/powerpc64/vsx.rs
+++ b/coresimd/powerpc64/vsx.rs
@@ -8,8 +8,6 @@
 
 #![allow(non_camel_case_types)]
 
-use coresimd::powerpc::*;
-use coresimd::simd::*;
 use coresimd::simd_llvm::*;
 
 #[cfg(test)]
@@ -34,194 +32,8 @@ types! {
     // pub struct vector_unsigned___int128 = i128x1;
 }
 
-impl_from_bits_!(
-    vector_signed_long: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-impl_from_bits_!(
-    i64x2: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int,
-    vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(
-    vector_unsigned_long: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int,
-    vector_signed_long,
-    vector_bool_long,
-    vector_double
-);
-impl_from_bits_!(
-    u64x2: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int,
-    vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(
-    vector_double: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int,
-    vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long
-);
-impl_from_bits_!(
-    f64x2: vector_signed_char,
-    vector_unsigned_char,
-    vector_bool_char,
-    vector_signed_short,
-    vector_unsigned_short,
-    vector_bool_short,
-    vector_signed_int,
-    vector_unsigned_int,
-    vector_float,
-    vector_bool_int,
-    vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(vector_bool_long: m64x2);
-impl_from_bits_!(m64x2: vector_bool_long);
-impl_from_bits_!(m32x4: vector_bool_long);
-impl_from_bits_!(m16x8: vector_bool_long);
-impl_from_bits_!(m8x16: vector_bool_long);
-impl_from_bits_!(vector_bool_char: vector_bool_long);
-impl_from_bits_!(vector_bool_short: vector_bool_long);
-impl_from_bits_!(vector_bool_int: vector_bool_long);
-
-impl_from_bits_!(
-    vector_signed_char: vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(
-    vector_unsigned_char: vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(
-    vector_signed_short: vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(
-    vector_unsigned_short: vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(
-    vector_signed_int: vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
-impl_from_bits_!(
-    vector_unsigned_int: vector_signed_long,
-    vector_unsigned_long,
-    vector_bool_long,
-    vector_double
-);
-
 mod sealed {
-
+    use coresimd::simd::*;
     use super::*;
 
     pub trait VectorPermDI {
@@ -283,20 +95,20 @@ mod tests {
     #[cfg(target_arch = "powerpc64")]
     use coresimd::arch::powerpc64::*;
 
-    use simd::*;
+    use coresimd::simd::*;
     use stdsimd_test::simd_test;
 
     macro_rules! test_vec_xxpermdi {
         {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
             #[simd_test(enable = "vsx")]
             unsafe fn $name() {
-                let a: $longtype = $shorttype::new($($a),+, $($b),+).into_bits();
-                let b = $shorttype::new($($c),+, $($d),+).into_bits();
+                let a: $longtype = ::mem::transmute($shorttype::new($($a),+, $($b),+));
+                let b = ::mem::transmute($shorttype::new($($c),+, $($d),+));
 
-                assert_eq!($shorttype::new($($a),+, $($c),+), vec_xxpermdi(a, b, 0).into_bits());
-                assert_eq!($shorttype::new($($b),+, $($c),+), vec_xxpermdi(a, b, 1).into_bits());
-                assert_eq!($shorttype::new($($a),+, $($d),+), vec_xxpermdi(a, b, 2).into_bits());
-                assert_eq!($shorttype::new($($b),+, $($d),+), vec_xxpermdi(a, b, 3).into_bits());
+                assert_eq!($shorttype::new($($a),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 0)));
+                assert_eq!($shorttype::new($($b),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 1)));
+                assert_eq!($shorttype::new($($a),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 2)));
+                assert_eq!($shorttype::new($($b),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 3)));
             }
         }
     }
diff --git a/coresimd/ppsv/api/arithmetic_ops.rs b/coresimd/ppsv/api/arithmetic_ops.rs
deleted file mode 100644
index 28c97c1740..0000000000
--- a/coresimd/ppsv/api/arithmetic_ops.rs
+++ /dev/null
@@ -1,147 +0,0 @@
-//! Lane-wise arithmetic operations.
-#![allow(unused)]
-
-macro_rules! impl_arithmetic_ops {
-    ($id:ident) => {
-        impl ::ops::Add for $id {
-            type Output = Self;
-            #[inline]
-            fn add(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_add;
-                unsafe { simd_add(self, other) }
-            }
-        }
-
-        impl ::ops::Sub for $id {
-            type Output = Self;
-            #[inline]
-            fn sub(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_sub;
-                unsafe { simd_sub(self, other) }
-            }
-        }
-
-        impl ::ops::Mul for $id {
-            type Output = Self;
-            #[inline]
-            fn mul(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_mul;
-                unsafe { simd_mul(self, other) }
-            }
-        }
-
-        impl ::ops::Div for $id {
-            type Output = Self;
-            #[inline]
-            fn div(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_div;
-                unsafe { simd_div(self, other) }
-            }
-        }
-
-        impl ::ops::Rem for $id {
-            type Output = Self;
-            #[inline]
-            fn rem(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_rem;
-                unsafe { simd_rem(self, other) }
-            }
-        }
-
-        impl ::ops::AddAssign for $id {
-            #[inline]
-            fn add_assign(&mut self, other: Self) {
-                *self = *self + other;
-            }
-        }
-
-        impl ::ops::SubAssign for $id {
-            #[inline]
-            fn sub_assign(&mut self, other: Self) {
-                *self = *self - other;
-            }
-        }
-
-        impl ::ops::MulAssign for $id {
-            #[inline]
-            fn mul_assign(&mut self, other: Self) {
-                *self = *self * other;
-            }
-        }
-
-        impl ::ops::DivAssign for $id {
-            #[inline]
-            fn div_assign(&mut self, other: Self) {
-                *self = *self / other;
-            }
-        }
-
-        impl ::ops::RemAssign for $id {
-            #[inline]
-            fn rem_assign(&mut self, other: Self) {
-                *self = *self % other;
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_arithmetic_ops {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn arithmetic() {
-            use coresimd::simd::$id;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            let t = $id::splat(2 as $elem_ty);
-            let f = $id::splat(4 as $elem_ty);
-
-            // add
-            assert_eq!(z + z, z);
-            assert_eq!(o + z, o);
-            assert_eq!(t + z, t);
-            assert_eq!(t + t, f);
-            // sub
-            assert_eq!(z - z, z);
-            assert_eq!(o - z, o);
-            assert_eq!(t - z, t);
-            assert_eq!(f - t, t);
-            assert_eq!(f - o - o, t);
-            // mul
-            assert_eq!(z * z, z);
-            assert_eq!(z * o, z);
-            assert_eq!(z * t, z);
-            assert_eq!(o * t, t);
-            assert_eq!(t * t, f);
-            // div
-            assert_eq!(z / o, z);
-            assert_eq!(t / o, t);
-            assert_eq!(f / o, f);
-            assert_eq!(t / t, o);
-            assert_eq!(f / t, t);
-            // rem
-            assert_eq!(o % o, z);
-            assert_eq!(f % t, z);
-
-            {
-                let mut v = z;
-                assert_eq!(v, z);
-                v += o; // add_assign
-                assert_eq!(v, o);
-                v -= o; // sub_assign
-                assert_eq!(v, z);
-                v = t;
-                v *= o; // mul_assign
-                assert_eq!(v, t);
-                v *= t;
-                assert_eq!(v, f);
-                v /= o; // div_assign
-                assert_eq!(v, f);
-                v /= t;
-                assert_eq!(v, t);
-                v %= t; // rem_assign
-                assert_eq!(v, z);
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs
deleted file mode 100644
index 7b324a7bab..0000000000
--- a/coresimd/ppsv/api/arithmetic_reductions.rs
+++ /dev/null
@@ -1,261 +0,0 @@
-//! Implements portable arithmetic vector reductions.
-#![allow(unused)]
-
-macro_rules! impl_int_arithmetic_reductions {
-    ($id:ident, $elem_ty:ident) => {
-        impl $id {
-            /// Horizontal sum of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
-            ///
-            /// If an operation overflows it returns the mathematical result
-            /// modulo `2^n` where `n` is the number of times it overflows.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn wrapping_sum(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_add_ordered;
-                unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
-            }
-            /// Horizontal sum of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
-            ///
-            /// If an operation overflows it returns the mathematical result
-            /// modulo `2^n` where `n` is the number of times it overflows.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn wrapping_sum(self) -> $elem_ty {
-                // FIXME: broken on AArch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                use super::codegen::wrapping::Wrapping;
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x = Wrapping::add(x, self.extract(i) as $elem_ty);
-                }
-                x
-            }
-
-            /// Horizontal product of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
-            ///
-            /// If an operation overflows it returns the mathematical result
-            /// modulo `2^n` where `n` is the number of times it overflows.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn wrapping_product(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_mul_ordered;
-                unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
-            }
-            /// Horizontal product of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
-            ///
-            /// If an operation overflows it returns the mathematical result
-            /// modulo `2^n` where `n` is the number of times it overflows.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn wrapping_product(self) -> $elem_ty {
-                // FIXME: broken on AArch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                use super::codegen::wrapping::Wrapping;
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x = Wrapping::mul(x, self.extract(i) as $elem_ty);
-                }
-                x
-            }
-        }
-    };
-}
-
-macro_rules! impl_float_arithmetic_reductions {
-    ($id:ident, $elem_ty:ident) => {
-        impl $id {
-            /// Horizontal sum of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
-            ///
-            /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`. The resulting `NaN` is not required to be equal to any
-            /// of the `NaN`s in the vector.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn sum(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_add_ordered;
-                unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
-            }
-            /// Horizontal sum of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
-            ///
-            /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`. The resulting `NaN` is not required to be equal to any
-            /// of the `NaN`s in the vector.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn sum(self) -> $elem_ty {
-                // FIXME: broken on AArch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                use super::codegen::wrapping::Wrapping;
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x = Wrapping::add(x, self.extract(i) as $elem_ty);
-                }
-                x
-            }
-
-            /// Horizontal product of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
-            ///
-            /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`. The resulting `NaN` is not required to be equal to any
-            /// of the `NaN`s in the vector.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn product(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_mul_ordered;
-                unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
-            }
-            /// Horizontal product of the vector elements.
-            ///
-            /// The intrinsic performs a tree-reduction of the vector elements.
-            /// That is, for an 8 element vector:
-            ///
-            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
-            ///
-            /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`. The resulting `NaN` is not required to be equal to any
-            /// of the `NaN`s in the vector.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn product(self) -> $elem_ty {
-                // FIXME: broken on AArch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                use super::codegen::wrapping::Wrapping;
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x = Wrapping::mul(x, self.extract(i) as $elem_ty);
-                }
-                x
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_int_arithmetic_reductions {
-    ($id:ident, $elem_ty:ident) => {
-        fn alternating(x: usize) -> ::coresimd::simd::$id {
-            use coresimd::simd::$id;
-            let mut v = $id::splat(1 as $elem_ty);
-            for i in 0..$id::lanes() {
-                if i % x == 0 {
-                    v = v.replace(i, 2 as $elem_ty);
-                }
-            }
-            v
-        }
-
-        #[test]
-        fn wrapping_sum() {
-            use coresimd::simd::$id;
-            let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
-            let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
-            let v = alternating(2);
-            assert_eq!(
-                v.wrapping_sum(),
-                ($id::lanes() / 2 + $id::lanes()) as $elem_ty
-            );
-        }
-        #[test]
-        fn wrapping_product() {
-            use coresimd::simd::$id;
-            let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.wrapping_product(), 0 as $elem_ty);
-            let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.wrapping_product(), 1 as $elem_ty);
-            let f = match $id::lanes() {
-                64 => 16,
-                32 => 8,
-                16 => 4,
-                _ => 2,
-            };
-            let v = alternating(f);
-            assert_eq!(
-                v.wrapping_product(),
-                (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
-            );
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_float_arithmetic_reductions {
-    ($id:ident, $elem_ty:ident) => {
-        fn alternating(x: usize) -> ::coresimd::simd::$id {
-            use coresimd::simd::$id;
-            let mut v = $id::splat(1 as $elem_ty);
-            for i in 0..$id::lanes() {
-                if i % x == 0 {
-                    v = v.replace(i, 2 as $elem_ty);
-                }
-            }
-            v
-        }
-
-        #[test]
-        fn sum() {
-            use coresimd::simd::$id;
-            let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.sum(), 0 as $elem_ty);
-            let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.sum(), $id::lanes() as $elem_ty);
-            let v = alternating(2);
-            assert_eq!(v.sum(), ($id::lanes() / 2 + $id::lanes()) as $elem_ty);
-        }
-        #[test]
-        fn product() {
-            use coresimd::simd::$id;
-            let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.product(), 0 as $elem_ty);
-            let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.product(), 1 as $elem_ty);
-            let f = match $id::lanes() {
-                64 => 16,
-                32 => 8,
-                16 => 4,
-                _ => 2,
-            };
-            let v = alternating(f);
-            assert_eq!(
-                v.product(),
-                (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
-            );
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/arithmetic_scalar_ops.rs b/coresimd/ppsv/api/arithmetic_scalar_ops.rs
deleted file mode 100644
index 6498801673..0000000000
--- a/coresimd/ppsv/api/arithmetic_scalar_ops.rs
+++ /dev/null
@@ -1,202 +0,0 @@
-//! Lane-wise arithmetic operations.
-#![allow(unused)]
-
-macro_rules! impl_arithmetic_scalar_ops {
-    ($id:ident, $elem_ty:ident) => {
-        impl ::ops::Add<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn add(self, other: $elem_ty) -> Self {
-                self + $id::splat(other)
-            }
-        }
-        impl ::ops::Add<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn add(self, other: $id) -> $id {
-                $id::splat(self) + other
-            }
-        }
-
-        impl ::ops::Sub<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn sub(self, other: $elem_ty) -> Self {
-                self - $id::splat(other)
-            }
-        }
-        impl ::ops::Sub<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn sub(self, other: $id) -> $id {
-                $id::splat(self) - other
-            }
-        }
-
-        impl ::ops::Mul<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn mul(self, other: $elem_ty) -> Self {
-                self * $id::splat(other)
-            }
-        }
-        impl ::ops::Mul<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn mul(self, other: $id) -> $id {
-                $id::splat(self) * other
-            }
-        }
-
-        impl ::ops::Div<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn div(self, other: $elem_ty) -> Self {
-                self / $id::splat(other)
-            }
-        }
-        impl ::ops::Div<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn div(self, other: $id) -> $id {
-                $id::splat(self) / other
-            }
-        }
-
-        impl ::ops::Rem<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn rem(self, other: $elem_ty) -> Self {
-                self % $id::splat(other)
-            }
-        }
-        impl ::ops::Rem<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn rem(self, other: $id) -> $id {
-                $id::splat(self) % other
-            }
-        }
-
-        impl ::ops::AddAssign<$elem_ty> for $id {
-            #[inline]
-            fn add_assign(&mut self, other: $elem_ty) {
-                *self = *self + other;
-            }
-        }
-
-        impl ::ops::SubAssign<$elem_ty> for $id {
-            #[inline]
-            fn sub_assign(&mut self, other: $elem_ty) {
-                *self = *self - other;
-            }
-        }
-
-        impl ::ops::MulAssign<$elem_ty> for $id {
-            #[inline]
-            fn mul_assign(&mut self, other: $elem_ty) {
-                *self = *self * other;
-            }
-        }
-
-        impl ::ops::DivAssign<$elem_ty> for $id {
-            #[inline]
-            fn div_assign(&mut self, other: $elem_ty) {
-                *self = *self / other;
-            }
-        }
-
-        impl ::ops::RemAssign<$elem_ty> for $id {
-            #[inline]
-            fn rem_assign(&mut self, other: $elem_ty) {
-                *self = *self % other;
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_arithmetic_scalar_ops {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn arithmetic_scalar() {
-            use coresimd::simd::$id;
-            let zi = 0 as $elem_ty;
-            let oi = 1 as $elem_ty;
-            let ti = 2 as $elem_ty;
-            let fi = 4 as $elem_ty;
-            let z = $id::splat(zi);
-            let o = $id::splat(oi);
-            let t = $id::splat(ti);
-            let f = $id::splat(fi);
-
-            // add
-            assert_eq!(zi + z, z);
-            assert_eq!(z + zi, z);
-            assert_eq!(oi + z, o);
-            assert_eq!(o + zi, o);
-            assert_eq!(ti + z, t);
-            assert_eq!(t + zi, t);
-            assert_eq!(ti + t, f);
-            assert_eq!(t + ti, f);
-            // sub
-            assert_eq!(zi - z, z);
-            assert_eq!(z - zi, z);
-            assert_eq!(oi - z, o);
-            assert_eq!(o - zi, o);
-            assert_eq!(ti - z, t);
-            assert_eq!(t - zi, t);
-            assert_eq!(fi - t, t);
-            assert_eq!(f - ti, t);
-            assert_eq!(f - o - o, t);
-            assert_eq!(f - oi - oi, t);
-            // mul
-            assert_eq!(zi * z, z);
-            assert_eq!(z * zi, z);
-            assert_eq!(zi * o, z);
-            assert_eq!(z * oi, z);
-            assert_eq!(zi * t, z);
-            assert_eq!(z * ti, z);
-            assert_eq!(oi * t, t);
-            assert_eq!(o * ti, t);
-            assert_eq!(ti * t, f);
-            assert_eq!(t * ti, f);
-            // div
-            assert_eq!(zi / o, z);
-            assert_eq!(z / oi, z);
-            assert_eq!(ti / o, t);
-            assert_eq!(t / oi, t);
-            assert_eq!(fi / o, f);
-            assert_eq!(f / oi, f);
-            assert_eq!(ti / t, o);
-            assert_eq!(t / ti, o);
-            assert_eq!(fi / t, t);
-            assert_eq!(f / ti, t);
-            // rem
-            assert_eq!(oi % o, z);
-            assert_eq!(o % oi, z);
-            assert_eq!(fi % t, z);
-            assert_eq!(f % ti, z);
-
-            {
-                let mut v = z;
-                assert_eq!(v, z);
-                v += oi; // add_assign
-                assert_eq!(v, o);
-                v -= oi; // sub_assign
-                assert_eq!(v, z);
-                v = t;
-                v *= oi; // mul_assign
-                assert_eq!(v, t);
-                v *= ti;
-                assert_eq!(v, f);
-                v /= oi; // div_assign
-                assert_eq!(v, f);
-                v /= ti;
-                assert_eq!(v, t);
-                v %= ti; // rem_assign
-                assert_eq!(v, z);
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/bitwise_ops.rs b/coresimd/ppsv/api/bitwise_ops.rs
deleted file mode 100644
index 67b4a1909f..0000000000
--- a/coresimd/ppsv/api/bitwise_ops.rs
+++ /dev/null
@@ -1,179 +0,0 @@
-//! Lane-wise bitwise operations for integer and boolean vectors.
-#![allow(unused)]
-
-macro_rules! impl_bitwise_ops {
-    ($id:ident, $true_val:expr) => {
-        impl ::ops::Not for $id {
-            type Output = Self;
-            #[inline]
-            fn not(self) -> Self {
-                Self::splat($true_val) ^ self
-            }
-        }
-        impl ::ops::BitXor for $id {
-            type Output = Self;
-            #[inline]
-            fn bitxor(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_xor;
-                unsafe { simd_xor(self, other) }
-            }
-        }
-        impl ::ops::BitAnd for $id {
-            type Output = Self;
-            #[inline]
-            fn bitand(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_and;
-                unsafe { simd_and(self, other) }
-            }
-        }
-        impl ::ops::BitOr for $id {
-            type Output = Self;
-            #[inline]
-            fn bitor(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_or;
-                unsafe { simd_or(self, other) }
-            }
-        }
-        impl ::ops::BitAndAssign for $id {
-            #[inline]
-            fn bitand_assign(&mut self, other: Self) {
-                *self = *self & other;
-            }
-        }
-        impl ::ops::BitOrAssign for $id {
-            #[inline]
-            fn bitor_assign(&mut self, other: Self) {
-                *self = *self | other;
-            }
-        }
-        impl ::ops::BitXorAssign for $id {
-            #[inline]
-            fn bitxor_assign(&mut self, other: Self) {
-                *self = *self ^ other;
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_int_bitwise_ops {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn bitwise_ops() {
-            use coresimd::simd::$id;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            let t = $id::splat(2 as $elem_ty);
-            let m = $id::splat(!z.extract(0));
-
-            // Not:
-            assert_eq!(!z, m);
-            assert_eq!(!m, z);
-
-            // BitAnd:
-            assert_eq!(o & o, o);
-            assert_eq!(o & z, z);
-            assert_eq!(z & o, z);
-            assert_eq!(z & z, z);
-
-            assert_eq!(t & t, t);
-            assert_eq!(t & o, z);
-            assert_eq!(o & t, z);
-
-            // BitOr:
-            assert_eq!(o | o, o);
-            assert_eq!(o | z, o);
-            assert_eq!(z | o, o);
-            assert_eq!(z | z, z);
-
-            assert_eq!(t | t, t);
-            assert_eq!(z | t, t);
-            assert_eq!(t | z, t);
-
-            // BitXOR:
-            assert_eq!(o ^ o, z);
-            assert_eq!(z ^ z, z);
-            assert_eq!(z ^ o, o);
-            assert_eq!(o ^ z, o);
-
-            assert_eq!(t ^ t, z);
-            assert_eq!(t ^ z, t);
-            assert_eq!(z ^ t, t);
-
-            {
-                // AndAssign:
-                let mut v = o;
-                v &= t;
-                assert_eq!(v, z);
-            }
-            {
-                // OrAssign:
-                let mut v = z;
-                v |= o;
-                assert_eq!(v, o);
-            }
-            {
-                // XORAssign:
-                let mut v = z;
-                v ^= o;
-                assert_eq!(v, o);
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_mask_bitwise_ops {
-    ($id:ident) => {
-        #[test]
-        fn mask_bitwise_ops() {
-            use coresimd::simd::*;
-
-            let t = $id::splat(true);
-            let f = $id::splat(false);
-            assert!(t != f);
-            assert!(!(t == f));
-
-            // Not:
-            assert_eq!(!t, f);
-            assert_eq!(t, !f);
-
-            // BitAnd:
-            assert_eq!(t & f, f);
-            assert_eq!(f & t, f);
-            assert_eq!(t & t, t);
-            assert_eq!(f & f, f);
-
-            // BitOr:
-            assert_eq!(t | f, t);
-            assert_eq!(f | t, t);
-            assert_eq!(t | t, t);
-            assert_eq!(f | f, f);
-
-            // BitXOR:
-            assert_eq!(t ^ f, t);
-            assert_eq!(f ^ t, t);
-            assert_eq!(t ^ t, f);
-            assert_eq!(f ^ f, f);
-
-            {
-                // AndAssign:
-                let mut v = f;
-                v &= t;
-                assert_eq!(v, f);
-            }
-            {
-                // OrAssign:
-                let mut v = f;
-                v |= t;
-                assert_eq!(v, t);
-            }
-            {
-                // XORAssign:
-                let mut v = f;
-                v ^= t;
-                assert_eq!(v, t);
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/bitwise_reductions.rs b/coresimd/ppsv/api/bitwise_reductions.rs
deleted file mode 100644
index 840746ab7a..0000000000
--- a/coresimd/ppsv/api/bitwise_reductions.rs
+++ /dev/null
@@ -1,194 +0,0 @@
-//! Implements portable horizontal bitwise vector reductions.
-#![allow(unused)]
-
-macro_rules! impl_bitwise_reductions {
-    ($id:ident, $elem_ty:ident) => {
-        impl $id {
-            /// Lane-wise bitwise `and` of the vector elements.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn and(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_and;
-                unsafe { simd_reduce_and(self) }
-            }
-            /// Lane-wise bitwise `and` of the vector elements.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn and(self) -> $elem_ty {
-                // FIXME: broken on aarch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x &= self.extract(i) as $elem_ty;
-                }
-                x
-            }
-
-            /// Lane-wise bitwise `or` of the vector elements.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn or(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_or;
-                unsafe { simd_reduce_or(self) }
-            }
-            /// Lane-wise bitwise `or` of the vector elements.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn or(self) -> $elem_ty {
-                // FIXME: broken on aarch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x |= self.extract(i) as $elem_ty;
-                }
-                x
-            }
-
-            /// Lane-wise bitwise `xor` of the vector elements.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn xor(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_xor;
-                unsafe { simd_reduce_xor(self) }
-            }
-            /// Lane-wise bitwise `xor` of the vector elements.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn xor(self) -> $elem_ty {
-                // FIXME: broken on aarch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x ^= self.extract(i) as $elem_ty;
-                }
-                x
-            }
-        }
-    };
-}
-
-macro_rules! impl_mask_bitwise_reductions {
-    ($id:ident, $elem_ty:ident, $internal_ty:ident) => {
-        impl $id {
-            /// Lane-wise bitwise `and` of the vector elements.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn and(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_and;
-                unsafe {
-                    let r: $internal_ty = simd_reduce_and(self);
-                    r != 0
-                }
-            }
-            /// Lane-wise bitwise `and` of the vector elements.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn and(self) -> $elem_ty {
-                // FIXME: broken on aarch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x &= self.extract(i) as $elem_ty;
-                }
-                x
-            }
-
-            /// Lane-wise bitwise `or` of the vector elements.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn or(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_or;
-                unsafe {
-                    let r: $internal_ty = simd_reduce_or(self);
-                    r != 0
-                }
-            }
-            /// Lane-wise bitwise `or` of the vector elements.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn or(self) -> $elem_ty {
-                // FIXME: broken on aarch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x |= self.extract(i) as $elem_ty;
-                }
-                x
-            }
-
-            /// Lane-wise bitwise `xor` of the vector elements.
-            #[cfg(not(target_arch = "aarch64"))]
-            #[inline]
-            pub fn xor(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_xor;
-                unsafe {
-                    let r: $internal_ty = simd_reduce_xor(self);
-                    r != 0
-                }
-            }
-            /// Lane-wise bitwise `xor` of the vector elements.
-            #[cfg(target_arch = "aarch64")]
-            #[inline]
-            pub fn xor(self) -> $elem_ty {
-                // FIXME: broken on aarch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                let mut x = self.extract(0) as $elem_ty;
-                for i in 1..$id::lanes() {
-                    x ^= self.extract(i) as $elem_ty;
-                }
-                x
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_bitwise_reductions {
-    ($id:ident, $true:expr) => {
-        #[test]
-        fn and() {
-            let false_ = !$true;
-            use coresimd::simd::$id;
-            let v = $id::splat(false_);
-            assert_eq!(v.and(), false_);
-            let v = $id::splat($true);
-            assert_eq!(v.and(), $true);
-            let v = $id::splat(false_);
-            let v = v.replace(0, $true);
-            assert_eq!(v.and(), false_);
-            let v = $id::splat($true);
-            let v = v.replace(0, false_);
-            assert_eq!(v.and(), false_);
-        }
-        #[test]
-        fn or() {
-            let false_ = !$true;
-            use coresimd::simd::$id;
-            let v = $id::splat(false_);
-            assert_eq!(v.or(), false_);
-            let v = $id::splat($true);
-            assert_eq!(v.or(), $true);
-            let v = $id::splat(false_);
-            let v = v.replace(0, $true);
-            assert_eq!(v.or(), $true);
-            let v = $id::splat($true);
-            let v = v.replace(0, false_);
-            assert_eq!(v.or(), $true);
-        }
-        #[test]
-        fn xor() {
-            let false_ = !$true;
-            use coresimd::simd::$id;
-            let v = $id::splat(false_);
-            assert_eq!(v.xor(), false_);
-            let v = $id::splat($true);
-            assert_eq!(v.xor(), false_);
-            let v = $id::splat(false_);
-            let v = v.replace(0, $true);
-            assert_eq!(v.xor(), $true);
-            let v = $id::splat($true);
-            let v = v.replace(0, false_);
-            assert_eq!(v.xor(), $true);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/bitwise_scalar_ops.rs b/coresimd/ppsv/api/bitwise_scalar_ops.rs
deleted file mode 100644
index 55efa752da..0000000000
--- a/coresimd/ppsv/api/bitwise_scalar_ops.rs
+++ /dev/null
@@ -1,222 +0,0 @@
-//! Lane-wise bitwise operations for integer vectors and vector masks.
-#![allow(unused)]
-
-macro_rules! impl_bitwise_scalar_ops {
-    ($id:ident, $elem_ty:ident) => {
-        impl ::ops::BitXor<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn bitxor(self, other: $elem_ty) -> Self {
-                self ^ $id::splat(other)
-            }
-        }
-        impl ::ops::BitXor<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn bitxor(self, other: $id) -> $id {
-                $id::splat(self) ^ other
-            }
-        }
-
-        impl ::ops::BitAnd<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn bitand(self, other: $elem_ty) -> Self {
-                self & $id::splat(other)
-            }
-        }
-        impl ::ops::BitAnd<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn bitand(self, other: $id) -> $id {
-                $id::splat(self) & other
-            }
-        }
-
-        impl ::ops::BitOr<$elem_ty> for $id {
-            type Output = Self;
-            #[inline]
-            fn bitor(self, other: $elem_ty) -> Self {
-                self | $id::splat(other)
-            }
-        }
-        impl ::ops::BitOr<$id> for $elem_ty {
-            type Output = $id;
-            #[inline]
-            fn bitor(self, other: $id) -> $id {
-                $id::splat(self) | other
-            }
-        }
-
-        impl ::ops::BitAndAssign<$elem_ty> for $id {
-            #[inline]
-            fn bitand_assign(&mut self, other: $elem_ty) {
-                *self = *self & other;
-            }
-        }
-        impl ::ops::BitOrAssign<$elem_ty> for $id {
-            #[inline]
-            fn bitor_assign(&mut self, other: $elem_ty) {
-                *self = *self | other;
-            }
-        }
-        impl ::ops::BitXorAssign<$elem_ty> for $id {
-            #[inline]
-            fn bitxor_assign(&mut self, other: $elem_ty) {
-                *self = *self ^ other;
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_int_bitwise_scalar_ops {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn bitwise_scalar_ops() {
-            use coresimd::simd::$id;
-            let zi = 0 as $elem_ty;
-            let oi = 1 as $elem_ty;
-            let ti = 2 as $elem_ty;
-            let z = $id::splat(zi);
-            let o = $id::splat(oi);
-            let t = $id::splat(ti);
-
-            // BitAnd:
-            assert_eq!(oi & o, o);
-            assert_eq!(o & oi, o);
-            assert_eq!(oi & z, z);
-            assert_eq!(o & zi, z);
-            assert_eq!(zi & o, z);
-            assert_eq!(z & oi, z);
-            assert_eq!(zi & z, z);
-            assert_eq!(z & zi, z);
-
-            assert_eq!(ti & t, t);
-            assert_eq!(t & ti, t);
-            assert_eq!(ti & o, z);
-            assert_eq!(t & oi, z);
-            assert_eq!(oi & t, z);
-            assert_eq!(o & ti, z);
-
-            // BitOr:
-            assert_eq!(oi | o, o);
-            assert_eq!(o | oi, o);
-            assert_eq!(oi | z, o);
-            assert_eq!(o | zi, o);
-            assert_eq!(zi | o, o);
-            assert_eq!(z | oi, o);
-            assert_eq!(zi | z, z);
-            assert_eq!(z | zi, z);
-
-            assert_eq!(ti | t, t);
-            assert_eq!(t | ti, t);
-            assert_eq!(zi | t, t);
-            assert_eq!(z | ti, t);
-            assert_eq!(ti | z, t);
-            assert_eq!(t | zi, t);
-
-            // BitXOR:
-            assert_eq!(oi ^ o, z);
-            assert_eq!(o ^ oi, z);
-            assert_eq!(zi ^ z, z);
-            assert_eq!(z ^ zi, z);
-            assert_eq!(zi ^ o, o);
-            assert_eq!(z ^ oi, o);
-            assert_eq!(oi ^ z, o);
-            assert_eq!(o ^ zi, o);
-
-            assert_eq!(ti ^ t, z);
-            assert_eq!(t ^ ti, z);
-            assert_eq!(ti ^ z, t);
-            assert_eq!(t ^ zi, t);
-            assert_eq!(zi ^ t, t);
-            assert_eq!(z ^ ti, t);
-
-            {
-                // AndAssign:
-                let mut v = o;
-                v &= ti;
-                assert_eq!(v, z);
-            }
-            {
-                // OrAssign:
-                let mut v = z;
-                v |= oi;
-                assert_eq!(v, o);
-            }
-            {
-                // XORAssign:
-                let mut v = z;
-                v ^= oi;
-                assert_eq!(v, o);
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_mask_bitwise_scalar_ops {
-    ($id:ident) => {
-        #[test]
-        fn bool_scalar_arithmetic() {
-            use coresimd::simd::*;
-
-            let ti = true;
-            let fi = false;
-            let t = $id::splat(ti);
-            let f = $id::splat(fi);
-            assert!(t != f);
-            assert!(!(t == f));
-
-            // BitAnd:
-            assert_eq!(ti & f, f);
-            assert_eq!(t & fi, f);
-            assert_eq!(fi & t, f);
-            assert_eq!(f & ti, f);
-            assert_eq!(ti & t, t);
-            assert_eq!(t & ti, t);
-            assert_eq!(fi & f, f);
-            assert_eq!(f & fi, f);
-
-            // BitOr:
-            assert_eq!(ti | f, t);
-            assert_eq!(t | fi, t);
-            assert_eq!(fi | t, t);
-            assert_eq!(f | ti, t);
-            assert_eq!(ti | t, t);
-            assert_eq!(t | ti, t);
-            assert_eq!(fi | f, f);
-            assert_eq!(f | fi, f);
-
-            // BitXOR:
-            assert_eq!(ti ^ f, t);
-            assert_eq!(t ^ fi, t);
-            assert_eq!(fi ^ t, t);
-            assert_eq!(f ^ ti, t);
-            assert_eq!(ti ^ t, f);
-            assert_eq!(t ^ ti, f);
-            assert_eq!(fi ^ f, f);
-            assert_eq!(f ^ fi, f);
-
-            {
-                // AndAssign:
-                let mut v = f;
-                v &= ti;
-                assert_eq!(v, f);
-            }
-            {
-                // OrAssign:
-                let mut v = f;
-                v |= ti;
-                assert_eq!(v, t);
-            }
-            {
-                // XORAssign:
-                let mut v = f;
-                v ^= ti;
-                assert_eq!(v, t);
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/cmp.rs b/coresimd/ppsv/api/cmp.rs
deleted file mode 100644
index f6b42d5fa0..0000000000
--- a/coresimd/ppsv/api/cmp.rs
+++ /dev/null
@@ -1,142 +0,0 @@
-//! Lane-wise vector comparisons returning vector masks.
-#![allow(unused)]
-
-macro_rules! impl_cmp {
-    ($id:ident, $bool_ty:ident) => {
-        impl $id {
-            /// Lane-wise equality comparison.
-            #[inline]
-            pub fn eq(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_eq;
-                unsafe { simd_eq(self, other) }
-            }
-
-            /// Lane-wise inequality comparison.
-            #[inline]
-            pub fn ne(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_ne;
-                unsafe { simd_ne(self, other) }
-            }
-
-            /// Lane-wise less-than comparison.
-            #[inline]
-            pub fn lt(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_lt;
-                unsafe { simd_lt(self, other) }
-            }
-
-            /// Lane-wise less-than-or-equals comparison.
-            #[inline]
-            pub fn le(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_le;
-                unsafe { simd_le(self, other) }
-            }
-
-            /// Lane-wise greater-than comparison.
-            #[inline]
-            pub fn gt(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_gt;
-                unsafe { simd_gt(self, other) }
-            }
-
-            /// Lane-wise greater-than-or-equals comparison.
-            #[inline]
-            pub fn ge(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_ge;
-                unsafe { simd_ge(self, other) }
-            }
-        }
-    };
-}
-
-macro_rules! impl_mask_cmp {
-    ($id:ident, $bool_ty:ident) => {
-        impl $id {
-            /// Lane-wise equality comparison.
-            #[inline]
-            pub fn eq(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_eq;
-                unsafe { simd_eq(self, other) }
-            }
-
-            /// Lane-wise inequality comparison.
-            #[inline]
-            pub fn ne(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_ne;
-                unsafe { simd_ne(self, other) }
-            }
-
-            /// Lane-wise less-than comparison.
-            #[inline]
-            pub fn lt(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_gt;
-                unsafe { simd_gt(self, other) }
-            }
-
-            /// Lane-wise less-than-or-equals comparison.
-            #[inline]
-            pub fn le(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_ge;
-                unsafe { simd_ge(self, other) }
-            }
-
-            /// Lane-wise greater-than comparison.
-            #[inline]
-            pub fn gt(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_lt;
-                unsafe { simd_lt(self, other) }
-            }
-
-            /// Lane-wise greater-than-or-equals comparison.
-            #[inline]
-            pub fn ge(self, other: $id) -> $bool_ty {
-                use coresimd::simd_llvm::simd_le;
-                unsafe { simd_le(self, other) }
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_cmp {
-    ($id:ident, $elem_ty:ident, $bool_ty:ident, $true:expr, $false:expr) => {
-        #[test]
-        fn cmp() {
-            use coresimd::simd::*;
-
-            let a = $id::splat($false);
-            let b = $id::splat($true);
-
-            let r = a.lt(b);
-            let e = $bool_ty::splat(true);
-            assert!(r == e);
-            let r = a.le(b);
-            assert!(r == e);
-
-            let e = $bool_ty::splat(false);
-            let r = a.gt(b);
-            assert!(r == e);
-            let r = a.ge(b);
-            assert!(r == e);
-            let r = a.eq(b);
-            assert!(r == e);
-
-            let mut a = a;
-            let mut b = b;
-            let mut e = e;
-            for i in 0..$id::lanes() {
-                if i % 2 == 0 {
-                    a = a.replace(i, $false);
-                    b = b.replace(i, $true);
-                    e = e.replace(i, true);
-                } else {
-                    a = a.replace(i, $true);
-                    b = b.replace(i, $false);
-                    e = e.replace(i, false);
-                }
-            }
-            let r = a.lt(b);
-            assert!(r == e);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/default.rs b/coresimd/ppsv/api/default.rs
deleted file mode 100644
index 3e655e26cb..0000000000
--- a/coresimd/ppsv/api/default.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-//! Implements `Default` for vector types.
-#![allow(unused)]
-
-macro_rules! impl_default {
-    ($id:ident, $elem_ty:ident) => {
-        impl ::default::Default for $id {
-            #[inline]
-            fn default() -> Self {
-                Self::splat($elem_ty::default())
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_default {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn default() {
-            use coresimd::simd::$id;
-            let a = $id::default();
-            for i in 0..$id::lanes() {
-                assert_eq!(a.extract(i), $elem_ty::default());
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/eq.rs b/coresimd/ppsv/api/eq.rs
deleted file mode 100644
index 3d14e34031..0000000000
--- a/coresimd/ppsv/api/eq.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-//! Implements `Eq` for vector types.
-#![allow(unused)]
-
-macro_rules! impl_eq {
-    ($id:ident) => {
-        impl ::cmp::Eq for $id {}
-    };
-}
diff --git a/coresimd/ppsv/api/float_math.rs b/coresimd/ppsv/api/float_math.rs
deleted file mode 100644
index 9092460a76..0000000000
--- a/coresimd/ppsv/api/float_math.rs
+++ /dev/null
@@ -1,182 +0,0 @@
-//! Float math
-
-macro_rules! impl_float_math {
-    ($id:ident) => {
-        impl $id {
-            /// Absolute-value
-            #[inline]
-            pub fn abs(self) -> Self {
-                use coresimd::ppsv::codegen::abs::FloatAbs;
-                FloatAbs::abs(self)
-            }
-
-            /// Square-root
-            #[inline]
-            pub fn sqrt(self) -> Self {
-                use coresimd::ppsv::codegen::sqrt::FloatSqrt;
-                FloatSqrt::sqrt(self)
-            }
-
-            /// Square-root estimate
-            #[inline]
-            pub fn sqrte(self) -> Self {
-                use coresimd::simd_llvm::simd_fsqrt;
-                unsafe { simd_fsqrt(self) }
-            }
-
-            /// Reciprocal square-root estimate
-            #[inline]
-            pub fn rsqrte(self) -> Self {
-                unsafe {
-                    use coresimd::simd_llvm::simd_fsqrt;
-                    $id::splat(1.) / simd_fsqrt(self)
-                }
-            }
-
-            /// Fused multiply add: `self * y + z`
-            #[inline]
-            pub fn fma(self, y: Self, z: Self) -> Self {
-                use coresimd::ppsv::codegen::fma::FloatFma;
-                FloatFma::fma(self, y, z)
-            }
-
-            /// Sin
-            #[inline(always)]
-            pub fn sin(self) -> Self {
-                use coresimd::ppsv::codegen::sin::FloatSin;
-                FloatSin::sin(self)
-            }
-
-            /// Cos
-            #[inline]
-            pub fn cos(self) -> Self {
-                use coresimd::ppsv::codegen::cos::FloatCos;
-                FloatCos::cos(self)
-            }
-        }
-    };
-}
-
-macro_rules! test_float_math {
-    ($id:ident, $elem_ty:ident) => {
-        fn sqrt2() -> $elem_ty {
-            match ::mem::size_of::<$elem_ty>() {
-                4 => 1.4142135 as $elem_ty,
-                8 => 1.4142135623730951 as $elem_ty,
-                _ => unreachable!(),
-            }
-        }
-
-        fn pi() -> $elem_ty {
-            match ::mem::size_of::<$elem_ty>() {
-                4 => ::std::f32::consts::PI as $elem_ty,
-                8 => ::std::f64::consts::PI as $elem_ty,
-                _ => unreachable!(),
-            }
-        }
-
-        #[test]
-        fn abs() {
-            use coresimd::simd::*;
-            let o = $id::splat(1 as $elem_ty);
-            assert_eq!(o, o.abs());
-
-            let mo = $id::splat(-1 as $elem_ty);
-            assert_eq!(o, mo.abs());
-        }
-
-        #[test]
-        fn sqrt() {
-            use coresimd::simd::*;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            assert_eq!(z, z.sqrt());
-            assert_eq!(o, o.sqrt());
-
-            let t = $id::splat(2 as $elem_ty);
-            let e = $id::splat(sqrt2() as $elem_ty);
-            assert_eq!(e, t.sqrt());
-        }
-
-        #[test]
-        fn sqrte() {
-            use coresimd::simd::*;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            assert_eq!(z, z.sqrte());
-            assert_eq!(o, o.sqrte());
-
-            let t = $id::splat(2 as $elem_ty);
-            let e = $id::splat(sqrt2() as $elem_ty);
-            let error = (e - t.sqrte()).abs();
-            let tol = $id::splat(2.4e-4 as $elem_ty);
-
-            assert!(error.le(tol).all());
-        }
-
-        #[test]
-        fn rsqrte() {
-            use coresimd::simd::*;
-            let o = $id::splat(1 as $elem_ty);
-            assert_eq!(o, o.rsqrte());
-
-            let t = $id::splat(2 as $elem_ty);
-            let e = 1. / sqrt2();
-            let error = (e - t.rsqrte()).abs();
-            let tol = $id::splat(2.4e-4 as $elem_ty);
-            assert!(error.le(tol).all());
-        }
-
-        #[test]
-        fn fma() {
-            use coresimd::simd::*;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            let t = $id::splat(2 as $elem_ty);
-            let t3 = $id::splat(3 as $elem_ty);
-            let f = $id::splat(4 as $elem_ty);
-
-            assert_eq!(z, z.fma(z, z));
-            assert_eq!(o, o.fma(o, z));
-            assert_eq!(o, o.fma(z, o));
-            assert_eq!(o, z.fma(o, o));
-
-            assert_eq!(t, o.fma(o, o));
-            assert_eq!(t, o.fma(t, z));
-            assert_eq!(t, t.fma(o, z));
-
-            assert_eq!(f, t.fma(t, z));
-            assert_eq!(f, t.fma(o, t));
-            assert_eq!(t3, t.fma(o, o));
-        }
-
-        #[test]
-        fn sin() {
-            use coresimd::simd::*;
-            let z = $id::splat(0 as $elem_ty);
-            let p = $id::splat(pi() as $elem_ty);
-            let ph = $id::splat(pi() as $elem_ty / 2.);
-            let o_r = $id::splat((pi() as $elem_ty / 2.).sin());
-            let z_r = $id::splat((pi() as $elem_ty).sin());
-
-            assert_eq!(z, z.sin());
-            assert_eq!(o_r, ph.sin());
-            assert_eq!(z_r, p.sin());
-        }
-
-        #[test]
-        fn cos() {
-            use coresimd::simd::*;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            let p = $id::splat(pi() as $elem_ty);
-            let ph = $id::splat(pi() as $elem_ty / 2.);
-            let z_r = $id::splat((pi() as $elem_ty / 2.).cos());
-            let o_r = $id::splat((pi() as $elem_ty).cos());
-
-            assert_eq!(o, z.cos());
-            assert_eq!(z_r, ph.cos());
-            assert_eq!(o_r, p.cos());
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/fmt.rs b/coresimd/ppsv/api/fmt.rs
deleted file mode 100644
index 3005042309..0000000000
--- a/coresimd/ppsv/api/fmt.rs
+++ /dev/null
@@ -1,152 +0,0 @@
-//! Implements formating traits.
-#![allow(unused)]
-
-macro_rules! impl_hex_fmt {
-    ($id:ident, $elem_ty:ident) => {
-        impl ::fmt::LowerHex for $id {
-            fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result {
-                use mem;
-                write!(f, "{}(", stringify!($id))?;
-                let n = mem::size_of_val(self) / mem::size_of::<$elem_ty>();
-                for i in 0..n {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    self.extract(i).fmt(f)?;
-                }
-                write!(f, ")")
-            }
-        }
-        impl ::fmt::UpperHex for $id {
-            fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result {
-                write!(f, "{}(", stringify!($id))?;
-                for i in 0..$id::lanes() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    self.extract(i).fmt(f)?;
-                }
-                write!(f, ")")
-            }
-        }
-        impl ::fmt::Octal for $id {
-            fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result {
-                write!(f, "{}(", stringify!($id))?;
-                for i in 0..$id::lanes() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    self.extract(i).fmt(f)?;
-                }
-                write!(f, ")")
-            }
-        }
-        impl ::fmt::Binary for $id {
-            fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result {
-                write!(f, "{}(", stringify!($id))?;
-                for i in 0..$id::lanes() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    self.extract(i).fmt(f)?;
-                }
-                write!(f, ")")
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_hex_fmt_impl {
-    ($id:ident, $elem_ty:ident, $($values:expr),+) => {
-        #[test]
-        fn hex_fmt() {
-            use ::std::prelude::v1::*;
-            use ::coresimd::simd::$id;
-            for &i in [$($values),+].iter() {
-                let vec = $id::splat(i as $elem_ty);
-
-                let s = format!("{:#x}", vec);
-                let beg = format!("{}(", stringify!($id));
-                assert!(s.starts_with(&beg));
-                assert!(s.ends_with(")"));
-                let s: Vec<String> = s.replace(&beg, "").replace(")", "").split(",")
-                    .map(|v| v.trim().to_string()).collect();
-                assert_eq!(s.len(), $id::lanes());
-                for (index, ss) in s.into_iter().enumerate() {
-                    assert_eq!(ss, format!("{:#x}", vec.extract(index)));
-                }
-            }
-        }
-        #[test]
-        fn upper_hex_fmt() {
-            use ::std::prelude::v1::*;
-            use ::coresimd::simd::$id;
-            for &i in [$($values),+].iter() {
-                let vec = $id::splat(i as $elem_ty);
-
-                let s = format!("{:#X}", vec);
-                let beg = format!("{}(", stringify!($id));
-                assert!(s.starts_with(&beg));
-                assert!(s.ends_with(")"));
-                let s: Vec<String> = s.replace(&beg, "").replace(")", "").split(",")
-                    .map(|v| v.trim().to_string()).collect();
-                assert_eq!(s.len(), $id::lanes());
-                for (index, ss) in s.into_iter().enumerate() {
-                    assert_eq!(ss, format!("{:#X}", vec.extract(index)));
-                }
-            }
-        }
-        #[test]
-        fn octal_fmt() {
-            use ::std::prelude::v1::*;
-            use ::coresimd::simd::$id;
-            for &i in [$($values),+].iter() {
-                let vec = $id::splat(i as $elem_ty);
-
-                let s = format!("{:#o}", vec);
-                let beg = format!("{}(", stringify!($id));
-                assert!(s.starts_with(&beg));
-                assert!(s.ends_with(")"));
-                let s: Vec<String> = s.replace(&beg, "").replace(")", "").split(",")
-                    .map(|v| v.trim().to_string()).collect();
-                assert_eq!(s.len(), $id::lanes());
-                for (index, ss) in s.into_iter().enumerate() {
-                    assert_eq!(ss, format!("{:#o}", vec.extract(index)));
-                }
-            }
-        }
-        #[test]
-        fn binary_fmt() {
-            use ::std::prelude::v1::*;
-            use ::coresimd::simd::$id;
-            for &i in [$($values),+].iter() {
-                let vec = $id::splat(i as $elem_ty);
-
-                let s = format!("{:#b}", vec);
-                let beg = format!("{}(", stringify!($id));
-                assert!(s.starts_with(&beg));
-                assert!(s.ends_with(")"));
-                let s: Vec<String> = s.replace(&beg, "").replace(")", "").split(",")
-                    .map(|v| v.trim().to_string()).collect();
-                assert_eq!(s.len(), $id::lanes());
-                for (index, ss) in s.into_iter().enumerate() {
-                    assert_eq!(ss, format!("{:#b}", vec.extract(index)));
-                }
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-macro_rules! test_hex_fmt {
-    ($id:ident, $elem_ty:ident) => {
-        test_hex_fmt_impl!(
-            $id,
-            $elem_ty,
-            0 as $elem_ty,
-            !(0 as $elem_ty),
-            (1 as $elem_ty)
-        );
-    };
-}
diff --git a/coresimd/ppsv/api/from.rs b/coresimd/ppsv/api/from.rs
deleted file mode 100644
index dff93e8547..0000000000
--- a/coresimd/ppsv/api/from.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-//! Implements the From trait for vector types, which performs a lane-wise
-//! cast vector types with the same number of lanes.
-#![allow(unused)]
-
-macro_rules! impl_from_impl {
-    ($from:ident, $to:ident) => {
-        impl ::convert::From<::simd::$from> for $to {
-            #[inline]
-            fn from(f: ::simd::$from) -> $to {
-                use coresimd::simd_llvm::simd_cast;
-                unsafe { simd_cast(f) }
-            }
-        }
-    };
-}
-
-macro_rules! impl_from_ {
-    ($to:ident, $from:ident) => {
-        vector_impl!([impl_from_impl, $to, $from]);
-    };
-}
-
-macro_rules! impl_from {
-    ($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => {
-        $(
-            impl_from_!($from, $to);
-        )+
-
-        $test_macro!(
-            #[cfg(test)]
-            mod $test_mod {
-                $(
-                    #[test]
-                    fn $from() {
-                        use std::convert::{From, Into};
-                        use ::coresimd::simd::{$from, $to};
-                        use ::std::default::Default;
-                        assert_eq!($to::lanes(), $from::lanes());
-                        let a: $from = $from::default();
-                        let b_0: $to = From::from(a);
-                        let b_1: $to = a.into();
-                        assert_eq!(b_0, b_1);
-                    }
-                )+
-            }
-        );
-    }
-}
diff --git a/coresimd/ppsv/api/from_bits.rs b/coresimd/ppsv/api/from_bits.rs
deleted file mode 100644
index 2658b61fe8..0000000000
--- a/coresimd/ppsv/api/from_bits.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-//! Implements the `FromBits` trait for vector types, which performs bitwise
-//! lossless transmutes between equally-sized vector types.
-#![allow(unused)]
-
-macro_rules! impl_from_bits__ {
-    ($to:ident: $($from:ident),+) => {
-        $(
-            impl ::simd::FromBits<$from> for $to {
-                #[inline]
-                fn from_bits(f: $from) -> $to {
-                    unsafe { ::mem::transmute(f) }
-                }
-            }
-        )+
-    }
-}
-
-macro_rules! impl_from_bits_ {
-    ($to:ident: $($from:ident),+) => {
-        vector_impl!([impl_from_bits__, $to: $($from),+]);
-    }
-}
-
-macro_rules! impl_from_bits {
-    ($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => {
-        impl_from_bits_!($to: $($from),+);
-
-        $test_macro!(
-            #[cfg(test)]
-            mod $test_mod {
-                $(
-                    #[test]
-                    fn $from() {
-                        use ::coresimd::simd::*;
-                        use ::std::mem;
-                        assert_eq!(mem::size_of::<$from>(),
-                                   mem::size_of::<$to>());
-                        let a: $from = $from::default();
-                        let b_0: $to = FromBits::from_bits(a);
-                        let b_1: $to = a.into_bits();
-                        assert_eq!(b_0, b_1);
-                    }
-                )+
-            }
-        );
-    }
-}
diff --git a/coresimd/ppsv/api/hash.rs b/coresimd/ppsv/api/hash.rs
deleted file mode 100644
index 0dd8e05388..0000000000
--- a/coresimd/ppsv/api/hash.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-//! Implements `Hash`.
-#![allow(unused)]
-
-macro_rules! impl_hash {
-    ($id:ident, $elem_ty:ident) => {
-        impl ::hash::Hash for $id {
-            #[inline]
-            fn hash<H: ::hash::Hasher>(&self, state: &mut H) {
-                union A {
-                    data: [$elem_ty; $id::lanes()],
-                    vec: $id,
-                }
-                unsafe { A { vec: *self }.data.hash(state) }
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_hash {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn hash() {
-            use coresimd::simd::$id;
-            use std::collections::hash_map::DefaultHasher;
-            use std::hash::{Hash, Hasher};
-            use std::mem;
-            type A = [$elem_ty; $id::lanes()];
-            let a: A = [42 as $elem_ty; $id::lanes()];
-            assert!(mem::size_of::<A>() == mem::size_of::<$id>());
-            let mut a_hash = DefaultHasher::new();
-            let mut v_hash = a_hash.clone();
-            a.hash(&mut a_hash);
-
-            let v = $id::splat(42 as $elem_ty);
-            v.hash(&mut v_hash);
-            assert_eq!(a_hash.finish(), v_hash.finish());
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/load_store.rs b/coresimd/ppsv/api/load_store.rs
deleted file mode 100644
index 59749da0e1..0000000000
--- a/coresimd/ppsv/api/load_store.rs
+++ /dev/null
@@ -1,312 +0,0 @@
-//! Implements the load/store API.
-#![allow(unused)]
-
-macro_rules! impl_load_store {
-    ($id:ident, $elem_ty:ident, $elem_count:expr) => {
-        impl $id {
-            /// Writes the values of the vector to the `slice`.
-            ///
-            /// # Panics
-            ///
-            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
-            /// aligned to an `align_of::<Self>()` boundary.
-            #[inline]
-            pub fn store_aligned(self, slice: &mut [$elem_ty]) {
-                unsafe {
-                    assert!(slice.len() >= $elem_count);
-                    let target_ptr =
-                        slice.get_unchecked_mut(0) as *mut $elem_ty;
-                    assert!(
-                        target_ptr.align_offset(::mem::align_of::<Self>())
-                            == 0
-                    );
-                    self.store_aligned_unchecked(slice);
-                }
-            }
-
-            /// Writes the values of the vector to the `slice`.
-            ///
-            /// # Panics
-            ///
-            /// If `slice.len() < Self::lanes()`.
-            #[inline]
-            pub fn store_unaligned(self, slice: &mut [$elem_ty]) {
-                unsafe {
-                    assert!(slice.len() >= $elem_count);
-                    self.store_unaligned_unchecked(slice);
-                }
-            }
-
-            /// Writes the values of the vector to the `slice`.
-            ///
-            /// # Precondition
-            ///
-            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
-            /// aligned to an `align_of::<Self>()` boundary, the behavior is
-            /// undefined.
-            #[inline]
-            pub unsafe fn store_aligned_unchecked(
-                self, slice: &mut [$elem_ty],
-            ) {
-                *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) =
-                    self;
-            }
-
-            /// Writes the values of the vector to the `slice`.
-            ///
-            /// # Precondition
-            ///
-            /// If `slice.len() < Self::lanes()` the behavior is undefined.
-            #[inline]
-            pub unsafe fn store_unaligned_unchecked(
-                self, slice: &mut [$elem_ty],
-            ) {
-                let target_ptr =
-                    slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
-                let self_ptr = &self as *const Self as *const u8;
-                ::ptr::copy_nonoverlapping(
-                    self_ptr,
-                    target_ptr,
-                    ::mem::size_of::<Self>(),
-                );
-            }
-
-            /// Instantiates a new vector with the values of the `slice`.
-            ///
-            /// # Panics
-            ///
-            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
-            /// to an `align_of::<Self>()` boundary.
-            #[inline]
-            pub fn load_aligned(slice: &[$elem_ty]) -> Self {
-                unsafe {
-                    assert!(slice.len() >= $elem_count);
-                    let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
-                    assert!(
-                        target_ptr.align_offset(::mem::align_of::<Self>())
-                            == 0
-                    );
-                    Self::load_aligned_unchecked(slice)
-                }
-            }
-
-            /// Instantiates a new vector with the values of the `slice`.
-            ///
-            /// # Panics
-            ///
-            /// If `slice.len() < Self::lanes()`.
-            #[inline]
-            pub fn load_unaligned(slice: &[$elem_ty]) -> Self {
-                unsafe {
-                    assert!(slice.len() >= $elem_count);
-                    Self::load_unaligned_unchecked(slice)
-                }
-            }
-
-            /// Instantiates a new vector with the values of the `slice`.
-            ///
-            /// # Precondition
-            ///
-            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
-            /// to an `align_of::<Self>()` boundary, the behavior is undefined.
-            #[inline]
-            pub unsafe fn load_aligned_unchecked(slice: &[$elem_ty]) -> Self {
-                *(slice.get_unchecked(0) as *const $elem_ty as *const Self)
-            }
-
-            /// Instantiates a new vector with the values of the `slice`.
-            ///
-            /// # Precondition
-            ///
-            /// If `slice.len() < Self::lanes()` the behavior is undefined.
-            #[inline]
-            pub unsafe fn load_unaligned_unchecked(
-                slice: &[$elem_ty],
-            ) -> Self {
-                use mem::size_of;
-                let target_ptr =
-                    slice.get_unchecked(0) as *const $elem_ty as *const u8;
-                let mut x = Self::splat(0 as $elem_ty);
-                let self_ptr = &mut x as *mut Self as *mut u8;
-                ::ptr::copy_nonoverlapping(
-                    target_ptr,
-                    self_ptr,
-                    size_of::<Self>(),
-                );
-                x
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_load_store {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn store_unaligned() {
-            use coresimd::simd::$id;
-            use std::iter::Iterator;
-            let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
-            let vec = $id::splat(42 as $elem_ty);
-            vec.store_unaligned(&mut unaligned[1..]);
-            for (index, &b) in unaligned.iter().enumerate() {
-                if index == 0 {
-                    assert_eq!(b, 0 as $elem_ty);
-                } else {
-                    assert_eq!(b, vec.extract(index - 1));
-                }
-            }
-        }
-
-        #[test]
-        #[should_panic]
-        fn store_unaligned_fail() {
-            use coresimd::simd::$id;
-            let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
-            let vec = $id::splat(42 as $elem_ty);
-            vec.store_unaligned(&mut unaligned[2..]);
-        }
-
-        #[test]
-        fn load_unaligned() {
-            use coresimd::simd::$id;
-            use std::iter::Iterator;
-            let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
-            unaligned[0] = 0 as $elem_ty;
-            let vec = $id::load_unaligned(&unaligned[1..]);
-            for (index, &b) in unaligned.iter().enumerate() {
-                if index == 0 {
-                    assert_eq!(b, 0 as $elem_ty);
-                } else {
-                    assert_eq!(b, vec.extract(index - 1));
-                }
-            }
-        }
-
-        #[test]
-        #[should_panic]
-        fn load_unaligned_fail() {
-            use coresimd::simd::$id;
-            let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
-            unaligned[0] = 0 as $elem_ty;
-            let _vec = $id::load_unaligned(&unaligned[2..]);
-        }
-
-        union A {
-            data: [$elem_ty; 2 * ::coresimd::simd::$id::lanes()],
-            _vec: ::coresimd::simd::$id,
-        }
-
-        #[test]
-        fn store_aligned() {
-            use coresimd::simd::$id;
-            use std::iter::Iterator;
-            let mut aligned = A {
-                data: [0 as $elem_ty; 2 * $id::lanes()],
-            };
-            let vec = $id::splat(42 as $elem_ty);
-            unsafe { vec.store_aligned(&mut aligned.data[$id::lanes()..]) };
-            for (index, &b) in unsafe { aligned.data.iter().enumerate() } {
-                if index < $id::lanes() {
-                    assert_eq!(b, 0 as $elem_ty);
-                } else {
-                    assert_eq!(b, vec.extract(index - $id::lanes()));
-                }
-            }
-        }
-
-        #[test]
-        #[should_panic]
-        fn store_aligned_fail_lanes() {
-            use coresimd::simd::$id;
-            let mut aligned = A {
-                data: [0 as $elem_ty; 2 * $id::lanes()],
-            };
-            let vec = $id::splat(42 as $elem_ty);
-            unsafe {
-                vec.store_aligned(&mut aligned.data[2 * $id::lanes()..])
-            };
-        }
-
-        #[test]
-        #[should_panic]
-        fn store_aligned_fail_align() {
-            unsafe {
-                use coresimd::simd::$id;
-                use std::{mem, slice};
-                let mut aligned = A {
-                    data: [0 as $elem_ty; 2 * $id::lanes()],
-                };
-                // offset the aligned data by one byte:
-                let s: &mut [u8; 2
-                                * $id::lanes()
-                                * mem::size_of::<$elem_ty>()] =
-                    mem::transmute(&mut aligned.data);
-                let s: &mut [$elem_ty] = slice::from_raw_parts_mut(
-                    s.get_unchecked_mut(1) as *mut u8 as *mut $elem_ty,
-                    $id::lanes(),
-                );
-                let vec = $id::splat(42 as $elem_ty);
-                vec.store_aligned(s);
-            }
-        }
-
-        #[test]
-        fn load_aligned() {
-            use coresimd::simd::$id;
-            use std::iter::Iterator;
-            let mut aligned = A {
-                data: [0 as $elem_ty; 2 * $id::lanes()],
-            };
-            for i in $id::lanes()..(2 * $id::lanes()) {
-                unsafe {
-                    aligned.data[i] = 42 as $elem_ty;
-                }
-            }
-
-            let vec =
-                unsafe { $id::load_aligned(&aligned.data[$id::lanes()..]) };
-            for (index, &b) in unsafe { aligned.data.iter().enumerate() } {
-                if index < $id::lanes() {
-                    assert_eq!(b, 0 as $elem_ty);
-                } else {
-                    assert_eq!(b, vec.extract(index - $id::lanes()));
-                }
-            }
-        }
-
-        #[test]
-        #[should_panic]
-        fn load_aligned_fail_lanes() {
-            use coresimd::simd::$id;
-            let aligned = A {
-                data: [0 as $elem_ty; 2 * $id::lanes()],
-            };
-            let _vec = unsafe {
-                $id::load_aligned(&aligned.data[2 * $id::lanes()..])
-            };
-        }
-
-        #[test]
-        #[should_panic]
-        fn load_aligned_fail_align() {
-            unsafe {
-                use coresimd::simd::$id;
-                use std::{mem, slice};
-                let aligned = A {
-                    data: [0 as $elem_ty; 2 * $id::lanes()],
-                };
-                // offset the aligned data by one byte:
-                let s: &[u8; 2
-                            * $id::lanes()
-                            * mem::size_of::<$elem_ty>()] =
-                    mem::transmute(&aligned.data);
-                let s: &[$elem_ty] = slice::from_raw_parts(
-                    s.get_unchecked(1) as *const u8 as *const $elem_ty,
-                    $id::lanes(),
-                );
-                let _vec = $id::load_aligned(s);
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/masks.rs b/coresimd/ppsv/api/masks.rs
deleted file mode 100644
index a287e0feeb..0000000000
--- a/coresimd/ppsv/api/masks.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-//! Minimal boolean vector implementation
-#![allow(unused)]
-
-/// Minimal interface: all packed SIMD mask types implement this.
-macro_rules! impl_mask_minimal {
-    ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => {
-
-        impl super::api::Lanes<[u32; $elem_count]> for $id {}
-
-        impl $id {
-            /// Creates a new instance with each vector elements initialized
-            /// with the provided values.
-            #[inline]
-            pub const fn new($($elem_name: bool),*) -> Self {
-                $id($(Self::bool_to_internal($elem_name)),*)
-            }
-
-            /// Converts a boolean type into the type of the vector lanes.
-            #[inline]
-            const fn bool_to_internal(x: bool) -> $elem_ty {
-                [0 as $elem_ty, !(0 as $elem_ty)][x as usize]
-            }
-
-            /// Returns the number of vector lanes.
-            #[inline]
-            pub const fn lanes() -> usize {
-                $elem_count
-            }
-
-            /// Constructs a new instance with each element initialized to
-            /// `value`.
-            #[inline]
-            pub const fn splat(value: bool) -> Self {
-                $id($({
-                    #[allow(non_camel_case_types, dead_code)]
-                    struct $elem_name;
-                    Self::bool_to_internal(value)
-                }),*)
-            }
-
-            /// Extracts the value at `index`.
-            ///
-            /// # Panics
-            ///
-            /// If `index >= Self::lanes()`.
-            #[inline]
-            pub fn extract(self, index: usize) -> bool {
-                assert!(index < $elem_count);
-                unsafe { self.extract_unchecked(index) }
-            }
-
-            /// Extracts the value at `index`.
-            ///
-            /// If `index >= Self::lanes()` the behavior is undefined.
-            #[inline]
-            pub unsafe fn extract_unchecked(self, index: usize) -> bool {
-                use coresimd::simd_llvm::simd_extract;
-                let x: $elem_ty = simd_extract(self, index as u32);
-                x != 0
-            }
-
-            /// Returns a new vector where the value at `index` is replaced by `new_value`.
-            ///
-            /// # Panics
-            ///
-            /// If `index >= Self::lanes()`.
-            #[inline]
-            #[must_use = "replace does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"]
-            pub fn replace(self, index: usize, new_value: bool) -> Self {
-                assert!(index < $elem_count);
-                unsafe { self.replace_unchecked(index, new_value) }
-            }
-
-            /// Returns a new vector where the value at `index` is replaced by `new_value`.
-            ///
-            /// # Panics
-            ///
-            /// If `index >= Self::lanes()`.
-            #[inline]
-            #[must_use = "replace_unchecked does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"]
-            pub unsafe fn replace_unchecked(
-                self,
-                index: usize,
-                new_value: bool,
-            ) -> Self {
-                use coresimd::simd_llvm::simd_insert;
-                simd_insert(self, index as u32, Self::bool_to_internal(new_value))
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-macro_rules! test_mask_minimal {
-    ($id:ident, $elem_count:expr) => {
-        #[test]
-        fn minimal() {
-            use coresimd::simd::$id;
-            // TODO: test new
-
-            // lanes:
-            assert_eq!($elem_count, $id::lanes());
-
-            // splat and extract / extract_unchecked:
-            let vec = $id::splat(true);
-            for i in 0..$id::lanes() {
-                assert_eq!(true, vec.extract(i));
-                assert_eq!(true, unsafe { vec.extract_unchecked(i) });
-            }
-
-            // replace / replace_unchecked
-            let new_vec = vec.replace(1, false);
-            for i in 0..$id::lanes() {
-                if i == 1 {
-                    assert_eq!(false, new_vec.extract(i));
-                } else {
-                    assert_eq!(true, new_vec.extract(i));
-                }
-            }
-            let new_vec = unsafe { vec.replace_unchecked(1, false) };
-            for i in 0..$id::lanes() {
-                if i == 1 {
-                    assert_eq!(false, new_vec.extract(i));
-                } else {
-                    assert_eq!(true, new_vec.extract(i));
-                }
-            }
-        }
-        #[test]
-        #[should_panic]
-        fn minimal_extract_panic_on_out_of_bounds() {
-            use coresimd::simd::$id;
-            let vec = $id::splat(false);
-            let _ = vec.extract($id::lanes());
-        }
-        #[test]
-        #[should_panic]
-        fn minimal_replace_panic_on_out_of_bounds() {
-            use coresimd::simd::$id;
-            let vec = $id::splat(false);
-            let _ = vec.replace($id::lanes(), true);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/masks_reductions.rs b/coresimd/ppsv/api/masks_reductions.rs
deleted file mode 100644
index 85ba11c4a7..0000000000
--- a/coresimd/ppsv/api/masks_reductions.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-//! Horizontal mask reductions.
-#![allow(unused)]
-
-macro_rules! impl_mask_reductions {
-    ($id:ident) => {
-        impl $id {
-            /// Are `all` vector lanes `true`?
-            #[inline]
-            pub fn all(self) -> bool {
-                unsafe { super::codegen::masks_reductions::All::all(self) }
-            }
-            /// Is `any` vector lane `true`?
-            #[inline]
-            pub fn any(self) -> bool {
-                unsafe { super::codegen::masks_reductions::Any::any(self) }
-            }
-            /// Are `all` vector lanes `false`?
-            #[inline]
-            pub fn none(self) -> bool {
-                !self.any()
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_mask_reductions {
-    ($id:ident) => {
-        #[test]
-        fn all() {
-            use coresimd::simd::$id;
-
-            let a = $id::splat(true);
-            assert!(a.all());
-            let a = $id::splat(false);
-            assert!(!a.all());
-
-            for i in 0..$id::lanes() {
-                let mut a = $id::splat(true);
-                a = a.replace(i, false);
-                assert!(!a.all());
-                let mut a = $id::splat(false);
-                a = a.replace(i, true);
-                assert!(!a.all());
-            }
-        }
-        #[test]
-        fn any() {
-            use coresimd::simd::$id;
-
-            let a = $id::splat(true);
-            assert!(a.any());
-            let a = $id::splat(false);
-            assert!(!a.any());
-
-            for i in 0..$id::lanes() {
-                let mut a = $id::splat(true);
-                a = a.replace(i, false);
-                assert!(a.any());
-                let mut a = $id::splat(false);
-                a = a.replace(i, true);
-                assert!(a.any());
-            }
-        }
-        #[test]
-        fn none() {
-            use coresimd::simd::$id;
-
-            let a = $id::splat(true);
-            assert!(!a.none());
-            let a = $id::splat(false);
-            assert!(a.none());
-
-            for i in 0..$id::lanes() {
-                let mut a = $id::splat(true);
-                a = a.replace(i, false);
-                assert!(!a.none());
-                let mut a = $id::splat(false);
-                a = a.replace(i, true);
-                assert!(!a.none());
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/masks_select.rs b/coresimd/ppsv/api/masks_select.rs
deleted file mode 100644
index 517fd997c5..0000000000
--- a/coresimd/ppsv/api/masks_select.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-//! Mask select method
-#![allow(unused)]
-
-/// Implements mask select method
-macro_rules! impl_mask_select {
-    ($id:ident, $elem_ty:ident, $elem_count:expr) => {
-        impl $id {
-            /// Selects elements of `a` and `b` using mask.
-            ///
-            /// For each lane, the result contains the element of `a` if the
-            /// mask is true, and the element of `b` otherwise.
-            #[inline]
-            pub fn select<T>(self, a: T, b: T) -> T
-            where
-                T: super::api::Lanes<[u32; $elem_count]>,
-            {
-                use coresimd::simd_llvm::simd_select;
-                unsafe { simd_select(self, a, b) }
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_mask_select {
-    ($mask_id:ident, $vec_id:ident, $elem_ty:ident) => {
-        #[test]
-        fn select() {
-            use coresimd::simd::{$mask_id, $vec_id};
-            let o = 1 as $elem_ty;
-            let t = 2 as $elem_ty;
-
-            let a = $vec_id::splat(o);
-            let b = $vec_id::splat(t);
-            let m = a.lt(b);
-            assert_eq!(m.select(a, b), a);
-
-            let m = b.lt(a);
-            assert_eq!(m.select(b, a), a);
-
-            let mut c = a;
-            let mut d = b;
-            let mut m_e = $mask_id::splat(false);
-            for i in 0..$vec_id::lanes() {
-                if i % 2 == 0 {
-                    let c_tmp = c.extract(i);
-                    c = c.replace(i, d.extract(i));
-                    d = d.replace(i, c_tmp);
-                } else {
-                    m_e = m_e.replace(i, true);
-                }
-            }
-
-            let m = c.lt(d);
-            assert_eq!(m_e, m);
-            assert_eq!(m.select(c, d), a);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/minimal.rs b/coresimd/ppsv/api/minimal.rs
deleted file mode 100644
index 4470bd6c31..0000000000
--- a/coresimd/ppsv/api/minimal.rs
+++ /dev/null
@@ -1,141 +0,0 @@
-//! Minimal portable vector types API.
-#![allow(unused)]
-
-/// Minimal interface: all packed SIMD vector types implement this.
-macro_rules! impl_minimal {
-    ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => {
-        impl super::api::Lanes<[u32; $elem_count]> for $id {}
-
-        impl $id {
-            /// Creates a new instance with each vector elements initialized
-            /// with the provided values.
-            #[inline]
-            pub const fn new($($elem_name: $elem_ty),*) -> Self {
-                $id($($elem_name),*)
-            }
-
-            /// Returns the number of vector lanes.
-            #[inline]
-            pub const fn lanes() -> usize {
-                $elem_count
-            }
-
-            /// Constructs a new instance with each element initialized to
-            /// `value`.
-            #[inline]
-            pub const fn splat(value: $elem_ty) -> Self {
-                $id($({
-                    #[allow(non_camel_case_types, dead_code)]
-                    struct $elem_name;
-                    value
-                }),*)
-            }
-
-            /// Extracts the value at `index`.
-            ///
-            /// # Panics
-            ///
-            /// If `index >= Self::lanes()`.
-            #[inline]
-            pub fn extract(self, index: usize) -> $elem_ty {
-                assert!(index < $elem_count);
-                unsafe { self.extract_unchecked(index) }
-            }
-
-            /// Extracts the value at `index`.
-            ///
-            /// # Precondition
-            ///
-            /// If `index >= Self::lanes()` the behavior is undefined.
-            #[inline]
-            pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
-                use coresimd::simd_llvm::simd_extract;
-                simd_extract(self, index as u32)
-            }
-
-            /// Returns a new vector where the value at `index` is replaced by `new_value`.
-            ///
-            /// # Panics
-            ///
-            /// If `index >= Self::lanes()`.
-            #[inline]
-            #[must_use = "replace does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"]
-            pub fn replace(self, index: usize, new_value: $elem_ty) -> Self {
-                assert!(index < $elem_count);
-                unsafe { self.replace_unchecked(index, new_value) }
-            }
-
-            /// Returns a new vector where the value at `index` is replaced by `new_value`.
-            ///
-            /// # Precondition
-            ///
-            /// If `index >= Self::lanes()` the behavior is undefined.
-            #[inline]
-            #[must_use = "replace_unchecked does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"]
-            pub unsafe fn replace_unchecked(
-                self,
-                index: usize,
-                new_value: $elem_ty,
-            ) -> Self {
-                use coresimd::simd_llvm::simd_insert;
-                simd_insert(self, index as u32, new_value)
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-macro_rules! test_minimal {
-    ($id:ident, $elem_ty:ident, $elem_count:expr) => {
-        #[test]
-        fn minimal() {
-            use coresimd::simd::$id;
-            // TODO: test new
-
-            // lanes:
-            assert_eq!($elem_count, $id::lanes());
-
-            // splat and extract / extract_unchecked:
-            const VAL: $elem_ty = 7 as $elem_ty;
-            const VEC: $id = $id::splat(VAL);
-            for i in 0..$id::lanes() {
-                assert_eq!(VAL, VEC.extract(i));
-                assert_eq!(VAL, unsafe { VEC.extract_unchecked(i) });
-            }
-
-            // replace / replace_unchecked
-            let new_vec = VEC.replace(1, 42 as $elem_ty);
-            for i in 0..$id::lanes() {
-                if i == 1 {
-                    assert_eq!(42 as $elem_ty, new_vec.extract(i));
-                } else {
-                    assert_eq!(VAL, new_vec.extract(i));
-                }
-            }
-            let new_vec = unsafe { VEC.replace_unchecked(1, 42 as $elem_ty) };
-            for i in 0..$id::lanes() {
-                if i == 1 {
-                    assert_eq!(42 as $elem_ty, new_vec.extract(i));
-                } else {
-                    assert_eq!(VAL, new_vec.extract(i));
-                }
-            }
-        }
-        #[test]
-        #[should_panic]
-        fn minimal_extract_panic_on_out_of_bounds() {
-            use coresimd::simd::$id;
-            const VAL: $elem_ty = 7 as $elem_ty;
-            const VEC: $id = $id::splat(VAL);
-            let _ = VEC.extract($id::lanes());
-        }
-        #[test]
-        #[should_panic]
-        fn minimal_replace_panic_on_out_of_bounds() {
-            use coresimd::simd::$id;
-            const VAL: $elem_ty = 7 as $elem_ty;
-            const VEC: $id = $id::splat(VAL);
-            let _ = VEC.replace($id::lanes(), 42 as $elem_ty);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/minmax.rs b/coresimd/ppsv/api/minmax.rs
deleted file mode 100755
index c1c7499c06..0000000000
--- a/coresimd/ppsv/api/minmax.rs
+++ /dev/null
@@ -1,148 +0,0 @@
-//! Lane-wise arithmetic operations.
-#![allow(unused)]
-
-macro_rules! impl_int_minmax_ops {
-    ($id:ident) => {
-        impl $id {
-            // Note:
-            //
-            // * if two elements are equal min returns
-            //   always the second element
-            // * if two elements are equal max returns
-            //   always the second element
-            //
-            // Since we are dealing with integers here, and `min` and `max`
-            // construct a new integer vector, whether the first or the
-            // second element is returned when two elements compare equal
-            // does not matter.
-
-            /// Minimum of two vectors.
-            ///
-            /// Returns a new vector containing the minimum value of each of
-            /// the input vector lanes.
-            #[inline]
-            pub fn min(self, x: Self) -> Self {
-                self.lt(x).select(self, x)
-            }
-
-            /// Maximum of two vectors.
-            ///
-            /// Returns a new vector containing the maximum value of each of
-            /// the input vector lanes.
-            #[inline]
-            pub fn max(self, x: Self) -> Self {
-                self.gt(x).select(self, x)
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_int_minmax_ops {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn minmax() {
-            use coresimd::simd::$id;
-            let o = $id::splat(1 as $elem_ty);
-            let t = $id::splat(2 as $elem_ty);
-
-            let mut m = o;
-            for i in 0..$id::lanes() {
-                if i % 2 == 0 {
-                    m = m.replace(i, 2 as $elem_ty);
-                }
-            }
-
-            assert_eq!(o.min(t), o);
-            assert_eq!(t.min(o), o);
-            assert_eq!(m.min(o), o);
-            assert_eq!(o.min(m), o);
-            assert_eq!(m.min(t), m);
-            assert_eq!(t.min(m), m);
-
-            assert_eq!(o.max(t), t);
-            assert_eq!(t.max(o), t);
-            assert_eq!(m.max(o), m);
-            assert_eq!(o.max(m), m);
-            assert_eq!(m.max(t), t);
-            assert_eq!(t.max(m), t);
-        }
-    };
-}
-
-macro_rules! impl_float_minmax_ops {
-    ($id:ident) => {
-        impl $id {
-            /// Minimum of two vectors.
-            ///
-            /// Returns a new vector containing the minimum value of each of the
-            /// input vector lanes. The lane-wise semantics are the same as that
-            /// of `min` for the primitive floating-point types.
-            #[inline]
-            pub fn min(self, x: Self) -> Self {
-                use coresimd::simd_llvm::simd_fmin;
-                unsafe { simd_fmin(self, x) }
-            }
-
-            /// Maximum of two vectors.
-            ///
-            /// Returns a new vector containing the maximum value of each of the
-            /// input vector lanes. The lane-wise semantics are the same as that
-            /// of `max` for the primitive floating-point types.
-            #[inline]
-            pub fn max(self, x: Self) -> Self {
-                // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
-                // use coresimd::simd_llvm::simd_fmax;
-                // unsafe { simd_fmax(self, x) }
-                let mut r = self;
-                for i in 0..$id::lanes() {
-                    let a = self.extract(i);
-                    let b = x.extract(i);
-                    r = r.replace(i, a.max(b))
-                }
-                r
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-macro_rules! test_float_minmax_ops {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn minmax() {
-            use coresimd::simd::$id;
-            let n = ::std::$elem_ty::NAN;
-            let o = $id::splat(1. as $elem_ty);
-            let t = $id::splat(2. as $elem_ty);
-
-            let mut m = o;
-            let mut on = o;
-            for i in 0..$id::lanes() {
-                if i % 2 == 0 {
-                    m = m.replace(i, 2. as $elem_ty);
-                    on = on.replace(i, n);
-                }
-            }
-
-            assert_eq!(o.min(t), o);
-            assert_eq!(t.min(o), o);
-            assert_eq!(m.min(o), o);
-            assert_eq!(o.min(m), o);
-            assert_eq!(m.min(t), m);
-            assert_eq!(t.min(m), m);
-
-            assert_eq!(o.max(t), t);
-            assert_eq!(t.max(o), t);
-            assert_eq!(m.max(o), m);
-            assert_eq!(o.max(m), m);
-            assert_eq!(m.max(t), t);
-            assert_eq!(t.max(m), t);
-
-            assert_eq!(on.min(o), o);
-            assert_eq!(o.min(on), o);
-            assert_eq!(on.max(o), o);
-            assert_eq!(o.max(on), o);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs
deleted file mode 100644
index dd461b30dd..0000000000
--- a/coresimd/ppsv/api/minmax_reductions.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-//! Implements portable horizontal arithmetic reductions.
-#![allow(unused)]
-
-macro_rules! impl_minmax_reductions {
-    ($id:ident, $elem_ty:ident) => {
-        impl $id {
-            /// Largest vector element value.
-            #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
-            #[inline]
-            pub fn max_element(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_max;
-                unsafe { simd_reduce_max(self) }
-            }
-
-            /// Largest vector element value.
-            #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
-            #[allow(unused_imports)]
-            #[inline]
-            pub fn max_element(self) -> $elem_ty {
-                // FIXME: broken on AArch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                use cmp::Ord;
-                let mut x = self.extract(0);
-                for i in 1..$id::lanes() {
-                    x = x.max(self.extract(i));
-                }
-                x
-            }
-
-            /// Smallest vector element value.
-            #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
-            #[inline]
-            pub fn min_element(self) -> $elem_ty {
-                use coresimd::simd_llvm::simd_reduce_min;
-                unsafe { simd_reduce_min(self) }
-            }
-
-            /// Smallest vector element value.
-            #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
-            #[allow(unused_imports)]
-            #[inline]
-            pub fn min_element(self) -> $elem_ty {
-                // FIXME: broken on AArch64
-                // https://bugs.llvm.org/show_bug.cgi?id=36796
-                use cmp::Ord;
-                let mut x = self.extract(0);
-                for i in 1..$id::lanes() {
-                    x = x.min(self.extract(i));
-                }
-                x
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_minmax_reductions {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn max_element() {
-            use coresimd::simd::$id;
-            let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.max_element(), 0 as $elem_ty);
-            let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.max_element(), 1 as $elem_ty);
-            let v = v.replace(0, 2 as $elem_ty);
-            assert_eq!(v.max_element(), 2 as $elem_ty);
-        }
-
-        #[test]
-        fn min_element() {
-            use coresimd::simd::$id;
-            let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.min_element(), 0 as $elem_ty);
-            let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.min_element(), 0 as $elem_ty);
-            let v = $id::splat(1 as $elem_ty);
-            let v = v.replace(0, 2 as $elem_ty);
-            assert_eq!(v.min_element(), 1 as $elem_ty);
-            let v = $id::splat(2 as $elem_ty);
-            let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.min_element(), 1 as $elem_ty);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs
deleted file mode 100644
index 1c38926a65..0000000000
--- a/coresimd/ppsv/api/mod.rs
+++ /dev/null
@@ -1,266 +0,0 @@
-//! This module defines the API of portable vector types.
-#![allow(unused)]
-
-/// Adds the vector type `$id`, with elements of types `$elem_tys`.
-macro_rules! define_ty {
-    ($id:ident, $($elem_tys:ident),+ | $(#[$doc:meta])*) => {
-        $(#[$doc])*
-        #[repr(simd)]
-        #[derive(Copy, Clone, Debug,
-                 /*FIXME: manually implement and add tests*/ PartialOrd)]
-        #[allow(non_camel_case_types)]
-        pub struct $id($($elem_tys),*);
-    }
-}
-
-#[macro_use]
-mod arithmetic_ops;
-#[macro_use]
-mod arithmetic_scalar_ops;
-#[macro_use]
-mod arithmetic_reductions;
-#[macro_use]
-mod bitwise_ops;
-#[macro_use]
-mod bitwise_scalar_ops;
-#[macro_use]
-mod bitwise_reductions;
-#[macro_use]
-mod cmp;
-#[macro_use]
-mod default;
-#[macro_use]
-mod eq;
-#[macro_use]
-mod float_math;
-#[macro_use]
-mod fmt;
-#[macro_use]
-mod from;
-#[macro_use]
-mod from_bits;
-#[macro_use]
-mod hash;
-#[macro_use]
-mod load_store;
-#[macro_use]
-mod masks;
-#[macro_use]
-mod masks_reductions;
-#[macro_use]
-mod minimal;
-#[macro_use]
-mod minmax;
-#[macro_use]
-mod minmax_reductions;
-#[macro_use]
-mod neg;
-#[macro_use]
-mod partial_eq;
-// TODO:
-//#[macro_use]
-//mod partial_ord;
-// TODO:
-//#[macro_use]
-//mod shuffles;
-// TODO:
-//#[macro_use]
-//mod gather_scatter;
-#[macro_use]
-mod masks_select;
-#[macro_use]
-mod scalar_shifts;
-#[macro_use]
-mod shifts;
-#[macro_use]
-mod swap_bytes;
-
-/// Sealed trait used for constraining select implementations.
-pub trait Lanes<A> {}
-
-/// Defines a portable packed SIMD floating-point vector type.
-macro_rules! simd_f_ty {
-    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
-     $test_macro:ident |
-     $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
-        vector_impl!(
-            [define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
-            [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
-            [impl_load_store, $id, $elem_ty, $elem_count],
-            [impl_cmp, $id, $mask_ty],
-            [impl_arithmetic_ops, $id],
-            [impl_arithmetic_scalar_ops, $id, $elem_ty],
-            [impl_float_arithmetic_reductions, $id, $elem_ty],
-            [impl_minmax_reductions, $id, $elem_ty],
-            [impl_neg_op, $id, $elem_ty],
-            [impl_partial_eq, $id],
-            [impl_default, $id, $elem_ty],
-            [impl_float_minmax_ops, $id],
-            [impl_float_math, $id]
-        );
-
-        $test_macro!(
-            #[cfg(test)]
-            mod $test_mod {
-                test_minimal!($id, $elem_ty, $elem_count);
-                test_load_store!($id, $elem_ty);
-                test_cmp!($id, $elem_ty, $mask_ty, 1. as $elem_ty, 0. as $elem_ty);
-                test_arithmetic_ops!($id, $elem_ty);
-                test_arithmetic_scalar_ops!($id, $elem_ty);
-                test_float_arithmetic_reductions!($id, $elem_ty);
-                test_minmax_reductions!($id, $elem_ty);
-                test_neg_op!($id, $elem_ty);
-                test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty);
-                test_default!($id, $elem_ty);
-                test_mask_select!($mask_ty, $id, $elem_ty);
-                test_float_minmax_ops!($id, $elem_ty);
-                test_float_math!($id, $elem_ty);
-            }
-        );
-    }
-}
-
-/// Defines a portable packed SIMD signed-integer vector type.
-macro_rules! simd_i_ty {
-    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
-     $test_macro:ident |
-     $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
-        vector_impl!(
-            [define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
-            [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
-            [impl_load_store, $id, $elem_ty, $elem_count],
-            [impl_cmp, $id, $mask_ty],
-            [impl_hash, $id, $elem_ty],
-            [impl_arithmetic_ops, $id],
-            [impl_arithmetic_scalar_ops, $id, $elem_ty],
-            [impl_int_arithmetic_reductions, $id, $elem_ty],
-            [impl_minmax_reductions, $id, $elem_ty],
-            [impl_neg_op, $id, $elem_ty],
-            [impl_bitwise_ops, $id, !(0 as $elem_ty)],
-            [impl_bitwise_scalar_ops, $id, $elem_ty],
-            [impl_bitwise_reductions, $id, $elem_ty],
-            [impl_all_scalar_shifts, $id, $elem_ty],
-            [impl_vector_shifts, $id, $elem_ty],
-            [impl_hex_fmt, $id, $elem_ty],
-            [impl_eq, $id],
-            [impl_partial_eq, $id],
-            [impl_default, $id, $elem_ty],
-            [impl_int_minmax_ops, $id],
-            [impl_swap_bytes, $id]
-        );
-
-        $test_macro!(
-            #[cfg(test)]
-            mod $test_mod {
-                test_minimal!($id, $elem_ty, $elem_count);
-                test_load_store!($id, $elem_ty);
-                test_cmp!($id, $elem_ty, $mask_ty, 1 as $elem_ty, 0 as $elem_ty);
-                test_hash!($id, $elem_ty);
-                test_arithmetic_ops!($id, $elem_ty);
-                test_arithmetic_scalar_ops!($id, $elem_ty);
-                test_int_arithmetic_reductions!($id, $elem_ty);
-                test_minmax_reductions!($id, $elem_ty);
-                test_neg_op!($id, $elem_ty);
-                test_int_bitwise_ops!($id, $elem_ty);
-                test_int_bitwise_scalar_ops!($id, $elem_ty);
-                test_bitwise_reductions!($id, !(0 as $elem_ty));
-                test_all_scalar_shift_ops!($id, $elem_ty);
-                test_vector_shift_ops!($id, $elem_ty);
-                test_hex_fmt!($id, $elem_ty);
-                test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty);
-                test_default!($id, $elem_ty);
-                test_mask_select!($mask_ty, $id, $elem_ty);
-                test_int_minmax_ops!($id, $elem_ty);
-            }
-        );
-    }
-}
-
-/// Defines a portable packed SIMD unsigned-integer vector type.
-macro_rules! simd_u_ty {
-    ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
-     $test_macro:ident |
-     $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
-        vector_impl!(
-            [define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
-            [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
-            [impl_load_store, $id, $elem_ty, $elem_count],
-            [impl_cmp, $id, $mask_ty],
-            [impl_hash, $id, $elem_ty],
-            [impl_arithmetic_ops, $id],
-            [impl_arithmetic_scalar_ops, $id, $elem_ty],
-            [impl_int_arithmetic_reductions, $id, $elem_ty],
-            [impl_minmax_reductions, $id, $elem_ty],
-            [impl_bitwise_scalar_ops, $id, $elem_ty],
-            [impl_bitwise_ops, $id, !(0 as $elem_ty)],
-            [impl_bitwise_reductions, $id, $elem_ty],
-            [impl_all_scalar_shifts, $id, $elem_ty],
-            [impl_vector_shifts, $id, $elem_ty],
-            [impl_hex_fmt, $id, $elem_ty],
-            [impl_eq, $id],
-            [impl_partial_eq, $id],
-            [impl_default, $id, $elem_ty],
-            [impl_int_minmax_ops, $id],
-            [impl_swap_bytes, $id]
-        );
-
-        $test_macro!(
-            #[cfg(test)]
-            mod $test_mod {
-                test_minimal!($id, $elem_ty, $elem_count);
-                test_load_store!($id, $elem_ty);
-                test_cmp!($id, $elem_ty, $mask_ty, 1 as $elem_ty, 0 as $elem_ty);
-                test_hash!($id, $elem_ty);
-                test_arithmetic_ops!($id, $elem_ty);
-                test_arithmetic_scalar_ops!($id, $elem_ty);
-                test_int_arithmetic_reductions!($id, $elem_ty);
-                test_minmax_reductions!($id, $elem_ty);
-                test_int_bitwise_ops!($id, $elem_ty);
-                test_int_bitwise_scalar_ops!($id, $elem_ty);
-                test_bitwise_reductions!($id, !(0 as $elem_ty));
-                test_all_scalar_shift_ops!($id, $elem_ty);
-                test_vector_shift_ops!($id, $elem_ty);
-                test_hex_fmt!($id, $elem_ty);
-                test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty);
-                test_default!($id, $elem_ty);
-                test_mask_select!($mask_ty, $id, $elem_ty);
-                test_int_minmax_ops!($id, $elem_ty);
-                test_swap_bytes!($id, $elem_ty);
-            }
-        );
-    }
-}
-
-/// Defines a portable packed SIMD mask type.
-macro_rules! simd_m_ty {
-    ($id:ident : $elem_count:expr, $elem_ty:ident, $test_mod:ident, $test_macro:ident |
-     $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
-        vector_impl!(
-            [define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
-            [impl_mask_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
-            [impl_bitwise_ops, $id, true],
-            [impl_bitwise_scalar_ops, $id, bool],
-            [impl_mask_bitwise_reductions, $id, bool, $elem_ty],
-            [impl_mask_reductions, $id],
-            [impl_mask_select, $id, $elem_ty, $elem_count],
-            [impl_mask_cmp, $id, $id],
-            [impl_eq, $id],
-            [impl_partial_eq, $id],
-            [impl_default, $id, bool]
-        );
-
-        $test_macro!(
-            #[cfg(test)]
-            mod $test_mod {
-                test_mask_minimal!($id, $elem_count);
-                test_mask_bitwise_ops!($id);
-                test_mask_bitwise_scalar_ops!($id);
-                test_mask_reductions!($id);
-                test_bitwise_reductions!($id, true);
-                test_cmp!($id, $elem_ty, $id, true, false);
-                test_partial_eq!($id, true, false);
-                test_default!($id, bool);
-            }
-        );
-    }
-}
diff --git a/coresimd/ppsv/api/neg.rs b/coresimd/ppsv/api/neg.rs
deleted file mode 100644
index 55dd4d65bd..0000000000
--- a/coresimd/ppsv/api/neg.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-//! Implements `std::ops::Neg` for signed vector types.
-#![allow(unused)]
-
-macro_rules! impl_neg_op {
-    ($id:ident, $elem_ty:ident) => {
-        impl ::ops::Neg for $id {
-            type Output = Self;
-            #[inline]
-            fn neg(self) -> Self {
-                Self::splat(-1 as $elem_ty) * self
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_neg_op {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn neg() {
-            use coresimd::simd::$id;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            let t = $id::splat(2 as $elem_ty);
-            let f = $id::splat(4 as $elem_ty);
-
-            let nz = $id::splat(-(0 as $elem_ty));
-            let no = $id::splat(-(1 as $elem_ty));
-            let nt = $id::splat(-(2 as $elem_ty));
-            let nf = $id::splat(-(4 as $elem_ty));
-
-            assert_eq!(-z, nz);
-            assert_eq!(-o, no);
-            assert_eq!(-t, nt);
-            assert_eq!(-f, nf);
-
-            assert_eq!(z, -nz);
-            assert_eq!(o, -no);
-            assert_eq!(t, -nt);
-            assert_eq!(f, -nf);
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/partial_eq.rs b/coresimd/ppsv/api/partial_eq.rs
deleted file mode 100644
index ab4997b72a..0000000000
--- a/coresimd/ppsv/api/partial_eq.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-//! Implements `PartialEq` for vector types.
-#![allow(unused)]
-
-macro_rules! impl_partial_eq {
-    ($id:ident) => {
-        impl ::cmp::PartialEq<$id> for $id {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                $id::eq(*self, *other).all()
-            }
-            #[inline]
-            fn ne(&self, other: &Self) -> bool {
-                $id::ne(*self, *other).any()
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_partial_eq {
-    ($id:ident, $true:expr, $false:expr) => {
-        #[test]
-        fn partial_eq() {
-            use coresimd::simd::*;
-
-            let a = $id::splat($false);
-            let b = $id::splat($true);
-
-            assert!(a != b);
-            assert!(!(a == b));
-            assert!(a == a);
-            assert!(!(a != a));
-
-            // Test further to make sure comparisons work with non-splatted
-            // values.
-            // This is to test the fix for #511
-
-            let a = $id::splat($false).replace(0, $true);
-            let b = $id::splat($true);
-
-            assert!(a != b);
-            assert!(!(a == b));
-            assert!(a == a);
-            assert!(!(a != a));
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/scalar_shifts.rs b/coresimd/ppsv/api/scalar_shifts.rs
deleted file mode 100644
index 586d909c32..0000000000
--- a/coresimd/ppsv/api/scalar_shifts.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-//! Implements integer shifts.
-#![allow(unused)]
-
-macro_rules! impl_shifts {
-    ($id:ident, $elem_ty:ident, $($by:ident),+) => {
-        $(
-            impl ::ops::Shl<$by> for $id {
-                type Output = Self;
-                #[inline]
-                fn shl(self, other: $by) -> Self {
-                    use coresimd::simd_llvm::simd_shl;
-                    unsafe { simd_shl(self, $id::splat(other as $elem_ty)) }
-                }
-            }
-            impl ::ops::Shr<$by> for $id {
-                type Output = Self;
-                #[inline]
-                fn shr(self, other: $by) -> Self {
-                    use coresimd::simd_llvm::simd_shr;
-                    unsafe { simd_shr(self, $id::splat(other as $elem_ty)) }
-                }
-            }
-
-            impl ::ops::ShlAssign<$by> for $id {
-                #[inline]
-                fn shl_assign(&mut self, other: $by) {
-                    *self = *self << other;
-                }
-            }
-            impl ::ops::ShrAssign<$by> for $id {
-                #[inline]
-                fn shr_assign(&mut self, other: $by) {
-                    *self = *self >> other;
-                }
-            }
-
-        )+
-    }
-}
-
-macro_rules! impl_all_scalar_shifts {
-    ($id:ident, $elem_ty:ident) => {
-        impl_shifts!(
-            $id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize
-        );
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_shift_ops {
-    ($id:ident, $elem_ty:ident, $($index_ty:ident),+) => {
-        #[test]
-        fn scalar_shift_ops() {
-            use ::coresimd::simd::$id;
-            use ::std::mem;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            let t = $id::splat(2 as $elem_ty);
-            let f = $id::splat(4 as $elem_ty);
-
-            $(
-                {
-                    let zi = 0 as $index_ty;
-                    let oi = 1 as $index_ty;
-                    let ti = 2 as $index_ty;
-                    let maxi = (mem::size_of::<$elem_ty>() * 8 - 1) as $index_ty;
-
-                    // shr
-                    assert_eq!(z >> zi, z);
-                    assert_eq!(z >> oi, z);
-                    assert_eq!(z >> ti, z);
-                    assert_eq!(z >> ti, z);
-
-                    assert_eq!(o >> zi, o);
-                    assert_eq!(t >> zi, t);
-                    assert_eq!(f >> zi, f);
-                    assert_eq!(f >> maxi, z);
-
-                    assert_eq!(o >> oi, z);
-                    assert_eq!(t >> oi, o);
-                    assert_eq!(t >> ti, z);
-                    assert_eq!(f >> oi, t);
-                    assert_eq!(f >> ti, o);
-                    assert_eq!(f >> maxi, z);
-
-                    // shl
-                    assert_eq!(z << zi, z);
-                    assert_eq!(o << zi, o);
-                    assert_eq!(t << zi, t);
-                    assert_eq!(f << zi, f);
-                    assert_eq!(f << maxi, z);
-
-                    assert_eq!(o << oi, t);
-                    assert_eq!(o << ti, f);
-                    assert_eq!(t << oi, f);
-
-                    {  // shr_assign
-                        let mut v = o;
-                        v >>= oi;
-                        assert_eq!(v, z);
-                    }
-                    {  // shl_assign
-                        let mut v = o;
-                        v <<= oi;
-                        assert_eq!(v, t);
-                    }
-                }
-            )+
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_all_scalar_shift_ops {
-    ($id:ident, $elem_ty:ident) => {
-        test_shift_ops!(
-            $id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize
-        );
-    };
-}
diff --git a/coresimd/ppsv/api/shifts.rs b/coresimd/ppsv/api/shifts.rs
deleted file mode 100644
index 70850d8b74..0000000000
--- a/coresimd/ppsv/api/shifts.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-//! Implements integer shifts.
-#![allow(unused)]
-
-macro_rules! impl_vector_shifts {
-    ($id:ident, $elem_ty:ident) => {
-        impl ::ops::Shl<$id> for $id {
-            type Output = Self;
-            #[inline]
-            fn shl(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_shl;
-                unsafe { simd_shl(self, other) }
-            }
-        }
-        impl ::ops::Shr<$id> for $id {
-            type Output = Self;
-            #[inline]
-            fn shr(self, other: Self) -> Self {
-                use coresimd::simd_llvm::simd_shr;
-                unsafe { simd_shr(self, other) }
-            }
-        }
-        impl ::ops::ShlAssign<$id> for $id {
-            #[inline]
-            fn shl_assign(&mut self, other: Self) {
-                *self = *self << other;
-            }
-        }
-        impl ::ops::ShrAssign<$id> for $id {
-            #[inline]
-            fn shr_assign(&mut self, other: Self) {
-                *self = *self >> other;
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_vector_shift_ops {
-    ($id:ident, $elem_ty:ident) => {
-        #[test]
-        fn shift_ops() {
-            use coresimd::simd::$id;
-            use std::mem;
-            let z = $id::splat(0 as $elem_ty);
-            let o = $id::splat(1 as $elem_ty);
-            let t = $id::splat(2 as $elem_ty);
-            let f = $id::splat(4 as $elem_ty);
-
-            let max =
-                $id::splat((mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty);
-
-            // shr
-            assert_eq!(z >> z, z);
-            assert_eq!(z >> o, z);
-            assert_eq!(z >> t, z);
-            assert_eq!(z >> t, z);
-
-            assert_eq!(o >> z, o);
-            assert_eq!(t >> z, t);
-            assert_eq!(f >> z, f);
-            assert_eq!(f >> max, z);
-
-            assert_eq!(o >> o, z);
-            assert_eq!(t >> o, o);
-            assert_eq!(t >> t, z);
-            assert_eq!(f >> o, t);
-            assert_eq!(f >> t, o);
-            assert_eq!(f >> max, z);
-
-            // shl
-            assert_eq!(z << z, z);
-            assert_eq!(o << z, o);
-            assert_eq!(t << z, t);
-            assert_eq!(f << z, f);
-            assert_eq!(f << max, z);
-
-            assert_eq!(o << o, t);
-            assert_eq!(o << t, f);
-            assert_eq!(t << o, f);
-
-            {
-                // shr_assign
-                let mut v = o;
-                v >>= o;
-                assert_eq!(v, z);
-            }
-            {
-                // shl_assign
-                let mut v = o;
-                v <<= o;
-                assert_eq!(v, t);
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/api/swap_bytes.rs b/coresimd/ppsv/api/swap_bytes.rs
deleted file mode 100644
index d94dbb592f..0000000000
--- a/coresimd/ppsv/api/swap_bytes.rs
+++ /dev/null
@@ -1,130 +0,0 @@
-//! Horizontal swap bytes.
-
-macro_rules! impl_swap_bytes {
-    ($id:ident) => {
-        impl $id {
-            /// Reverses the byte order of the vector.
-            #[inline]
-            pub fn swap_bytes(self) -> Self {
-                unsafe {
-                    super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
-                }
-            }
-
-            /// Converts self to little endian from the target's endianness.
-            ///
-            /// On little endian this is a no-op. On big endian the bytes are
-            /// swapped.
-            #[inline]
-            pub fn to_le(self) -> Self {
-                #[cfg(target_endian = "little")]
-                {
-                    self
-                }
-                #[cfg(not(target_endian = "little"))]
-                {
-                    self.swap_bytes()
-                }
-            }
-
-            /// Converts self to big endian from the target's endianness.
-            ///
-            /// On big endian this is a no-op. On little endian the bytes are
-            /// swapped.
-            #[inline]
-            pub fn to_be(self) -> Self {
-                #[cfg(target_endian = "big")]
-                {
-                    self
-                }
-                #[cfg(not(target_endian = "big"))]
-                {
-                    self.swap_bytes()
-                }
-            }
-        }
-    };
-}
-
-#[cfg(test)]
-macro_rules! test_swap_bytes {
-    ($id:ident, $elem_ty:ty) => {
-        use coresimd::simd::$id;
-        use std::{mem, slice};
-
-        const BYTES: [u8; 64] = [
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
-            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
-            35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
-            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-        ];
-
-        macro_rules! swap {
-            ($func: ident) => {{
-                // catch possible future >512 vectors
-                assert!(mem::size_of::<$id>() <= 64);
-
-                let mut actual = BYTES;
-                let elems: &mut [$elem_ty] = unsafe {
-                    slice::from_raw_parts_mut(
-                        actual.as_mut_ptr() as *mut $elem_ty,
-                        $id::lanes(),
-                    )
-                };
-
-                let vec = $id::load_unaligned(elems);
-                vec.$func().store_unaligned(elems);
-
-                actual
-            }};
-        }
-
-        macro_rules! test_swap {
-            ($func: ident) => {{
-                let actual = swap!($func);
-                let expected =
-                    BYTES.iter().rev().skip(64 - mem::size_of::<$id>());
-
-                assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
-            }};
-        }
-
-        macro_rules! test_no_swap {
-            ($func: ident) => {{
-                let actual = swap!($func);
-                let expected = BYTES.iter().take(mem::size_of::<$id>());
-
-                assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
-            }};
-        }
-
-        #[test]
-        fn swap_bytes() {
-            test_swap!(swap_bytes);
-        }
-
-        #[test]
-        fn to_le() {
-            #[cfg(target_endian = "little")]
-            {
-                test_no_swap!(to_le);
-            }
-            #[cfg(not(target_endian = "little"))]
-            {
-                test_swap!(to_le);
-            }
-        }
-
-        #[test]
-        fn to_be() {
-            #[cfg(target_endian = "big")]
-            {
-                test_no_swap!(to_be);
-            }
-            #[cfg(not(target_endian = "big"))]
-            {
-                test_swap!(to_be);
-            }
-        }
-    };
-}
diff --git a/coresimd/ppsv/codegen/abs.rs b/coresimd/ppsv/codegen/abs.rs
deleted file mode 100644
index c829ff8c5b..0000000000
--- a/coresimd/ppsv/codegen/abs.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-//! Vector absolute value
-#![allow(dead_code)]
-use coresimd::simd::*;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.fabs.f32"]
-    fn abs_f32(x: f32) -> f32;
-    #[link_name = "llvm.fabs.f64"]
-    fn abs_f64(x: f64) -> f64;
-
-    #[link_name = "llvm.fabs.v2f32"]
-    fn abs_v2f32(x: f32x2) -> f32x2;
-    #[link_name = "llvm.fabs.v4f32"]
-    fn abs_v4f32(x: f32x4) -> f32x4;
-    #[link_name = "llvm.fabs.v8f32"]
-    fn abs_v8f32(x: f32x8) -> f32x8;
-    #[link_name = "llvm.fabs.v16f32"]
-    fn abs_v16f32(x: f32x16) -> f32x16;
-    #[link_name = "llvm.fabs.v2f64"]
-    fn abs_v2f64(x: f64x2) -> f64x2;
-    #[link_name = "llvm.fabs.v4f64"]
-    fn abs_v4f64(x: f64x4) -> f64x4;
-    #[link_name = "llvm.fabs.v8f64"]
-    fn abs_v8f64(x: f64x8) -> f64x8;
-}
-
-pub(crate) trait FloatAbs {
-    fn abs(self) -> Self;
-}
-
-trait RawAbs {
-    fn raw_abs(self) -> Self;
-}
-
-impl RawAbs for f32 {
-    fn raw_abs(self) -> Self {
-        unsafe { abs_f32(self) }
-    }
-}
-
-impl RawAbs for f64 {
-    fn raw_abs(self) -> Self {
-        unsafe { abs_f64(self) }
-    }
-}
-
-
-macro_rules! impl_fabs {
-    ($id:ident : $fn:ident) => {
-        #[cfg(not(target_arch = "s390x"))]
-        impl FloatAbs for $id {
-            fn abs(self) -> Self {
-                unsafe { $fn(self) }
-            }
-        }
-        // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501
-        #[cfg(target_arch = "s390x")]
-        impl FloatAbs for $id {
-            fn abs(self) -> Self {
-                let mut v = $id::splat(0.);
-                for i in 0..$id::lanes() {
-                    v = v.replace(i, self.extract(i).raw_abs())
-                }
-                v
-            }
-        }
-    };
-}
-
-impl_fabs!(f32x2: abs_v2f32);
-impl_fabs!(f32x4: abs_v4f32);
-impl_fabs!(f32x8: abs_v8f32);
-impl_fabs!(f32x16: abs_v16f32);
-impl_fabs!(f64x2: abs_v2f64);
-impl_fabs!(f64x4: abs_v4f64);
-impl_fabs!(f64x8: abs_v8f64);
diff --git a/coresimd/ppsv/codegen/cos.rs b/coresimd/ppsv/codegen/cos.rs
deleted file mode 100644
index 38dce584f8..0000000000
--- a/coresimd/ppsv/codegen/cos.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-//! Exact vector cos
-#![allow(dead_code)]
-use coresimd::simd::*;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.cos.f32"]
-    fn cos_f32(x: f32) -> f32;
-    #[link_name = "llvm.cos.f64"]
-    fn cos_f64(x: f64) -> f64;
-
-    #[link_name = "llvm.cos.v2f32"]
-    fn cos_v2f32(x: f32x2) -> f32x2;
-    #[link_name = "llvm.cos.v4f32"]
-    fn cos_v4f32(x: f32x4) -> f32x4;
-    #[link_name = "llvm.cos.v8f32"]
-    fn cos_v8f32(x: f32x8) -> f32x8;
-    #[link_name = "llvm.cos.v16f32"]
-    fn cos_v16f32(x: f32x16) -> f32x16;
-    #[link_name = "llvm.cos.v2f64"]
-    fn cos_v2f64(x: f64x2) -> f64x2;
-    #[link_name = "llvm.cos.v4f64"]
-    fn cos_v4f64(x: f64x4) -> f64x4;
-    #[link_name = "llvm.cos.v8f64"]
-    fn cos_v8f64(x: f64x8) -> f64x8;
-}
-
-pub(crate) trait FloatCos {
-    fn cos(self) -> Self;
-}
-
-trait RawCos {
-    fn raw_cos(self) -> Self;
-}
-
-impl RawCos for f32 {
-    fn raw_cos(self) -> Self {
-        unsafe { cos_f32(self) }
-    }
-}
-
-impl RawCos for f64 {
-    fn raw_cos(self) -> Self {
-        unsafe { cos_f64(self) }
-    }
-}
-
-
-macro_rules! impl_fcos {
-    ($id:ident : $fn:ident) => {
-        #[cfg(not(target_arch = "s390x"))]
-        impl FloatCos for $id {
-            fn cos(self) -> Self {
-                unsafe { $fn(self) }
-            }
-        }
-
-        // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501
-        #[cfg(target_arch = "s390x")]
-        impl FloatCos for $id {
-            fn cos(self) -> Self {
-                let mut v = $id::splat(0.);
-                for i in 0..$id::lanes() {
-                    v = v.replace(i, self.extract(i).raw_cos())
-                }
-                v
-            }
-        }
-    };
-}
-
-impl_fcos!(f32x2: cos_v2f32);
-impl_fcos!(f32x4: cos_v4f32);
-impl_fcos!(f32x8: cos_v8f32);
-impl_fcos!(f32x16: cos_v16f32);
-impl_fcos!(f64x2: cos_v2f64);
-impl_fcos!(f64x4: cos_v4f64);
-impl_fcos!(f64x8: cos_v8f64);
diff --git a/coresimd/ppsv/codegen/fma.rs b/coresimd/ppsv/codegen/fma.rs
deleted file mode 100644
index a0f0e8f729..0000000000
--- a/coresimd/ppsv/codegen/fma.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-//! Vector fused multiply add
-#![allow(dead_code)]
-use coresimd::simd::*;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.fma.v2f32"]
-    fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
-    #[link_name = "llvm.fma.v4f32"]
-    fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
-    #[link_name = "llvm.fma.v8f32"]
-    fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
-    #[link_name = "llvm.fma.v16f32"]
-    fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
-    #[link_name = "llvm.fma.v2f64"]
-    fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
-    #[link_name = "llvm.fma.v4f64"]
-    fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
-    #[link_name = "llvm.fma.v8f64"]
-    fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
-}
-
-pub(crate) trait FloatFma {
-    fn fma(self, y: Self, z: Self) -> Self;
-}
-
-macro_rules! impl_fma {
-    ($id:ident : $fn:ident) => {
-        #[cfg(not(target_arch = "s390x"))]
-        impl FloatFma for $id {
-            fn fma(self, y: Self, z: Self) -> Self {
-                unsafe { $fn(self, y, z) }
-            }
-        }
-        // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501
-        #[cfg(target_arch = "s390x")]
-        impl FloatFma for $id {
-            fn fma(self, y: Self, z: Self) -> Self {
-                self * y + z
-            }
-        }
-    };
-}
-
-impl_fma!(f32x2: fma_v2f32);
-impl_fma!(f32x4: fma_v4f32);
-impl_fma!(f32x8: fma_v8f32);
-impl_fma!(f32x16: fma_v16f32);
-impl_fma!(f64x2: fma_v2f64);
-impl_fma!(f64x4: fma_v4f64);
-impl_fma!(f64x8: fma_v8f64);
diff --git a/coresimd/ppsv/codegen/masks_reductions.rs b/coresimd/ppsv/codegen/masks_reductions.rs
deleted file mode 100644
index 617f1fd300..0000000000
--- a/coresimd/ppsv/codegen/masks_reductions.rs
+++ /dev/null
@@ -1,651 +0,0 @@
-//! LLVM6 currently generates sub-optimal code for the `all` mask reductions.
-//!
-//! See https://github.com/rust-lang-nursery/stdsimd/issues/362#issuecomment-372774371
-//! and the associated LLVM bug:
-//! https://bugs.llvm.org/show_bug.cgi?id=36702
-
-#![allow(unused)]
-
-use coresimd::simd::*;
-
-pub trait All: ::marker::Sized {
-    unsafe fn all(self) -> bool;
-}
-
-pub trait Any: ::marker::Sized {
-    unsafe fn any(self) -> bool;
-}
-
-// By default we use the simd_reduce_{all,any} intrinsics, which produces
-// sub-optimal code, except on aarch64 where that intrinsic is broken
-// due to https://bugs.llvm.org/show_bug.cgi?id=36796 so we just use
-// full-blown bitwise and/or reduction there.
-macro_rules! default_impl {
-    ($id:ident) => {
-        impl All for $id {
-            #[inline]
-            unsafe fn all(self) -> bool {
-                #[cfg(not(target_arch = "aarch64"))]
-                {
-                    use coresimd::simd_llvm::simd_reduce_all;
-                    simd_reduce_all(self)
-                }
-                #[cfg(target_arch = "aarch64")]
-                {
-                    // FIXME: Broken on AArch64
-                    // https://bugs.llvm.org/show_bug.cgi?id=36796
-                    self.and()
-                }
-            }
-        }
-
-        impl Any for $id {
-            #[inline]
-            unsafe fn any(self) -> bool {
-                #[cfg(not(target_arch = "aarch64"))]
-                {
-                    use coresimd::simd_llvm::simd_reduce_any;
-                    simd_reduce_any(self)
-                }
-                #[cfg(target_arch = "aarch64")]
-                {
-                    // FIXME: Broken on AArch64
-                    // https://bugs.llvm.org/show_bug.cgi?id=36796
-                    self.or()
-                }
-            }
-        }
-    };
-}
-
-// On x86 both SSE2 and AVX2 provide movemask instructions that can be used
-// here. The AVX2 instructions aren't necessarily better than the AVX
-// instructions below, so they aren't implemented here.
-//
-// FIXME: for mask generated from f32x4 LLVM6 emits pmovmskb but should emit
-// movmskps. Since the masks don't track whether they were produced by integer
-// or floating point vectors, we can't currently work around this yet. The
-// performance impact for this shouldn't be large, but this is filled as:
-// https://bugs.llvm.org/show_bug.cgi?id=37087
-#[cfg(
-    all(
-        any(target_arch = "x86", target_arch = "x86_64"),
-        target_feature = "sse2"
-    )
-)]
-macro_rules! x86_128_sse2_movemask_impl {
-    ($id:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "sse2")]
-            unsafe fn all(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use coresimd::arch::x86::_mm_movemask_epi8;
-                #[cfg(target_arch = "x86_64")]
-                use coresimd::arch::x86_64::_mm_movemask_epi8;
-                // _mm_movemask_epi8(a) creates a 16bit mask containing the
-                // most significant bit of each byte of `a`. If all
-                // bits are set, then all 16 lanes of the mask are
-                // true.
-                _mm_movemask_epi8(::mem::transmute(self))
-                    == u16::max_value() as i32
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "sse2")]
-            unsafe fn any(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use coresimd::arch::x86::_mm_movemask_epi8;
-                #[cfg(target_arch = "x86_64")]
-                use coresimd::arch::x86_64::_mm_movemask_epi8;
-
-                _mm_movemask_epi8(::mem::transmute(self)) != 0
-            }
-        }
-    };
-}
-
-// On x86 with AVX we use _mm256_testc_si256 and _mm256_testz_si256.
-//
-// FIXME: for masks generated from floating point vectors one should use
-// x86_mm256_testc_ps, x86_mm256_testz_ps, x86_mm256_testc_pd,
-// x86_mm256_testz_pd.Since the masks don't track whether they were produced by
-// integer or floating point vectors, we can't currently work around this yet.
-//
-// TODO: investigate perf impact and fill LLVM bugs as necessary.
-#[cfg(
-    all(
-        any(target_arch = "x86", target_arch = "x86_64"),
-        target_feature = "avx"
-    )
-)]
-macro_rules! x86_256_avx_test_impl {
-    ($id:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "avx")]
-            unsafe fn all(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use coresimd::arch::x86::_mm256_testc_si256;
-                #[cfg(target_arch = "x86_64")]
-                use coresimd::arch::x86_64::_mm256_testc_si256;
-                _mm256_testc_si256(
-                    ::mem::transmute(self),
-                    ::mem::transmute($id::splat(true)),
-                ) != 0
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "avx")]
-            unsafe fn any(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use coresimd::arch::x86::_mm256_testz_si256;
-                #[cfg(target_arch = "x86_64")]
-                use coresimd::arch::x86_64::_mm256_testz_si256;
-                _mm256_testz_si256(
-                    ::mem::transmute(self),
-                    ::mem::transmute(self),
-                ) == 0
-            }
-        }
-    };
-}
-
-// On x86 with SSE2 all/any for 256-bit wide vectors is implemented by
-// executing the algorithm for 128-bit on the higher and lower elements of the
-// vector independently.
-#[cfg(
-    all(
-        any(target_arch = "x86", target_arch = "x86_64"),
-        target_feature = "sse2"
-    )
-)]
-macro_rules! x86_256_sse2_impl {
-    ($id:ident, $v128:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "sse2")]
-            unsafe fn all(self) -> bool {
-                unsafe {
-                    union U {
-                        halves: ($v128, $v128),
-                        vec: $id,
-                    }
-                    let halves = U { vec: self }.halves;
-                    halves.0.all() && halves.1.all()
-                }
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "sse2")]
-            unsafe fn any(self) -> bool {
-                unsafe {
-                    union U {
-                        halves: ($v128, $v128),
-                        vec: $id,
-                    }
-                    let halves = U { vec: self }.halves;
-                    halves.0.any() || halves.1.any()
-                }
-            }
-        }
-    };
-}
-
-// Implementation for 64-bit wide masks on x86.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! x86_64_mmx_movemask_impl {
-    ($id:ident, $vec128:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "mmx")]
-            unsafe fn all(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use coresimd::arch::x86::_mm_movemask_pi8;
-                #[cfg(target_arch = "x86_64")]
-                use coresimd::arch::x86_64::_mm_movemask_pi8;
-                // _mm_movemask_pi8(a) creates an 8bit mask containing the most
-                // significant bit of each byte of `a`. If all bits are set,
-                // then all 8 lanes of the mask are true.
-                _mm_movemask_pi8(::mem::transmute(self))
-                    == u8::max_value() as i32
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "mmx")]
-            unsafe fn any(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use coresimd::arch::x86::_mm_movemask_pi8;
-                #[cfg(target_arch = "x86_64")]
-                use coresimd::arch::x86_64::_mm_movemask_pi8;
-
-                _mm_movemask_pi8(::mem::transmute(self)) != 0
-            }
-        }
-    };
-}
-
-// Implementation for 128-bit wide masks on x86
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! x86_128_impl {
-    ($id:ident) => {
-        cfg_if! {
-            if #[cfg(target_feature = "sse2")] {
-                x86_128_sse2_movemask_impl!($id);
-            }  else {
-                default_impl!($id);
-            }
-        }
-    };
-}
-
-// Implementation for 256-bit wide masks on x86
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! x86_256_impl {
-    ($id:ident, $half_id:ident) => {
-        cfg_if! {
-            if #[cfg(target_feature = "avx")] {
-                x86_256_avx_test_impl!($id);
-            } else if #[cfg(target_feature = "sse2")] {
-                x86_256_sse2_impl!($id, $half_id);
-            } else {
-                default_impl!($id);
-            }
-        }
-    };
-}
-
-// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
-// minimum/maximum of adjacent pairs) for 64-bit wide two-element vectors.
-#[cfg(
-    all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
-)]
-macro_rules! arm_64_x2_v7_neon_impl {
-    ($id:ident, $vpmin:ident, $vpmax:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn all(self) -> bool {
-                use coresimd::arch::arm::$vpmin;
-                use mem::transmute;
-                // pmin((a, b), (-,-)) => (b, -).0 => b
-                let tmp: $id =
-                    transmute($vpmin(transmute(self), ::mem::uninitialized()));
-                tmp.extract(0)
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn any(self) -> bool {
-                use coresimd::arch::arm::$vpmax;
-                use mem::transmute;
-                // pmax((a, b), (-,-)) => (b, -).0 => b
-                let tmp: $id =
-                    transmute($vpmax(transmute(self), ::mem::uninitialized()));
-                tmp.extract(0)
-            }
-        }
-    };
-}
-
-// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
-// minimum/maximum of adjacent pairs) for 64-bit wide four-element vectors.
-#[cfg(
-    all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
-)]
-macro_rules! arm_64_x4_v7_neon_impl {
-    ($id:ident, $vpmin:ident, $vpmax:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn all(self) -> bool {
-                use coresimd::arch::arm::$vpmin;
-                use mem::transmute;
-                // tmp = pmin((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
-                let tmp = $vpmin(transmute(self), ::mem::uninitialized());
-                // tmp = pmin((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
-                let tmp: $id = transmute($vpmin(tmp, ::mem::uninitialized()));
-                tmp.extract(0)
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn any(self) -> bool {
-                use coresimd::arch::arm::$vpmax;
-                use mem::transmute;
-                // tmp = pmax((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
-                let tmp = $vpmax(transmute(self), ::mem::uninitialized());
-                // tmp = pmax((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
-                let tmp: $id = transmute($vpmax(tmp, ::mem::uninitialized()));
-                tmp.extract(0)
-            }
-        }
-    };
-}
-
-// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
-// minimum/maximum of adjacent pairs) for 64-bit wide eight-element vectors.
-#[cfg(
-    all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
-)]
-macro_rules! arm_64_x8_v7_neon_impl {
-    ($id:ident, $vpmin:ident, $vpmax:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn all(self) -> bool {
-                use coresimd::arch::arm::$vpmin;
-                use mem::transmute;
-                // tmp = pmin(
-                //     (a, b, c, d, e, f, g, h),
-                //     (-, -, -, -, -, -, -, -)
-                // ) => (a, c, e, g, -, -, -, -)
-                let tmp = $vpmin(transmute(self), ::mem::uninitialized());
-                // tmp = pmin(
-                //     (a, c, e, g, -, -, -, -),
-                //     (-, -, -, -, -, -, -, -)
-                // ) => (c, g, -, -, -, -, -, -)
-                let tmp = $vpmin(tmp, ::mem::uninitialized());
-                // tmp = pmin(
-                //     (c, g, -, -, -, -, -, -),
-                //     (-, -, -, -, -, -, -, -)
-                // ) => (g, -, -, -, -, -, -, -).0 => g
-                let tmp: $id = transmute($vpmin(tmp, ::mem::uninitialized()));
-                tmp.extract(0)
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn any(self) -> bool {
-                use coresimd::arch::arm::$vpmax;
-                use mem::transmute;
-                // tmp = pmax(
-                //     (a, b, c, d, e, f, g, h),
-                //     (-, -, -, -, -, -, -, -)
-                // ) => (a, c, e, g, -, -, -, -)
-                let tmp = $vpmax(transmute(self), ::mem::uninitialized());
-                // tmp = pmax(
-                //     (a, c, e, g, -, -, -, -),
-                //     (-, -, -, -, -, -, -, -)
-                // ) => (c, g, -, -, -, -, -, -)
-                let tmp = $vpmax(tmp, ::mem::uninitialized());
-                // tmp = pmax(
-                //     (c, g, -, -, -, -, -, -),
-                //     (-, -, -, -, -, -, -, -)
-                // ) => (g, -, -, -, -, -, -, -).0 => g
-                let tmp: $id = transmute($vpmax(tmp, ::mem::uninitialized()));
-                tmp.extract(0)
-            }
-        }
-    };
-}
-
-// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
-// minimum/maximum of adjacent pairs) for 64-bit or 128-bit wide vectors with
-// more than two elements.
-#[cfg(
-    all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
-)]
-macro_rules! arm_128_v7_neon_impl {
-    ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn all(self) -> bool {
-                use coresimd::arch::arm::$vpmin;
-                use mem::transmute;
-                union U {
-                    halves: ($half, $half),
-                    vec: $id,
-                }
-                let halves = U { vec: self }.halves;
-                let h: $half = transmute($vpmin(
-                    transmute(halves.0),
-                    transmute(halves.1),
-                ));
-                h.all()
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "v7,neon")]
-            unsafe fn any(self) -> bool {
-                use coresimd::arch::arm::$vpmax;
-                use mem::transmute;
-                union U {
-                    halves: ($half, $half),
-                    vec: $id,
-                }
-                let halves = U { vec: self }.halves;
-                let h: $half = transmute($vpmax(
-                    transmute(halves.0),
-                    transmute(halves.1),
-                ));
-                h.any()
-            }
-        }
-    };
-}
-
-// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector
-// min/max) for 128-bit wide vectors.
-#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
-macro_rules! aarch64_128_neon_impl {
-    ($id:ident, $vmin:ident, $vmax:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "neon")]
-            unsafe fn all(self) -> bool {
-                use coresimd::arch::aarch64::$vmin;
-                $vmin(::mem::transmute(self)) != 0
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "neon")]
-            unsafe fn any(self) -> bool {
-                use coresimd::arch::aarch64::$vmax;
-                $vmax(::mem::transmute(self)) != 0
-            }
-        }
-    };
-}
-
-// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector
-// min/max) for 64-bit wide vectors.
-//
-// This impl duplicates the 64-bit vector into a 128-bit one and calls
-// all/any on that.
-#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
-macro_rules! aarch64_64_neon_impl {
-    ($id:ident, $vec128:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "neon")]
-            unsafe fn all(self) -> bool {
-                union U {
-                    halves: ($id, $id),
-                    vec: $vec128,
-                }
-                U {
-                    halves: (self, self),
-                }.vec
-                    .all()
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "neon")]
-            unsafe fn any(self) -> bool {
-                union U {
-                    halves: ($id, $id),
-                    vec: $vec128,
-                }
-                U {
-                    halves: (self, self),
-                }.vec
-                    .any()
-            }
-        }
-    };
-}
-
-macro_rules! impl_mask_all_any {
-    // 64-bit wide masks
-    (m8x8) => {
-        cfg_if! {
-            if #[cfg(target_arch = "x86_64")] {
-                x86_64_mmx_movemask_impl!(m8x8, m8x16);
-            } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] {
-                arm_64_x8_v7_neon_impl!(m8x8, vpmin_u8, vpmax_u8);
-            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
-                aarch64_64_neon_impl!(m8x8, m8x16);
-            } else {
-                default_impl!(m8x8);
-            }
-        }
-    };
-    (m16x4) => {
-        cfg_if! {
-            if #[cfg(target_arch = "x86_64")] {
-                x86_64_mmx_movemask_impl!(m16x4, m16x8);
-            } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] {
-                arm_64_x4_v7_neon_impl!(m16x4, vpmin_u16, vpmax_u16);
-            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
-                aarch64_64_neon_impl!(m16x4, m16x8);
-            } else {
-                default_impl!(m16x4);
-            }
-        }
-    };
-    (m32x2) => {
-        cfg_if! {
-            if #[cfg(all(target_arch = "x86_64", not(target_os = "macos")))] {
-                // FIXME: this fails on travis-ci osx build bots.
-                x86_64_mmx_movemask_impl!(m32x2, m32x4);
-            } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] {
-                arm_64_x2_v7_neon_impl!(m32x2, vpmin_u32, vpmax_u32);
-            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
-                aarch64_64_neon_impl!(m32x2, m32x4);
-            } else {
-                default_impl!(m32x2);
-            }
-        }
-    };
-    // 128-bit wide masks
-    (m8x16) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_128_impl!(m8x16);
-            } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] {
-                arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8);
-            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
-                aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8);
-            } else {
-                default_impl!(m8x16);
-            }
-        }
-    };
-    (m16x8) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_128_impl!(m16x8);
-            } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] {
-                arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16);
-            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
-                aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16);
-            } else {
-                default_impl!(m16x8);
-            }
-        }
-    };
-    (m32x4) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_128_impl!(m32x4);
-            } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] {
-                arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32);
-            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
-                aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32);
-            } else {
-                default_impl!(m32x4);
-            }
-        }
-    };
-    (m64x2) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_128_impl!(m64x2);
-            } else {
-                default_impl!(m64x2);
-            }
-        }
-    };
-    // 256-bit wide masks:
-    (m8x32) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_256_impl!(m8x32, m8x16);
-            } else {
-                default_impl!(m8x32);
-            }
-        }
-    };
-    (m16x16) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_256_impl!(m16x16, m16x8);
-            } else {
-                default_impl!(m16x16);
-            }
-        }
-    };
-    (m32x8) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_256_impl!(m32x8, m32x4);
-            } else {
-                default_impl!(m32x8);
-            }
-        }
-    };
-    (m64x4) => {
-        cfg_if! {
-            if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                x86_256_impl!(m64x4, m64x2);
-            } else {
-                default_impl!(m64x4);
-            }
-        }
-    };
-    // Fallback to LLVM's default code-generation:
-    ($id:ident) => { default_impl!($id); };
-}
-
-vector_impl!(
-    [impl_mask_all_any, m1x8],
-    [impl_mask_all_any, m1x16],
-    [impl_mask_all_any, m1x32],
-    [impl_mask_all_any, m1x64],
-    [impl_mask_all_any, m8x2],
-    [impl_mask_all_any, m8x4],
-    [impl_mask_all_any, m8x8],
-    [impl_mask_all_any, m8x16],
-    [impl_mask_all_any, m8x32],
-    [impl_mask_all_any, m16x2],
-    [impl_mask_all_any, m16x4],
-    [impl_mask_all_any, m16x8],
-    [impl_mask_all_any, m16x16],
-    [impl_mask_all_any, m32x2],
-    [impl_mask_all_any, m32x4],
-    [impl_mask_all_any, m32x8],
-    [impl_mask_all_any, m64x2],
-    [impl_mask_all_any, m64x4]
-);
diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs
deleted file mode 100644
index 6e9a73fe5c..0000000000
--- a/coresimd/ppsv/codegen/mod.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-//! Work arounds for code generation issues
-
-#[cfg(target_arch = "aarch64")]
-pub mod wrapping;
-
-pub mod masks_reductions;
-pub mod swap_bytes;
-
-pub mod abs;
-pub mod cos;
-pub mod fma;
-pub mod sin;
-pub mod sqrt;
diff --git a/coresimd/ppsv/codegen/sin.rs b/coresimd/ppsv/codegen/sin.rs
deleted file mode 100644
index c13ae31d34..0000000000
--- a/coresimd/ppsv/codegen/sin.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-//! Exact vector sin
-#![allow(dead_code)]
-use coresimd::simd::*;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.sin.f32"]
-    fn sin_f32(x: f32) -> f32;
-    #[link_name = "llvm.sin.f64"]
-    fn sin_f64(x: f64) -> f64;
-
-    #[link_name = "llvm.sin.v2f32"]
-    fn sin_v2f32(x: f32x2) -> f32x2;
-    #[link_name = "llvm.sin.v4f32"]
-    fn sin_v4f32(x: f32x4) -> f32x4;
-    #[link_name = "llvm.sin.v8f32"]
-    fn sin_v8f32(x: f32x8) -> f32x8;
-    #[link_name = "llvm.sin.v16f32"]
-    fn sin_v16f32(x: f32x16) -> f32x16;
-    #[link_name = "llvm.sin.v2f64"]
-    fn sin_v2f64(x: f64x2) -> f64x2;
-    #[link_name = "llvm.sin.v4f64"]
-    fn sin_v4f64(x: f64x4) -> f64x4;
-    #[link_name = "llvm.sin.v8f64"]
-    fn sin_v8f64(x: f64x8) -> f64x8;
-}
-
-pub(crate) trait FloatSin {
-    fn sin(self) -> Self;
-}
-
-trait RawSin {
-    fn raw_sin(self) -> Self;
-}
-
-impl RawSin for f32 {
-    fn raw_sin(self) -> Self {
-        unsafe { sin_f32(self) }
-    }
-}
-
-impl RawSin for f64 {
-    fn raw_sin(self) -> Self {
-        unsafe { sin_f64(self) }
-    }
-}
-
-macro_rules! impl_fsin {
-    ($id:ident : $fn:ident) => {
-        #[cfg(not(target_arch = "s390x"))]
-        impl FloatSin for $id {
-            fn sin(self) -> Self {
-                unsafe { $fn(self) }
-            }
-        }
-
-        // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501
-        #[cfg(target_arch = "s390x")]
-        impl FloatSin for $id {
-            fn sin(self) -> Self {
-                let mut v = $id::splat(0.);
-                for i in 0..$id::lanes() {
-                    v = v.replace(i, self.extract(i).raw_sin())
-                }
-                v
-            }
-        }
-
-    };
-}
-
-impl_fsin!(f32x2: sin_v2f32);
-impl_fsin!(f32x4: sin_v4f32);
-impl_fsin!(f32x8: sin_v8f32);
-impl_fsin!(f32x16: sin_v16f32);
-impl_fsin!(f64x2: sin_v2f64);
-impl_fsin!(f64x4: sin_v4f64);
-impl_fsin!(f64x8: sin_v8f64);
diff --git a/coresimd/ppsv/codegen/sqrt.rs b/coresimd/ppsv/codegen/sqrt.rs
deleted file mode 100644
index 6a18589e71..0000000000
--- a/coresimd/ppsv/codegen/sqrt.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-//! Exact vector square-root
-#![allow(dead_code)]
-use coresimd::simd::*;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.sqrt.f32"]
-    fn sqrt_f32(x: f32) -> f32;
-    #[link_name = "llvm.sqrt.f64"]
-    fn sqrt_f64(x: f64) -> f64;
-
-    #[link_name = "llvm.sqrt.v2f32"]
-    fn sqrt_v2f32(x: f32x2) -> f32x2;
-    #[link_name = "llvm.sqrt.v4f32"]
-    fn sqrt_v4f32(x: f32x4) -> f32x4;
-    #[link_name = "llvm.sqrt.v8f32"]
-    fn sqrt_v8f32(x: f32x8) -> f32x8;
-    #[link_name = "llvm.sqrt.v16f32"]
-    fn sqrt_v16f32(x: f32x16) -> f32x16;
-    #[link_name = "llvm.sqrt.v2f64"]
-    fn sqrt_v2f64(x: f64x2) -> f64x2;
-    #[link_name = "llvm.sqrt.v4f64"]
-    fn sqrt_v4f64(x: f64x4) -> f64x4;
-    #[link_name = "llvm.sqrt.v8f64"]
-    fn sqrt_v8f64(x: f64x8) -> f64x8;
-}
-
-pub(crate) trait FloatSqrt {
-    fn sqrt(self) -> Self;
-}
-
-trait RawSqrt {
-    fn raw_sqrt(self) -> Self;
-}
-
-impl RawSqrt for f32 {
-    fn raw_sqrt(self) -> Self {
-        unsafe { sqrt_f32(self) }
-    }
-}
-
-impl RawSqrt for f64 {
-    fn raw_sqrt(self) -> Self {
-        unsafe { sqrt_f64(self) }
-    }
-}
-
-macro_rules! impl_fsqrt {
-    ($id:ident : $fn:ident) => {
-        #[cfg(not(target_arch = "s390x"))]
-        impl FloatSqrt for $id {
-            fn sqrt(self) -> Self {
-                unsafe { $fn(self) }
-            }
-        }
-        // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501
-        #[cfg(target_arch = "s390x")]
-        impl FloatSqrt for $id {
-            fn sqrt(self) -> Self {
-                let mut v = $id::splat(0.);
-                for i in 0..$id::lanes() {
-                    v = v.replace(i, self.extract(i).raw_sqrt());
-                }
-                v
-            }
-        }
-
-    };
-}
-
-impl_fsqrt!(f32x2: sqrt_v2f32);
-impl_fsqrt!(f32x4: sqrt_v4f32);
-impl_fsqrt!(f32x8: sqrt_v8f32);
-impl_fsqrt!(f32x16: sqrt_v16f32);
-impl_fsqrt!(f64x2: sqrt_v2f64);
-impl_fsqrt!(f64x4: sqrt_v4f64);
-impl_fsqrt!(f64x8: sqrt_v8f64);
diff --git a/coresimd/ppsv/codegen/swap_bytes.rs b/coresimd/ppsv/codegen/swap_bytes.rs
deleted file mode 100644
index e9d2918737..0000000000
--- a/coresimd/ppsv/codegen/swap_bytes.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-//! Horizontal mask reductions.
-
-#![allow(unused)]
-
-use coresimd::simd::*;
-
-pub trait SwapBytes {
-    unsafe fn swap_bytes(self) -> Self;
-}
-
-// TODO: switch to shuffle API once it lands
-// TODO: investigate `llvm.bswap`
-macro_rules! impl_swap_bytes {
-    (v16, $($id:ident,)+) => {$(
-        impl SwapBytes for $id {
-            #[inline]
-            unsafe fn swap_bytes(self) -> Self {
-                use coresimd::simd_llvm::simd_shuffle2;
-
-                const INDICES: [u32; 2] = [1, 0];
-                simd_shuffle2(self, self, INDICES)
-            }
-        }
-    )+};
-    (v32, $($id:ident,)+) => {$(
-        impl SwapBytes for $id {
-            #[inline]
-            unsafe fn swap_bytes(self) -> Self {
-                use coresimd::simd_llvm::simd_shuffle4;
-
-                const INDICES: [u32; 4] = [3, 2, 1, 0];
-                let vec8 = u8x4::from_bits(self);
-                let shuffled: u8x4 = simd_shuffle4(vec8, vec8, INDICES);
-                $id::from_bits(shuffled)
-            }
-        }
-    )+};
-    (v64, $($id:ident,)+) => {$(
-        impl SwapBytes for $id {
-            #[inline]
-            unsafe fn swap_bytes(self) -> Self {
-                use coresimd::simd_llvm::simd_shuffle8;
-
-                const INDICES: [u32; 8] = [7, 6, 5, 4, 3, 2, 1, 0];
-                let vec8 = u8x8::from_bits(self);
-                let shuffled: u8x8 = simd_shuffle8(vec8, vec8, INDICES);
-                $id::from_bits(shuffled)
-            }
-        }
-    )+};
-    (v128, $($id:ident,)+) => {$(
-        impl SwapBytes for $id {
-            #[inline]
-            unsafe fn swap_bytes(self) -> Self {
-                use coresimd::simd_llvm::simd_shuffle16;
-
-                const INDICES: [u32; 16] = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0];
-                let vec8 = u8x16::from_bits(self);
-                let shuffled: u8x16 = simd_shuffle16(vec8, vec8, INDICES);
-                $id::from_bits(shuffled)
-            }
-        }
-    )+};
-    (v256, $($id:ident,)+) => {$(
-        impl SwapBytes for $id {
-            #[inline]
-            unsafe fn swap_bytes(self) -> Self {
-                use coresimd::simd_llvm::simd_shuffle32;
-
-                const INDICES: [u32; 32] = [
-                    31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
-                    15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0,
-                ];
-                let vec8 = u8x32::from_bits(self);
-                let shuffled: u8x32 = simd_shuffle32(vec8, vec8, INDICES);
-                $id::from_bits(shuffled)
-            }
-        }
-    )+};
-    (v512, $($id:ident,)+) => {$(
-        impl SwapBytes for $id {
-            #[inline]
-            unsafe fn swap_bytes(self) -> Self {
-                use coresimd::simd_llvm::simd_shuffle64;
-
-                const INDICES: [u32; 64] = [
-                    63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48,
-                    47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
-                    31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
-                    15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0,
-                ];
-                let vec8 = u8x64::from_bits(self);
-                let shuffled: u8x64 = simd_shuffle64(vec8, vec8, INDICES);
-                $id::from_bits(shuffled)
-            }
-        }
-    )+};
-}
-
-vector_impl!(
-    [impl_swap_bytes, v16, u8x2, i8x2,],
-    [impl_swap_bytes, v32, u8x4, i8x4, u16x2, i16x2,],
-    [impl_swap_bytes, v64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,],
-    [
-        impl_swap_bytes,
-        v128,
-        u8x16,
-        i8x16,
-        u16x8,
-        i16x8,
-        u32x4,
-        i32x4,
-        u64x2,
-        i64x2,
-    ],
-    [
-        impl_swap_bytes,
-        v256,
-        u8x32,
-        i8x32,
-        u16x16,
-        i16x16,
-        u32x8,
-        i32x8,
-        u64x4,
-        i64x4,
-    ],
-    [
-        impl_swap_bytes,
-        v512,
-        u8x64,
-        i8x64,
-        u16x32,
-        i16x32,
-        u32x16,
-        i32x16,
-        u64x8,
-        i64x8,
-    ]
-);
diff --git a/coresimd/ppsv/codegen/wrapping.rs b/coresimd/ppsv/codegen/wrapping.rs
deleted file mode 100644
index 0e2f306eb0..0000000000
--- a/coresimd/ppsv/codegen/wrapping.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-//! Used by the wrapping_sum and wrapping_product algorithms for AArch64.
-
-pub(crate) trait Wrapping {
-    fn add(self, other: Self) -> Self;
-    fn mul(self, other: Self) -> Self;
-}
-
-macro_rules! int_impl {
-    ($id:ident) => {
-        impl Wrapping for $id {
-            fn add(self, other: Self) -> Self {
-                self.wrapping_add(other)
-            }
-            fn mul(self, other: Self) -> Self {
-                self.wrapping_mul(other)
-            }
-        }
-    };
-}
-int_impl!(i8);
-int_impl!(i16);
-int_impl!(i32);
-int_impl!(i64);
-int_impl!(u8);
-int_impl!(u16);
-int_impl!(u32);
-int_impl!(u64);
-
-macro_rules! float_impl {
-    ($id:ident) => {
-        impl Wrapping for $id {
-            fn add(self, other: Self) -> Self {
-                self + other
-            }
-            fn mul(self, other: Self) -> Self {
-                self * other
-            }
-        }
-    };
-}
-float_impl!(f32);
-float_impl!(f64);
diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs
deleted file mode 100644
index eb2ba49541..0000000000
--- a/coresimd/ppsv/mod.rs
+++ /dev/null
@@ -1,96 +0,0 @@
-//! Portable Packed-SIMD Vectors.
-//!
-//! These types are:
-//!
-//! * portable: work correctly on all architectures,
-//! * packed: have a size fixed at compile-time.
-//!
-//! These two terms are the opposites of:
-//!
-//! * architecture-specific: only available in a particular architecture,
-//! * scalable: the vector's size is dynamic.
-//!
-//! This module is structured as follows:
-//!
-//! * `api`: defines the API of the portable packed vector types.
-//! * `v{width}`: defines the portable vector types for a particular `width`.
-//!
-//! The portable packed vector types are named using the following schema:
-//! `{t}{l_w}x{l_n}`:
-//!
-//! * `t`: type - single letter corresponding to the following Rust literal
-//! types:
-//!   * `i`: signed integer
-//!   * `u`: unsigned integer
-//!   * `f`: floating point
-//!   * `m`: vector mask
-//! * `l_w`: lane width in bits
-//! * `l_n`: number of lanes
-//!
-//! For example, `f32x4` is a vector type containing four 32-bit wide
-//! floating-point numbers. The total width of this type is 32 bit times 4
-//! lanes, that is, 128 bits, and is thus defined in the `v128` module.
-
-#[macro_use]
-mod api;
-
-mod v128;
-mod v16;
-mod v256;
-mod v32;
-mod v512;
-mod v64;
-
-pub use self::v128::*;
-pub use self::v16::*;
-pub use self::v256::*;
-pub use self::v32::*;
-pub use self::v512::*;
-pub use self::v64::*;
-
-/// Safe lossless bitwise conversion from `T` to `Self`.
-pub trait FromBits<T>: ::marker::Sized {
-    /// Safe lossless bitwise from `T` to `Self`.
-    fn from_bits(T) -> Self;
-}
-
-/// Safe lossless bitwise conversion from `Self` to `T`.
-pub trait IntoBits<T>: ::marker::Sized {
-    /// Safe lossless bitwise transmute from `self` to `T`.
-    fn into_bits(self) -> T;
-}
-
-// FromBits implies IntoBits.
-impl<T, U> IntoBits<U> for T
-where
-    U: FromBits<T>,
-{
-    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449
-    #[cfg_attr(
-        any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always)
-    )]
-    #[cfg_attr(
-        not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline
-    )]
-    fn into_bits(self) -> U {
-        debug_assert!(::mem::size_of::<Self>() == ::mem::size_of::<U>());
-        U::from_bits(self)
-    }
-}
-
-// FromBits (and thus IntoBits) is reflexive.
-impl<T> FromBits<T> for T {
-    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449
-    #[cfg_attr(
-        any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always)
-    )]
-    #[cfg_attr(
-        not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline
-    )]
-    fn from_bits(t: Self) -> Self {
-        t
-    }
-}
-
-/// Work arounds code generation issues.
-mod codegen;
diff --git a/coresimd/ppsv/v128.rs b/coresimd/ppsv/v128.rs
deleted file mode 100644
index 08593bcef7..0000000000
--- a/coresimd/ppsv/v128.rs
+++ /dev/null
@@ -1,550 +0,0 @@
-//! 128-bit wide portable packed vector types.
-
-simd_i_ty! {
-    i8x16: 16, i8, m8x16, i8x16_tests, test_v128 |
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8  |
-    x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 128-bit vector with 16 `i8` lanes.
-}
-
-simd_u_ty! {
-    u8x16: 16, u8, m8x16, u8x16_tests, test_v128 |
-    u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 |
-    x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 128-bit vector with 16 `u8` lanes.
-}
-
-simd_m_ty! {
-    m8x16: 16, i8, m8x16_tests, test_v128 |
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8  |
-    x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 128-bit vector mask with 16 lanes.
-}
-
-simd_i_ty! {
-    i16x8: 8, i16, m16x8, i16x8_tests, test_v128 |
-    i16, i16, i16, i16, i16, i16, i16, i16 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 128-bit vector with 8 `i16` lanes.
-}
-
-simd_u_ty! {
-    u16x8: 8, u16, m16x8, u16x8_tests, test_v128 |
-    u16, u16, u16, u16, u16, u16, u16, u16 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 128-bit vector with 8 `u16` lanes.
-}
-
-simd_m_ty! {
-    m16x8: 8, i16, m16x8_tests, test_v128 |
-    i16, i16, i16, i16, i16, i16, i16, i16  |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 128-bit vector mask with 8 lanes.
-}
-
-simd_i_ty! {
-    i32x4: 4, i32, m32x4, i32x4_tests, test_v128 |
-    i32, i32, i32, i32 |
-    x0, x1, x2, x3 |
-    /// A 128-bit vector with 4 `i32` lanes.
-}
-
-simd_u_ty! {
-    u32x4: 4, u32, m32x4, u32x4_tests, test_v128 |
-    u32, u32, u32, u32 |
-    x0, x1, x2, x3 |
-    /// A 128-bit vector with 4 `u32` lanes.
-}
-
-simd_f_ty! {
-    f32x4: 4, f32, m32x4, f32x4_tests, test_v128 |
-    f32, f32, f32, f32 |
-    x0, x1, x2, x3 |
-    /// A 128-bit vector with 4 `f32` lanes.
-}
-
-simd_m_ty! {
-    m32x4: 4, i32, m32x4_tests, test_v128 |
-    i32, i32, i32, i32  |
-    x0, x1, x2, x3 |
-    /// A 128-bit vector mask with 4 lanes.
-}
-
-simd_i_ty! {
-    i64x2: 2, i64, m64x2, i64x2_tests, test_v128 |
-    i64, i64 |
-    x0, x1 |
-    /// A 128-bit vector with 2 `u64` lanes.
-}
-
-simd_u_ty! {
-    u64x2: 2, u64, m64x2, u64x2_tests, test_v128 |
-    u64, u64 |
-    x0, x1 |
-    /// A 128-bit vector with 2 `u64` lanes.
-}
-
-simd_f_ty! {
-    f64x2: 2, f64, m64x2, f64x2_tests, test_v128 |
-    f64, f64 |
-    x0, x1 |
-    /// A 128-bit vector with 2 `f64` lanes.
-}
-
-simd_m_ty! {
-    m64x2: 2, i64, m64x4_tests, test_v128 |
-    i64, i64  |
-    x0, x1 |
-    /// A 128-bit vector mask with 2 lanes.
-}
-
-#[cfg(target_arch = "x86")]
-use coresimd::arch::x86::{__m128, __m128d, __m128i};
-#[cfg(target_arch = "x86_64")]
-use coresimd::arch::x86_64::{__m128, __m128d, __m128i};
-
-macro_rules! from_bits_x86 {
-    ($id:ident, $elem_ty:ident, $test_mod:ident) => {
-        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-        impl_from_bits_!($id: __m128, __m128i, __m128d);
-    };
-}
-
-#[cfg(
-    all(target_arch = "arm", target_feature = "neon", target_feature = "v7")
-)]
-use coresimd::arch::arm::{
-    // FIXME: float16x8_t,
-    float32x4_t,
-    int16x8_t,
-    int32x4_t,
-    int64x2_t,
-    int8x16_t,
-    poly16x8_t,
-    poly8x16_t,
-    uint16x8_t,
-    uint32x4_t,
-    uint64x2_t,
-    uint8x16_t,
-};
-
-#[cfg(target_arch = "aarch64")]
-use coresimd::arch::aarch64::{
-    // FIXME: float16x8_t,
-    float32x4_t,
-    float64x2_t,
-    int16x8_t,
-    int32x4_t,
-    int64x2_t,
-    int8x16_t,
-    poly16x8_t,
-    poly8x16_t,
-    uint16x8_t,
-    uint32x4_t,
-    uint64x2_t,
-    uint8x16_t,
-};
-
-macro_rules! from_bits_arm {
-    ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
-        #[cfg(
-            any(
-                all(
-                    target_arch = "arm",
-                    target_feature = "neon",
-                    target_feature = "v7"
-                ),
-                target_arch = "aarch64"
-            )
-        )]
-        impl_from_bits_!(
-            $id: int8x16_t,
-            uint8x16_t,
-            int16x8_t,
-            uint16x8_t,
-            int32x4_t,
-            uint32x4_t,
-            int64x2_t,
-            uint64x2_t,
-            // FIXME: float16x8_t,
-            float32x4_t,
-            poly8x16_t,
-            poly16x8_t
-        );
-        #[cfg(target_arch = "aarch64")]
-        impl_from_bits_!($id: float64x2_t);
-    };
-}
-
-impl_from_bits!(
-    u64x2: u64,
-    u64x2_from_bits,
-    test_v128 | i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(u64x2, u64, u64x2_from_bits_x86);
-from_bits_arm!(u64x2, u64, u64x2_from_bits_arm, u64x2_from_bits_aarch64);
-
-impl_from_bits!(
-    i64x2: i64,
-    i64x2_from_bits,
-    test_v128 | u64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(i64x2, i64, i64x2_from_bits_x86);
-from_bits_arm!(i64x2, i64, i64x2_from_bits_arm, i64x2_from_bits_aarch64);
-
-impl_from_bits!(
-    f64x2: f64,
-    f64x2_from_bits,
-    test_v128 | i64x2,
-    u64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(f64x2, f64, f64x2_from_bits_x86);
-from_bits_arm!(f64x2, f64, f64x2_from_bits_arm, f64x2_from_bits_aarch64);
-
-impl_from_bits!(
-    u32x4: u32,
-    u32x4_from_bits,
-    test_v128 | u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(u32x4, u32, u32x4_from_bits_x86);
-from_bits_arm!(u32x4, u32, u32x4_from_bits_arm, u32x4_from_bits_aarch64);
-
-impl_from_bits!(
-    i32x4: i32,
-    i32x4_from_bits,
-    test_v128 | u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(i32x4, i32, i32x4_from_bits_x86);
-from_bits_arm!(i32x4, i32, i32x4_from_bits_arm, i32x4_from_bits_aarch64);
-
-impl_from_bits!(
-    f32x4: f32,
-    f32x4_from_bits,
-    test_v128 | u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    i32x4,
-    u32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(f32x4, f32, f32x4_from_bits_x86);
-from_bits_arm!(f32x4, f32, f32x4_from_bits_arm, f32x4_from_bits_aarch64);
-
-impl_from_bits!(
-    u16x8: u16,
-    u16x8_from_bits,
-    test_v128 | u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(u16x8, u16, u16x8_from_bits_x86);
-from_bits_arm!(u16x8, u16, u16x8_from_bits_arm, u16x8_from_bits_aarch64);
-
-impl_from_bits!(
-    i16x8: i16,
-    i16x8_from_bits,
-    test_v128 | u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(i16x8, i16, i16x8_from_bits_x86);
-from_bits_arm!(i16x8, i16, i16x8_from_bits_arm, i16x8_from_bits_aarch64);
-
-impl_from_bits!(
-    u8x16: u8,
-    u8x16_from_bits,
-    test_v128 | u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    i8x16,
-    m8x16
-);
-from_bits_x86!(u8x16, u8, u8x16_from_bits_x86);
-from_bits_arm!(u8x16, u8, u8x16_from_bits_arm, u8x16_from_bits_aarch64);
-
-impl_from_bits!(
-    i8x16: i8,
-    i8x16_from_bits,
-    test_v128 | u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    m8x16
-);
-from_bits_x86!(i8x16, i8, i8x16_from_bits_x86);
-from_bits_arm!(i8x16, i8, i8x16_from_bits_arm, i8x16_from_bits_aarch64);
-
-impl_from!(
-    f64x2: f64,
-    f64x2_from,
-    test_v128 | f32x2,
-    u64x2,
-    i64x2,
-    m64x2,
-    u32x2,
-    i32x2,
-    m32x2,
-    u16x2,
-    i16x2,
-    m16x2,
-    u8x2,
-    i8x2,
-    m8x2
-);
-impl_from!(
-    u64x2: u64,
-    u64x2_from,
-    test_v128 | f64x2,
-    i64x2,
-    m64x2,
-    f32x2,
-    i32x2,
-    u32x2,
-    m32x2,
-    i16x2,
-    u16x2,
-    m16x2,
-    i8x2,
-    u8x2,
-    m8x2
-);
-impl_from!(
-    i64x2: i64,
-    i64x2_from,
-    test_v128 | f64x2,
-    u64x2,
-    m64x2,
-    i32x2,
-    u32x2,
-    f32x2,
-    m32x2,
-    i16x2,
-    u16x2,
-    m16x2,
-    i8x2,
-    u8x2,
-    m8x2
-);
-impl_from!(
-    u32x4: u32,
-    u32x4_from,
-    test_v128 | f64x4,
-    u64x4,
-    i64x4,
-    m64x4,
-    f32x4,
-    i32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-impl_from!(
-    i32x4: i32,
-    i32x4_from,
-    test_v128 | f64x4,
-    u64x4,
-    i64x4,
-    m64x4,
-    f32x4,
-    u32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-impl_from!(
-    f32x4: f32,
-    f32x4_from,
-    test_v128 | f64x4,
-    u64x4,
-    i64x4,
-    m64x4,
-    u32x4,
-    i32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-
-impl_from!(
-    i16x8: i16,
-    i16x8_from,
-    test_v128 | f64x8,
-    u64x8,
-    i64x8,
-    m1x8,
-    f32x8,
-    u32x8,
-    i32x8,
-    m32x8,
-    u16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from!(
-    u16x8: u16,
-    u16x8_from,
-    test_v128 | f64x8,
-    u64x8,
-    i64x8,
-    m1x8,
-    f32x8,
-    u32x8,
-    i32x8,
-    m32x8,
-    i16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-
-impl_from!(
-    u8x16: u8,
-    u8x16_from,
-    test_v128 | i32x16,
-    u32x16,
-    f32x16,
-    m1x16,
-    i16x16,
-    u16x16,
-    m16x16,
-    i8x16,
-    m8x16
-);
-impl_from!(
-    i8x16: i8,
-    i8x16_from,
-    test_v128 | i32x16,
-    u32x16,
-    f32x16,
-    m1x16,
-    i16x16,
-    u16x16,
-    m16x16,
-    u8x16,
-    m8x16
-);
-
-impl_from!(m8x16: i8, m8x16_from, test_v128 | m1x16, m16x16);
-
-impl_from!(m16x8: i16, m16x8_from, test_v128 | m1x8, m32x8, m8x8);
-
-impl_from!(m32x4: i32, m32x4_from, test_v128 | m64x4, m16x4, m8x4);
-
-impl_from!(m64x2: i64, m64x2_from, test_v128 | m32x2, m16x2, m8x2);
diff --git a/coresimd/ppsv/v16.rs b/coresimd/ppsv/v16.rs
deleted file mode 100644
index a2baf8dfc5..0000000000
--- a/coresimd/ppsv/v16.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-//! 16-bit wide portable packed vector types.
-
-simd_i_ty! {
-    i8x2: 2, i8, m8x2, i8x2_tests, test_v16 |
-    i8, i8 |
-    x0, x1 |
-    /// A 16-bit wide vector with 2 `i8` lanes.
-}
-
-simd_u_ty! {
-    u8x2: 2, u8, m8x2, u8x2_tests, test_v16 |
-    u8, u8 |
-    x0, x1 |
-    /// A 16-bit wide vector with 2 `u8` lanes.
-}
-
-simd_m_ty! {
-    m8x2: 2, i8, m8x2_tests, test_v16 |
-    i8, i8 |
-    x0, x1 |
-    /// A 16-bit wide vector mask with 2 lanes.
-}
-
-impl_from_bits!(i8x2: i8, i8x2_from_bits, test_v16 | u8x2, m8x2);
-impl_from_bits!(u8x2: u8, u8x2_from_bits, test_v16 | i8x2, m8x2);
-
-impl_from!(
-    i8x2: i8,
-    i8x2_from,
-    test_v16 | f64x2,
-    u64x2,
-    m64x2,
-    i64x2,
-    f32x2,
-    u32x2,
-    i32x2,
-    m32x2,
-    u16x2,
-    m16x2,
-    u8x2,
-    m8x2
-);
-impl_from!(
-    u8x2: u8,
-    u8x2_from,
-    test_v16 | f64x2,
-    u64x2,
-    i64x2,
-    m64x2,
-    f32x2,
-    u32x2,
-    i32x2,
-    m32x2,
-    u16x2,
-    m16x2,
-    i8x2,
-    m8x2
-);
-
-impl_from!(m8x2: i8, m8x2_from, test_v16 | m64x2, m32x2, m16x2);
diff --git a/coresimd/ppsv/v256.rs b/coresimd/ppsv/v256.rs
deleted file mode 100644
index c68ec9118e..0000000000
--- a/coresimd/ppsv/v256.rs
+++ /dev/null
@@ -1,472 +0,0 @@
-//! 256-bit wide portable packed vector types.
-
-simd_i_ty! {
-    i8x32: 32, i8, m8x32, i8x32_tests, test_v256 |
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31 |
-    /// A 256-bit vector with 32 `i8` lanes.
-}
-
-simd_u_ty! {
-    u8x32: 32, u8, m8x32, u8x32_tests, test_v256 |
-    u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
-    u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31 |
-    /// A 256-bit vector with 32 `u8` lanes.
-}
-
-simd_m_ty! {
-    m8x32: 32, i8, m8x32_tests, test_v256 |
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31 |
-    /// A 256-bit vector mask with 32 lanes.
-}
-
-simd_i_ty! {
-    i16x16: 16, i16, m16x16, i16x16_tests, test_v256 |
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 256-bit vector with 16 `i16` lanes.
-}
-
-simd_u_ty! {
-    u16x16: 16, u16, m16x16, u16x16_tests, test_v256 |
-    u16, u16, u16, u16, u16, u16, u16, u16,
-    u16, u16, u16, u16, u16, u16, u16, u16 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 256-bit vector with 16 `u16` lanes.
-}
-
-simd_m_ty! {
-    m16x16: 16, i16, m16x16_tests, test_v256 |
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 256-bit vector mask with 16 lanes.
-}
-
-simd_i_ty! {
-    i32x8: 8, i32, m32x8, i32x8_tests, test_v256 |
-    i32, i32, i32, i32, i32, i32, i32, i32 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 256-bit vector with 8 `i32` lanes.
-}
-
-simd_u_ty! {
-    u32x8: 8, u32, m32x8, u32x8_tests, test_v256 |
-    u32, u32, u32, u32, u32, u32, u32, u32 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 256-bit vector with 8 `u32` lanes.
-}
-
-simd_f_ty! {
-    f32x8: 8, f32, m32x8, f32x8_tests, test_v256 |
-    f32, f32, f32, f32, f32, f32, f32, f32 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 256-bit vector with 8 `f32` lanes.
-}
-
-simd_m_ty! {
-    m32x8: 8, i32, m32x8_tests, test_v256 |
-    i32, i32, i32, i32, i32, i32, i32, i32 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 256-bit vector mask with 8 lanes.
-}
-
-simd_i_ty! {
-    i64x4: 4, i64, m64x4, i64x4_tests, test_v256 |
-    i64, i64, i64, i64 |
-    x0, x1, x2, x3 |
-    /// A 256-bit vector with 4 `i64` lanes.
-}
-
-simd_u_ty! {
-    u64x4: 4, u64, m64x4, u64x4_tests, test_v256 |
-    u64, u64, u64, u64 |
-    x0, x1, x2, x3 |
-    /// A 256-bit vector with 4 `u64` lanes.
-}
-
-simd_f_ty! {
-    f64x4: 4, f64, m64x4, f64x4_tests, test_v256 |
-    f64, f64, f64, f64 |
-    x0, x1, x2, x3 |
-    /// A 256-bit vector with 4 `f64` lanes.
-}
-
-simd_m_ty! {
-    m64x4: 4, i64, m64x4_tests, test_v256 |
-    i64, i64, i64, i64 |
-    x0, x1, x2, x3 |
-    /// A 256-bit vector mask with 4 lanes.
-}
-
-#[cfg(target_arch = "x86")]
-use coresimd::arch::x86::{__m256, __m256d, __m256i};
-#[cfg(target_arch = "x86_64")]
-use coresimd::arch::x86_64::{__m256, __m256d, __m256i};
-
-macro_rules! from_bits_x86 {
-    ($id:ident, $elem_ty:ident, $test_mod:ident) => {
-        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-        impl_from_bits_!($id: __m256, __m256i, __m256d);
-    };
-}
-
-impl_from_bits!(
-    i8x32: i8,
-    i8x32_from_bits,
-    test_v256 | u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    m8x32
-);
-from_bits_x86!(i8x32, i8, i8x32_from_bits_x86);
-
-impl_from_bits!(
-    u8x32: u8,
-    u8x32_from_bits,
-    test_v256 | u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(u8x32, u8, u8x32_from_bits_x86);
-
-impl_from_bits!(
-    i16x16: i16,
-    i16x16_from_bits,
-    test_v256 | u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(i16x16, i16, i16x16_from_bits_x86);
-
-impl_from_bits!(
-    u16x16: u16,
-    u16x16_from_bits,
-    test_v256 | u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(u16x16, u16, u16x16_from_bits_x86);
-
-impl_from_bits!(
-    i32x8: i32,
-    i32x8_from_bits,
-    test_v256 | u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(i32x8, i32, i32x8_from_bits_x86);
-
-impl_from_bits!(
-    u32x8: u32,
-    u32x8_from_bits,
-    test_v256 | u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(u32x8, u32, u32x8_from_bits_x86);
-
-impl_from_bits!(
-    f32x8: f32,
-    f32x8_from_bits,
-    test_v256 | u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    i32x8,
-    u32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(f32x8, f32, f32x8_from_bits_x86);
-
-impl_from_bits!(
-    i64x4: i64,
-    i64x4_from_bits,
-    test_v256 | u64x4,
-    f64x4,
-    m64x4,
-    i32x8,
-    u32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(i64x4, i64, i64x4_from_bits_x86);
-
-impl_from_bits!(
-    u64x4: u64,
-    u64x4_from_bits,
-    test_v256 | i64x4,
-    f64x4,
-    m64x4,
-    i32x8,
-    u32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(u64x4, u64, u64x4_from_bits_x86);
-
-impl_from_bits!(
-    f64x4: f64,
-    f64x4_from_bits,
-    test_v256 | i64x4,
-    u64x4,
-    m64x4,
-    i32x8,
-    u32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-from_bits_x86!(f64x4, f64, f64x4_from_bits_x86);
-
-impl_from!(
-    f64x4: f64,
-    f64x4_from,
-    test_v256 | u64x4,
-    i64x4,
-    m64x4,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-impl_from!(
-    i64x4: i64,
-    i64x4_from,
-    test_v256 | u64x4,
-    f64x4,
-    m64x4,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-impl_from!(
-    u64x4: u64,
-    u64x4_from,
-    test_v256 | i64x4,
-    f64x4,
-    m64x4,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-impl_from!(
-    f32x8: f32,
-    f32x8_from,
-    test_v256 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8,
-    u32x8,
-    i32x8,
-    m32x8,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from!(
-    i32x8: i32,
-    i32x8_from,
-    test_v256 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8,
-    u32x8,
-    f32x8,
-    m32x8,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from!(
-    u32x8: u32,
-    u32x8_from,
-    test_v256 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from!(
-    i16x16: i16,
-    i16x16_from,
-    test_v256 | u32x16,
-    i32x16,
-    f32x16,
-    m1x16,
-    u16x16,
-    m16x16,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from!(
-    u16x16: u16,
-    u16x16_from,
-    test_v256 | u32x16,
-    i32x16,
-    f32x16,
-    m1x16,
-    i16x16,
-    m16x16,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from!(
-    i8x32: i8,
-    i8x32_from,
-    test_v256 | u16x32,
-    i16x32,
-    u8x32,
-    m8x32
-);
-impl_from!(
-    u8x32: u8,
-    u8x32_from,
-    test_v256 | u16x32,
-    i16x32,
-    i8x32,
-    m8x32
-);
-
-impl_from!(m8x32: i8, m8x32_from, test_v256 | m1x32);
-
-impl_from!(m16x16: i16, m16x16_from, test_v256 | m1x16, m8x16);
-
-impl_from!(m32x8: i32, m32x8_from, test_v256 | m1x8, m16x8, m8x8);
-
-impl_from!(m64x4: i64, m64x4_from, test_v256 | m32x4, m16x4, m8x4);
diff --git a/coresimd/ppsv/v32.rs b/coresimd/ppsv/v32.rs
deleted file mode 100644
index ab56b5ad80..0000000000
--- a/coresimd/ppsv/v32.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-//! 32-bit wide portable packed vector types.
-
-simd_i_ty! {
-    i16x2: 2, i16, m16x2, i16x2_tests, test_v32 |
-    i16, i16 |
-    x0, x1 |
-    /// A 32-bit wide vector with 2 `i16` lanes.
-}
-
-simd_u_ty! {
-    u16x2: 2, u16, m16x2, u16x2_tests, test_v32 |
-    u16, u16 |
-    x0, x1 |
-    /// A 32-bit wide vector with 2 `u16` lanes.
-}
-
-simd_m_ty! {
-    m16x2: 2, i16, m16x2_tests, test_v32 |
-    i16, i16  |
-    x0, x1 |
-    /// A 32-bit wide vector mask with 2 lanes.
-}
-
-simd_i_ty! {
-    i8x4: 4, i8, m8x4, i8x4_tests, test_v32 |
-    i8, i8, i8, i8  |
-    x0, x1, x2, x3 |
-    /// A 32-bit wide vector with 4 `i8` lanes.
-}
-
-simd_u_ty! {
-    u8x4: 4, u8, m8x4, u8x4_tests, test_v32 |
-    u8, u8, u8, u8  |
-    x0, x1, x2, x3 |
-    /// A 32-bit wide vector with 4 `u8` lanes.
-}
-
-simd_m_ty! {
-    m8x4: 4, i8, m8x4_tests, test_v32 |
-    i8, i8, i8, i8  |
-    x0, x1, x2, x3 |
-    /// A 32-bit wide vector mask 4 lanes.
-}
-
-impl_from_bits!(
-    i16x2: i16,
-    i16x2_from_bits,
-    test_v32 | u16x2,
-    m16x2,
-    i8x4,
-    u8x4,
-    m8x4
-);
-impl_from_bits!(
-    u16x2: u16,
-    u16x2_from_bits,
-    test_v32 | i16x2,
-    m16x2,
-    i8x4,
-    u8x4,
-    m8x4
-);
-impl_from_bits!(
-    i8x4: i8,
-    i8x2_from_bits,
-    test_v32 | i16x2,
-    u16x2,
-    m16x2,
-    u8x4,
-    m8x4
-);
-impl_from_bits!(
-    u8x4: u8,
-    u8x2_from_bits,
-    test_v32 | i16x2,
-    u16x2,
-    m16x2,
-    i8x4,
-    m8x4
-);
-
-impl_from!(
-    i16x2: i16,
-    i16x2_from,
-    test_v32 | f64x2,
-    u64x2,
-    i64x2,
-    m64x2,
-    f32x2,
-    u32x2,
-    i32x2,
-    m32x2,
-    u16x2,
-    m16x2,
-    u8x2,
-    i8x2,
-    m8x2
-);
-
-impl_from!(
-    u16x2: u16,
-    u16x2_from,
-    test_v32 | f64x2,
-    u64x2,
-    i64x2,
-    m64x2,
-    f32x2,
-    u32x2,
-    i32x2,
-    m32x2,
-    i16x2,
-    m16x2,
-    u8x2,
-    i8x2,
-    m8x2
-);
-
-impl_from!(
-    i8x4: i8,
-    i8x4_from,
-    test_v32 | f64x4,
-    u64x4,
-    i64x4,
-    m64x4,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    m8x4
-);
-
-impl_from!(
-    u8x4: u8,
-    u8x4_from,
-    test_v32 | f64x4,
-    u64x4,
-    i64x4,
-    m64x4,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x4,
-    i16x4,
-    m16x4,
-    i8x4,
-    m8x4
-);
-
-impl_from!(m8x4: i8, m8x4_from, test_v32 | m64x4, m32x4, m16x4);
-
-impl_from!(m16x2: i16, m16x2_from, test_v32 | m64x2, m32x2, m8x2);
diff --git a/coresimd/ppsv/v512.rs b/coresimd/ppsv/v512.rs
deleted file mode 100644
index 6bea72c73b..0000000000
--- a/coresimd/ppsv/v512.rs
+++ /dev/null
@@ -1,451 +0,0 @@
-//! 512-bit wide portable packed vector types.
-
-// FIXME: Here the m1xN masks should map to AVX-512 m1xN registers,
-// but due to lack of rustc support (shouldn't be hard to add) these masks
-// are currently implemented as being 512-bit wide.
-
-simd_i_ty! {
-    i8x64: 64, i8, m1x64, i8x64_tests, test_v512 |
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31,
-    x32, x33, x34, x35, x36, x37, x38, x39,
-    x40, x41, x42, x43, x44, x45, x46, x47,
-    x48, x49, x50, x51, x52, x53, x54, x55,
-    x56, x57, x58, x59, x60, x61, x62, x63 |
-    /// A 512-bit vector with 64 `i8` lanes.
-}
-
-simd_u_ty! {
-    u8x64: 64, u8, m1x64, u8x64_tests, test_v512 |
-    u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
-    u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
-    u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
-    u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31,
-    x32, x33, x34, x35, x36, x37, x38, x39,
-    x40, x41, x42, x43, x44, x45, x46, x47,
-    x48, x49, x50, x51, x52, x53, x54, x55,
-    x56, x57, x58, x59, x60, x61, x62, x63 |
-    /// A 512-bit vector with 64 `u8` lanes.
-}
-
-simd_m_ty! {
-    m1x64: 64, i8, m1x64_tests, test_v512 |
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
-    i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31,
-    x32, x33, x34, x35, x36, x37, x38, x39,
-    x40, x41, x42, x43, x44, x45, x46, x47,
-    x48, x49, x50, x51, x52, x53, x54, x55,
-    x56, x57, x58, x59, x60, x61, x62, x63 |
-    /// A 64-bit vector mask with 64 lanes (FIXME: 512-bit wide).
-}
-
-simd_i_ty! {
-    i16x32: 32, i16, m1x32, i16x32_tests, test_v512 |
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31 |
-    /// A 512-bit vector with 32 `i16` lanes.
-}
-
-simd_u_ty! {
-    u16x32: 32, u16, m1x32, u16x32_tests, test_v512 |
-    u16, u16, u16, u16, u16, u16, u16, u16,
-    u16, u16, u16, u16, u16, u16, u16, u16,
-    u16, u16, u16, u16, u16, u16, u16, u16,
-    u16, u16, u16, u16, u16, u16, u16, u16 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31 |
-    /// A 512-bit vector with 32 `u16` lanes.
-}
-
-simd_m_ty! {
-    m1x32: 32, i16, m1x32_tests, test_v512 |
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16,
-    i16, i16, i16, i16, i16, i16, i16, i16 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15,
-    x16, x17, x18, x19, x20, x21, x22, x23,
-    x24, x25, x26, x27, x28, x29, x30, x31 |
-    /// A 32-bit vector mask with 32 lanes (FIXME: 512-bit wide).
-}
-
-simd_i_ty! {
-    i32x16: 16, i32, m1x16, i32x16_tests, test_v512 |
-    i32, i32, i32, i32, i32, i32, i32, i32,
-    i32, i32, i32, i32, i32, i32, i32, i32 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 512-bit vector with 16 `i32` lanes.
-}
-
-simd_u_ty! {
-    u32x16: 16, u32, m1x16, u32x16_tests, test_v512 |
-    u32, u32, u32, u32, u32, u32, u32, u32,
-    u32, u32, u32, u32, u32, u32, u32, u32 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 512-bit vector with 16 `u32` lanes.
-}
-
-simd_f_ty! {
-    f32x16: 16, f32, m1x16, f32x16_tests, test_v512 |
-    f32, f32, f32, f32, f32, f32, f32, f32,
-    f32, f32, f32, f32, f32, f32, f32, f32 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 512-bit vector with 16 `f32` lanes.
-}
-
-simd_m_ty! {
-    m1x16: 16, i32, m1x16_tests, test_v512 |
-    i32, i32, i32, i32, i32, i32, i32, i32,
-    i32, i32, i32, i32, i32, i32, i32, i32 |
-    x0, x1, x2, x3, x4, x5, x6, x7,
-    x8, x9, x10, x11, x12, x13, x14, x15 |
-    /// A 16-bit vector mask with 16 lanes (FIXME: 512-bit wide).
-}
-
-simd_i_ty! {
-    i64x8: 8, i64, m1x8, i64x8_tests, test_v512 |
-    i64, i64, i64, i64, i64, i64, i64, i64 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 512-bit vector with 8 `i64` lanes.
-}
-
-simd_u_ty! {
-    u64x8: 8, u64, m1x8, u64x8_tests, test_v512 |
-    u64, u64, u64, u64, u64, u64, u64, u64 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 512-bit vector with 8 `u64` lanes.
-}
-
-simd_f_ty! {
-    f64x8: 8, f64, m1x8, f64x8_tests, test_v512 |
-    f64, f64, f64, f64, f64, f64, f64, f64 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 512-bit vector with 8 `f64` lanes.
-}
-
-simd_m_ty! {
-    m1x8: 8, i64, m1x8_tests, test_v512 |
-    i64, i64, i64, i64,
-    i64, i64, i64, i64 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 8-bit vector mask with 8 lanes (FIXME: 512-bit wide).
-}
-
-impl_from_bits!(
-    i8x64: i8,
-    i8x64_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    u8x64: u8,
-    u8x64_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    i16x32: i16,
-    i16x32_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    u16x32: u16,
-    u16x32_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    i32x16: i32,
-    i32x16_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    u32x16: u32,
-    u32x16_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8, // FIXME
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    f32x16: f32,
-    f32x16_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    i64x8: i64,
-    i64x8_from_bits,
-    test_v512 | u64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    u64x8: u64,
-    u64x8_from_bits,
-    test_v512 | i64x8,
-    f64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-impl_from_bits!(
-    f64x8: f64,
-    f64x8_from_bits,
-    test_v512 | u64x8,
-    i64x8,
-    m1x8, // FIXME
-    u32x16,
-    i32x16,
-    f32x16,
-    m1x16, // FIXME
-    u16x32,
-    i16x32,
-    m1x32, // FIXME
-    i8x64,
-    u8x64,
-    m1x64 // FIXME
-);
-
-impl_from!(
-    f64x8: f64,
-    f64x8_from,
-    test_v512 | u64x8,
-    i64x8,
-    m1x8,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from!(
-    i64x8: i64,
-    i64x8_from,
-    test_v512 | u64x8,
-    f64x8,
-    m1x8,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-impl_from!(
-    u64x8: u64,
-    u64x8_from,
-    test_v512 | i64x8,
-    f64x8,
-    m1x8,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x8,
-    i8x8,
-    m8x8
-);
-
-impl_from!(
-    f32x16: f32,
-    f32x16_from,
-    test_v512 | u32x16,
-    i32x16,
-    m1x16,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from!(
-    i32x16: i32,
-    i32x16_from,
-    test_v512 | u32x16,
-    f32x16,
-    m1x16,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from!(
-    u32x16: u32,
-    u32x16_from,
-    test_v512 | i32x16,
-    f32x16,
-    m1x16,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x16,
-    i8x16,
-    m8x16
-);
-
-impl_from!(
-    i16x32: i16,
-    i16x32_from,
-    test_v512 | u16x32,
-    u8x32,
-    i8x32,
-    m1x32,
-    m8x32
-);
-impl_from!(
-    u16x32: u16,
-    u16x32_from,
-    test_v512 | i16x32,
-    u8x32,
-    i8x32,
-    m1x32,
-    m8x32
-);
-
-impl_from!(i8x64: i8, i8x64_from, test_v512 | u8x64, m1x64);
-impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64, m1x64);
-
-impl_from!(m1x32: i16, m1x32_from, test_v512 | m8x32);
-
-impl_from!(m1x16: i32, m1x16_from, test_v512 | m16x16, m8x16);
-
-impl_from!(m1x8: i64, m1x8_from, test_v512 | m32x8, m16x8, m8x8);
diff --git a/coresimd/ppsv/v64.rs b/coresimd/ppsv/v64.rs
deleted file mode 100644
index 64a86b601d..0000000000
--- a/coresimd/ppsv/v64.rs
+++ /dev/null
@@ -1,388 +0,0 @@
-//! 64-bit wide portable packed vector types.
-
-simd_i_ty! {
-    i8x8: 8, i8, m8x8, i8x8_tests, test_v64 |
-    i8, i8, i8, i8, i8, i8, i8, i8 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 64-bit vector with 8 `i8` lanes.
-}
-
-simd_u_ty! {
-    u8x8: 8, u8, m8x8, u8x8_tests, test_v64 |
-    u8, u8, u8, u8, u8, u8, u8, u8 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 64-bit vector with 8 `u8` lanes.
-}
-
-simd_m_ty! {
-    m8x8: 8, i8, m8x8_tests, test_v64 |
-    i8, i8, i8, i8, i8, i8, i8, i8 |
-    x0, x1, x2, x3, x4, x5, x6, x7 |
-    /// A 64-bit vector mask with 8 lanes.
-}
-
-simd_i_ty! {
-    i16x4: 4, i16, m16x4, i16x4_tests, test_v64 |
-    i16, i16, i16, i16 |
-    x0, x1, x2, x3 |
-    /// A 64-bit vector with 4 `i16` lanes.
-}
-
-simd_u_ty! {
-    u16x4: 4, u16, m16x4, u16x4_tests, test_v64 |
-    u16, u16, u16, u16 |
-    x0, x1, x2, x3 |
-    /// A 64-bit vector with 4 `u16` lanes.
-}
-
-simd_m_ty! {
-    m16x4: 4, i16, m16x4_tests, test_v64 |
-    i16, i16, i16, i16 |
-    x0, x1, x2, x3 |
-    /// A 64-bit vector mask with 4 lanes.
-}
-
-simd_i_ty! {
-    i32x2: 2, i32, m32x2, i32x2_tests, test_v64 |
-    i32, i32 |
-    x0, x1 |
-    /// A 64-bit vector with 2 `i32` lanes.
-}
-
-simd_u_ty! {
-    u32x2: 2, u32, m32x2, u32x2_tests, test_v64 |
-    u32, u32 |
-    x0, x1 |
-    /// A 64-bit vector with 2 `u32` lanes.
-}
-
-simd_m_ty! {
-    m32x2: 2, i32, m32x2_tests, test_v64 |
-    i32, i32 |
-    x0, x1 |
-    /// A 64-bit vector mask with 2 lanes.
-}
-
-simd_f_ty! {
-    f32x2: 2, f32, m32x2, f32x2_tests, test_v64 |
-    f32, f32 |
-    x0, x1 |
-    /// A 64-bit vector with 2 `f32` lanes.
-}
-
-#[cfg(target_arch = "x86")]
-use coresimd::arch::x86::__m64;
-
-#[cfg(target_arch = "x86_64")]
-use coresimd::arch::x86_64::__m64;
-
-macro_rules! from_bits_x86 {
-    ($id:ident, $elem_ty:ident, $test_mod:ident) => {
-        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-        impl_from_bits_!($id: __m64);
-    };
-}
-
-#[cfg(
-    all(target_arch = "arm", target_feature = "neon", target_feature = "v7")
-)]
-use coresimd::arch::arm::{
-    // FIXME: float16x4_t,
-    float32x2_t,
-    int16x4_t,
-    int32x2_t,
-    int64x1_t,
-    int8x8_t,
-    poly16x4_t,
-    poly8x8_t,
-    uint16x4_t,
-    uint32x2_t,
-    uint64x1_t,
-    uint8x8_t,
-};
-
-#[cfg(target_arch = "aarch64")]
-use coresimd::arch::aarch64::{
-    // FIXME: float16x4_t,
-    float32x2_t,
-    float64x1_t,
-    int16x4_t,
-    int32x2_t,
-    int64x1_t,
-    int8x8_t,
-    poly16x4_t,
-    poly8x8_t,
-    uint16x4_t,
-    uint32x2_t,
-    uint64x1_t,
-    uint8x8_t,
-};
-
-macro_rules! from_bits_arm {
-    ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
-        #[cfg(
-            any(
-                all(
-                    target_arch = "arm",
-                    target_feature = "neon",
-                    target_feature = "v7"
-                ),
-                target_arch = "aarch64"
-            )
-        )]
-        impl_from_bits_!(
-            $id: int64x1_t,
-            uint64x1_t,
-            uint32x2_t,
-            int32x2_t,
-            float32x2_t,
-            uint16x4_t,
-            int16x4_t,
-            // FIXME: float16x4_t
-            poly16x4_t,
-            uint8x8_t,
-            int8x8_t,
-            poly8x8_t
-        );
-        #[cfg(target_arch = "aarch64")]
-        impl_from_bits_!($id: float64x1_t);
-    };
-}
-
-impl_from_bits!(
-    u32x2: u32,
-    u32x2_from_bits,
-    test_v64 | i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-from_bits_x86!(u32x2, u32, u32x2_from_bits_x86);
-from_bits_arm!(u32x2, u32, u32x2_from_bits_arm, u32x2_from_bits_aarch64);
-
-impl_from_bits!(
-    i32x2: i32,
-    i32x2_from_bits,
-    test_v64 | u32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-from_bits_x86!(i32x2, i32, i32x2_from_bits_x86);
-from_bits_arm!(i32x2, i32, i32x2_from_bits_arm, i32x2_from_bits_aarch64);
-
-impl_from_bits!(
-    f32x2: f32,
-    f32x2_from_bits,
-    test_v64 | i32x2,
-    u32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-from_bits_x86!(f32x2, f32, f32x2_from_bits_x86);
-from_bits_arm!(f32x2, f32, f32x2_from_bits_arm, f32x2_from_bits_aarch64);
-
-impl_from_bits!(
-    u16x4: u16,
-    u16x4_from_bits,
-    test_v64 | u32x2,
-    i32x2,
-    m32x2,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-from_bits_x86!(u16x4, u16, u16x4_from_bits_x86);
-from_bits_arm!(u16x4, u16, u16x4_from_bits_arm, u16x4_from_bits_aarch64);
-
-impl_from_bits!(
-    i16x4: i16,
-    i16x4_from_bits,
-    test_v64 | u32x2,
-    i32x2,
-    m32x2,
-    u16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-from_bits_x86!(i16x4, i16, i16x4_from_bits_x86);
-from_bits_arm!(i16x4, i16, i16x4_from_bits_arm, i16x4_from_bits_aarch64);
-
-impl_from_bits!(
-    u8x8: u8,
-    u8x8_from_bits,
-    test_v64 | u32x2,
-    i32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    i8x8,
-    m8x8
-);
-from_bits_x86!(u8x8, u8, u8x8_from_bits_x86);
-from_bits_arm!(u8x8, u8, u8x8_from_bits_arm, u8x8_from_bits_aarch64);
-
-impl_from_bits!(
-    i8x8: i8,
-    i8x8_from_bits,
-    test_v64 | u32x2,
-    i32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    m8x8
-);
-from_bits_x86!(i8x8, i8, i8x8_from_bits_x86);
-from_bits_arm!(i8x8, i8, i8x8_from_bits_arm, i8x8_from_bits_aarch64);
-
-impl_from!(
-    f32x2: f32,
-    f32x2_from,
-    test_v64 | f64x2,
-    u64x2,
-    i64x2,
-    m64x2,
-    u32x2,
-    i32x2,
-    m32x2,
-    u16x2,
-    i16x2,
-    m16x2,
-    u8x2,
-    i8x2,
-    m8x2
-);
-
-impl_from!(
-    u32x2: u32,
-    u32x2_from,
-    test_v64 | f64x2,
-    u64x2,
-    i64x2,
-    m64x2,
-    f32x2,
-    i32x2,
-    m32x2,
-    u16x2,
-    i16x2,
-    m16x2,
-    u8x2,
-    i8x2,
-    m8x2
-);
-
-impl_from!(
-    i32x2: i32,
-    i32x2_from,
-    test_v64 | f64x2,
-    u64x2,
-    i64x2,
-    m64x2,
-    f32x2,
-    u32x2,
-    m32x2,
-    u16x2,
-    i16x2,
-    m16x2,
-    u8x2,
-    i8x2,
-    m8x2
-);
-
-impl_from!(
-    u16x4: u16,
-    u16x4_from,
-    test_v64 | f64x4,
-    u64x4,
-    i64x4,
-    m64x4,
-    f32x4,
-    i32x4,
-    u32x4,
-    m32x4,
-    i16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-
-impl_from!(
-    i16x4: i16,
-    i16x4_from,
-    test_v64 | f64x4,
-    u64x4,
-    i64x4,
-    m64x4,
-    f32x4,
-    i32x4,
-    u32x4,
-    m32x4,
-    u16x4,
-    m16x4,
-    u8x4,
-    i8x4,
-    m8x4
-);
-impl_from!(
-    i8x8: i8,
-    i8x8_from,
-    test_v64 | f64x8,
-    u64x8,
-    i64x8,
-    m1x8,
-    f32x8,
-    u32x8,
-    i32x8,
-    m32x8,
-    i16x8,
-    u16x8,
-    m16x8,
-    u8x8,
-    m8x8
-);
-impl_from!(
-    u8x8: u8,
-    u8x8_from,
-    test_v64 | f64x8,
-    u64x8,
-    i64x8,
-    m1x8,
-    f32x8,
-    u32x8,
-    i32x8,
-    m32x8,
-    i16x8,
-    u16x8,
-    m16x8,
-    i8x8,
-    m8x8
-);
-
-impl_from!(m8x8: i8, m8x8_from, test_v64 | m1x8, m32x8, m16x8);
-
-impl_from!(m16x4: i16, m16x4_from, test_v64 | m64x4, m32x4, m8x4);
-
-impl_from!(m32x2: i32, m32x2_from, test_v64 | m64x2, m16x2, m8x2);
diff --git a/coresimd/simd.rs b/coresimd/simd.rs
new file mode 100644
index 0000000000..6bdac0bfd6
--- /dev/null
+++ b/coresimd/simd.rs
@@ -0,0 +1,161 @@
+//! Internal `#[repr(simd)]` types
+
+#![allow(non_camel_case_types)]
+
+macro_rules! simd_ty {
+    ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
+        #[repr(simd)]
+        #[derive(Copy, Clone, Debug, PartialEq)]
+        pub(crate) struct $id($(pub $elem_ty),*);
+
+        impl $id {
+            #[inline]
+            pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
+                $id($($elem_name),*)
+            }
+
+            #[inline]
+            pub(crate) const fn splat(value: $ety) -> Self {
+                $id($({
+                    #[allow(non_camel_case_types, dead_code)]
+                    struct $elem_name;
+                    value
+                }),*)
+            }
+
+            #[inline]
+            pub(crate) fn extract(self, index: usize) -> $ety {
+                unsafe { ::coresimd::simd_llvm::simd_extract(self, index as u32) }
+            }
+        }
+    }
+}
+
+macro_rules! simd_m_ty {
+    ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
+        #[repr(simd)]
+        #[derive(Copy, Clone, Debug, PartialEq)]
+        pub(crate) struct $id($(pub $elem_ty),*);
+
+        impl $id {
+            #[inline]
+            const fn bool_to_internal(x: bool) -> $ety {
+                [0 as $ety, !(0 as $ety)][x as usize]
+            }
+
+            #[inline]
+            pub(crate) const fn new($($elem_name: bool),*) -> Self {
+                $id($(Self::bool_to_internal($elem_name)),*)
+            }
+
+            #[inline]
+            pub(crate) const fn splat(value: bool) -> Self {
+                $id($({
+                    #[allow(non_camel_case_types, dead_code)]
+                    struct $elem_name;
+                    Self::bool_to_internal(value)
+                }),*)
+            }
+
+            #[inline]
+            pub(crate) fn extract(self, index: usize) -> bool {
+                let r: $ety = unsafe { ::coresimd::simd_llvm::simd_extract(self, index as u32) };
+                r != 0
+            }
+        }
+    }
+}
+
+// 16-bit wide types:
+
+simd_ty!(u8x2[u8]: u8, u8 | x0, x1);
+simd_ty!(i8x2[i8]: i8, i8 | x0, x1);
+
+// 32-bit wide types:
+
+simd_ty!(u8x4[u8]: u8, u8, u8, u8 | x0, x1, x2, x3);
+simd_ty!(u16x2[u16]: u16, u16 | x0, x1);
+
+simd_ty!(i8x4[i8]: i8, i8, i8, i8 | x0, x1, x2, x3);
+simd_ty!(i16x2[i16]: i16, i16 | x0, x1);
+
+// 64-bit wide types:
+
+simd_ty!(u8x8[u8]: u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3);
+simd_ty!(u32x2[u32]: u32, u32 | x0, x1);
+simd_ty!(u64x1[u64]: u64 | x1);
+
+simd_ty!(i8x8[i8]: i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3);
+simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
+simd_ty!(i64x1[i64]: i64 | x1);
+
+simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
+
+// 128-bit wide types:
+
+simd_ty!(u8x16[u8]:
+         u8, u8, u8, u8, u8, u8, u8, u8,
+         u8, u8, u8, u8, u8, u8, u8, u8
+         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+);
+simd_ty!(u16x8[u16]: u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3);
+simd_ty!(u64x2[u64]: u64, u64 | x0, x1);
+
+simd_ty!(i8x16[i8]:
+         i8, i8, i8, i8, i8, i8, i8, i8,
+         i8, i8, i8, i8, i8, i8, i8, i8
+         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+);
+simd_ty!(i16x8[i16]: i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
+simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
+
+simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
+simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
+
+simd_m_ty!(m8x16[i8]:
+           i8, i8, i8, i8, i8, i8, i8, i8,
+           i8, i8, i8, i8, i8, i8, i8, i8
+           | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+);
+simd_m_ty!(m16x8[i16]: i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
+simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1);
+
+// 256-bit wide types:
+
+simd_ty!(u8x32[u8]:
+         u8, u8, u8, u8, u8, u8, u8, u8,
+         u8, u8, u8, u8, u8, u8, u8, u8,
+         u8, u8, u8, u8, u8, u8, u8, u8,
+         u8, u8, u8, u8, u8, u8, u8, u8
+         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+         x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31
+);
+simd_ty!(u16x16[u16]:
+         u16, u16, u16, u16, u16, u16, u16, u16,
+         u16, u16, u16, u16, u16, u16, u16, u16
+         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+);
+simd_ty!(u32x8[u32]: u32, u32, u32, u32, u32, u32, u32, u32 | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3);
+
+simd_ty!(i8x32[i8]:
+         i8, i8, i8, i8, i8, i8, i8, i8,
+         i8, i8, i8, i8, i8, i8, i8, i8,
+         i8, i8, i8, i8, i8, i8, i8, i8,
+         i8, i8, i8, i8, i8, i8, i8, i8
+         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+         x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31
+);
+simd_ty!(i16x16[i16]:
+         i16, i16, i16, i16, i16, i16, i16, i16,
+         i16, i16, i16, i16, i16, i16, i16, i16
+         | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
+);
+simd_ty!(i32x8[i32]: i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7);
+simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
+
diff --git a/coresimd/x86/avx.rs b/coresimd/x86/avx.rs
index f41ebb8974..58d9482ff1 100644
--- a/coresimd/x86/avx.rs
+++ b/coresimd/x86/avx.rs
@@ -61,7 +61,7 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
 pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = mem::transmute(a);
     let b: u64x4 = mem::transmute(b);
-    mem::transmute(a & b)
+    mem::transmute(simd_and(a, b))
 }
 
 /// Compute the bitwise AND of packed single-precision (32-bit) floating-point
@@ -75,7 +75,7 @@ pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
 pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
     let a: u32x8 = mem::transmute(a);
     let b: u32x8 = mem::transmute(b);
-    mem::transmute(a & b)
+    mem::transmute(simd_and(a, b))
 }
 
 /// Compute the bitwise OR packed double-precision (64-bit) floating-point
@@ -91,7 +91,7 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
 pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = mem::transmute(a);
     let b: u64x4 = mem::transmute(b);
-    mem::transmute(a | b)
+    mem::transmute(simd_or(a, b))
 }
 
 /// Compute the bitwise OR packed single-precision (32-bit) floating-point
@@ -105,7 +105,7 @@ pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
 pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
     let a: u32x8 = mem::transmute(a);
     let b: u32x8 = mem::transmute(b);
-    mem::transmute(a | b)
+    mem::transmute(simd_or(a, b))
 }
 
 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit
@@ -230,7 +230,7 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
 pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = mem::transmute(a);
     let b: u64x4 = mem::transmute(b);
-    mem::transmute((!a) & b)
+    mem::transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b))
 }
 
 /// Compute the bitwise NOT of packed single-precision (32-bit) floating-point
@@ -245,7 +245,7 @@ pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
 pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
     let a: u32x8 = mem::transmute(a);
     let b: u32x8 = mem::transmute(b);
-    mem::transmute((!a) & b)
+    mem::transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b))
 }
 
 /// Compare packed double-precision (64-bit) floating-point elements
@@ -741,7 +741,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
 pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = mem::transmute(a);
     let b: u64x4 = mem::transmute(b);
-    mem::transmute(a ^ b)
+    mem::transmute(simd_xor(a, b))
 }
 
 /// Compute the bitwise XOR of packed single-precision (32-bit) floating-point
@@ -755,7 +755,7 @@ pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
 pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
     let a: u32x8 = mem::transmute(a);
     let b: u32x8 = mem::transmute(b);
-    mem::transmute(a ^ b)
+    mem::transmute(simd_xor(a, b))
 }
 
 /// Equal (ordered, non-signaling)
diff --git a/coresimd/x86/mod.rs b/coresimd/x86/mod.rs
index 31d950a2c6..83aea858f4 100644
--- a/coresimd/x86/mod.rs
+++ b/coresimd/x86/mod.rs
@@ -444,123 +444,6 @@ impl m256iExt for __m256i {
     }
 }
 
-use coresimd::simd::{
-    f32x2, f32x4, f32x8, f64x2, f64x4, i16x16, i16x4, i16x8, i32x2, i32x4,
-    i32x8, i64x2, i64x4, i8x16, i8x32, i8x8, m16x16, m16x4, m16x8, m32x2,
-    m32x4, m32x8, m64x2, m64x4, m8x16, m8x32, m8x8, u16x16, u16x4, u16x8,
-    u32x2, u32x4, u32x8, u64x2, u64x4, u8x16, u8x32, u8x8,
-};
-
-impl_from_bits_!(
-    __m64: u32x2,
-    i32x2,
-    f32x2,
-    m32x2,
-    u16x4,
-    i16x4,
-    m16x4,
-    u8x8,
-    i8x8,
-    m8x8
-);
-
-impl_from_bits_!(
-    __m128: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    __m128i: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    __m128d: u64x2,
-    i64x2,
-    f64x2,
-    m64x2,
-    u32x4,
-    i32x4,
-    f32x4,
-    m32x4,
-    u16x8,
-    i16x8,
-    m16x8,
-    u8x16,
-    i8x16,
-    m8x16
-);
-impl_from_bits_!(
-    __m256: u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-impl_from_bits_!(
-    __m256i: u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-impl_from_bits_!(
-    __m256d: u64x4,
-    i64x4,
-    f64x4,
-    m64x4,
-    u32x8,
-    i32x8,
-    f32x8,
-    m32x8,
-    u16x16,
-    i16x16,
-    m16x16,
-    u8x32,
-    i8x32,
-    m8x32
-);
-
 mod eflags;
 pub use self::eflags::*;
 
diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs
index e31cf5fbed..c45a1dd1ff 100644
--- a/crates/coresimd/src/lib.rs
+++ b/crates/coresimd/src/lib.rs
@@ -39,7 +39,6 @@
     test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
 )]
 
-#[cfg_attr(not(test), macro_use)]
 extern crate core as _core;
 #[cfg(test)]
 #[macro_use]
@@ -52,33 +51,10 @@ extern crate stdsimd_test;
 #[cfg(test)]
 extern crate test;
 
-macro_rules! test_v16 {
-    ($item:item) => {};
-}
-macro_rules! test_v32 {
-    ($item:item) => {};
-}
-macro_rules! test_v64 {
-    ($item:item) => {};
-}
-macro_rules! test_v128 {
-    ($item:item) => {};
-}
-macro_rules! test_v256 {
-    ($item:item) => {};
-}
-macro_rules! test_v512 {
-    ($item:item) => {};
-}
-macro_rules! vector_impl {
-    ($([$f:ident, $($args:tt)*]),*) => { $($f!($($args)*);)* }
-}
-
 #[path = "../../../coresimd/mod.rs"]
 mod coresimd;
 
 pub use coresimd::arch;
-pub use coresimd::simd;
 
 #[allow(unused_imports)]
 use _core::clone;
diff --git a/crates/coresimd/tests/endian_tests.rs b/crates/coresimd/tests/endian_tests.rs
deleted file mode 100644
index 8bb9c49051..0000000000
--- a/crates/coresimd/tests/endian_tests.rs
+++ /dev/null
@@ -1,278 +0,0 @@
-#![feature(stdsimd)]
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-
-extern crate core;
-extern crate coresimd;
-
-use core::{mem, slice};
-use coresimd::simd::*;
-
-#[test]
-fn endian_indexing() {
-    let v = i32x4::new(0, 1, 2, 3);
-    assert_eq!(v.extract(0), 0);
-    assert_eq!(v.extract(1), 1);
-    assert_eq!(v.extract(2), 2);
-    assert_eq!(v.extract(3), 3);
-}
-
-#[test]
-fn endian_bitcasts() {
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let t: i16x8 = unsafe { mem::transmute(x) };
-    let e: i16x8 = if cfg!(target_endian = "little") {
-        i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854)
-    } else {
-        i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599)
-    };
-    assert_eq!(t, e);
-}
-
-#[test]
-fn endian_casts() {
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let t: i16x16 = x.into(); // simd_cast
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let e = i16x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    assert_eq!(t, e);
-}
-
-#[test]
-fn endian_load_and_stores() {
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let mut y: [i16; 8] = [0; 8];
-    x.store_unaligned(unsafe {
-        slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16)
-    });
-
-    let e: [i16; 8] = if cfg!(target_endian = "little") {
-        [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
-    } else {
-        [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
-    };
-    assert_eq!(y, e);
-
-    let z = i8x16::load_unaligned(unsafe {
-        slice::from_raw_parts(&y as *const _ as *const i8, 16)
-    });
-    assert_eq!(z, x);
-}
-
-#[test]
-fn endian_array_union() {
-    union A {
-        data: [f32; 4],
-        vec: f32x4,
-    }
-    let x: [f32; 4] = unsafe {
-        A {
-            vec: f32x4::new(0., 1., 2., 3.),
-        }.data
-    };
-    assert_eq!(x[0], 0_f32);
-    assert_eq!(x[1], 1_f32);
-    assert_eq!(x[2], 2_f32);
-    assert_eq!(x[3], 3_f32);
-    let y: f32x4 = unsafe {
-        A {
-            data: [3., 2., 1., 0.],
-        }.vec
-    };
-    assert_eq!(y, f32x4::new(3., 2., 1., 0.));
-
-    union B {
-        data: [i8; 16],
-        vec: i8x16,
-    }
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let x: [i8; 16] = unsafe { B { vec: x }.data };
-
-    for i in 0..16 {
-        assert_eq!(x[i], i as i8);
-    }
-
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let y = [
-        15, 14, 13, 12, 11, 19, 9, 8,
-        7, 6, 5, 4, 3, 2, 1, 0
-    ];
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let e = i8x16::new(
-        15, 14, 13, 12, 11, 19, 9, 8,
-        7, 6, 5, 4, 3, 2, 1, 0
-    );
-    let z = unsafe { B { data: y }.vec };
-    assert_eq!(z, e);
-
-    union C {
-        data: [i16; 8],
-        vec: i8x16,
-    }
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let x: [i16; 8] = unsafe { C { vec: x }.data };
-
-    let e: [i16; 8] = if cfg!(target_endian = "little") {
-        [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
-    } else {
-        [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
-    };
-    assert_eq!(x, e);
-}
-
-#[test]
-fn endian_tuple_access() {
-    type F32x4T = (f32, f32, f32, f32);
-    union A {
-        data: F32x4T,
-        vec: f32x4,
-    }
-    let x: F32x4T = unsafe {
-        A {
-            vec: f32x4::new(0., 1., 2., 3.),
-        }.data
-    };
-    assert_eq!(x.0, 0_f32);
-    assert_eq!(x.1, 1_f32);
-    assert_eq!(x.2, 2_f32);
-    assert_eq!(x.3, 3_f32);
-    let y: f32x4 = unsafe {
-        A {
-            data: (3., 2., 1., 0.),
-        }.vec
-    };
-    assert_eq!(y, f32x4::new(3., 2., 1., 0.));
-
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8);
-    union B {
-        data: I8x16T,
-        vec: i8x16,
-    }
-
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let x: I8x16T = unsafe { B { vec: x }.data };
-
-    assert_eq!(x.0, 0);
-    assert_eq!(x.1, 1);
-    assert_eq!(x.2, 2);
-    assert_eq!(x.3, 3);
-    assert_eq!(x.4, 4);
-    assert_eq!(x.5, 5);
-    assert_eq!(x.6, 6);
-    assert_eq!(x.7, 7);
-    assert_eq!(x.8, 8);
-    assert_eq!(x.9, 9);
-    assert_eq!(x.10, 10);
-    assert_eq!(x.11, 11);
-    assert_eq!(x.12, 12);
-    assert_eq!(x.13, 13);
-    assert_eq!(x.14, 14);
-    assert_eq!(x.15, 15);
-
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let y = (
-        15, 14, 13, 12, 11, 10, 9, 8,
-        7, 6, 5, 4, 3, 2, 1, 0
-    );
-    let z: i8x16 = unsafe { B { data: y }.vec };
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let e = i8x16::new(
-        15, 14, 13, 12, 11, 10, 9, 8,
-        7, 6, 5, 4, 3, 2, 1, 0
-    );
-    assert_eq!(e, z);
-
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16);
-    union C {
-        data: I16x8T,
-        vec: i8x16,
-    }
-
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let x: I16x8T = unsafe { C { vec: x }.data };
-
-    let e: [i16; 8] = if cfg!(target_endian = "little") {
-        [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
-    } else {
-        [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
-    };
-    assert_eq!(x.0, e[0]);
-    assert_eq!(x.1, e[1]);
-    assert_eq!(x.2, e[2]);
-    assert_eq!(x.3, e[3]);
-    assert_eq!(x.4, e[4]);
-    assert_eq!(x.5, e[5]);
-    assert_eq!(x.6, e[6]);
-    assert_eq!(x.7, e[7]);
-
-    // Without repr(C) this produces total garbage.
-    // FIXME: investigate more, this is maybe due to
-    // to tuple field reordering to minimize padding.
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    #[repr(C)]
-    #[derive(Copy ,Clone)]
-    pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16,
-                   pub i8, pub i8, pub i16, pub i8, pub i8, pub i16);
-
-    union D {
-        data: Tup,
-        vec: i8x16,
-    }
-
-    #[cfg_attr(rustfmt, rustfmt_skip)]
-    let x = i8x16::new(
-        0, 1, 2, 3, 4, 5, 6, 7,
-        8, 9, 10, 11, 12, 13, 14, 15,
-    );
-    let x: Tup = unsafe { D { vec: x }.data };
-
-    let e: [i16; 12] = if cfg!(target_endian = "little") {
-        [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854]
-    } else {
-        [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599]
-    };
-    assert_eq!(x.0 as i16, e[0]);
-    assert_eq!(x.1 as i16, e[1]);
-    assert_eq!(x.2 as i16, e[2]);
-    assert_eq!(x.3 as i16, e[3]);
-    assert_eq!(x.4 as i16, e[4]);
-    assert_eq!(x.5 as i16, e[5]);
-    assert_eq!(x.6 as i16, e[6]);
-    assert_eq!(x.7 as i16, e[7]);
-    assert_eq!(x.8 as i16, e[8]);
-    assert_eq!(x.9 as i16, e[9]);
-    assert_eq!(x.10 as i16, e[10]);
-    assert_eq!(x.11 as i16, e[11]);
-}
diff --git a/crates/coresimd/tests/reductions.rs b/crates/coresimd/tests/reductions.rs
deleted file mode 100644
index 123410b879..0000000000
--- a/crates/coresimd/tests/reductions.rs
+++ /dev/null
@@ -1,510 +0,0 @@
-#![feature(stdsimd, sse4a_target_feature, avx512_target_feature)]
-#![feature(arm_target_feature)]
-#![feature(aarch64_target_feature)]
-#![feature(powerpc_target_feature)]
-#![allow(unused_attributes, dead_code, unused_imports, unused_macros)]
-
-#[macro_use]
-extern crate stdsimd;
-
-use stdsimd::simd::*;
-
-#[cfg(target_arch = "powerpc")]
-macro_rules! is_powerpc_feature_detected {
-    ($t:tt) => {
-        false
-    };
-}
-
-macro_rules! invoke_arch {
-    ($macro:ident, $feature_macro:ident, $id:ident, $elem_ty:ident,
-     [$($feature:tt),*]) => {
-        $($macro!($feature, $feature_macro, $id, $elem_ty);)*
-    }
-}
-
-macro_rules! invoke_vectors {
-    ($macro:ident, [$(($id:ident, $elem_ty:ident)),*]) => {
-        $(
-            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-            invoke_arch!($macro, is_x86_feature_detected, $id, $elem_ty,
-                        ["sse", "sse2", "sse3", "ssse3", "sse4.1",
-                         "sse4.2", "sse4a", "avx2", "avx2", "avx512f"]);
-            #[cfg(target_arch = "aarch64")]
-            invoke_arch!($macro, is_aarch64_feature_detected, $id, $elem_ty,
-                        ["neon"]);
-            #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
-            invoke_arch!($macro, is_arm_feature_detected, $id, $elem_ty,
-                         ["neon"]);
-            #[cfg(target_arch = "powerpc")]
-            invoke_arch!($macro, is_powerpc_feature_detected, $id, $elem_ty, ["altivec"]);
-            #[cfg(target_arch = "powerpc64")]
-            invoke_arch!($macro, is_powerpc64_feature_detected, $id, $elem_ty, ["altivec"]);
-        )*
-    }
-}
-
-macro_rules! finvoke {
-    ($macro:ident) => {
-        invoke_vectors!(
-            $macro,
-            [
-                (f32x2, f32),
-                (f32x4, f32),
-                (f32x8, f32),
-                (f32x16, f32),
-                (f64x2, f64),
-                (f64x4, f64),
-                (f64x8, f64)
-            ]
-        );
-    };
-}
-
-macro_rules! iinvoke {
-    ($macro:ident) => {
-        invoke_vectors!(
-            $macro,
-            [
-                (i8x2, i8),
-                (i8x4, i8),
-                (i8x8, i8),
-                (i8x16, i8),
-                (i8x32, i8),
-                (i8x64, i8),
-                (i16x2, i16),
-                (i16x4, i16),
-                (i16x8, i16),
-                (i16x16, i16),
-                (i16x32, i16),
-                (i32x2, i32),
-                (i32x4, i32),
-                (i32x8, i32),
-                (i32x16, i32),
-                (i64x2, i64),
-                (i64x4, i64),
-                (i64x8, i64),
-                (u8x2, u8),
-                (u8x4, u8),
-                (u8x8, u8),
-                (u8x16, u8),
-                (u8x32, u8),
-                (u8x64, u8),
-                (u16x2, u16),
-                (u16x4, u16),
-                (u16x8, u16),
-                (u16x16, u16),
-                (u16x32, u16),
-                (u32x2, u32),
-                (u32x4, u32),
-                (u32x8, u32),
-                (u32x16, u32),
-                (u64x2, u64),
-                (u64x4, u64),
-                (u64x8, u64)
-            ]
-        );
-    };
-}
-
-macro_rules! min_nan_test {
-    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-        if $feature_macro!($feature) {
-            #[target_feature(enable = $feature)]
-            unsafe fn test_fn() {
-                let n0 = ::std::$elem_ty::NAN;
-
-                assert_eq!(n0.min(-3.0), -3.0);
-                assert_eq!((-3.0 as $elem_ty).min(n0), -3.0);
-
-                let v0 = $id::splat(-3.0);
-
-                // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
-                // When the last element is NaN the current implementation produces incorrect results.
-                let bugbug = 1;
-                for i in 0..$id::lanes() - bugbug {
-                    let mut v = v0.replace(i, n0);
-                    // If there is a NaN, the result is always the smallest element:
-                    assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
-                    for j in 0..i {
-                        v = v.replace(j, n0);
-                        assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
-                    }
-                }
-                // If the vector contains all NaNs the result is NaN:
-                let vn = $id::splat(n0);
-                assert!(vn.min_element().is_nan(), "all nans | v={:?} | min={} | is_nan: {}",
-                        vn, vn.min_element(), vn.min_element().is_nan());
-            }
-            unsafe { test_fn() };
-        }
-    }
-}
-
-#[test]
-fn min_nan() {
-    finvoke!(min_nan_test);
-}
-
-macro_rules! max_nan_test {
-    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-        if $feature_macro!($feature) {
-            #[target_feature(enable = $feature)]
-            unsafe fn test_fn() {
-                let n0 = ::std::$elem_ty::NAN;
-
-                assert_eq!(n0.max(-3.0), -3.0);
-                assert_eq!((-3.0 as $elem_ty).max(n0), -3.0);
-
-                let v0 = $id::splat(-3.0);
-
-                // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
-                // When the last element is NaN the current implementation produces incorrect results.
-                let bugbug = 1;
-                for i in 0..$id::lanes() - bugbug {
-                    let mut v = v0.replace(i, n0);
-                    // If there is a NaN the result is always the largest element:
-                    assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
-                    for j in 0..i {
-                        v = v.replace(j, n0);
-                        assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
-                    }
-                }
-
-                // If the vector contains all NaNs the result is NaN:
-                let vn = $id::splat(n0);
-                assert!(vn.max_element().is_nan(), "all nans | v={:?} | max={} | is_nan: {}",
-                        vn, vn.max_element(), vn.max_element().is_nan());
-            }
-            unsafe { test_fn() };
-        }
-    }
-}
-
-#[test]
-fn max_nan() {
-    finvoke!(max_nan_test);
-}
-
-macro_rules! sum_nan_test {
-    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-        if $feature_macro!($feature) {
-            #[target_feature(enable = $feature)]
-            #[allow(unreachable_code)]
-            unsafe fn test_fn() {
-                // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
-                // https://github.com/rust-lang-nursery/stdsimd/issues/409
-                return;
-
-                let n0 = ::std::$elem_ty::NAN;
-                let v0 = $id::splat(-3.0);
-                for i in 0..$id::lanes() {
-                    let mut v = v0.replace(i, n0);
-                    // If the vector contains a NaN the result is NaN:
-                    assert!(
-                        v.sum().is_nan(),
-                        "nan at {} => {} | {:?}",
-                        i,
-                        v.sum(),
-                        v
-                    );
-                    for j in 0..i {
-                        v = v.replace(j, n0);
-                        assert!(v.sum().is_nan());
-                    }
-                }
-                let v = $id::splat(n0);
-                assert!(v.sum().is_nan(), "all nans | {:?}", v);
-            }
-            unsafe { test_fn() };
-        }
-    };
-}
-
-#[test]
-fn sum_nan() {
-    finvoke!(sum_nan_test);
-}
-
-macro_rules! product_nan_test {
-    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-        if $feature_macro!($feature) {
-            #[target_feature(enable = $feature)]
-            #[allow(unreachable_code)]
-            unsafe fn test_fn() {
-                // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
-                // https://github.com/rust-lang-nursery/stdsimd/issues/409
-                return;
-
-                let n0 = ::std::$elem_ty::NAN;
-                let v0 = $id::splat(-3.0);
-                for i in 0..$id::lanes() {
-                    let mut v = v0.replace(i, n0);
-                    // If the vector contains a NaN the result is NaN:
-                    assert!(
-                        v.product().is_nan(),
-                        "nan at {} | {:?}",
-                        i,
-                        v
-                    );
-                    for j in 0..i {
-                        v = v.replace(j, n0);
-                        assert!(v.product().is_nan());
-                    }
-                }
-                let v = $id::splat(n0);
-                assert!(v.product().is_nan(), "all nans | {:?}", v);
-            }
-            unsafe { test_fn() };
-        }
-    };
-}
-
-#[test]
-fn product_nan() {
-    finvoke!(product_nan_test);
-}
-
-trait AsInt {
-    type Int;
-    fn as_int(self) -> Self::Int;
-    fn from_int(Self::Int) -> Self;
-}
-
-macro_rules! as_int {
-    ($float:ident, $int:ident) => {
-        impl AsInt for $float {
-            type Int = $int;
-            fn as_int(self) -> $int {
-                unsafe { ::std::mem::transmute(self) }
-            }
-            fn from_int(x: $int) -> $float {
-                unsafe { ::std::mem::transmute(x) }
-            }
-        }
-    };
-}
-
-as_int!(f32, u32);
-as_int!(f64, u64);
-as_int!(f32x2, i32x2);
-as_int!(f32x4, i32x4);
-as_int!(f32x8, i32x8);
-as_int!(f32x16, i32x16);
-as_int!(f64x2, i64x2);
-as_int!(f64x4, i64x4);
-as_int!(f64x8, i64x8);
-
-// FIXME: these fail on i586 for some reason
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))]
-mod offset {
-    use super::*;
-
-    trait TreeSum {
-        type R;
-        fn tree_sum(self) -> Self::R;
-    }
-
-    macro_rules! tree_sum_f {
-        ($elem_ty:ident) => {
-            impl<'a> TreeSum for &'a [$elem_ty] {
-                type R = $elem_ty;
-                fn tree_sum(self) -> $elem_ty {
-                    if self.len() == 2 {
-                        self[0] + self[1]
-                    } else {
-                        let mid = self.len() / 2;
-                        let (left, right) = self.split_at(mid);
-                        Self::tree_sum(left) + Self::tree_sum(right)
-                    }
-                }
-            }
-        };
-    }
-    tree_sum_f!(f32);
-    tree_sum_f!(f64);
-
-    macro_rules! sum_roundoff_test {
-        ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-            if $feature_macro!($feature) {
-                #[target_feature(enable = $feature)]
-                unsafe fn test_fn() {
-                    let mut start = std::$elem_ty::EPSILON;
-                    let mut sum = 0. as $elem_ty;
-
-                    let mut v = $id::splat(0. as $elem_ty);
-                    for i in 0..$id::lanes() {
-                        let c = if i % 2 == 0 { 1e3 } else { -1. };
-                        start *= 3.14 * c;
-                        sum += start;
-                        v = v.replace(i, start);
-                    }
-                    let vsum = v.sum();
-                    let _r = vsum.as_int() == sum.as_int();
-                    // This is false in general; the intrinsic performs a
-                    // tree-reduce:
-                    let mut a = [0. as $elem_ty; $id::lanes()];
-                    v.store_unaligned(&mut a);
-
-                    let tsum = a.tree_sum();
-
-                    // tolerate 1 ULP difference:
-                    if vsum.as_int() > tsum.as_int() {
-                        assert!(
-                            vsum.as_int() - tsum.as_int() < 2,
-                            "v: {:?} | vsum: {} | tsum: {}",
-                            v,
-                            vsum,
-                            tsum
-                        );
-                    } else {
-                        assert!(
-                            tsum.as_int() - vsum.as_int() < 2,
-                            "v: {:?} | vsum: {} | tsum: {}",
-                            v,
-                            vsum,
-                            tsum
-                        );
-                    }
-                }
-                unsafe { test_fn() };
-            }
-        };
-    }
-
-    #[test]
-    fn sum_roundoff_test() {
-        finvoke!(sum_roundoff_test);
-    }
-
-    trait TreeProduct {
-        type R;
-        fn tree_product(self) -> Self::R;
-    }
-
-    macro_rules! tree_product_f {
-        ($elem_ty:ident) => {
-            impl<'a> TreeProduct for &'a [$elem_ty] {
-                type R = $elem_ty;
-                fn tree_product(self) -> $elem_ty {
-                    if self.len() == 2 {
-                        self[0] * self[1]
-                    } else {
-                        let mid = self.len() / 2;
-                        let (left, right) = self.split_at(mid);
-                        Self::tree_product(left) * Self::tree_product(right)
-                    }
-                }
-            }
-        };
-    }
-
-    tree_product_f!(f32);
-    tree_product_f!(f64);
-
-    macro_rules! product_roundoff_test {
-        ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-            if $feature_macro!($feature) {
-                #[target_feature(enable = $feature)]
-                unsafe fn test_fn() {
-                    let mut start = std::$elem_ty::EPSILON;
-                    let mut mul = 1. as $elem_ty;
-
-                    let mut v = $id::splat(1. as $elem_ty);
-                    for i in 0..$id::lanes() {
-                        let c = if i % 2 == 0 { 1e3 } else { -1. };
-                        start *= 3.14 * c;
-                        mul *= start;
-                        v = v.replace(i, start);
-                    }
-                    let vmul = v.product();
-                    let _r = vmul.as_int() == mul.as_int();
-                    // This is false in general; the intrinsic performs a
-                    // tree-reduce:
-                    let mut a = [0. as $elem_ty; $id::lanes()];
-                    v.store_unaligned(&mut a);
-
-                    let tmul = a.tree_product();
-                    // tolerate 1 ULP difference:
-                    if vmul.as_int() > tmul.as_int() {
-                        assert!(
-                            vmul.as_int() - tmul.as_int() < 2,
-                            "v: {:?} | vmul: {} | tmul: {}",
-                            v,
-                            vmul,
-                            tmul
-                        );
-                    } else {
-                        assert!(
-                            tmul.as_int() - vmul.as_int() < 2,
-                            "v: {:?} | vmul: {} | tmul: {}",
-                            v,
-                            vmul,
-                            tmul
-                        );
-                    }
-                }
-                unsafe { test_fn() };
-            }
-        };
-    }
-
-    #[test]
-    fn product_roundoff_test() {
-        finvoke!(product_roundoff_test);
-    }
-
-    macro_rules! wrapping_sum_overflow_test {
-        ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-            if $feature_macro!($feature) {
-                #[target_feature(enable = $feature)]
-                unsafe fn test_fn() {
-                    let start = $elem_ty::max_value()
-                        - ($id::lanes() as $elem_ty / 2);
-
-                    let v = $id::splat(start as $elem_ty);
-                    let vwrapping_sum = v.wrapping_sum();
-
-                    let mut wrapping_sum = start;
-                    for _ in 1..$id::lanes() {
-                        wrapping_sum = wrapping_sum.wrapping_add(start);
-                    }
-                    assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
-                }
-                unsafe { test_fn() };
-            }
-        };
-    }
-
-    #[test]
-    fn wrapping_sum_overflow_test() {
-        iinvoke!(wrapping_sum_overflow_test);
-    }
-
-    macro_rules! product_overflow_test {
-        ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
-            if $feature_macro!($feature) {
-                #[target_feature(enable = $feature)]
-                unsafe fn test_fn() {
-                    let start = $elem_ty::max_value()
-                        - ($id::lanes() as $elem_ty / 2);
-
-                    let v = $id::splat(start as $elem_ty);
-                    let vmul = v.wrapping_product();
-
-                    let mut mul = start;
-                    for _ in 1..$id::lanes() {
-                        mul = mul.wrapping_mul(start);
-                    }
-                    assert_eq!(mul, vmul, "v = {:?}", v);
-                }
-                unsafe { test_fn() };
-            }
-        };
-    }
-
-    #[test]
-    fn product_overflow_test() {
-        iinvoke!(product_overflow_test);
-    }
-}
diff --git a/crates/coresimd/tests/v128.rs b/crates/coresimd/tests/v128.rs
deleted file mode 100644
index 3cba8811cb..0000000000
--- a/crates/coresimd/tests/v128.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! coresimd 128-bit wide vector tests
-
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
-#![allow(unused_imports, dead_code)]
-
-#[cfg(test)]
-extern crate coresimd;
-
-#[cfg(test)]
-macro_rules! test_v16 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v32 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v64 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v128 {
-    ($item:item) => {
-        $item
-    };
-}
-#[cfg(test)]
-macro_rules! test_v256 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v512 {
-    ($item:item) => {};
-}
-
-#[cfg(test)]
-macro_rules! vector_impl {
-    ($([$f:ident, $($args:tt)*]),*) => {};
-}
-
-#[cfg(test)]
-#[path = "../../../coresimd/ppsv/mod.rs"]
-mod ppsv;
-
-#[cfg(test)]
-use std::{marker, mem};
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use std::cmp;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-extern crate core as _core;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use _core::num;
diff --git a/crates/coresimd/tests/v16.rs b/crates/coresimd/tests/v16.rs
deleted file mode 100644
index 77c78323fe..0000000000
--- a/crates/coresimd/tests/v16.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! coresimd 16-bit wide vector tests
-
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
-#![allow(unused_imports, dead_code)]
-
-#[cfg(test)]
-extern crate coresimd;
-
-#[cfg(test)]
-macro_rules! test_v16 {
-    ($item:item) => {
-        $item
-    };
-}
-#[cfg(test)]
-macro_rules! test_v32 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v64 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v128 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v256 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v512 {
-    ($item:item) => {};
-}
-
-#[cfg(test)]
-macro_rules! vector_impl {
-    ($([$f:ident, $($args:tt)*]),*) => {};
-}
-
-#[cfg(test)]
-#[path = "../../../coresimd/ppsv/mod.rs"]
-mod ppsv;
-
-#[cfg(test)]
-use std::{marker, mem};
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use std::cmp;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-extern crate core as _core;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use _core::num;
diff --git a/crates/coresimd/tests/v256.rs b/crates/coresimd/tests/v256.rs
deleted file mode 100644
index fac31c316f..0000000000
--- a/crates/coresimd/tests/v256.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! coresimd 256-bit wide vector tests
-
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
-#![allow(unused_imports)]
-
-#[cfg(test)]
-extern crate coresimd;
-
-#[cfg(test)]
-macro_rules! test_v16 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v32 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v64 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v128 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v256 {
-    ($item:item) => {
-        $item
-    };
-}
-#[cfg(test)]
-macro_rules! test_v512 {
-    ($item:item) => {};
-}
-
-#[cfg(test)]
-macro_rules! vector_impl {
-    ($([$f:ident, $($args:tt)*]),*) => {};
-}
-
-#[cfg(test)]
-#[path = "../../../coresimd/ppsv/mod.rs"]
-mod ppsv;
-
-#[cfg(test)]
-use std::{marker, mem};
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use std::cmp;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-extern crate core as _core;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use _core::num;
diff --git a/crates/coresimd/tests/v32.rs b/crates/coresimd/tests/v32.rs
deleted file mode 100644
index 3f89acc9a0..0000000000
--- a/crates/coresimd/tests/v32.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! coresimd 32-bit wide vector tests
-
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
-#![allow(unused_imports, dead_code)]
-
-#[cfg(test)]
-extern crate coresimd;
-
-#[cfg(test)]
-macro_rules! test_v16 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v32 {
-    ($item:item) => {
-        $item
-    };
-}
-#[cfg(test)]
-macro_rules! test_v64 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v128 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v256 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v512 {
-    ($item:item) => {};
-}
-
-#[cfg(test)]
-macro_rules! vector_impl {
-    ($([$f:ident, $($args:tt)*]),*) => {};
-}
-
-#[cfg(test)]
-#[path = "../../../coresimd/ppsv/mod.rs"]
-mod ppsv;
-
-#[cfg(test)]
-use std::{marker, mem};
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use std::cmp;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-extern crate core as _core;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use _core::num;
diff --git a/crates/coresimd/tests/v512.rs b/crates/coresimd/tests/v512.rs
deleted file mode 100644
index cda0661370..0000000000
--- a/crates/coresimd/tests/v512.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! coresimd 512-bit wide vector tests
-
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
-#![allow(unused_imports)]
-
-#[cfg(test)]
-extern crate coresimd;
-
-#[cfg(test)]
-macro_rules! test_v16 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v32 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v64 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v128 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v256 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v512 {
-    ($item:item) => {
-        $item
-    };
-}
-
-#[cfg(test)]
-macro_rules! vector_impl {
-    ($([$f:ident, $($args:tt)*]),*) => {};
-}
-
-#[cfg(test)]
-#[path = "../../../coresimd/ppsv/mod.rs"]
-mod ppsv;
-
-#[cfg(test)]
-use std::{marker, mem};
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use std::cmp;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-extern crate core as _core;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use _core::num;
diff --git a/crates/coresimd/tests/v64.rs b/crates/coresimd/tests/v64.rs
deleted file mode 100644
index 65b91219ca..0000000000
--- a/crates/coresimd/tests/v64.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! coresimd 64-bit wide vector tests
-
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
-#![allow(unused_imports, dead_code)]
-
-#[cfg(test)]
-extern crate coresimd;
-
-#[cfg(test)]
-macro_rules! test_v16 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v32 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v64 {
-    ($item:item) => {
-        $item
-    };
-}
-#[cfg(test)]
-macro_rules! test_v128 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v256 {
-    ($item:item) => {};
-}
-#[cfg(test)]
-macro_rules! test_v512 {
-    ($item:item) => {};
-}
-
-#[cfg(test)]
-macro_rules! vector_impl {
-    ($([$f:ident, $($args:tt)*]),*) => {};
-}
-
-#[cfg(test)]
-#[path = "../../../coresimd/ppsv/mod.rs"]
-mod ppsv;
-
-#[cfg(test)]
-use std::{marker, mem};
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use std::cmp;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-extern crate core as _core;
-
-#[cfg(all(test, target_arch = "aarch64"))]
-use _core::num;
diff --git a/crates/stdsimd/Cargo.toml b/crates/stdsimd/Cargo.toml
index 0371f38403..4ab553db48 100644
--- a/crates/stdsimd/Cargo.toml
+++ b/crates/stdsimd/Cargo.toml
@@ -33,10 +33,6 @@ cupid = "0.6.0"
 name = "hex"
 path = "../../examples/hex.rs"
 
-[[example]]
-name = "nbody"
-path = "../../examples/nbody.rs"
-
 [[example]]
 name = "wasm"
 crate-type = ["cdylib"]
diff --git a/examples/nbody.rs b/examples/nbody.rs
deleted file mode 100644
index 63281e78e8..0000000000
--- a/examples/nbody.rs
+++ /dev/null
@@ -1,243 +0,0 @@
-//! n-body benchmark from the [benchmarks game][bg].
-//!
-//! [bg]: https://benchmarksgame.alioth.debian.org/u64q/nbody-description.
-//! html#nbody
-
-#![cfg_attr(stdsimd_strict, deny(warnings))]
-#![feature(stdsimd)]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(
-        similar_names, missing_docs_in_private_items, shadow_reuse,
-        print_stdout
-    )
-)]
-
-extern crate stdsimd;
-#[macro_use]
-extern crate cfg_if;
-
-use stdsimd::simd::*;
-
-const PI: f64 = std::f64::consts::PI;
-const SOLAR_MASS: f64 = 4.0 * PI * PI;
-const DAYS_PER_YEAR: f64 = 365.24;
-
-pub trait Frsqrt {
-    fn frsqrt(&self) -> Self;
-}
-
-cfg_if! {
-    if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
-                  target_feature = "sse"))] {
-        fn frsqrt(s: &f64x2) -> f64x2 {
-            #[cfg(target_arch = "x86")]
-            use stdsimd::arch::x86::*;
-            #[cfg(target_arch = "x86_64")]
-            use stdsimd::arch::x86_64::*;
-            let t: f32x2 = (*s).into();
-
-            let u: f64x4 = unsafe {
-                let res = _mm_rsqrt_ps(_mm_setr_ps(
-                    t.extract(0),
-                    t.extract(1),
-                    0.,
-                    0.,
-                ));
-                f32x4::from_bits(res).into()
-            };
-            f64x2::new(u.extract(0), u.extract(1))
-        }
-    } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
-        fn frsqrt(s: &f64x2) -> f64x2 {
-            #[cfg(target_arch = "aarch64")]
-            use stdsimd::arch::aarch64::*;
-            #[cfg(target_arch = "arm")]
-            use stdsimd::arch::arm::*;
-
-            let t: f32x2 = (*s).into();
-            let t: f32x2 = unsafe { vrsqrte_f32(t.into_bits()).into_bits() };
-            t.into()
-        }
-    } else {
-        fn frsqrt(s: &f64x2) -> f64x2 {
-            let r = s.replace(0, 1. / s.extract(0).sqrt());
-            let r = r.replace(1, 1. / s.extract(1).sqrt());
-            r
-        }
-    }
-}
-
-impl Frsqrt for f64x2 {
-    fn frsqrt(&self) -> Self {
-        frsqrt(self)
-    }
-}
-
-struct Body {
-    x: [f64; 3],
-    _fill: f64,
-    v: [f64; 3],
-    mass: f64,
-}
-
-impl Body {
-    fn new(
-        x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64,
-    ) -> Self {
-        Self {
-            x: [x0, x1, x2],
-            _fill: 0.0,
-            v: [v0, v1, v2],
-            mass,
-        }
-    }
-}
-
-const N_BODIES: usize = 5;
-const N: usize = N_BODIES * (N_BODIES - 1) / 2;
-fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
-    let (sun, rest) = bodies.split_at_mut(1);
-    let sun = &mut sun[0];
-    for body in rest {
-        for k in 0..3 {
-            sun.v[k] -= body.v[k] * body.mass / SOLAR_MASS;
-        }
-    }
-}
-fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
-    let mut r = [[0.0; 4]; N];
-    let mut mag = [0.0; N];
-
-    let mut dx = [f64x2::splat(0.0); 3];
-    let mut dsquared;
-    let mut distance;
-    let mut dmag;
-
-    let mut i = 0;
-    for j in 0..N_BODIES {
-        for k in j + 1..N_BODIES {
-            for m in 0..3 {
-                r[i][m] = bodies[j].x[m] - bodies[k].x[m];
-            }
-            i += 1;
-        }
-    }
-
-    i = 0;
-    while i < N {
-        for (m, dx) in dx.iter_mut().enumerate() {
-            *dx = f64x2::new(r[i][m], r[i + 1][m]);
-        }
-
-        dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-        distance = dsquared.frsqrt();
-        for _ in 0..2 {
-            distance = distance * f64x2::splat(1.5)
-                - ((f64x2::splat(0.5) * dsquared) * distance)
-                    * (distance * distance)
-        }
-        dmag = f64x2::splat(dt) / dsquared * distance;
-        dmag.store_unaligned(&mut mag[i..]);
-
-        i += 2;
-    }
-
-    i = 0;
-    for j in 0..N_BODIES {
-        for k in j + 1..N_BODIES {
-            for m in 0..3 {
-                bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i];
-                bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i];
-            }
-            i += 1
-        }
-    }
-    for body in bodies {
-        for m in 0..3 {
-            body.x[m] += dt * body.v[m]
-        }
-    }
-}
-
-fn energy(bodies: &[Body; N_BODIES]) -> f64 {
-    let mut e = 0.0;
-    for i in 0..N_BODIES {
-        let bi = &bodies[i];
-        e += bi.mass
-            * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2])
-            / 2.0;
-        for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
-            let mut dx = [0.0; 3];
-            for (k, dx) in dx.iter_mut().enumerate() {
-                *dx = bi.x[k] - bj.x[k];
-            }
-            let mut distance = 0.0;
-            for &d in &dx {
-                distance += d * d
-            }
-            e -= bi.mass * bj.mass / distance.sqrt()
-        }
-    }
-    e
-}
-
-fn main() {
-    let mut bodies: [Body; N_BODIES] = [
-        /* sun */
-        Body::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SOLAR_MASS),
-        /* jupiter */
-        Body::new(
-            4.84143144246472090e+00,
-            -1.16032004402742839e+00,
-            -1.03622044471123109e-01,
-            1.66007664274403694e-03 * DAYS_PER_YEAR,
-            7.69901118419740425e-03 * DAYS_PER_YEAR,
-            -6.90460016972063023e-05 * DAYS_PER_YEAR,
-            9.54791938424326609e-04 * SOLAR_MASS,
-        ),
-        /* saturn */
-        Body::new(
-            8.34336671824457987e+00,
-            4.12479856412430479e+00,
-            -4.03523417114321381e-01,
-            -2.76742510726862411e-03 * DAYS_PER_YEAR,
-            4.99852801234917238e-03 * DAYS_PER_YEAR,
-            2.30417297573763929e-05 * DAYS_PER_YEAR,
-            2.85885980666130812e-04 * SOLAR_MASS,
-        ),
-        /* uranus */
-        Body::new(
-            1.28943695621391310e+01,
-            -1.51111514016986312e+01,
-            -2.23307578892655734e-01,
-            2.96460137564761618e-03 * DAYS_PER_YEAR,
-            2.37847173959480950e-03 * DAYS_PER_YEAR,
-            -2.96589568540237556e-05 * DAYS_PER_YEAR,
-            4.36624404335156298e-05 * SOLAR_MASS,
-        ),
-        /* neptune */
-        Body::new(
-            1.53796971148509165e+01,
-            -2.59193146099879641e+01,
-            1.79258772950371181e-01,
-            2.68067772490389322e-03 * DAYS_PER_YEAR,
-            1.62824170038242295e-03 * DAYS_PER_YEAR,
-            -9.51592254519715870e-05 * DAYS_PER_YEAR,
-            5.15138902046611451e-05 * SOLAR_MASS,
-        ),
-    ];
-
-    let n: usize = std::env::args()
-        .nth(1)
-        .expect("need one arg")
-        .parse()
-        .expect("argument should be a usize");
-
-    offset_momentum(&mut bodies);
-    println!("{:.9}", energy(&bodies));
-    for _ in 0..n {
-        advance(&mut bodies, 0.01);
-    }
-    println!("{:.9}", energy(&bodies));
-}
diff --git a/stdsimd/mod.rs b/stdsimd/mod.rs
index b76deb520e..d6a7c2cba6 100644
--- a/stdsimd/mod.rs
+++ b/stdsimd/mod.rs
@@ -480,6 +480,3 @@ pub mod arch {
     #[unstable(feature = "stdsimd", issue = "27731")]
     pub mod powerpc64 {}
 }
-
-#[unstable(feature = "stdsimd", issue = "27731")]
-pub use coresimd::simd;

From 3bf763f8bf0c81ffd515768c96fc9fa903370a22 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 18 Jul 2018 16:38:36 +0200
Subject: [PATCH 2/2] LLVM7 generates different machine than LLVM6 for
 x86/x86_64 targets for some intrinsics. These are new optimizations

---
 coresimd/x86/avx.rs   |  5 ++++-
 coresimd/x86/sse41.rs | 10 ++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/coresimd/x86/avx.rs b/coresimd/x86/avx.rs
index 58d9482ff1..c1c2fff846 100644
--- a/coresimd/x86/avx.rs
+++ b/coresimd/x86/avx.rs
@@ -524,7 +524,10 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
+// Note: LLVM7 prefers single-precision blend instructions when
+// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
+// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
+#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
 #[rustc_args_required_const(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
diff --git a/coresimd/x86/sse41.rs b/coresimd/x86/sse41.rs
index 198bb16ba0..91722507da 100644
--- a/coresimd/x86/sse41.rs
+++ b/coresimd/x86/sse41.rs
@@ -80,7 +80,10 @@ pub unsafe fn _mm_blendv_epi8(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
 #[inline]
 #[target_feature(enable = "sse4.1")]
-#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
+// Note: LLVM7 prefers the single-precision floating-point domain when possible
+// see https://bugs.llvm.org/show_bug.cgi?id=38195
+// #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
+#[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))]
 #[rustc_args_required_const(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
@@ -124,7 +127,10 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
 #[inline]
 #[target_feature(enable = "sse4.1")]
-#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
+// Note: LLVM7 prefers the single-precision floating-point domain when possible
+// see https://bugs.llvm.org/show_bug.cgi?id=38195
+// #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
+#[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))]
 #[rustc_args_required_const(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {