From 9971391025f50848f792d215a8794c9dbd1287c4 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 17 Jul 2018 15:34:21 +0200 Subject: [PATCH 1/2] remove portable vector types --- coresimd/aarch64/crypto.rs | 111 ++-- coresimd/aarch64/neon.rs | 133 ++--- coresimd/arm/neon.rs | 395 ++----------- coresimd/macros.rs | 41 -- coresimd/mips/msa.rs | 20 +- coresimd/mod.rs | 13 +- coresimd/powerpc/altivec.rs | 379 ++---------- coresimd/powerpc64/vsx.rs | 204 +------ coresimd/ppsv/api/arithmetic_ops.rs | 147 ----- coresimd/ppsv/api/arithmetic_reductions.rs | 261 --------- coresimd/ppsv/api/arithmetic_scalar_ops.rs | 202 ------- coresimd/ppsv/api/bitwise_ops.rs | 179 ------ coresimd/ppsv/api/bitwise_reductions.rs | 194 ------ coresimd/ppsv/api/bitwise_scalar_ops.rs | 222 ------- coresimd/ppsv/api/cmp.rs | 142 ----- coresimd/ppsv/api/default.rs | 27 - coresimd/ppsv/api/eq.rs | 8 - coresimd/ppsv/api/float_math.rs | 182 ------ coresimd/ppsv/api/fmt.rs | 152 ----- coresimd/ppsv/api/from.rs | 48 -- coresimd/ppsv/api/from_bits.rs | 47 -- coresimd/ppsv/api/hash.rs | 40 -- coresimd/ppsv/api/load_store.rs | 312 ---------- coresimd/ppsv/api/masks.rs | 144 ----- coresimd/ppsv/api/masks_reductions.rs | 84 --- coresimd/ppsv/api/masks_select.rs | 59 -- coresimd/ppsv/api/minimal.rs | 141 ----- coresimd/ppsv/api/minmax.rs | 148 ----- coresimd/ppsv/api/minmax_reductions.rs | 85 --- coresimd/ppsv/api/mod.rs | 266 --------- coresimd/ppsv/api/neg.rs | 43 -- coresimd/ppsv/api/partial_eq.rs | 47 -- coresimd/ppsv/api/scalar_shifts.rs | 120 ---- coresimd/ppsv/api/shifts.rs | 95 --- coresimd/ppsv/api/swap_bytes.rs | 130 ---- coresimd/ppsv/codegen/abs.rs | 77 --- coresimd/ppsv/codegen/cos.rs | 78 --- coresimd/ppsv/codegen/fma.rs | 51 -- coresimd/ppsv/codegen/masks_reductions.rs | 651 --------------------- coresimd/ppsv/codegen/mod.rs | 13 - coresimd/ppsv/codegen/sin.rs | 78 --- coresimd/ppsv/codegen/sqrt.rs | 77 --- coresimd/ppsv/codegen/swap_bytes.rs | 140 ----- coresimd/ppsv/codegen/wrapping.rs | 42 -- coresimd/ppsv/mod.rs | 96 --- coresimd/ppsv/v128.rs | 550 ----------------- coresimd/ppsv/v16.rs | 60 -- coresimd/ppsv/v256.rs | 472 --------------- coresimd/ppsv/v32.rs | 156 ----- coresimd/ppsv/v512.rs | 451 -------------- coresimd/ppsv/v64.rs | 388 ------------ coresimd/simd.rs | 161 +++++ coresimd/x86/avx.rs | 16 +- coresimd/x86/mod.rs | 117 ---- crates/coresimd/src/lib.rs | 24 - crates/coresimd/tests/endian_tests.rs | 278 --------- crates/coresimd/tests/reductions.rs | 510 ---------------- crates/coresimd/tests/v128.rs | 56 -- crates/coresimd/tests/v16.rs | 56 -- crates/coresimd/tests/v256.rs | 56 -- crates/coresimd/tests/v32.rs | 56 -- crates/coresimd/tests/v512.rs | 56 -- crates/coresimd/tests/v64.rs | 56 -- crates/stdsimd/Cargo.toml | 4 - examples/nbody.rs | 243 -------- stdsimd/mod.rs | 3 - 66 files changed, 387 insertions(+), 9506 deletions(-) delete mode 100644 coresimd/ppsv/api/arithmetic_ops.rs delete mode 100644 coresimd/ppsv/api/arithmetic_reductions.rs delete mode 100644 coresimd/ppsv/api/arithmetic_scalar_ops.rs delete mode 100644 coresimd/ppsv/api/bitwise_ops.rs delete mode 100644 coresimd/ppsv/api/bitwise_reductions.rs delete mode 100644 coresimd/ppsv/api/bitwise_scalar_ops.rs delete mode 100644 coresimd/ppsv/api/cmp.rs delete mode 100644 coresimd/ppsv/api/default.rs delete mode 100644 coresimd/ppsv/api/eq.rs delete mode 100644 coresimd/ppsv/api/float_math.rs delete mode 100644 coresimd/ppsv/api/fmt.rs delete mode 100644 coresimd/ppsv/api/from.rs delete mode 100644 coresimd/ppsv/api/from_bits.rs delete mode 100644 coresimd/ppsv/api/hash.rs delete mode 100644 coresimd/ppsv/api/load_store.rs delete mode 100644 coresimd/ppsv/api/masks.rs delete mode 100644 coresimd/ppsv/api/masks_reductions.rs delete mode 100644 coresimd/ppsv/api/masks_select.rs delete mode 100644 coresimd/ppsv/api/minimal.rs delete mode 100755 coresimd/ppsv/api/minmax.rs delete mode 100644 coresimd/ppsv/api/minmax_reductions.rs delete mode 100644 coresimd/ppsv/api/mod.rs delete mode 100644 coresimd/ppsv/api/neg.rs delete mode 100644 coresimd/ppsv/api/partial_eq.rs delete mode 100644 coresimd/ppsv/api/scalar_shifts.rs delete mode 100644 coresimd/ppsv/api/shifts.rs delete mode 100644 coresimd/ppsv/api/swap_bytes.rs delete mode 100644 coresimd/ppsv/codegen/abs.rs delete mode 100644 coresimd/ppsv/codegen/cos.rs delete mode 100644 coresimd/ppsv/codegen/fma.rs delete mode 100644 coresimd/ppsv/codegen/masks_reductions.rs delete mode 100644 coresimd/ppsv/codegen/mod.rs delete mode 100644 coresimd/ppsv/codegen/sin.rs delete mode 100644 coresimd/ppsv/codegen/sqrt.rs delete mode 100644 coresimd/ppsv/codegen/swap_bytes.rs delete mode 100644 coresimd/ppsv/codegen/wrapping.rs delete mode 100644 coresimd/ppsv/mod.rs delete mode 100644 coresimd/ppsv/v128.rs delete mode 100644 coresimd/ppsv/v16.rs delete mode 100644 coresimd/ppsv/v256.rs delete mode 100644 coresimd/ppsv/v32.rs delete mode 100644 coresimd/ppsv/v512.rs delete mode 100644 coresimd/ppsv/v64.rs create mode 100644 coresimd/simd.rs delete mode 100644 crates/coresimd/tests/endian_tests.rs delete mode 100644 crates/coresimd/tests/reductions.rs delete mode 100644 crates/coresimd/tests/v128.rs delete mode 100644 crates/coresimd/tests/v16.rs delete mode 100644 crates/coresimd/tests/v256.rs delete mode 100644 crates/coresimd/tests/v32.rs delete mode 100644 crates/coresimd/tests/v512.rs delete mode 100644 crates/coresimd/tests/v64.rs delete mode 100644 examples/nbody.rs diff --git a/coresimd/aarch64/crypto.rs b/coresimd/aarch64/crypto.rs index 75f247585c..8752945146 100644 --- a/coresimd/aarch64/crypto.rs +++ b/coresimd/aarch64/crypto.rs @@ -185,17 +185,15 @@ pub unsafe fn vsha256su1q_u32( #[cfg(test)] mod tests { use coresimd::aarch64::*; - use simd::*; + use coresimd::simd::*; use std::mem; use stdsimd_test::simd_test; #[simd_test(enable = "crypto")] unsafe fn test_vaeseq_u8() { - let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) - .into_bits(); - let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7) - .into_bits(); - let r: u8x16 = vaeseq_u8(data, key).into_bits(); + let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); + let key = ::mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + let r: u8x16 = ::mem::transmute(vaeseq_u8(data, key)); assert_eq!( r, u8x16::new( @@ -207,11 +205,9 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vaesdq_u8() { - let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) - .into_bits(); - let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7) - .into_bits(); - let r: u8x16 = vaesdq_u8(data, key).into_bits(); + let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); + let key = ::mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + let r: u8x16 = ::mem::transmute(vaesdq_u8(data, key)); assert_eq!( r, u8x16::new( @@ -222,9 +218,8 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vaesmcq_u8() { - let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) - .into_bits(); - let r: u8x16 = vaesmcq_u8(data).into_bits(); + let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); + let r: u8x16 = ::mem::transmute(vaesmcq_u8(data)); assert_eq!( r, u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30) @@ -233,9 +228,8 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vaesimcq_u8() { - let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) - .into_bits(); - let r: u8x16 = vaesimcq_u8(data).into_bits(); + let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); + let r: u8x16 = ::mem::transmute(vaesimcq_u8(data)); assert_eq!( r, u8x16::new( @@ -253,23 +247,20 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha1su0q_u32() { - let r: u32x4 = vsha1su0q_u32( - u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) - .into_bits(), - u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) - .into_bits(), - u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) - .into_bits(), - ).into_bits(); + let r: u32x4 = ::mem::transmute(vsha1su0q_u32( + ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)), + ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)), + ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)), + )); assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678)); } #[simd_test(enable = "crypto")] unsafe fn test_vsha1su1q_u32() { - let r: u32x4 = vsha1su1q_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + let r: u32x4 = ::mem::transmute(vsha1su1q_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0) @@ -278,11 +269,11 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha1cq_u32() { - let r: u32x4 = vsha1cq_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + let r: u32x4 = ::mem::transmute(vsha1cq_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), 0x1234, - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168) @@ -291,11 +282,11 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha1pq_u32() { - let r: u32x4 = vsha1pq_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + let r: u32x4 = ::mem::transmute(vsha1pq_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), 0x1234, - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47) @@ -304,11 +295,11 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha1mq_u32() { - let r: u32x4 = vsha1mq_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + let r: u32x4 = ::mem::transmute(vsha1mq_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), 0x1234, - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278) @@ -317,11 +308,11 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha256hq_u32() { - let r: u32x4 = vsha256hq_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + let r: u32x4 = ::mem::transmute(vsha256hq_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef) @@ -330,11 +321,11 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha256h2q_u32() { - let r: u32x4 = vsha256h2q_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + let r: u32x4 = ::mem::transmute(vsha256h2q_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516) @@ -343,10 +334,10 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha256su0q_u32() { - let r: u32x4 = vsha256su0q_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + let r: u32x4 = ::mem::transmute(vsha256su0q_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152) @@ -355,11 +346,11 @@ mod tests { #[simd_test(enable = "crypto")] unsafe fn test_vsha256su1q_u32() { - let r: u32x4 = vsha256su1q_u32( - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), - ).into_bits(); + let r: u32x4 = ::mem::transmute(vsha256su1q_u32( + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), + )); assert_eq!( r, u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f) diff --git a/coresimd/aarch64/neon.rs b/coresimd/aarch64/neon.rs index 9656c36302..cc43530bbc 100644 --- a/coresimd/aarch64/neon.rs +++ b/coresimd/aarch64/neon.rs @@ -3,7 +3,6 @@ // FIXME: replace neon with asimd use coresimd::arm::*; -use coresimd::simd::*; use coresimd::simd_llvm::simd_add; #[cfg(test)] use stdsimd_test::assert_instr; @@ -14,34 +13,6 @@ types! { /// ARM-specific 128-bit wide vector of two packed `f64`. pub struct float64x2_t(f64, f64); } -impl_from_bits_!( - float64x1_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - float64x2_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); #[allow(improper_ctypes)] extern "C" { @@ -549,7 +520,7 @@ pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[cfg(test)] mod tests { use coresimd::aarch64::*; - use simd::*; + use coresimd::simd::*; use std::mem; use stdsimd_test::simd_test; @@ -568,7 +539,7 @@ mod tests { let a = f64x2::new(1., 2.); let b = f64x2::new(8., 7.); let e = f64x2::new(9., 9.); - let r: f64x2 = vaddq_f64(a.into_bits(), b.into_bits()).into_bits(); + let r: f64x2 = ::mem::transmute(vaddq_f64(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -594,205 +565,205 @@ mod tests { #[simd_test(enable = "neon")] unsafe fn test_vmaxv_s8() { - let r = vmaxv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5).into_bits()); + let r = vmaxv_s8(::mem::transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5))); assert_eq!(r, 7_i8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_s8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = vmaxvq_s8(i8x16::new( + let r = vmaxvq_s8(::mem::transmute(i8x16::new( 1, 2, 3, 4, -16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, - ).into_bits()); + ))); assert_eq!(r, 8_i8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_s16() { - let r = vmaxv_s16(i16x4::new(1, 2, -4, 3).into_bits()); + let r = vmaxv_s16(::mem::transmute(i16x4::new(1, 2, -4, 3))); assert_eq!(r, 3_i16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_s16() { - let r = vmaxvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5).into_bits()); + let r = vmaxvq_s16(::mem::transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5))); assert_eq!(r, 7_i16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_s32() { - let r = vmaxv_s32(i32x2::new(1, -4).into_bits()); + let r = vmaxv_s32(::mem::transmute(i32x2::new(1, -4))); assert_eq!(r, 1_i32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_s32() { - let r = vmaxvq_s32(i32x4::new(1, 2, -32, 4).into_bits()); + let r = vmaxvq_s32(::mem::transmute(i32x4::new(1, 2, -32, 4))); assert_eq!(r, 4_i32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_u8() { - let r = vmaxv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5).into_bits()); + let r = vmaxv_u8(::mem::transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5))); assert_eq!(r, 8_u8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_u8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = vmaxvq_u8(u8x16::new( + let r = vmaxvq_u8(::mem::transmute(u8x16::new( 1, 2, 3, 4, 16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, - ).into_bits()); + ))); assert_eq!(r, 16_u8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_u16() { - let r = vmaxv_u16(u16x4::new(1, 2, 4, 3).into_bits()); + let r = vmaxv_u16(::mem::transmute(u16x4::new(1, 2, 4, 3))); assert_eq!(r, 4_u16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_u16() { - let r = vmaxvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5).into_bits()); + let r = vmaxvq_u16(::mem::transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5))); assert_eq!(r, 16_u16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_u32() { - let r = vmaxv_u32(u32x2::new(1, 4).into_bits()); + let r = vmaxv_u32(::mem::transmute(u32x2::new(1, 4))); assert_eq!(r, 4_u32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_u32() { - let r = vmaxvq_u32(u32x4::new(1, 2, 32, 4).into_bits()); + let r = vmaxvq_u32(::mem::transmute(u32x4::new(1, 2, 32, 4))); assert_eq!(r, 32_u32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_f32() { - let r = vmaxv_f32(f32x2::new(1., 4.).into_bits()); + let r = vmaxv_f32(::mem::transmute(f32x2::new(1., 4.))); assert_eq!(r, 4_f32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_f32() { - let r = vmaxvq_f32(f32x4::new(1., 2., 32., 4.).into_bits()); + let r = vmaxvq_f32(::mem::transmute(f32x4::new(1., 2., 32., 4.))); assert_eq!(r, 32_f32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_f64() { - let r = vmaxvq_f64(f64x2::new(1., 4.).into_bits()); + let r = vmaxvq_f64(::mem::transmute(f64x2::new(1., 4.))); assert_eq!(r, 4_f64); } #[simd_test(enable = "neon")] unsafe fn test_vminv_s8() { - let r = vminv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5).into_bits()); + let r = vminv_s8(::mem::transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5))); assert_eq!(r, -8_i8); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_s8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = vminvq_s8(i8x16::new( + let r = vminvq_s8(::mem::transmute(i8x16::new( 1, 2, 3, 4, -16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, - ).into_bits()); + ))); assert_eq!(r, -16_i8); } #[simd_test(enable = "neon")] unsafe fn test_vminv_s16() { - let r = vminv_s16(i16x4::new(1, 2, -4, 3).into_bits()); + let r = vminv_s16(::mem::transmute(i16x4::new(1, 2, -4, 3))); assert_eq!(r, -4_i16); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_s16() { - let r = vminvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5).into_bits()); + let r = vminvq_s16(::mem::transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5))); assert_eq!(r, -16_i16); } #[simd_test(enable = "neon")] unsafe fn test_vminv_s32() { - let r = vminv_s32(i32x2::new(1, -4).into_bits()); + let r = vminv_s32(::mem::transmute(i32x2::new(1, -4))); assert_eq!(r, -4_i32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_s32() { - let r = vminvq_s32(i32x4::new(1, 2, -32, 4).into_bits()); + let r = vminvq_s32(::mem::transmute(i32x4::new(1, 2, -32, 4))); assert_eq!(r, -32_i32); } #[simd_test(enable = "neon")] unsafe fn test_vminv_u8() { - let r = vminv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5).into_bits()); + let r = vminv_u8(::mem::transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5))); assert_eq!(r, 1_u8); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_u8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = vminvq_u8(u8x16::new( + let r = vminvq_u8(::mem::transmute(u8x16::new( 1, 2, 3, 4, 16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, - ).into_bits()); + ))); assert_eq!(r, 1_u8); } #[simd_test(enable = "neon")] unsafe fn test_vminv_u16() { - let r = vminv_u16(u16x4::new(1, 2, 4, 3).into_bits()); + let r = vminv_u16(::mem::transmute(u16x4::new(1, 2, 4, 3))); assert_eq!(r, 1_u16); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_u16() { - let r = vminvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5).into_bits()); + let r = vminvq_u16(::mem::transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5))); assert_eq!(r, 1_u16); } #[simd_test(enable = "neon")] unsafe fn test_vminv_u32() { - let r = vminv_u32(u32x2::new(1, 4).into_bits()); + let r = vminv_u32(::mem::transmute(u32x2::new(1, 4))); assert_eq!(r, 1_u32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_u32() { - let r = vminvq_u32(u32x4::new(1, 2, 32, 4).into_bits()); + let r = vminvq_u32(::mem::transmute(u32x4::new(1, 2, 32, 4))); assert_eq!(r, 1_u32); } #[simd_test(enable = "neon")] unsafe fn test_vminv_f32() { - let r = vminv_f32(f32x2::new(1., 4.).into_bits()); + let r = vminv_f32(::mem::transmute(f32x2::new(1., 4.))); assert_eq!(r, 1_f32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_f32() { - let r = vminvq_f32(f32x4::new(1., 2., 32., 4.).into_bits()); + let r = vminvq_f32(::mem::transmute(f32x4::new(1., 2., 32., 4.))); assert_eq!(r, 1_f32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_f64() { - let r = vminvq_f64(f64x2::new(1., 4.).into_bits()); + let r = vminvq_f64(::mem::transmute(f64x2::new(1., 4.))); assert_eq!(r, 1_f64); } @@ -804,7 +775,7 @@ mod tests { let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = i8x16::new(-2, -4, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6); - let r: i8x16 = vpminq_s8(a.into_bits(), b.into_bits()).into_bits(); + let r: i8x16 = ::mem::transmute(vpminq_s8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -813,7 +784,7 @@ mod tests { let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8); let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i16x8::new(-2, 3, 5, 7, 0, 2, 4, 6); - let r: i16x8 = vpminq_s16(a.into_bits(), b.into_bits()).into_bits(); + let r: i16x8 = ::mem::transmute(vpminq_s16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -822,7 +793,7 @@ mod tests { let a = i32x4::new(1, -2, 3, 4); let b = i32x4::new(0, 3, 2, 5); let e = i32x4::new(-2, 3, 0, 2); - let r: i32x4 = vpminq_s32(a.into_bits(), b.into_bits()).into_bits(); + let r: i32x4 = ::mem::transmute(vpminq_s32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -834,7 +805,7 @@ mod tests { let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = u8x16::new(1, 3, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6); - let r: u8x16 = vpminq_u8(a.into_bits(), b.into_bits()).into_bits(); + let r: u8x16 = ::mem::transmute(vpminq_u8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -843,7 +814,7 @@ mod tests { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u16x8::new(1, 3, 5, 7, 0, 2, 4, 6); - let r: u16x8 = vpminq_u16(a.into_bits(), b.into_bits()).into_bits(); + let r: u16x8 = ::mem::transmute(vpminq_u16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -852,7 +823,7 @@ mod tests { let a = u32x4::new(1, 2, 3, 4); let b = u32x4::new(0, 3, 2, 5); let e = u32x4::new(1, 3, 0, 2); - let r: u32x4 = vpminq_u32(a.into_bits(), b.into_bits()).into_bits(); + let r: u32x4 = ::mem::transmute(vpminq_u32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -861,7 +832,7 @@ mod tests { let a = f32x4::new(1., -2., 3., 4.); let b = f32x4::new(0., 3., 2., 5.); let e = f32x4::new(-2., 3., 0., 2.); - let r: f32x4 = vpminq_f32(a.into_bits(), b.into_bits()).into_bits(); + let r: f32x4 = ::mem::transmute(vpminq_f32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -870,7 +841,7 @@ mod tests { let a = f64x2::new(1., -2.); let b = f64x2::new(0., 3.); let e = f64x2::new(-2., 0.); - let r: f64x2 = vpminq_f64(a.into_bits(), b.into_bits()).into_bits(); + let r: f64x2 = ::mem::transmute(vpminq_f64(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -882,7 +853,7 @@ mod tests { let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = i8x16::new(1, 3, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9); - let r: i8x16 = vpmaxq_s8(a.into_bits(), b.into_bits()).into_bits(); + let r: i8x16 = ::mem::transmute(vpmaxq_s8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -891,7 +862,7 @@ mod tests { let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8); let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i16x8::new(1, 4, 6, 8, 3, 5, 7, 9); - let r: i16x8 = vpmaxq_s16(a.into_bits(), b.into_bits()).into_bits(); + let r: i16x8 = ::mem::transmute(vpmaxq_s16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -900,7 +871,7 @@ mod tests { let a = i32x4::new(1, -2, 3, 4); let b = i32x4::new(0, 3, 2, 5); let e = i32x4::new(1, 4, 3, 5); - let r: i32x4 = vpmaxq_s32(a.into_bits(), b.into_bits()).into_bits(); + let r: i32x4 = ::mem::transmute(vpmaxq_s32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -912,7 +883,7 @@ mod tests { let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = u8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9); - let r: u8x16 = vpmaxq_u8(a.into_bits(), b.into_bits()).into_bits(); + let r: u8x16 = ::mem::transmute(vpmaxq_u8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -921,7 +892,7 @@ mod tests { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u16x8::new(2, 4, 6, 8, 3, 5, 7, 9); - let r: u16x8 = vpmaxq_u16(a.into_bits(), b.into_bits()).into_bits(); + let r: u16x8 = ::mem::transmute(vpmaxq_u16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -930,7 +901,7 @@ mod tests { let a = u32x4::new(1, 2, 3, 4); let b = u32x4::new(0, 3, 2, 5); let e = u32x4::new(2, 4, 3, 5); - let r: u32x4 = vpmaxq_u32(a.into_bits(), b.into_bits()).into_bits(); + let r: u32x4 = ::mem::transmute(vpmaxq_u32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -939,7 +910,7 @@ mod tests { let a = f32x4::new(1., -2., 3., 4.); let b = f32x4::new(0., 3., 2., 5.); let e = f32x4::new(1., 4., 3., 5.); - let r: f32x4 = vpmaxq_f32(a.into_bits(), b.into_bits()).into_bits(); + let r: f32x4 = ::mem::transmute(vpmaxq_f32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -948,7 +919,7 @@ mod tests { let a = f64x2::new(1., -2.); let b = f64x2::new(0., 3.); let e = f64x2::new(1., 3.); - let r: f64x2 = vpmaxq_f64(a.into_bits(), b.into_bits()).into_bits(); + let r: f64x2 = ::mem::transmute(vpmaxq_f64(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } } diff --git a/coresimd/arm/neon.rs b/coresimd/arm/neon.rs index f00096505a..1734de1efb 100644 --- a/coresimd/arm/neon.rs +++ b/coresimd/arm/neon.rs @@ -1,6 +1,5 @@ //! ARMv7 NEON intrinsics -use coresimd::simd::*; use coresimd::simd_llvm::*; #[cfg(test)] use stdsimd_test::assert_instr; @@ -66,304 +65,6 @@ types! { pub struct uint64x2_t(u64, u64); } -impl_from_bits_!( - int8x8_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - uint8x8_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - int16x4_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - uint16x4_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - int32x2_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - uint32x2_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - int64x1_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - float32x2_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - poly8x8_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -impl_from_bits_!( - poly16x4_t: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); - -impl_from_bits_!( - int8x16_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - uint8x16_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - poly8x16_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - int16x8_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - uint16x8_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - poly16x8_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - int32x4_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - uint32x4_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - float32x4_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - int64x2_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - uint64x2_t: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); - #[allow(improper_ctypes)] extern "C" { #[cfg_attr( @@ -949,7 +650,7 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[cfg(test)] mod tests { use coresimd::arm::*; - use simd::*; + use coresimd::simd::*; use std::mem; use stdsimd_test::simd_test; @@ -958,7 +659,7 @@ mod tests { let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = i8x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: i8x8 = vadd_s8(a.into_bits(), b.into_bits()).into_bits(); + let r: i8x8 = ::mem::transmute(vadd_s8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -967,7 +668,7 @@ mod tests { let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9); - let r: i8x16 = vaddq_s8(a.into_bits(), b.into_bits()).into_bits(); + let r: i8x16 = ::mem::transmute(vaddq_s8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -976,7 +677,7 @@ mod tests { let a = i16x4::new(1, 2, 3, 4); let b = i16x4::new(8, 7, 6, 5); let e = i16x4::new(9, 9, 9, 9); - let r: i16x4 = vadd_s16(a.into_bits(), b.into_bits()).into_bits(); + let r: i16x4 = ::mem::transmute(vadd_s16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -985,7 +686,7 @@ mod tests { let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = i16x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: i16x8 = vaddq_s16(a.into_bits(), b.into_bits()).into_bits(); + let r: i16x8 = ::mem::transmute(vaddq_s16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -994,7 +695,7 @@ mod tests { let a = i32x2::new(1, 2); let b = i32x2::new(8, 7); let e = i32x2::new(9, 9); - let r: i32x2 = vadd_s32(a.into_bits(), b.into_bits()).into_bits(); + let r: i32x2 = ::mem::transmute(vadd_s32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1003,7 +704,7 @@ mod tests { let a = i32x4::new(1, 2, 3, 4); let b = i32x4::new(8, 7, 6, 5); let e = i32x4::new(9, 9, 9, 9); - let r: i32x4 = vaddq_s32(a.into_bits(), b.into_bits()).into_bits(); + let r: i32x4 = ::mem::transmute(vaddq_s32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1012,7 +713,7 @@ mod tests { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u8x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = u8x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: u8x8 = vadd_u8(a.into_bits(), b.into_bits()).into_bits(); + let r: u8x8 = ::mem::transmute(vadd_u8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1021,7 +722,7 @@ mod tests { let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9); - let r: u8x16 = vaddq_u8(a.into_bits(), b.into_bits()).into_bits(); + let r: u8x16 = ::mem::transmute(vaddq_u8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1030,7 +731,7 @@ mod tests { let a = u16x4::new(1, 2, 3, 4); let b = u16x4::new(8, 7, 6, 5); let e = u16x4::new(9, 9, 9, 9); - let r: u16x4 = vadd_u16(a.into_bits(), b.into_bits()).into_bits(); + let r: u16x4 = ::mem::transmute(vadd_u16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1039,7 +740,7 @@ mod tests { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = u16x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: u16x8 = vaddq_u16(a.into_bits(), b.into_bits()).into_bits(); + let r: u16x8 = ::mem::transmute(vaddq_u16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1048,7 +749,7 @@ mod tests { let a = u32x2::new(1, 2); let b = u32x2::new(8, 7); let e = u32x2::new(9, 9); - let r: u32x2 = vadd_u32(a.into_bits(), b.into_bits()).into_bits(); + let r: u32x2 = ::mem::transmute(vadd_u32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1057,7 +758,7 @@ mod tests { let a = u32x4::new(1, 2, 3, 4); let b = u32x4::new(8, 7, 6, 5); let e = u32x4::new(9, 9, 9, 9); - let r: u32x4 = vaddq_u32(a.into_bits(), b.into_bits()).into_bits(); + let r: u32x4 = ::mem::transmute(vaddq_u32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1066,7 +767,7 @@ mod tests { let a = f32x2::new(1., 2.); let b = f32x2::new(8., 7.); let e = f32x2::new(9., 9.); - let r: f32x2 = vadd_f32(a.into_bits(), b.into_bits()).into_bits(); + let r: f32x2 = ::mem::transmute(vadd_f32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1075,7 +776,7 @@ mod tests { let a = f32x4::new(1., 2., 3., 4.); let b = f32x4::new(8., 7., 6., 5.); let e = f32x4::new(9., 9., 9., 9.); - let r: f32x4 = vaddq_f32(a.into_bits(), b.into_bits()).into_bits(); + let r: f32x4 = ::mem::transmute(vaddq_f32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1085,7 +786,7 @@ mod tests { let a = i8x8::new(v, v, v, v, v, v, v, v); let v = 2 * (v as i16); let e = i16x8::new(v, v, v, v, v, v, v, v); - let r: i16x8 = vaddl_s8(a.into_bits(), a.into_bits()).into_bits(); + let r: i16x8 = ::mem::transmute(vaddl_s8(::mem::transmute(a), ::mem::transmute(a))); assert_eq!(r, e); } @@ -1095,7 +796,7 @@ mod tests { let a = i16x4::new(v, v, v, v); let v = 2 * (v as i32); let e = i32x4::new(v, v, v, v); - let r: i32x4 = vaddl_s16(a.into_bits(), a.into_bits()).into_bits(); + let r: i32x4 = ::mem::transmute(vaddl_s16(::mem::transmute(a), ::mem::transmute(a))); assert_eq!(r, e); } @@ -1105,7 +806,7 @@ mod tests { let a = i32x2::new(v, v); let v = 2 * (v as i64); let e = i64x2::new(v, v); - let r: i64x2 = vaddl_s32(a.into_bits(), a.into_bits()).into_bits(); + let r: i64x2 = ::mem::transmute(vaddl_s32(::mem::transmute(a), ::mem::transmute(a))); assert_eq!(r, e); } @@ -1115,7 +816,7 @@ mod tests { let a = u8x8::new(v, v, v, v, v, v, v, v); let v = 2 * (v as u16); let e = u16x8::new(v, v, v, v, v, v, v, v); - let r: u16x8 = vaddl_u8(a.into_bits(), a.into_bits()).into_bits(); + let r: u16x8 = ::mem::transmute(vaddl_u8(::mem::transmute(a), ::mem::transmute(a))); assert_eq!(r, e); } @@ -1125,7 +826,7 @@ mod tests { let a = u16x4::new(v, v, v, v); let v = 2 * (v as u32); let e = u32x4::new(v, v, v, v); - let r: u32x4 = vaddl_u16(a.into_bits(), a.into_bits()).into_bits(); + let r: u32x4 = ::mem::transmute(vaddl_u16(::mem::transmute(a), ::mem::transmute(a))); assert_eq!(r, e); } @@ -1135,7 +836,7 @@ mod tests { let a = u32x2::new(v, v); let v = 2 * (v as u64); let e = u64x2::new(v, v); - let r: u64x2 = vaddl_u32(a.into_bits(), a.into_bits()).into_bits(); + let r: u64x2 = ::mem::transmute(vaddl_u32(::mem::transmute(a), ::mem::transmute(a))); assert_eq!(r, e); } @@ -1143,7 +844,7 @@ mod tests { unsafe fn test_vmovn_s16() { let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: i8x8 = vmovn_s16(a.into_bits()).into_bits(); + let r: i8x8 = ::mem::transmute(vmovn_s16(::mem::transmute(a))); assert_eq!(r, e); } @@ -1151,7 +852,7 @@ mod tests { unsafe fn test_vmovn_s32() { let a = i32x4::new(1, 2, 3, 4); let e = i16x4::new(1, 2, 3, 4); - let r: i16x4 = vmovn_s32(a.into_bits()).into_bits(); + let r: i16x4 = ::mem::transmute(vmovn_s32(::mem::transmute(a))); assert_eq!(r, e); } @@ -1159,7 +860,7 @@ mod tests { unsafe fn test_vmovn_s64() { let a = i64x2::new(1, 2); let e = i32x2::new(1, 2); - let r: i32x2 = vmovn_s64(a.into_bits()).into_bits(); + let r: i32x2 = ::mem::transmute(vmovn_s64(::mem::transmute(a))); assert_eq!(r, e); } @@ -1167,7 +868,7 @@ mod tests { unsafe fn test_vmovn_u16() { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: u8x8 = vmovn_u16(a.into_bits()).into_bits(); + let r: u8x8 = ::mem::transmute(vmovn_u16(::mem::transmute(a))); assert_eq!(r, e); } @@ -1175,7 +876,7 @@ mod tests { unsafe fn test_vmovn_u32() { let a = u32x4::new(1, 2, 3, 4); let e = u16x4::new(1, 2, 3, 4); - let r: u16x4 = vmovn_u32(a.into_bits()).into_bits(); + let r: u16x4 = ::mem::transmute(vmovn_u32(::mem::transmute(a))); assert_eq!(r, e); } @@ -1183,7 +884,7 @@ mod tests { unsafe fn test_vmovn_u64() { let a = u64x2::new(1, 2); let e = u32x2::new(1, 2); - let r: u32x2 = vmovn_u64(a.into_bits()).into_bits(); + let r: u32x2 = ::mem::transmute(vmovn_u64(::mem::transmute(a))); assert_eq!(r, e); } @@ -1191,7 +892,7 @@ mod tests { unsafe fn test_vmovl_s8() { let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: i16x8 = vmovl_s8(a.into_bits()).into_bits(); + let r: i16x8 = ::mem::transmute(vmovl_s8(::mem::transmute(a))); assert_eq!(r, e); } @@ -1199,7 +900,7 @@ mod tests { unsafe fn test_vmovl_s16() { let e = i32x4::new(1, 2, 3, 4); let a = i16x4::new(1, 2, 3, 4); - let r: i32x4 = vmovl_s16(a.into_bits()).into_bits(); + let r: i32x4 = ::mem::transmute(vmovl_s16(::mem::transmute(a))); assert_eq!(r, e); } @@ -1207,7 +908,7 @@ mod tests { unsafe fn test_vmovl_s32() { let e = i64x2::new(1, 2); let a = i32x2::new(1, 2); - let r: i64x2 = vmovl_s32(a.into_bits()).into_bits(); + let r: i64x2 = ::mem::transmute(vmovl_s32(::mem::transmute(a))); assert_eq!(r, e); } @@ -1215,7 +916,7 @@ mod tests { unsafe fn test_vmovl_u8() { let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: u16x8 = vmovl_u8(a.into_bits()).into_bits(); + let r: u16x8 = ::mem::transmute(vmovl_u8(::mem::transmute(a))); assert_eq!(r, e); } @@ -1223,7 +924,7 @@ mod tests { unsafe fn test_vmovl_u16() { let e = u32x4::new(1, 2, 3, 4); let a = u16x4::new(1, 2, 3, 4); - let r: u32x4 = vmovl_u16(a.into_bits()).into_bits(); + let r: u32x4 = ::mem::transmute(vmovl_u16(::mem::transmute(a))); assert_eq!(r, e); } @@ -1231,7 +932,7 @@ mod tests { unsafe fn test_vmovl_u32() { let e = u64x2::new(1, 2); let a = u32x2::new(1, 2); - let r: u64x2 = vmovl_u32(a.into_bits()).into_bits(); + let r: u64x2 = ::mem::transmute(vmovl_u32(::mem::transmute(a))); assert_eq!(r, e); } @@ -1239,7 +940,7 @@ mod tests { unsafe fn test_vrsqrt_f32() { let a = f32x2::new(1.0, 2.0); let e = f32x2::new(0.9980469, 0.7050781); - let r: f32x2 = vrsqrte_f32(a.into_bits()).into_bits(); + let r: f32x2 = ::mem::transmute(vrsqrte_f32(::mem::transmute(a))); assert_eq!(r, e); } @@ -1248,7 +949,7 @@ mod tests { let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6); - let r: i8x8 = vpmin_s8(a.into_bits(), b.into_bits()).into_bits(); + let r: i8x8 = ::mem::transmute(vpmin_s8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1257,7 +958,7 @@ mod tests { let a = i16x4::new(1, 2, 3, -4); let b = i16x4::new(0, 3, 2, 5); let e = i16x4::new(1, -4, 0, 2); - let r: i16x4 = vpmin_s16(a.into_bits(), b.into_bits()).into_bits(); + let r: i16x4 = ::mem::transmute(vpmin_s16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1266,7 +967,7 @@ mod tests { let a = i32x2::new(1, -2); let b = i32x2::new(0, 3); let e = i32x2::new(-2, 0); - let r: i32x2 = vpmin_s32(a.into_bits(), b.into_bits()).into_bits(); + let r: i32x2 = ::mem::transmute(vpmin_s32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1275,7 +976,7 @@ mod tests { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6); - let r: u8x8 = vpmin_u8(a.into_bits(), b.into_bits()).into_bits(); + let r: u8x8 = ::mem::transmute(vpmin_u8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1284,7 +985,7 @@ mod tests { let a = u16x4::new(1, 2, 3, 4); let b = u16x4::new(0, 3, 2, 5); let e = u16x4::new(1, 3, 0, 2); - let r: u16x4 = vpmin_u16(a.into_bits(), b.into_bits()).into_bits(); + let r: u16x4 = ::mem::transmute(vpmin_u16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1293,7 +994,7 @@ mod tests { let a = u32x2::new(1, 2); let b = u32x2::new(0, 3); let e = u32x2::new(1, 0); - let r: u32x2 = vpmin_u32(a.into_bits(), b.into_bits()).into_bits(); + let r: u32x2 = ::mem::transmute(vpmin_u32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1302,7 +1003,7 @@ mod tests { let a = f32x2::new(1., -2.); let b = f32x2::new(0., 3.); let e = f32x2::new(-2., 0.); - let r: f32x2 = vpmin_f32(a.into_bits(), b.into_bits()).into_bits(); + let r: f32x2 = ::mem::transmute(vpmin_f32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1311,7 +1012,7 @@ mod tests { let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9); - let r: i8x8 = vpmax_s8(a.into_bits(), b.into_bits()).into_bits(); + let r: i8x8 = ::mem::transmute(vpmax_s8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1320,7 +1021,7 @@ mod tests { let a = i16x4::new(1, 2, 3, -4); let b = i16x4::new(0, 3, 2, 5); let e = i16x4::new(2, 3, 3, 5); - let r: i16x4 = vpmax_s16(a.into_bits(), b.into_bits()).into_bits(); + let r: i16x4 = ::mem::transmute(vpmax_s16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1329,7 +1030,7 @@ mod tests { let a = i32x2::new(1, -2); let b = i32x2::new(0, 3); let e = i32x2::new(1, 3); - let r: i32x2 = vpmax_s32(a.into_bits(), b.into_bits()).into_bits(); + let r: i32x2 = ::mem::transmute(vpmax_s32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1338,7 +1039,7 @@ mod tests { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9); - let r: u8x8 = vpmax_u8(a.into_bits(), b.into_bits()).into_bits(); + let r: u8x8 = ::mem::transmute(vpmax_u8(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1347,7 +1048,7 @@ mod tests { let a = u16x4::new(1, 2, 3, 4); let b = u16x4::new(0, 3, 2, 5); let e = u16x4::new(2, 4, 3, 5); - let r: u16x4 = vpmax_u16(a.into_bits(), b.into_bits()).into_bits(); + let r: u16x4 = ::mem::transmute(vpmax_u16(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1356,7 +1057,7 @@ mod tests { let a = u32x2::new(1, 2); let b = u32x2::new(0, 3); let e = u32x2::new(2, 3); - let r: u32x2 = vpmax_u32(a.into_bits(), b.into_bits()).into_bits(); + let r: u32x2 = ::mem::transmute(vpmax_u32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } @@ -1365,7 +1066,7 @@ mod tests { let a = f32x2::new(1., -2.); let b = f32x2::new(0., 3.); let e = f32x2::new(1., 3.); - let r: f32x2 = vpmax_f32(a.into_bits(), b.into_bits()).into_bits(); + let r: f32x2 = ::mem::transmute(vpmax_f32(::mem::transmute(a), ::mem::transmute(b))); assert_eq!(r, e); } } diff --git a/coresimd/macros.rs b/coresimd/macros.rs index fa96f50c81..343f425c1a 100644 --- a/coresimd/macros.rs +++ b/coresimd/macros.rs @@ -13,44 +13,3 @@ macro_rules! types { pub struct $name($($fields)*); )*) } - -macro_rules! cfg_if { - ($( - if #[cfg($($meta:meta),*)] { $($it:item)* } - ) else * else { - $($it2:item)* - }) => { - __cfg_if_items! { - () ; - $( ( ($($meta),*) ($($it)*) ), )* - ( () ($($it2)*) ), - } - }; - ( - if #[cfg($($i_met:meta),*)] { $($i_it:item)* } - $( - else if #[cfg($($e_met:meta),*)] { $($e_it:item)* } - )* - ) => { - __cfg_if_items! { - () ; - ( ($($i_met),*) ($($i_it)*) ), - $( ( ($($e_met),*) ($($e_it)*) ), )* - ( () () ), - } - } -} - -macro_rules! __cfg_if_items { - (($($not:meta,)*) ; ) => {}; - (($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { - __cfg_if_apply! { cfg(all($($m,)* not(any($($not),*)))), $($it)* } - __cfg_if_items! { ($($not,)* $($m,)*) ; $($rest)* } - } -} - -macro_rules! __cfg_if_apply { - ($m:meta, $($it:item)*) => { - $(#[$m] $it)* - } -} diff --git a/coresimd/mips/msa.rs b/coresimd/mips/msa.rs index d26ad305d6..cad533dd82 100644 --- a/coresimd/mips/msa.rs +++ b/coresimd/mips/msa.rs @@ -5,10 +5,17 @@ //! //! [msa_ref]: http://cdn2.imgtec.com/documentation/MD00866-2B-MSA32-AFP-01.12.pdf -use coresimd::simd::*; #[cfg(test)] use stdsimd_test::assert_instr; +types! { + /// MIPS-specific 128-bit wide vector of 16 packed `i8`. + pub struct i8x16( + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + ); +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.mips.add.a.b"] @@ -35,20 +42,25 @@ mod tests { #[simd_test(enable = "msa")] unsafe fn __msa_add_a_b() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = i8x16( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, ); #[cfg_attr(rustfmt, rustfmt_skip)] - let b = i8x16::new( + let b = i8x16( -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, ); - let r = i8x16::splat(5); + let r = i8x16( + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5, + ); assert_eq!(r, msa::__msa_add_a_b(a, b)); } diff --git a/coresimd/mod.rs b/coresimd/mod.rs index 6fc312f420..67bdbf3114 100644 --- a/coresimd/mod.rs +++ b/coresimd/mod.rs @@ -3,18 +3,7 @@ #[macro_use] mod macros; -#[macro_use] -mod ppsv; - -/// Platform independent SIMD vector types and operations. -/// -/// This is an **unstable** module for portable SIMD operations. This module -/// has not yet gone through an RFC and is likely to change, but feedback is -/// always welcome! -#[unstable(feature = "stdsimd", issue = "27731")] -pub mod simd { - pub use coresimd::ppsv::*; -} +mod simd; /// Platform dependent vendor intrinsics. /// diff --git a/coresimd/powerpc/altivec.rs b/coresimd/powerpc/altivec.rs index 62371a6a87..1049a023b4 100644 --- a/coresimd/powerpc/altivec.rs +++ b/coresimd/powerpc/altivec.rs @@ -13,8 +13,8 @@ #![allow(non_camel_case_types)] -use coresimd::simd::*; use coresimd::simd_llvm::*; +use coresimd::simd::*; use mem; @@ -49,304 +49,6 @@ types! { pub struct vector_float(f32, f32, f32, f32); } -impl_from_bits_!( - vector_signed_char: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); -impl_from_bits_!( - i8x16: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); - -impl_from_bits_!( - vector_unsigned_char: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_signed_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); -impl_from_bits_!( - u8x16: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); - -impl_from_bits_!( - vector_bool_char: m64x2, - m32x4, - m16x8, - m8x16, - vector_bool_short, - vector_bool_int -); -impl_from_bits_!(m8x16: vector_bool_char, vector_bool_short, vector_bool_int); - -impl_from_bits_!( - vector_signed_short: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_signed_char, - vector_bool_char, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); -impl_from_bits_!( - i16x8: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); - -impl_from_bits_!( - vector_unsigned_short: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_signed_char, - vector_bool_char, - vector_signed_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); -impl_from_bits_!( - u16x8: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); - -impl_from_bits_!( - vector_bool_short: m64x2, - m32x4, - m16x8, - m8x16, - vector_bool_int -); -impl_from_bits_!(m16x8: vector_bool_short, vector_bool_int); - -impl_from_bits_!( - vector_signed_int: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_signed_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_unsigned_int, - vector_float, - vector_bool_int -); -impl_from_bits_!( - i32x4: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); - -impl_from_bits_!( - vector_unsigned_int: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_signed_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_float, - vector_bool_int -); -impl_from_bits_!( - u32x4: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); - -impl_from_bits_!( - vector_bool_int: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!(m32x4: vector_bool_int); - -impl_from_bits_!( - vector_float: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_signed_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_bool_int -); -impl_from_bits_!( - f32x4: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int -); - #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.ppc.altivec.vperm"] @@ -455,7 +157,7 @@ mod sealed { pub unsafe fn vec_add_bc_sc( a: vector_bool_char, b: vector_signed_char, ) -> vector_signed_char { - simd_add(a.into_bits(), b) + simd_add(::mem::transmute(a), b) } impl VectorAdd for vector_bool_char { type Result = vector_signed_char; @@ -497,7 +199,7 @@ mod sealed { pub unsafe fn vec_add_bc_uc( a: vector_bool_char, b: vector_unsigned_char, ) -> vector_unsigned_char { - simd_add(a.into_bits(), b) + simd_add(::mem::transmute(a), b) } impl VectorAdd for vector_bool_char { type Result = vector_unsigned_char; @@ -539,7 +241,7 @@ mod sealed { pub unsafe fn vec_add_bs_ss( a: vector_bool_short, b: vector_signed_short, ) -> vector_signed_short { - let a: i16x8 = a.into_bits(); + let a: i16x8 = ::mem::transmute(a); let a: vector_signed_short = simd_cast(a); simd_add(a, b) } @@ -584,7 +286,7 @@ mod sealed { pub unsafe fn vec_add_bs_us( a: vector_bool_short, b: vector_unsigned_short, ) -> vector_unsigned_short { - let a: i16x8 = a.into_bits(); + let a: i16x8 = ::mem::transmute(a); let a: vector_unsigned_short = simd_cast(a); simd_add(a, b) } @@ -629,7 +331,7 @@ mod sealed { pub unsafe fn vec_add_bi_si( a: vector_bool_int, b: vector_signed_int, ) -> vector_signed_int { - let a: i32x4 = a.into_bits(); + let a: i32x4 = ::mem::transmute(a); let a: vector_signed_int = simd_cast(a); simd_add(a, b) } @@ -673,7 +375,7 @@ mod sealed { pub unsafe fn vec_add_bi_ui( a: vector_bool_int, b: vector_unsigned_int, ) -> vector_unsigned_int { - let a: i32x4 = a.into_bits(); + let a: i32x4 = ::mem::transmute(a); let a: vector_unsigned_int = simd_cast(a); simd_add(a, b) } @@ -754,10 +456,10 @@ mod endian { // vperm has big-endian bias // // Xor the mask and flip the arguments - let d = u8x16::new( + let d = ::mem::transmute(u8x16::new( 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ).into_bits(); + )); let c = simd_xor(c, d); b.vec_vperm(a, c) @@ -816,19 +518,22 @@ mod tests { #[cfg(target_arch = "powerpc64")] use coresimd::arch::powerpc64::*; - use simd::*; + use coresimd::simd::*; use stdsimd_test::simd_test; macro_rules! test_vec_perm { - {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { + {$name:ident, + $shorttype:ident, $longtype:ident, + [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { #[simd_test(enable = "altivec")] unsafe fn $name() { - let a: $longtype = $shorttype::new($($a),+).into_bits(); - let b = $shorttype::new($($b),+).into_bits(); - let c = u8x16::new($($c),+).into_bits(); + let a: $longtype = ::mem::transmute($shorttype::new($($a),+)); + let b: $longtype = ::mem::transmute($shorttype::new($($b),+)); + let c: vector_unsigned_char = ::mem::transmute(u8x16::new($($c),+)); let d = $shorttype::new($($d),+); - assert_eq!(d, vec_perm(a, b, c).into_bits()); + let r: $shorttype = ::mem::transmute(vec_perm(a, b, c)); + assert_eq!(d, r); } } } @@ -847,6 +552,7 @@ mod tests { [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]} + test_vec_perm!{test_vec_perm_m8x16, m8x16, vector_bool_char, [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], @@ -854,7 +560,6 @@ mod tests { [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, true]} - test_vec_perm!{test_vec_perm_u16x8, u16x8, vector_unsigned_short, [0, 1, 2, 3, 4, 5, 6, 7], @@ -908,7 +613,7 @@ mod tests { #[simd_test(enable = "altivec")] unsafe fn test_vec_madds() { - let a: vector_signed_short = i16x8::new( + let a: vector_signed_short = ::mem::transmute(i16x8::new( 0 * 256, 1 * 256, 2 * 256, @@ -917,20 +622,20 @@ mod tests { 5 * 256, 6 * 256, 7 * 256, - ).into_bits(); + )); let b: vector_signed_short = - i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits(); + ::mem::transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_signed_short = - i16x8::new(0, 1, 2, 3, 4, 5, 6, 7).into_bits(); + ::mem::transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21); - assert_eq!(d, vec_madds(a, b, c).into_bits()); + assert_eq!(d, ::mem::transmute(vec_madds(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mradds() { - let a: vector_signed_short = i16x8::new( + let a: vector_signed_short = ::mem::transmute(i16x8::new( 0 * 256, 1 * 256, 2 * 256, @@ -939,20 +644,20 @@ mod tests { 5 * 256, 6 * 256, 7 * 256, - ).into_bits(); + )); let b: vector_signed_short = - i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits(); + ::mem::transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_signed_short = - i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::max_value() - 1).into_bits(); + ::mem::transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::max_value() - 1)); let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, i16::max_value()); - assert_eq!(d, vec_mradds(a, b, c).into_bits()); + assert_eq!(d, ::mem::transmute(vec_mradds(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_msums_unsigned() { - let a: vector_unsigned_short = u16x8::new( + let a: vector_unsigned_short = ::mem::transmute(u16x8::new( 0 * 256, 1 * 256, 2 * 256, @@ -961,10 +666,10 @@ mod tests { 5 * 256, 6 * 256, 7 * 256, - ).into_bits(); + )); let b: vector_unsigned_short = - u16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits(); - let c: vector_unsigned_int = u32x4::new(0, 1, 2, 3).into_bits(); + ::mem::transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3)); let d = u32x4::new( (0 + 1) * 256 * 256 + 0, (2 + 3) * 256 * 256 + 1, @@ -972,12 +677,12 @@ mod tests { (6 + 7) * 256 * 256 + 3, ); - assert_eq!(d, vec_msums(a, b, c).into_bits()); + assert_eq!(d, ::mem::transmute(vec_msums(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_msums_signed() { - let a: vector_signed_short = i16x8::new( + let a: vector_signed_short = ::mem::transmute(i16x8::new( 0 * 256, -1 * 256, 2 * 256, @@ -986,10 +691,10 @@ mod tests { -5 * 256, 6 * 256, -7 * 256, - ).into_bits(); + )); let b: vector_signed_short = - i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits(); - let c: vector_signed_int = i32x4::new(0, 1, 2, 3).into_bits(); + ::mem::transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3)); let d = i32x4::new( (0 - 1) * 256 * 256 + 0, (2 - 3) * 256 * 256 + 1, @@ -997,16 +702,16 @@ mod tests { (6 - 7) * 256 * 256 + 3, ); - assert_eq!(d, vec_msums(a, b, c).into_bits()); + assert_eq!(d, ::mem::transmute(vec_msums(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn vec_add_i32x4_i32x4() { let x = i32x4::new(1, 2, 3, 4); let y = i32x4::new(4, 3, 2, 1); - let x: vector_signed_int = x.into_bits(); - let y: vector_signed_int = y.into_bits(); + let x: vector_signed_int = ::mem::transmute(x); + let y: vector_signed_int = ::mem::transmute(y); let z = vec_add(x, y); - assert_eq!(i32x4::splat(5), z.into_bits()); + assert_eq!(i32x4::splat(5), ::mem::transmute(z)); } } diff --git a/coresimd/powerpc64/vsx.rs b/coresimd/powerpc64/vsx.rs index 51a8e824c1..b5363f5ad6 100644 --- a/coresimd/powerpc64/vsx.rs +++ b/coresimd/powerpc64/vsx.rs @@ -8,8 +8,6 @@ #![allow(non_camel_case_types)] -use coresimd::powerpc::*; -use coresimd::simd::*; use coresimd::simd_llvm::*; #[cfg(test)] @@ -34,194 +32,8 @@ types! { // pub struct vector_unsigned___int128 = i128x1; } -impl_from_bits_!( - vector_signed_long: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int, - vector_unsigned_long, - vector_bool_long, - vector_double -); -impl_from_bits_!( - i64x2: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int, - vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!( - vector_unsigned_long: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int, - vector_signed_long, - vector_bool_long, - vector_double -); -impl_from_bits_!( - u64x2: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int, - vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!( - vector_double: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int, - vector_signed_long, - vector_unsigned_long, - vector_bool_long -); -impl_from_bits_!( - f64x2: vector_signed_char, - vector_unsigned_char, - vector_bool_char, - vector_signed_short, - vector_unsigned_short, - vector_bool_short, - vector_signed_int, - vector_unsigned_int, - vector_float, - vector_bool_int, - vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!(vector_bool_long: m64x2); -impl_from_bits_!(m64x2: vector_bool_long); -impl_from_bits_!(m32x4: vector_bool_long); -impl_from_bits_!(m16x8: vector_bool_long); -impl_from_bits_!(m8x16: vector_bool_long); -impl_from_bits_!(vector_bool_char: vector_bool_long); -impl_from_bits_!(vector_bool_short: vector_bool_long); -impl_from_bits_!(vector_bool_int: vector_bool_long); - -impl_from_bits_!( - vector_signed_char: vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!( - vector_unsigned_char: vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!( - vector_signed_short: vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!( - vector_unsigned_short: vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!( - vector_signed_int: vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - -impl_from_bits_!( - vector_unsigned_int: vector_signed_long, - vector_unsigned_long, - vector_bool_long, - vector_double -); - mod sealed { - + use coresimd::simd::*; use super::*; pub trait VectorPermDI { @@ -283,20 +95,20 @@ mod tests { #[cfg(target_arch = "powerpc64")] use coresimd::arch::powerpc64::*; - use simd::*; + use coresimd::simd::*; use stdsimd_test::simd_test; macro_rules! test_vec_xxpermdi { {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { #[simd_test(enable = "vsx")] unsafe fn $name() { - let a: $longtype = $shorttype::new($($a),+, $($b),+).into_bits(); - let b = $shorttype::new($($c),+, $($d),+).into_bits(); + let a: $longtype = ::mem::transmute($shorttype::new($($a),+, $($b),+)); + let b = ::mem::transmute($shorttype::new($($c),+, $($d),+)); - assert_eq!($shorttype::new($($a),+, $($c),+), vec_xxpermdi(a, b, 0).into_bits()); - assert_eq!($shorttype::new($($b),+, $($c),+), vec_xxpermdi(a, b, 1).into_bits()); - assert_eq!($shorttype::new($($a),+, $($d),+), vec_xxpermdi(a, b, 2).into_bits()); - assert_eq!($shorttype::new($($b),+, $($d),+), vec_xxpermdi(a, b, 3).into_bits()); + assert_eq!($shorttype::new($($a),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 0))); + assert_eq!($shorttype::new($($b),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 1))); + assert_eq!($shorttype::new($($a),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 2))); + assert_eq!($shorttype::new($($b),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 3))); } } } diff --git a/coresimd/ppsv/api/arithmetic_ops.rs b/coresimd/ppsv/api/arithmetic_ops.rs deleted file mode 100644 index 28c97c1740..0000000000 --- a/coresimd/ppsv/api/arithmetic_ops.rs +++ /dev/null @@ -1,147 +0,0 @@ -//! Lane-wise arithmetic operations. -#![allow(unused)] - -macro_rules! impl_arithmetic_ops { - ($id:ident) => { - impl ::ops::Add for $id { - type Output = Self; - #[inline] - fn add(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_add; - unsafe { simd_add(self, other) } - } - } - - impl ::ops::Sub for $id { - type Output = Self; - #[inline] - fn sub(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_sub; - unsafe { simd_sub(self, other) } - } - } - - impl ::ops::Mul for $id { - type Output = Self; - #[inline] - fn mul(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_mul; - unsafe { simd_mul(self, other) } - } - } - - impl ::ops::Div for $id { - type Output = Self; - #[inline] - fn div(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_div; - unsafe { simd_div(self, other) } - } - } - - impl ::ops::Rem for $id { - type Output = Self; - #[inline] - fn rem(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_rem; - unsafe { simd_rem(self, other) } - } - } - - impl ::ops::AddAssign for $id { - #[inline] - fn add_assign(&mut self, other: Self) { - *self = *self + other; - } - } - - impl ::ops::SubAssign for $id { - #[inline] - fn sub_assign(&mut self, other: Self) { - *self = *self - other; - } - } - - impl ::ops::MulAssign for $id { - #[inline] - fn mul_assign(&mut self, other: Self) { - *self = *self * other; - } - } - - impl ::ops::DivAssign for $id { - #[inline] - fn div_assign(&mut self, other: Self) { - *self = *self / other; - } - } - - impl ::ops::RemAssign for $id { - #[inline] - fn rem_assign(&mut self, other: Self) { - *self = *self % other; - } - } - }; -} - -#[cfg(test)] -macro_rules! test_arithmetic_ops { - ($id:ident, $elem_ty:ident) => { - #[test] - fn arithmetic() { - use coresimd::simd::$id; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - let t = $id::splat(2 as $elem_ty); - let f = $id::splat(4 as $elem_ty); - - // add - assert_eq!(z + z, z); - assert_eq!(o + z, o); - assert_eq!(t + z, t); - assert_eq!(t + t, f); - // sub - assert_eq!(z - z, z); - assert_eq!(o - z, o); - assert_eq!(t - z, t); - assert_eq!(f - t, t); - assert_eq!(f - o - o, t); - // mul - assert_eq!(z * z, z); - assert_eq!(z * o, z); - assert_eq!(z * t, z); - assert_eq!(o * t, t); - assert_eq!(t * t, f); - // div - assert_eq!(z / o, z); - assert_eq!(t / o, t); - assert_eq!(f / o, f); - assert_eq!(t / t, o); - assert_eq!(f / t, t); - // rem - assert_eq!(o % o, z); - assert_eq!(f % t, z); - - { - let mut v = z; - assert_eq!(v, z); - v += o; // add_assign - assert_eq!(v, o); - v -= o; // sub_assign - assert_eq!(v, z); - v = t; - v *= o; // mul_assign - assert_eq!(v, t); - v *= t; - assert_eq!(v, f); - v /= o; // div_assign - assert_eq!(v, f); - v /= t; - assert_eq!(v, t); - v %= t; // rem_assign - assert_eq!(v, z); - } - } - }; -} diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs deleted file mode 100644 index 7b324a7bab..0000000000 --- a/coresimd/ppsv/api/arithmetic_reductions.rs +++ /dev/null @@ -1,261 +0,0 @@ -//! Implements portable arithmetic vector reductions. -#![allow(unused)] - -macro_rules! impl_int_arithmetic_reductions { - ($id:ident, $elem_ty:ident) => { - impl $id { - /// Horizontal sum of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) - /// - /// If an operation overflows it returns the mathematical result - /// modulo `2^n` where `n` is the number of times it overflows. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn wrapping_sum(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_add_ordered; - unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) } - } - /// Horizontal sum of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) - /// - /// If an operation overflows it returns the mathematical result - /// modulo `2^n` where `n` is the number of times it overflows. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn wrapping_sum(self) -> $elem_ty { - // FIXME: broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - use super::codegen::wrapping::Wrapping; - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x = Wrapping::add(x, self.extract(i) as $elem_ty); - } - x - } - - /// Horizontal product of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) - /// - /// If an operation overflows it returns the mathematical result - /// modulo `2^n` where `n` is the number of times it overflows. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn wrapping_product(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_mul_ordered; - unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) } - } - /// Horizontal product of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) - /// - /// If an operation overflows it returns the mathematical result - /// modulo `2^n` where `n` is the number of times it overflows. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn wrapping_product(self) -> $elem_ty { - // FIXME: broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - use super::codegen::wrapping::Wrapping; - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x = Wrapping::mul(x, self.extract(i) as $elem_ty); - } - x - } - } - }; -} - -macro_rules! impl_float_arithmetic_reductions { - ($id:ident, $elem_ty:ident) => { - impl $id { - /// Horizontal sum of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) - /// - /// If one of the vector element is `NaN` the reduction returns - /// `NaN`. The resulting `NaN` is not required to be equal to any - /// of the `NaN`s in the vector. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn sum(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_add_ordered; - unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) } - } - /// Horizontal sum of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) - /// - /// If one of the vector element is `NaN` the reduction returns - /// `NaN`. The resulting `NaN` is not required to be equal to any - /// of the `NaN`s in the vector. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn sum(self) -> $elem_ty { - // FIXME: broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - use super::codegen::wrapping::Wrapping; - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x = Wrapping::add(x, self.extract(i) as $elem_ty); - } - x - } - - /// Horizontal product of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) - /// - /// If one of the vector element is `NaN` the reduction returns - /// `NaN`. The resulting `NaN` is not required to be equal to any - /// of the `NaN`s in the vector. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn product(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_mul_ordered; - unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) } - } - /// Horizontal product of the vector elements. - /// - /// The intrinsic performs a tree-reduction of the vector elements. - /// That is, for an 8 element vector: - /// - /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) - /// - /// If one of the vector element is `NaN` the reduction returns - /// `NaN`. The resulting `NaN` is not required to be equal to any - /// of the `NaN`s in the vector. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn product(self) -> $elem_ty { - // FIXME: broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - use super::codegen::wrapping::Wrapping; - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x = Wrapping::mul(x, self.extract(i) as $elem_ty); - } - x - } - } - }; -} - -#[cfg(test)] -macro_rules! test_int_arithmetic_reductions { - ($id:ident, $elem_ty:ident) => { - fn alternating(x: usize) -> ::coresimd::simd::$id { - use coresimd::simd::$id; - let mut v = $id::splat(1 as $elem_ty); - for i in 0..$id::lanes() { - if i % x == 0 { - v = v.replace(i, 2 as $elem_ty); - } - } - v - } - - #[test] - fn wrapping_sum() { - use coresimd::simd::$id; - let v = $id::splat(0 as $elem_ty); - assert_eq!(v.wrapping_sum(), 0 as $elem_ty); - let v = $id::splat(1 as $elem_ty); - assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); - let v = alternating(2); - assert_eq!( - v.wrapping_sum(), - ($id::lanes() / 2 + $id::lanes()) as $elem_ty - ); - } - #[test] - fn wrapping_product() { - use coresimd::simd::$id; - let v = $id::splat(0 as $elem_ty); - assert_eq!(v.wrapping_product(), 0 as $elem_ty); - let v = $id::splat(1 as $elem_ty); - assert_eq!(v.wrapping_product(), 1 as $elem_ty); - let f = match $id::lanes() { - 64 => 16, - 32 => 8, - 16 => 4, - _ => 2, - }; - let v = alternating(f); - assert_eq!( - v.wrapping_product(), - (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty) - ); - } - }; -} - -#[cfg(test)] -macro_rules! test_float_arithmetic_reductions { - ($id:ident, $elem_ty:ident) => { - fn alternating(x: usize) -> ::coresimd::simd::$id { - use coresimd::simd::$id; - let mut v = $id::splat(1 as $elem_ty); - for i in 0..$id::lanes() { - if i % x == 0 { - v = v.replace(i, 2 as $elem_ty); - } - } - v - } - - #[test] - fn sum() { - use coresimd::simd::$id; - let v = $id::splat(0 as $elem_ty); - assert_eq!(v.sum(), 0 as $elem_ty); - let v = $id::splat(1 as $elem_ty); - assert_eq!(v.sum(), $id::lanes() as $elem_ty); - let v = alternating(2); - assert_eq!(v.sum(), ($id::lanes() / 2 + $id::lanes()) as $elem_ty); - } - #[test] - fn product() { - use coresimd::simd::$id; - let v = $id::splat(0 as $elem_ty); - assert_eq!(v.product(), 0 as $elem_ty); - let v = $id::splat(1 as $elem_ty); - assert_eq!(v.product(), 1 as $elem_ty); - let f = match $id::lanes() { - 64 => 16, - 32 => 8, - 16 => 4, - _ => 2, - }; - let v = alternating(f); - assert_eq!( - v.product(), - (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty) - ); - } - }; -} diff --git a/coresimd/ppsv/api/arithmetic_scalar_ops.rs b/coresimd/ppsv/api/arithmetic_scalar_ops.rs deleted file mode 100644 index 6498801673..0000000000 --- a/coresimd/ppsv/api/arithmetic_scalar_ops.rs +++ /dev/null @@ -1,202 +0,0 @@ -//! Lane-wise arithmetic operations. -#![allow(unused)] - -macro_rules! impl_arithmetic_scalar_ops { - ($id:ident, $elem_ty:ident) => { - impl ::ops::Add<$elem_ty> for $id { - type Output = Self; - #[inline] - fn add(self, other: $elem_ty) -> Self { - self + $id::splat(other) - } - } - impl ::ops::Add<$id> for $elem_ty { - type Output = $id; - #[inline] - fn add(self, other: $id) -> $id { - $id::splat(self) + other - } - } - - impl ::ops::Sub<$elem_ty> for $id { - type Output = Self; - #[inline] - fn sub(self, other: $elem_ty) -> Self { - self - $id::splat(other) - } - } - impl ::ops::Sub<$id> for $elem_ty { - type Output = $id; - #[inline] - fn sub(self, other: $id) -> $id { - $id::splat(self) - other - } - } - - impl ::ops::Mul<$elem_ty> for $id { - type Output = Self; - #[inline] - fn mul(self, other: $elem_ty) -> Self { - self * $id::splat(other) - } - } - impl ::ops::Mul<$id> for $elem_ty { - type Output = $id; - #[inline] - fn mul(self, other: $id) -> $id { - $id::splat(self) * other - } - } - - impl ::ops::Div<$elem_ty> for $id { - type Output = Self; - #[inline] - fn div(self, other: $elem_ty) -> Self { - self / $id::splat(other) - } - } - impl ::ops::Div<$id> for $elem_ty { - type Output = $id; - #[inline] - fn div(self, other: $id) -> $id { - $id::splat(self) / other - } - } - - impl ::ops::Rem<$elem_ty> for $id { - type Output = Self; - #[inline] - fn rem(self, other: $elem_ty) -> Self { - self % $id::splat(other) - } - } - impl ::ops::Rem<$id> for $elem_ty { - type Output = $id; - #[inline] - fn rem(self, other: $id) -> $id { - $id::splat(self) % other - } - } - - impl ::ops::AddAssign<$elem_ty> for $id { - #[inline] - fn add_assign(&mut self, other: $elem_ty) { - *self = *self + other; - } - } - - impl ::ops::SubAssign<$elem_ty> for $id { - #[inline] - fn sub_assign(&mut self, other: $elem_ty) { - *self = *self - other; - } - } - - impl ::ops::MulAssign<$elem_ty> for $id { - #[inline] - fn mul_assign(&mut self, other: $elem_ty) { - *self = *self * other; - } - } - - impl ::ops::DivAssign<$elem_ty> for $id { - #[inline] - fn div_assign(&mut self, other: $elem_ty) { - *self = *self / other; - } - } - - impl ::ops::RemAssign<$elem_ty> for $id { - #[inline] - fn rem_assign(&mut self, other: $elem_ty) { - *self = *self % other; - } - } - }; -} - -#[cfg(test)] -macro_rules! test_arithmetic_scalar_ops { - ($id:ident, $elem_ty:ident) => { - #[test] - fn arithmetic_scalar() { - use coresimd::simd::$id; - let zi = 0 as $elem_ty; - let oi = 1 as $elem_ty; - let ti = 2 as $elem_ty; - let fi = 4 as $elem_ty; - let z = $id::splat(zi); - let o = $id::splat(oi); - let t = $id::splat(ti); - let f = $id::splat(fi); - - // add - assert_eq!(zi + z, z); - assert_eq!(z + zi, z); - assert_eq!(oi + z, o); - assert_eq!(o + zi, o); - assert_eq!(ti + z, t); - assert_eq!(t + zi, t); - assert_eq!(ti + t, f); - assert_eq!(t + ti, f); - // sub - assert_eq!(zi - z, z); - assert_eq!(z - zi, z); - assert_eq!(oi - z, o); - assert_eq!(o - zi, o); - assert_eq!(ti - z, t); - assert_eq!(t - zi, t); - assert_eq!(fi - t, t); - assert_eq!(f - ti, t); - assert_eq!(f - o - o, t); - assert_eq!(f - oi - oi, t); - // mul - assert_eq!(zi * z, z); - assert_eq!(z * zi, z); - assert_eq!(zi * o, z); - assert_eq!(z * oi, z); - assert_eq!(zi * t, z); - assert_eq!(z * ti, z); - assert_eq!(oi * t, t); - assert_eq!(o * ti, t); - assert_eq!(ti * t, f); - assert_eq!(t * ti, f); - // div - assert_eq!(zi / o, z); - assert_eq!(z / oi, z); - assert_eq!(ti / o, t); - assert_eq!(t / oi, t); - assert_eq!(fi / o, f); - assert_eq!(f / oi, f); - assert_eq!(ti / t, o); - assert_eq!(t / ti, o); - assert_eq!(fi / t, t); - assert_eq!(f / ti, t); - // rem - assert_eq!(oi % o, z); - assert_eq!(o % oi, z); - assert_eq!(fi % t, z); - assert_eq!(f % ti, z); - - { - let mut v = z; - assert_eq!(v, z); - v += oi; // add_assign - assert_eq!(v, o); - v -= oi; // sub_assign - assert_eq!(v, z); - v = t; - v *= oi; // mul_assign - assert_eq!(v, t); - v *= ti; - assert_eq!(v, f); - v /= oi; // div_assign - assert_eq!(v, f); - v /= ti; - assert_eq!(v, t); - v %= ti; // rem_assign - assert_eq!(v, z); - } - } - }; -} diff --git a/coresimd/ppsv/api/bitwise_ops.rs b/coresimd/ppsv/api/bitwise_ops.rs deleted file mode 100644 index 67b4a1909f..0000000000 --- a/coresimd/ppsv/api/bitwise_ops.rs +++ /dev/null @@ -1,179 +0,0 @@ -//! Lane-wise bitwise operations for integer and boolean vectors. -#![allow(unused)] - -macro_rules! impl_bitwise_ops { - ($id:ident, $true_val:expr) => { - impl ::ops::Not for $id { - type Output = Self; - #[inline] - fn not(self) -> Self { - Self::splat($true_val) ^ self - } - } - impl ::ops::BitXor for $id { - type Output = Self; - #[inline] - fn bitxor(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_xor; - unsafe { simd_xor(self, other) } - } - } - impl ::ops::BitAnd for $id { - type Output = Self; - #[inline] - fn bitand(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_and; - unsafe { simd_and(self, other) } - } - } - impl ::ops::BitOr for $id { - type Output = Self; - #[inline] - fn bitor(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_or; - unsafe { simd_or(self, other) } - } - } - impl ::ops::BitAndAssign for $id { - #[inline] - fn bitand_assign(&mut self, other: Self) { - *self = *self & other; - } - } - impl ::ops::BitOrAssign for $id { - #[inline] - fn bitor_assign(&mut self, other: Self) { - *self = *self | other; - } - } - impl ::ops::BitXorAssign for $id { - #[inline] - fn bitxor_assign(&mut self, other: Self) { - *self = *self ^ other; - } - } - }; -} - -#[cfg(test)] -macro_rules! test_int_bitwise_ops { - ($id:ident, $elem_ty:ident) => { - #[test] - fn bitwise_ops() { - use coresimd::simd::$id; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - let t = $id::splat(2 as $elem_ty); - let m = $id::splat(!z.extract(0)); - - // Not: - assert_eq!(!z, m); - assert_eq!(!m, z); - - // BitAnd: - assert_eq!(o & o, o); - assert_eq!(o & z, z); - assert_eq!(z & o, z); - assert_eq!(z & z, z); - - assert_eq!(t & t, t); - assert_eq!(t & o, z); - assert_eq!(o & t, z); - - // BitOr: - assert_eq!(o | o, o); - assert_eq!(o | z, o); - assert_eq!(z | o, o); - assert_eq!(z | z, z); - - assert_eq!(t | t, t); - assert_eq!(z | t, t); - assert_eq!(t | z, t); - - // BitXOR: - assert_eq!(o ^ o, z); - assert_eq!(z ^ z, z); - assert_eq!(z ^ o, o); - assert_eq!(o ^ z, o); - - assert_eq!(t ^ t, z); - assert_eq!(t ^ z, t); - assert_eq!(z ^ t, t); - - { - // AndAssign: - let mut v = o; - v &= t; - assert_eq!(v, z); - } - { - // OrAssign: - let mut v = z; - v |= o; - assert_eq!(v, o); - } - { - // XORAssign: - let mut v = z; - v ^= o; - assert_eq!(v, o); - } - } - }; -} - -#[cfg(test)] -macro_rules! test_mask_bitwise_ops { - ($id:ident) => { - #[test] - fn mask_bitwise_ops() { - use coresimd::simd::*; - - let t = $id::splat(true); - let f = $id::splat(false); - assert!(t != f); - assert!(!(t == f)); - - // Not: - assert_eq!(!t, f); - assert_eq!(t, !f); - - // BitAnd: - assert_eq!(t & f, f); - assert_eq!(f & t, f); - assert_eq!(t & t, t); - assert_eq!(f & f, f); - - // BitOr: - assert_eq!(t | f, t); - assert_eq!(f | t, t); - assert_eq!(t | t, t); - assert_eq!(f | f, f); - - // BitXOR: - assert_eq!(t ^ f, t); - assert_eq!(f ^ t, t); - assert_eq!(t ^ t, f); - assert_eq!(f ^ f, f); - - { - // AndAssign: - let mut v = f; - v &= t; - assert_eq!(v, f); - } - { - // OrAssign: - let mut v = f; - v |= t; - assert_eq!(v, t); - } - { - // XORAssign: - let mut v = f; - v ^= t; - assert_eq!(v, t); - } - } - }; -} diff --git a/coresimd/ppsv/api/bitwise_reductions.rs b/coresimd/ppsv/api/bitwise_reductions.rs deleted file mode 100644 index 840746ab7a..0000000000 --- a/coresimd/ppsv/api/bitwise_reductions.rs +++ /dev/null @@ -1,194 +0,0 @@ -//! Implements portable horizontal bitwise vector reductions. -#![allow(unused)] - -macro_rules! impl_bitwise_reductions { - ($id:ident, $elem_ty:ident) => { - impl $id { - /// Lane-wise bitwise `and` of the vector elements. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn and(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_and; - unsafe { simd_reduce_and(self) } - } - /// Lane-wise bitwise `and` of the vector elements. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn and(self) -> $elem_ty { - // FIXME: broken on aarch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x &= self.extract(i) as $elem_ty; - } - x - } - - /// Lane-wise bitwise `or` of the vector elements. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn or(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_or; - unsafe { simd_reduce_or(self) } - } - /// Lane-wise bitwise `or` of the vector elements. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn or(self) -> $elem_ty { - // FIXME: broken on aarch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x |= self.extract(i) as $elem_ty; - } - x - } - - /// Lane-wise bitwise `xor` of the vector elements. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn xor(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_xor; - unsafe { simd_reduce_xor(self) } - } - /// Lane-wise bitwise `xor` of the vector elements. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn xor(self) -> $elem_ty { - // FIXME: broken on aarch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x ^= self.extract(i) as $elem_ty; - } - x - } - } - }; -} - -macro_rules! impl_mask_bitwise_reductions { - ($id:ident, $elem_ty:ident, $internal_ty:ident) => { - impl $id { - /// Lane-wise bitwise `and` of the vector elements. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn and(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_and; - unsafe { - let r: $internal_ty = simd_reduce_and(self); - r != 0 - } - } - /// Lane-wise bitwise `and` of the vector elements. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn and(self) -> $elem_ty { - // FIXME: broken on aarch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x &= self.extract(i) as $elem_ty; - } - x - } - - /// Lane-wise bitwise `or` of the vector elements. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn or(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_or; - unsafe { - let r: $internal_ty = simd_reduce_or(self); - r != 0 - } - } - /// Lane-wise bitwise `or` of the vector elements. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn or(self) -> $elem_ty { - // FIXME: broken on aarch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x |= self.extract(i) as $elem_ty; - } - x - } - - /// Lane-wise bitwise `xor` of the vector elements. - #[cfg(not(target_arch = "aarch64"))] - #[inline] - pub fn xor(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_xor; - unsafe { - let r: $internal_ty = simd_reduce_xor(self); - r != 0 - } - } - /// Lane-wise bitwise `xor` of the vector elements. - #[cfg(target_arch = "aarch64")] - #[inline] - pub fn xor(self) -> $elem_ty { - // FIXME: broken on aarch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - let mut x = self.extract(0) as $elem_ty; - for i in 1..$id::lanes() { - x ^= self.extract(i) as $elem_ty; - } - x - } - } - }; -} - -#[cfg(test)] -macro_rules! test_bitwise_reductions { - ($id:ident, $true:expr) => { - #[test] - fn and() { - let false_ = !$true; - use coresimd::simd::$id; - let v = $id::splat(false_); - assert_eq!(v.and(), false_); - let v = $id::splat($true); - assert_eq!(v.and(), $true); - let v = $id::splat(false_); - let v = v.replace(0, $true); - assert_eq!(v.and(), false_); - let v = $id::splat($true); - let v = v.replace(0, false_); - assert_eq!(v.and(), false_); - } - #[test] - fn or() { - let false_ = !$true; - use coresimd::simd::$id; - let v = $id::splat(false_); - assert_eq!(v.or(), false_); - let v = $id::splat($true); - assert_eq!(v.or(), $true); - let v = $id::splat(false_); - let v = v.replace(0, $true); - assert_eq!(v.or(), $true); - let v = $id::splat($true); - let v = v.replace(0, false_); - assert_eq!(v.or(), $true); - } - #[test] - fn xor() { - let false_ = !$true; - use coresimd::simd::$id; - let v = $id::splat(false_); - assert_eq!(v.xor(), false_); - let v = $id::splat($true); - assert_eq!(v.xor(), false_); - let v = $id::splat(false_); - let v = v.replace(0, $true); - assert_eq!(v.xor(), $true); - let v = $id::splat($true); - let v = v.replace(0, false_); - assert_eq!(v.xor(), $true); - } - }; -} diff --git a/coresimd/ppsv/api/bitwise_scalar_ops.rs b/coresimd/ppsv/api/bitwise_scalar_ops.rs deleted file mode 100644 index 55efa752da..0000000000 --- a/coresimd/ppsv/api/bitwise_scalar_ops.rs +++ /dev/null @@ -1,222 +0,0 @@ -//! Lane-wise bitwise operations for integer vectors and vector masks. -#![allow(unused)] - -macro_rules! impl_bitwise_scalar_ops { - ($id:ident, $elem_ty:ident) => { - impl ::ops::BitXor<$elem_ty> for $id { - type Output = Self; - #[inline] - fn bitxor(self, other: $elem_ty) -> Self { - self ^ $id::splat(other) - } - } - impl ::ops::BitXor<$id> for $elem_ty { - type Output = $id; - #[inline] - fn bitxor(self, other: $id) -> $id { - $id::splat(self) ^ other - } - } - - impl ::ops::BitAnd<$elem_ty> for $id { - type Output = Self; - #[inline] - fn bitand(self, other: $elem_ty) -> Self { - self & $id::splat(other) - } - } - impl ::ops::BitAnd<$id> for $elem_ty { - type Output = $id; - #[inline] - fn bitand(self, other: $id) -> $id { - $id::splat(self) & other - } - } - - impl ::ops::BitOr<$elem_ty> for $id { - type Output = Self; - #[inline] - fn bitor(self, other: $elem_ty) -> Self { - self | $id::splat(other) - } - } - impl ::ops::BitOr<$id> for $elem_ty { - type Output = $id; - #[inline] - fn bitor(self, other: $id) -> $id { - $id::splat(self) | other - } - } - - impl ::ops::BitAndAssign<$elem_ty> for $id { - #[inline] - fn bitand_assign(&mut self, other: $elem_ty) { - *self = *self & other; - } - } - impl ::ops::BitOrAssign<$elem_ty> for $id { - #[inline] - fn bitor_assign(&mut self, other: $elem_ty) { - *self = *self | other; - } - } - impl ::ops::BitXorAssign<$elem_ty> for $id { - #[inline] - fn bitxor_assign(&mut self, other: $elem_ty) { - *self = *self ^ other; - } - } - }; -} - -#[cfg(test)] -macro_rules! test_int_bitwise_scalar_ops { - ($id:ident, $elem_ty:ident) => { - #[test] - fn bitwise_scalar_ops() { - use coresimd::simd::$id; - let zi = 0 as $elem_ty; - let oi = 1 as $elem_ty; - let ti = 2 as $elem_ty; - let z = $id::splat(zi); - let o = $id::splat(oi); - let t = $id::splat(ti); - - // BitAnd: - assert_eq!(oi & o, o); - assert_eq!(o & oi, o); - assert_eq!(oi & z, z); - assert_eq!(o & zi, z); - assert_eq!(zi & o, z); - assert_eq!(z & oi, z); - assert_eq!(zi & z, z); - assert_eq!(z & zi, z); - - assert_eq!(ti & t, t); - assert_eq!(t & ti, t); - assert_eq!(ti & o, z); - assert_eq!(t & oi, z); - assert_eq!(oi & t, z); - assert_eq!(o & ti, z); - - // BitOr: - assert_eq!(oi | o, o); - assert_eq!(o | oi, o); - assert_eq!(oi | z, o); - assert_eq!(o | zi, o); - assert_eq!(zi | o, o); - assert_eq!(z | oi, o); - assert_eq!(zi | z, z); - assert_eq!(z | zi, z); - - assert_eq!(ti | t, t); - assert_eq!(t | ti, t); - assert_eq!(zi | t, t); - assert_eq!(z | ti, t); - assert_eq!(ti | z, t); - assert_eq!(t | zi, t); - - // BitXOR: - assert_eq!(oi ^ o, z); - assert_eq!(o ^ oi, z); - assert_eq!(zi ^ z, z); - assert_eq!(z ^ zi, z); - assert_eq!(zi ^ o, o); - assert_eq!(z ^ oi, o); - assert_eq!(oi ^ z, o); - assert_eq!(o ^ zi, o); - - assert_eq!(ti ^ t, z); - assert_eq!(t ^ ti, z); - assert_eq!(ti ^ z, t); - assert_eq!(t ^ zi, t); - assert_eq!(zi ^ t, t); - assert_eq!(z ^ ti, t); - - { - // AndAssign: - let mut v = o; - v &= ti; - assert_eq!(v, z); - } - { - // OrAssign: - let mut v = z; - v |= oi; - assert_eq!(v, o); - } - { - // XORAssign: - let mut v = z; - v ^= oi; - assert_eq!(v, o); - } - } - }; -} - -#[cfg(test)] -macro_rules! test_mask_bitwise_scalar_ops { - ($id:ident) => { - #[test] - fn bool_scalar_arithmetic() { - use coresimd::simd::*; - - let ti = true; - let fi = false; - let t = $id::splat(ti); - let f = $id::splat(fi); - assert!(t != f); - assert!(!(t == f)); - - // BitAnd: - assert_eq!(ti & f, f); - assert_eq!(t & fi, f); - assert_eq!(fi & t, f); - assert_eq!(f & ti, f); - assert_eq!(ti & t, t); - assert_eq!(t & ti, t); - assert_eq!(fi & f, f); - assert_eq!(f & fi, f); - - // BitOr: - assert_eq!(ti | f, t); - assert_eq!(t | fi, t); - assert_eq!(fi | t, t); - assert_eq!(f | ti, t); - assert_eq!(ti | t, t); - assert_eq!(t | ti, t); - assert_eq!(fi | f, f); - assert_eq!(f | fi, f); - - // BitXOR: - assert_eq!(ti ^ f, t); - assert_eq!(t ^ fi, t); - assert_eq!(fi ^ t, t); - assert_eq!(f ^ ti, t); - assert_eq!(ti ^ t, f); - assert_eq!(t ^ ti, f); - assert_eq!(fi ^ f, f); - assert_eq!(f ^ fi, f); - - { - // AndAssign: - let mut v = f; - v &= ti; - assert_eq!(v, f); - } - { - // OrAssign: - let mut v = f; - v |= ti; - assert_eq!(v, t); - } - { - // XORAssign: - let mut v = f; - v ^= ti; - assert_eq!(v, t); - } - } - }; -} diff --git a/coresimd/ppsv/api/cmp.rs b/coresimd/ppsv/api/cmp.rs deleted file mode 100644 index f6b42d5fa0..0000000000 --- a/coresimd/ppsv/api/cmp.rs +++ /dev/null @@ -1,142 +0,0 @@ -//! Lane-wise vector comparisons returning vector masks. -#![allow(unused)] - -macro_rules! impl_cmp { - ($id:ident, $bool_ty:ident) => { - impl $id { - /// Lane-wise equality comparison. - #[inline] - pub fn eq(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_eq; - unsafe { simd_eq(self, other) } - } - - /// Lane-wise inequality comparison. - #[inline] - pub fn ne(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_ne; - unsafe { simd_ne(self, other) } - } - - /// Lane-wise less-than comparison. - #[inline] - pub fn lt(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_lt; - unsafe { simd_lt(self, other) } - } - - /// Lane-wise less-than-or-equals comparison. - #[inline] - pub fn le(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_le; - unsafe { simd_le(self, other) } - } - - /// Lane-wise greater-than comparison. - #[inline] - pub fn gt(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_gt; - unsafe { simd_gt(self, other) } - } - - /// Lane-wise greater-than-or-equals comparison. - #[inline] - pub fn ge(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_ge; - unsafe { simd_ge(self, other) } - } - } - }; -} - -macro_rules! impl_mask_cmp { - ($id:ident, $bool_ty:ident) => { - impl $id { - /// Lane-wise equality comparison. - #[inline] - pub fn eq(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_eq; - unsafe { simd_eq(self, other) } - } - - /// Lane-wise inequality comparison. - #[inline] - pub fn ne(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_ne; - unsafe { simd_ne(self, other) } - } - - /// Lane-wise less-than comparison. - #[inline] - pub fn lt(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_gt; - unsafe { simd_gt(self, other) } - } - - /// Lane-wise less-than-or-equals comparison. - #[inline] - pub fn le(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_ge; - unsafe { simd_ge(self, other) } - } - - /// Lane-wise greater-than comparison. - #[inline] - pub fn gt(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_lt; - unsafe { simd_lt(self, other) } - } - - /// Lane-wise greater-than-or-equals comparison. - #[inline] - pub fn ge(self, other: $id) -> $bool_ty { - use coresimd::simd_llvm::simd_le; - unsafe { simd_le(self, other) } - } - } - }; -} - -#[cfg(test)] -macro_rules! test_cmp { - ($id:ident, $elem_ty:ident, $bool_ty:ident, $true:expr, $false:expr) => { - #[test] - fn cmp() { - use coresimd::simd::*; - - let a = $id::splat($false); - let b = $id::splat($true); - - let r = a.lt(b); - let e = $bool_ty::splat(true); - assert!(r == e); - let r = a.le(b); - assert!(r == e); - - let e = $bool_ty::splat(false); - let r = a.gt(b); - assert!(r == e); - let r = a.ge(b); - assert!(r == e); - let r = a.eq(b); - assert!(r == e); - - let mut a = a; - let mut b = b; - let mut e = e; - for i in 0..$id::lanes() { - if i % 2 == 0 { - a = a.replace(i, $false); - b = b.replace(i, $true); - e = e.replace(i, true); - } else { - a = a.replace(i, $true); - b = b.replace(i, $false); - e = e.replace(i, false); - } - } - let r = a.lt(b); - assert!(r == e); - } - }; -} diff --git a/coresimd/ppsv/api/default.rs b/coresimd/ppsv/api/default.rs deleted file mode 100644 index 3e655e26cb..0000000000 --- a/coresimd/ppsv/api/default.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! Implements `Default` for vector types. -#![allow(unused)] - -macro_rules! impl_default { - ($id:ident, $elem_ty:ident) => { - impl ::default::Default for $id { - #[inline] - fn default() -> Self { - Self::splat($elem_ty::default()) - } - } - }; -} - -#[cfg(test)] -macro_rules! test_default { - ($id:ident, $elem_ty:ident) => { - #[test] - fn default() { - use coresimd::simd::$id; - let a = $id::default(); - for i in 0..$id::lanes() { - assert_eq!(a.extract(i), $elem_ty::default()); - } - } - }; -} diff --git a/coresimd/ppsv/api/eq.rs b/coresimd/ppsv/api/eq.rs deleted file mode 100644 index 3d14e34031..0000000000 --- a/coresimd/ppsv/api/eq.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! Implements `Eq` for vector types. -#![allow(unused)] - -macro_rules! impl_eq { - ($id:ident) => { - impl ::cmp::Eq for $id {} - }; -} diff --git a/coresimd/ppsv/api/float_math.rs b/coresimd/ppsv/api/float_math.rs deleted file mode 100644 index 9092460a76..0000000000 --- a/coresimd/ppsv/api/float_math.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Float math - -macro_rules! impl_float_math { - ($id:ident) => { - impl $id { - /// Absolute-value - #[inline] - pub fn abs(self) -> Self { - use coresimd::ppsv::codegen::abs::FloatAbs; - FloatAbs::abs(self) - } - - /// Square-root - #[inline] - pub fn sqrt(self) -> Self { - use coresimd::ppsv::codegen::sqrt::FloatSqrt; - FloatSqrt::sqrt(self) - } - - /// Square-root estimate - #[inline] - pub fn sqrte(self) -> Self { - use coresimd::simd_llvm::simd_fsqrt; - unsafe { simd_fsqrt(self) } - } - - /// Reciprocal square-root estimate - #[inline] - pub fn rsqrte(self) -> Self { - unsafe { - use coresimd::simd_llvm::simd_fsqrt; - $id::splat(1.) / simd_fsqrt(self) - } - } - - /// Fused multiply add: `self * y + z` - #[inline] - pub fn fma(self, y: Self, z: Self) -> Self { - use coresimd::ppsv::codegen::fma::FloatFma; - FloatFma::fma(self, y, z) - } - - /// Sin - #[inline(always)] - pub fn sin(self) -> Self { - use coresimd::ppsv::codegen::sin::FloatSin; - FloatSin::sin(self) - } - - /// Cos - #[inline] - pub fn cos(self) -> Self { - use coresimd::ppsv::codegen::cos::FloatCos; - FloatCos::cos(self) - } - } - }; -} - -macro_rules! test_float_math { - ($id:ident, $elem_ty:ident) => { - fn sqrt2() -> $elem_ty { - match ::mem::size_of::<$elem_ty>() { - 4 => 1.4142135 as $elem_ty, - 8 => 1.4142135623730951 as $elem_ty, - _ => unreachable!(), - } - } - - fn pi() -> $elem_ty { - match ::mem::size_of::<$elem_ty>() { - 4 => ::std::f32::consts::PI as $elem_ty, - 8 => ::std::f64::consts::PI as $elem_ty, - _ => unreachable!(), - } - } - - #[test] - fn abs() { - use coresimd::simd::*; - let o = $id::splat(1 as $elem_ty); - assert_eq!(o, o.abs()); - - let mo = $id::splat(-1 as $elem_ty); - assert_eq!(o, mo.abs()); - } - - #[test] - fn sqrt() { - use coresimd::simd::*; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - assert_eq!(z, z.sqrt()); - assert_eq!(o, o.sqrt()); - - let t = $id::splat(2 as $elem_ty); - let e = $id::splat(sqrt2() as $elem_ty); - assert_eq!(e, t.sqrt()); - } - - #[test] - fn sqrte() { - use coresimd::simd::*; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - assert_eq!(z, z.sqrte()); - assert_eq!(o, o.sqrte()); - - let t = $id::splat(2 as $elem_ty); - let e = $id::splat(sqrt2() as $elem_ty); - let error = (e - t.sqrte()).abs(); - let tol = $id::splat(2.4e-4 as $elem_ty); - - assert!(error.le(tol).all()); - } - - #[test] - fn rsqrte() { - use coresimd::simd::*; - let o = $id::splat(1 as $elem_ty); - assert_eq!(o, o.rsqrte()); - - let t = $id::splat(2 as $elem_ty); - let e = 1. / sqrt2(); - let error = (e - t.rsqrte()).abs(); - let tol = $id::splat(2.4e-4 as $elem_ty); - assert!(error.le(tol).all()); - } - - #[test] - fn fma() { - use coresimd::simd::*; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - let t = $id::splat(2 as $elem_ty); - let t3 = $id::splat(3 as $elem_ty); - let f = $id::splat(4 as $elem_ty); - - assert_eq!(z, z.fma(z, z)); - assert_eq!(o, o.fma(o, z)); - assert_eq!(o, o.fma(z, o)); - assert_eq!(o, z.fma(o, o)); - - assert_eq!(t, o.fma(o, o)); - assert_eq!(t, o.fma(t, z)); - assert_eq!(t, t.fma(o, z)); - - assert_eq!(f, t.fma(t, z)); - assert_eq!(f, t.fma(o, t)); - assert_eq!(t3, t.fma(o, o)); - } - - #[test] - fn sin() { - use coresimd::simd::*; - let z = $id::splat(0 as $elem_ty); - let p = $id::splat(pi() as $elem_ty); - let ph = $id::splat(pi() as $elem_ty / 2.); - let o_r = $id::splat((pi() as $elem_ty / 2.).sin()); - let z_r = $id::splat((pi() as $elem_ty).sin()); - - assert_eq!(z, z.sin()); - assert_eq!(o_r, ph.sin()); - assert_eq!(z_r, p.sin()); - } - - #[test] - fn cos() { - use coresimd::simd::*; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - let p = $id::splat(pi() as $elem_ty); - let ph = $id::splat(pi() as $elem_ty / 2.); - let z_r = $id::splat((pi() as $elem_ty / 2.).cos()); - let o_r = $id::splat((pi() as $elem_ty).cos()); - - assert_eq!(o, z.cos()); - assert_eq!(z_r, ph.cos()); - assert_eq!(o_r, p.cos()); - } - }; -} diff --git a/coresimd/ppsv/api/fmt.rs b/coresimd/ppsv/api/fmt.rs deleted file mode 100644 index 3005042309..0000000000 --- a/coresimd/ppsv/api/fmt.rs +++ /dev/null @@ -1,152 +0,0 @@ -//! Implements formating traits. -#![allow(unused)] - -macro_rules! impl_hex_fmt { - ($id:ident, $elem_ty:ident) => { - impl ::fmt::LowerHex for $id { - fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result { - use mem; - write!(f, "{}(", stringify!($id))?; - let n = mem::size_of_val(self) / mem::size_of::<$elem_ty>(); - for i in 0..n { - if i > 0 { - write!(f, ", ")?; - } - self.extract(i).fmt(f)?; - } - write!(f, ")") - } - } - impl ::fmt::UpperHex for $id { - fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result { - write!(f, "{}(", stringify!($id))?; - for i in 0..$id::lanes() { - if i > 0 { - write!(f, ", ")?; - } - self.extract(i).fmt(f)?; - } - write!(f, ")") - } - } - impl ::fmt::Octal for $id { - fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result { - write!(f, "{}(", stringify!($id))?; - for i in 0..$id::lanes() { - if i > 0 { - write!(f, ", ")?; - } - self.extract(i).fmt(f)?; - } - write!(f, ")") - } - } - impl ::fmt::Binary for $id { - fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result { - write!(f, "{}(", stringify!($id))?; - for i in 0..$id::lanes() { - if i > 0 { - write!(f, ", ")?; - } - self.extract(i).fmt(f)?; - } - write!(f, ")") - } - } - }; -} - -#[cfg(test)] -macro_rules! test_hex_fmt_impl { - ($id:ident, $elem_ty:ident, $($values:expr),+) => { - #[test] - fn hex_fmt() { - use ::std::prelude::v1::*; - use ::coresimd::simd::$id; - for &i in [$($values),+].iter() { - let vec = $id::splat(i as $elem_ty); - - let s = format!("{:#x}", vec); - let beg = format!("{}(", stringify!($id)); - assert!(s.starts_with(&beg)); - assert!(s.ends_with(")")); - let s: Vec = s.replace(&beg, "").replace(")", "").split(",") - .map(|v| v.trim().to_string()).collect(); - assert_eq!(s.len(), $id::lanes()); - for (index, ss) in s.into_iter().enumerate() { - assert_eq!(ss, format!("{:#x}", vec.extract(index))); - } - } - } - #[test] - fn upper_hex_fmt() { - use ::std::prelude::v1::*; - use ::coresimd::simd::$id; - for &i in [$($values),+].iter() { - let vec = $id::splat(i as $elem_ty); - - let s = format!("{:#X}", vec); - let beg = format!("{}(", stringify!($id)); - assert!(s.starts_with(&beg)); - assert!(s.ends_with(")")); - let s: Vec = s.replace(&beg, "").replace(")", "").split(",") - .map(|v| v.trim().to_string()).collect(); - assert_eq!(s.len(), $id::lanes()); - for (index, ss) in s.into_iter().enumerate() { - assert_eq!(ss, format!("{:#X}", vec.extract(index))); - } - } - } - #[test] - fn octal_fmt() { - use ::std::prelude::v1::*; - use ::coresimd::simd::$id; - for &i in [$($values),+].iter() { - let vec = $id::splat(i as $elem_ty); - - let s = format!("{:#o}", vec); - let beg = format!("{}(", stringify!($id)); - assert!(s.starts_with(&beg)); - assert!(s.ends_with(")")); - let s: Vec = s.replace(&beg, "").replace(")", "").split(",") - .map(|v| v.trim().to_string()).collect(); - assert_eq!(s.len(), $id::lanes()); - for (index, ss) in s.into_iter().enumerate() { - assert_eq!(ss, format!("{:#o}", vec.extract(index))); - } - } - } - #[test] - fn binary_fmt() { - use ::std::prelude::v1::*; - use ::coresimd::simd::$id; - for &i in [$($values),+].iter() { - let vec = $id::splat(i as $elem_ty); - - let s = format!("{:#b}", vec); - let beg = format!("{}(", stringify!($id)); - assert!(s.starts_with(&beg)); - assert!(s.ends_with(")")); - let s: Vec = s.replace(&beg, "").replace(")", "").split(",") - .map(|v| v.trim().to_string()).collect(); - assert_eq!(s.len(), $id::lanes()); - for (index, ss) in s.into_iter().enumerate() { - assert_eq!(ss, format!("{:#b}", vec.extract(index))); - } - } - } - } -} - -#[cfg(test)] -macro_rules! test_hex_fmt { - ($id:ident, $elem_ty:ident) => { - test_hex_fmt_impl!( - $id, - $elem_ty, - 0 as $elem_ty, - !(0 as $elem_ty), - (1 as $elem_ty) - ); - }; -} diff --git a/coresimd/ppsv/api/from.rs b/coresimd/ppsv/api/from.rs deleted file mode 100644 index dff93e8547..0000000000 --- a/coresimd/ppsv/api/from.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! Implements the From trait for vector types, which performs a lane-wise -//! cast vector types with the same number of lanes. -#![allow(unused)] - -macro_rules! impl_from_impl { - ($from:ident, $to:ident) => { - impl ::convert::From<::simd::$from> for $to { - #[inline] - fn from(f: ::simd::$from) -> $to { - use coresimd::simd_llvm::simd_cast; - unsafe { simd_cast(f) } - } - } - }; -} - -macro_rules! impl_from_ { - ($to:ident, $from:ident) => { - vector_impl!([impl_from_impl, $to, $from]); - }; -} - -macro_rules! impl_from { - ($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => { - $( - impl_from_!($from, $to); - )+ - - $test_macro!( - #[cfg(test)] - mod $test_mod { - $( - #[test] - fn $from() { - use std::convert::{From, Into}; - use ::coresimd::simd::{$from, $to}; - use ::std::default::Default; - assert_eq!($to::lanes(), $from::lanes()); - let a: $from = $from::default(); - let b_0: $to = From::from(a); - let b_1: $to = a.into(); - assert_eq!(b_0, b_1); - } - )+ - } - ); - } -} diff --git a/coresimd/ppsv/api/from_bits.rs b/coresimd/ppsv/api/from_bits.rs deleted file mode 100644 index 2658b61fe8..0000000000 --- a/coresimd/ppsv/api/from_bits.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Implements the `FromBits` trait for vector types, which performs bitwise -//! lossless transmutes between equally-sized vector types. -#![allow(unused)] - -macro_rules! impl_from_bits__ { - ($to:ident: $($from:ident),+) => { - $( - impl ::simd::FromBits<$from> for $to { - #[inline] - fn from_bits(f: $from) -> $to { - unsafe { ::mem::transmute(f) } - } - } - )+ - } -} - -macro_rules! impl_from_bits_ { - ($to:ident: $($from:ident),+) => { - vector_impl!([impl_from_bits__, $to: $($from),+]); - } -} - -macro_rules! impl_from_bits { - ($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => { - impl_from_bits_!($to: $($from),+); - - $test_macro!( - #[cfg(test)] - mod $test_mod { - $( - #[test] - fn $from() { - use ::coresimd::simd::*; - use ::std::mem; - assert_eq!(mem::size_of::<$from>(), - mem::size_of::<$to>()); - let a: $from = $from::default(); - let b_0: $to = FromBits::from_bits(a); - let b_1: $to = a.into_bits(); - assert_eq!(b_0, b_1); - } - )+ - } - ); - } -} diff --git a/coresimd/ppsv/api/hash.rs b/coresimd/ppsv/api/hash.rs deleted file mode 100644 index 0dd8e05388..0000000000 --- a/coresimd/ppsv/api/hash.rs +++ /dev/null @@ -1,40 +0,0 @@ -//! Implements `Hash`. -#![allow(unused)] - -macro_rules! impl_hash { - ($id:ident, $elem_ty:ident) => { - impl ::hash::Hash for $id { - #[inline] - fn hash(&self, state: &mut H) { - union A { - data: [$elem_ty; $id::lanes()], - vec: $id, - } - unsafe { A { vec: *self }.data.hash(state) } - } - } - }; -} - -#[cfg(test)] -macro_rules! test_hash { - ($id:ident, $elem_ty:ident) => { - #[test] - fn hash() { - use coresimd::simd::$id; - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - use std::mem; - type A = [$elem_ty; $id::lanes()]; - let a: A = [42 as $elem_ty; $id::lanes()]; - assert!(mem::size_of::() == mem::size_of::<$id>()); - let mut a_hash = DefaultHasher::new(); - let mut v_hash = a_hash.clone(); - a.hash(&mut a_hash); - - let v = $id::splat(42 as $elem_ty); - v.hash(&mut v_hash); - assert_eq!(a_hash.finish(), v_hash.finish()); - } - }; -} diff --git a/coresimd/ppsv/api/load_store.rs b/coresimd/ppsv/api/load_store.rs deleted file mode 100644 index 59749da0e1..0000000000 --- a/coresimd/ppsv/api/load_store.rs +++ /dev/null @@ -1,312 +0,0 @@ -//! Implements the load/store API. -#![allow(unused)] - -macro_rules! impl_load_store { - ($id:ident, $elem_ty:ident, $elem_count:expr) => { - impl $id { - /// Writes the values of the vector to the `slice`. - /// - /// # Panics - /// - /// If `slice.len() < Self::lanes()` or `&slice[0]` is not - /// aligned to an `align_of::()` boundary. - #[inline] - pub fn store_aligned(self, slice: &mut [$elem_ty]) { - unsafe { - assert!(slice.len() >= $elem_count); - let target_ptr = - slice.get_unchecked_mut(0) as *mut $elem_ty; - assert!( - target_ptr.align_offset(::mem::align_of::()) - == 0 - ); - self.store_aligned_unchecked(slice); - } - } - - /// Writes the values of the vector to the `slice`. - /// - /// # Panics - /// - /// If `slice.len() < Self::lanes()`. - #[inline] - pub fn store_unaligned(self, slice: &mut [$elem_ty]) { - unsafe { - assert!(slice.len() >= $elem_count); - self.store_unaligned_unchecked(slice); - } - } - - /// Writes the values of the vector to the `slice`. - /// - /// # Precondition - /// - /// If `slice.len() < Self::lanes()` or `&slice[0]` is not - /// aligned to an `align_of::()` boundary, the behavior is - /// undefined. - #[inline] - pub unsafe fn store_aligned_unchecked( - self, slice: &mut [$elem_ty], - ) { - *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = - self; - } - - /// Writes the values of the vector to the `slice`. - /// - /// # Precondition - /// - /// If `slice.len() < Self::lanes()` the behavior is undefined. - #[inline] - pub unsafe fn store_unaligned_unchecked( - self, slice: &mut [$elem_ty], - ) { - let target_ptr = - slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; - let self_ptr = &self as *const Self as *const u8; - ::ptr::copy_nonoverlapping( - self_ptr, - target_ptr, - ::mem::size_of::(), - ); - } - - /// Instantiates a new vector with the values of the `slice`. - /// - /// # Panics - /// - /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned - /// to an `align_of::()` boundary. - #[inline] - pub fn load_aligned(slice: &[$elem_ty]) -> Self { - unsafe { - assert!(slice.len() >= $elem_count); - let target_ptr = slice.get_unchecked(0) as *const $elem_ty; - assert!( - target_ptr.align_offset(::mem::align_of::()) - == 0 - ); - Self::load_aligned_unchecked(slice) - } - } - - /// Instantiates a new vector with the values of the `slice`. - /// - /// # Panics - /// - /// If `slice.len() < Self::lanes()`. - #[inline] - pub fn load_unaligned(slice: &[$elem_ty]) -> Self { - unsafe { - assert!(slice.len() >= $elem_count); - Self::load_unaligned_unchecked(slice) - } - } - - /// Instantiates a new vector with the values of the `slice`. - /// - /// # Precondition - /// - /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned - /// to an `align_of::()` boundary, the behavior is undefined. - #[inline] - pub unsafe fn load_aligned_unchecked(slice: &[$elem_ty]) -> Self { - *(slice.get_unchecked(0) as *const $elem_ty as *const Self) - } - - /// Instantiates a new vector with the values of the `slice`. - /// - /// # Precondition - /// - /// If `slice.len() < Self::lanes()` the behavior is undefined. - #[inline] - pub unsafe fn load_unaligned_unchecked( - slice: &[$elem_ty], - ) -> Self { - use mem::size_of; - let target_ptr = - slice.get_unchecked(0) as *const $elem_ty as *const u8; - let mut x = Self::splat(0 as $elem_ty); - let self_ptr = &mut x as *mut Self as *mut u8; - ::ptr::copy_nonoverlapping( - target_ptr, - self_ptr, - size_of::(), - ); - x - } - } - }; -} - -#[cfg(test)] -macro_rules! test_load_store { - ($id:ident, $elem_ty:ident) => { - #[test] - fn store_unaligned() { - use coresimd::simd::$id; - use std::iter::Iterator; - let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; - let vec = $id::splat(42 as $elem_ty); - vec.store_unaligned(&mut unaligned[1..]); - for (index, &b) in unaligned.iter().enumerate() { - if index == 0 { - assert_eq!(b, 0 as $elem_ty); - } else { - assert_eq!(b, vec.extract(index - 1)); - } - } - } - - #[test] - #[should_panic] - fn store_unaligned_fail() { - use coresimd::simd::$id; - let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; - let vec = $id::splat(42 as $elem_ty); - vec.store_unaligned(&mut unaligned[2..]); - } - - #[test] - fn load_unaligned() { - use coresimd::simd::$id; - use std::iter::Iterator; - let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; - unaligned[0] = 0 as $elem_ty; - let vec = $id::load_unaligned(&unaligned[1..]); - for (index, &b) in unaligned.iter().enumerate() { - if index == 0 { - assert_eq!(b, 0 as $elem_ty); - } else { - assert_eq!(b, vec.extract(index - 1)); - } - } - } - - #[test] - #[should_panic] - fn load_unaligned_fail() { - use coresimd::simd::$id; - let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; - unaligned[0] = 0 as $elem_ty; - let _vec = $id::load_unaligned(&unaligned[2..]); - } - - union A { - data: [$elem_ty; 2 * ::coresimd::simd::$id::lanes()], - _vec: ::coresimd::simd::$id, - } - - #[test] - fn store_aligned() { - use coresimd::simd::$id; - use std::iter::Iterator; - let mut aligned = A { - data: [0 as $elem_ty; 2 * $id::lanes()], - }; - let vec = $id::splat(42 as $elem_ty); - unsafe { vec.store_aligned(&mut aligned.data[$id::lanes()..]) }; - for (index, &b) in unsafe { aligned.data.iter().enumerate() } { - if index < $id::lanes() { - assert_eq!(b, 0 as $elem_ty); - } else { - assert_eq!(b, vec.extract(index - $id::lanes())); - } - } - } - - #[test] - #[should_panic] - fn store_aligned_fail_lanes() { - use coresimd::simd::$id; - let mut aligned = A { - data: [0 as $elem_ty; 2 * $id::lanes()], - }; - let vec = $id::splat(42 as $elem_ty); - unsafe { - vec.store_aligned(&mut aligned.data[2 * $id::lanes()..]) - }; - } - - #[test] - #[should_panic] - fn store_aligned_fail_align() { - unsafe { - use coresimd::simd::$id; - use std::{mem, slice}; - let mut aligned = A { - data: [0 as $elem_ty; 2 * $id::lanes()], - }; - // offset the aligned data by one byte: - let s: &mut [u8; 2 - * $id::lanes() - * mem::size_of::<$elem_ty>()] = - mem::transmute(&mut aligned.data); - let s: &mut [$elem_ty] = slice::from_raw_parts_mut( - s.get_unchecked_mut(1) as *mut u8 as *mut $elem_ty, - $id::lanes(), - ); - let vec = $id::splat(42 as $elem_ty); - vec.store_aligned(s); - } - } - - #[test] - fn load_aligned() { - use coresimd::simd::$id; - use std::iter::Iterator; - let mut aligned = A { - data: [0 as $elem_ty; 2 * $id::lanes()], - }; - for i in $id::lanes()..(2 * $id::lanes()) { - unsafe { - aligned.data[i] = 42 as $elem_ty; - } - } - - let vec = - unsafe { $id::load_aligned(&aligned.data[$id::lanes()..]) }; - for (index, &b) in unsafe { aligned.data.iter().enumerate() } { - if index < $id::lanes() { - assert_eq!(b, 0 as $elem_ty); - } else { - assert_eq!(b, vec.extract(index - $id::lanes())); - } - } - } - - #[test] - #[should_panic] - fn load_aligned_fail_lanes() { - use coresimd::simd::$id; - let aligned = A { - data: [0 as $elem_ty; 2 * $id::lanes()], - }; - let _vec = unsafe { - $id::load_aligned(&aligned.data[2 * $id::lanes()..]) - }; - } - - #[test] - #[should_panic] - fn load_aligned_fail_align() { - unsafe { - use coresimd::simd::$id; - use std::{mem, slice}; - let aligned = A { - data: [0 as $elem_ty; 2 * $id::lanes()], - }; - // offset the aligned data by one byte: - let s: &[u8; 2 - * $id::lanes() - * mem::size_of::<$elem_ty>()] = - mem::transmute(&aligned.data); - let s: &[$elem_ty] = slice::from_raw_parts( - s.get_unchecked(1) as *const u8 as *const $elem_ty, - $id::lanes(), - ); - let _vec = $id::load_aligned(s); - } - } - }; -} diff --git a/coresimd/ppsv/api/masks.rs b/coresimd/ppsv/api/masks.rs deleted file mode 100644 index a287e0feeb..0000000000 --- a/coresimd/ppsv/api/masks.rs +++ /dev/null @@ -1,144 +0,0 @@ -//! Minimal boolean vector implementation -#![allow(unused)] - -/// Minimal interface: all packed SIMD mask types implement this. -macro_rules! impl_mask_minimal { - ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => { - - impl super::api::Lanes<[u32; $elem_count]> for $id {} - - impl $id { - /// Creates a new instance with each vector elements initialized - /// with the provided values. - #[inline] - pub const fn new($($elem_name: bool),*) -> Self { - $id($(Self::bool_to_internal($elem_name)),*) - } - - /// Converts a boolean type into the type of the vector lanes. - #[inline] - const fn bool_to_internal(x: bool) -> $elem_ty { - [0 as $elem_ty, !(0 as $elem_ty)][x as usize] - } - - /// Returns the number of vector lanes. - #[inline] - pub const fn lanes() -> usize { - $elem_count - } - - /// Constructs a new instance with each element initialized to - /// `value`. - #[inline] - pub const fn splat(value: bool) -> Self { - $id($({ - #[allow(non_camel_case_types, dead_code)] - struct $elem_name; - Self::bool_to_internal(value) - }),*) - } - - /// Extracts the value at `index`. - /// - /// # Panics - /// - /// If `index >= Self::lanes()`. - #[inline] - pub fn extract(self, index: usize) -> bool { - assert!(index < $elem_count); - unsafe { self.extract_unchecked(index) } - } - - /// Extracts the value at `index`. - /// - /// If `index >= Self::lanes()` the behavior is undefined. - #[inline] - pub unsafe fn extract_unchecked(self, index: usize) -> bool { - use coresimd::simd_llvm::simd_extract; - let x: $elem_ty = simd_extract(self, index as u32); - x != 0 - } - - /// Returns a new vector where the value at `index` is replaced by `new_value`. - /// - /// # Panics - /// - /// If `index >= Self::lanes()`. - #[inline] - #[must_use = "replace does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"] - pub fn replace(self, index: usize, new_value: bool) -> Self { - assert!(index < $elem_count); - unsafe { self.replace_unchecked(index, new_value) } - } - - /// Returns a new vector where the value at `index` is replaced by `new_value`. - /// - /// # Panics - /// - /// If `index >= Self::lanes()`. - #[inline] - #[must_use = "replace_unchecked does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"] - pub unsafe fn replace_unchecked( - self, - index: usize, - new_value: bool, - ) -> Self { - use coresimd::simd_llvm::simd_insert; - simd_insert(self, index as u32, Self::bool_to_internal(new_value)) - } - } - } -} - -#[cfg(test)] -macro_rules! test_mask_minimal { - ($id:ident, $elem_count:expr) => { - #[test] - fn minimal() { - use coresimd::simd::$id; - // TODO: test new - - // lanes: - assert_eq!($elem_count, $id::lanes()); - - // splat and extract / extract_unchecked: - let vec = $id::splat(true); - for i in 0..$id::lanes() { - assert_eq!(true, vec.extract(i)); - assert_eq!(true, unsafe { vec.extract_unchecked(i) }); - } - - // replace / replace_unchecked - let new_vec = vec.replace(1, false); - for i in 0..$id::lanes() { - if i == 1 { - assert_eq!(false, new_vec.extract(i)); - } else { - assert_eq!(true, new_vec.extract(i)); - } - } - let new_vec = unsafe { vec.replace_unchecked(1, false) }; - for i in 0..$id::lanes() { - if i == 1 { - assert_eq!(false, new_vec.extract(i)); - } else { - assert_eq!(true, new_vec.extract(i)); - } - } - } - #[test] - #[should_panic] - fn minimal_extract_panic_on_out_of_bounds() { - use coresimd::simd::$id; - let vec = $id::splat(false); - let _ = vec.extract($id::lanes()); - } - #[test] - #[should_panic] - fn minimal_replace_panic_on_out_of_bounds() { - use coresimd::simd::$id; - let vec = $id::splat(false); - let _ = vec.replace($id::lanes(), true); - } - }; -} diff --git a/coresimd/ppsv/api/masks_reductions.rs b/coresimd/ppsv/api/masks_reductions.rs deleted file mode 100644 index 85ba11c4a7..0000000000 --- a/coresimd/ppsv/api/masks_reductions.rs +++ /dev/null @@ -1,84 +0,0 @@ -//! Horizontal mask reductions. -#![allow(unused)] - -macro_rules! impl_mask_reductions { - ($id:ident) => { - impl $id { - /// Are `all` vector lanes `true`? - #[inline] - pub fn all(self) -> bool { - unsafe { super::codegen::masks_reductions::All::all(self) } - } - /// Is `any` vector lane `true`? - #[inline] - pub fn any(self) -> bool { - unsafe { super::codegen::masks_reductions::Any::any(self) } - } - /// Are `all` vector lanes `false`? - #[inline] - pub fn none(self) -> bool { - !self.any() - } - } - }; -} - -#[cfg(test)] -macro_rules! test_mask_reductions { - ($id:ident) => { - #[test] - fn all() { - use coresimd::simd::$id; - - let a = $id::splat(true); - assert!(a.all()); - let a = $id::splat(false); - assert!(!a.all()); - - for i in 0..$id::lanes() { - let mut a = $id::splat(true); - a = a.replace(i, false); - assert!(!a.all()); - let mut a = $id::splat(false); - a = a.replace(i, true); - assert!(!a.all()); - } - } - #[test] - fn any() { - use coresimd::simd::$id; - - let a = $id::splat(true); - assert!(a.any()); - let a = $id::splat(false); - assert!(!a.any()); - - for i in 0..$id::lanes() { - let mut a = $id::splat(true); - a = a.replace(i, false); - assert!(a.any()); - let mut a = $id::splat(false); - a = a.replace(i, true); - assert!(a.any()); - } - } - #[test] - fn none() { - use coresimd::simd::$id; - - let a = $id::splat(true); - assert!(!a.none()); - let a = $id::splat(false); - assert!(a.none()); - - for i in 0..$id::lanes() { - let mut a = $id::splat(true); - a = a.replace(i, false); - assert!(!a.none()); - let mut a = $id::splat(false); - a = a.replace(i, true); - assert!(!a.none()); - } - } - }; -} diff --git a/coresimd/ppsv/api/masks_select.rs b/coresimd/ppsv/api/masks_select.rs deleted file mode 100644 index 517fd997c5..0000000000 --- a/coresimd/ppsv/api/masks_select.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Mask select method -#![allow(unused)] - -/// Implements mask select method -macro_rules! impl_mask_select { - ($id:ident, $elem_ty:ident, $elem_count:expr) => { - impl $id { - /// Selects elements of `a` and `b` using mask. - /// - /// For each lane, the result contains the element of `a` if the - /// mask is true, and the element of `b` otherwise. - #[inline] - pub fn select(self, a: T, b: T) -> T - where - T: super::api::Lanes<[u32; $elem_count]>, - { - use coresimd::simd_llvm::simd_select; - unsafe { simd_select(self, a, b) } - } - } - }; -} - -#[cfg(test)] -macro_rules! test_mask_select { - ($mask_id:ident, $vec_id:ident, $elem_ty:ident) => { - #[test] - fn select() { - use coresimd::simd::{$mask_id, $vec_id}; - let o = 1 as $elem_ty; - let t = 2 as $elem_ty; - - let a = $vec_id::splat(o); - let b = $vec_id::splat(t); - let m = a.lt(b); - assert_eq!(m.select(a, b), a); - - let m = b.lt(a); - assert_eq!(m.select(b, a), a); - - let mut c = a; - let mut d = b; - let mut m_e = $mask_id::splat(false); - for i in 0..$vec_id::lanes() { - if i % 2 == 0 { - let c_tmp = c.extract(i); - c = c.replace(i, d.extract(i)); - d = d.replace(i, c_tmp); - } else { - m_e = m_e.replace(i, true); - } - } - - let m = c.lt(d); - assert_eq!(m_e, m); - assert_eq!(m.select(c, d), a); - } - }; -} diff --git a/coresimd/ppsv/api/minimal.rs b/coresimd/ppsv/api/minimal.rs deleted file mode 100644 index 4470bd6c31..0000000000 --- a/coresimd/ppsv/api/minimal.rs +++ /dev/null @@ -1,141 +0,0 @@ -//! Minimal portable vector types API. -#![allow(unused)] - -/// Minimal interface: all packed SIMD vector types implement this. -macro_rules! impl_minimal { - ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => { - impl super::api::Lanes<[u32; $elem_count]> for $id {} - - impl $id { - /// Creates a new instance with each vector elements initialized - /// with the provided values. - #[inline] - pub const fn new($($elem_name: $elem_ty),*) -> Self { - $id($($elem_name),*) - } - - /// Returns the number of vector lanes. - #[inline] - pub const fn lanes() -> usize { - $elem_count - } - - /// Constructs a new instance with each element initialized to - /// `value`. - #[inline] - pub const fn splat(value: $elem_ty) -> Self { - $id($({ - #[allow(non_camel_case_types, dead_code)] - struct $elem_name; - value - }),*) - } - - /// Extracts the value at `index`. - /// - /// # Panics - /// - /// If `index >= Self::lanes()`. - #[inline] - pub fn extract(self, index: usize) -> $elem_ty { - assert!(index < $elem_count); - unsafe { self.extract_unchecked(index) } - } - - /// Extracts the value at `index`. - /// - /// # Precondition - /// - /// If `index >= Self::lanes()` the behavior is undefined. - #[inline] - pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { - use coresimd::simd_llvm::simd_extract; - simd_extract(self, index as u32) - } - - /// Returns a new vector where the value at `index` is replaced by `new_value`. - /// - /// # Panics - /// - /// If `index >= Self::lanes()`. - #[inline] - #[must_use = "replace does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"] - pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { - assert!(index < $elem_count); - unsafe { self.replace_unchecked(index, new_value) } - } - - /// Returns a new vector where the value at `index` is replaced by `new_value`. - /// - /// # Precondition - /// - /// If `index >= Self::lanes()` the behavior is undefined. - #[inline] - #[must_use = "replace_unchecked does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"] - pub unsafe fn replace_unchecked( - self, - index: usize, - new_value: $elem_ty, - ) -> Self { - use coresimd::simd_llvm::simd_insert; - simd_insert(self, index as u32, new_value) - } - } - } -} - -#[cfg(test)] -macro_rules! test_minimal { - ($id:ident, $elem_ty:ident, $elem_count:expr) => { - #[test] - fn minimal() { - use coresimd::simd::$id; - // TODO: test new - - // lanes: - assert_eq!($elem_count, $id::lanes()); - - // splat and extract / extract_unchecked: - const VAL: $elem_ty = 7 as $elem_ty; - const VEC: $id = $id::splat(VAL); - for i in 0..$id::lanes() { - assert_eq!(VAL, VEC.extract(i)); - assert_eq!(VAL, unsafe { VEC.extract_unchecked(i) }); - } - - // replace / replace_unchecked - let new_vec = VEC.replace(1, 42 as $elem_ty); - for i in 0..$id::lanes() { - if i == 1 { - assert_eq!(42 as $elem_ty, new_vec.extract(i)); - } else { - assert_eq!(VAL, new_vec.extract(i)); - } - } - let new_vec = unsafe { VEC.replace_unchecked(1, 42 as $elem_ty) }; - for i in 0..$id::lanes() { - if i == 1 { - assert_eq!(42 as $elem_ty, new_vec.extract(i)); - } else { - assert_eq!(VAL, new_vec.extract(i)); - } - } - } - #[test] - #[should_panic] - fn minimal_extract_panic_on_out_of_bounds() { - use coresimd::simd::$id; - const VAL: $elem_ty = 7 as $elem_ty; - const VEC: $id = $id::splat(VAL); - let _ = VEC.extract($id::lanes()); - } - #[test] - #[should_panic] - fn minimal_replace_panic_on_out_of_bounds() { - use coresimd::simd::$id; - const VAL: $elem_ty = 7 as $elem_ty; - const VEC: $id = $id::splat(VAL); - let _ = VEC.replace($id::lanes(), 42 as $elem_ty); - } - }; -} diff --git a/coresimd/ppsv/api/minmax.rs b/coresimd/ppsv/api/minmax.rs deleted file mode 100755 index c1c7499c06..0000000000 --- a/coresimd/ppsv/api/minmax.rs +++ /dev/null @@ -1,148 +0,0 @@ -//! Lane-wise arithmetic operations. -#![allow(unused)] - -macro_rules! impl_int_minmax_ops { - ($id:ident) => { - impl $id { - // Note: - // - // * if two elements are equal min returns - // always the second element - // * if two elements are equal max returns - // always the second element - // - // Since we are dealing with integers here, and `min` and `max` - // construct a new integer vector, whether the first or the - // second element is returned when two elements compare equal - // does not matter. - - /// Minimum of two vectors. - /// - /// Returns a new vector containing the minimum value of each of - /// the input vector lanes. - #[inline] - pub fn min(self, x: Self) -> Self { - self.lt(x).select(self, x) - } - - /// Maximum of two vectors. - /// - /// Returns a new vector containing the maximum value of each of - /// the input vector lanes. - #[inline] - pub fn max(self, x: Self) -> Self { - self.gt(x).select(self, x) - } - } - }; -} - -#[cfg(test)] -macro_rules! test_int_minmax_ops { - ($id:ident, $elem_ty:ident) => { - #[test] - fn minmax() { - use coresimd::simd::$id; - let o = $id::splat(1 as $elem_ty); - let t = $id::splat(2 as $elem_ty); - - let mut m = o; - for i in 0..$id::lanes() { - if i % 2 == 0 { - m = m.replace(i, 2 as $elem_ty); - } - } - - assert_eq!(o.min(t), o); - assert_eq!(t.min(o), o); - assert_eq!(m.min(o), o); - assert_eq!(o.min(m), o); - assert_eq!(m.min(t), m); - assert_eq!(t.min(m), m); - - assert_eq!(o.max(t), t); - assert_eq!(t.max(o), t); - assert_eq!(m.max(o), m); - assert_eq!(o.max(m), m); - assert_eq!(m.max(t), t); - assert_eq!(t.max(m), t); - } - }; -} - -macro_rules! impl_float_minmax_ops { - ($id:ident) => { - impl $id { - /// Minimum of two vectors. - /// - /// Returns a new vector containing the minimum value of each of the - /// input vector lanes. The lane-wise semantics are the same as that - /// of `min` for the primitive floating-point types. - #[inline] - pub fn min(self, x: Self) -> Self { - use coresimd::simd_llvm::simd_fmin; - unsafe { simd_fmin(self, x) } - } - - /// Maximum of two vectors. - /// - /// Returns a new vector containing the maximum value of each of the - /// input vector lanes. The lane-wise semantics are the same as that - /// of `max` for the primitive floating-point types. - #[inline] - pub fn max(self, x: Self) -> Self { - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 - // use coresimd::simd_llvm::simd_fmax; - // unsafe { simd_fmax(self, x) } - let mut r = self; - for i in 0..$id::lanes() { - let a = self.extract(i); - let b = x.extract(i); - r = r.replace(i, a.max(b)) - } - r - } - } - } -} - -#[cfg(test)] -macro_rules! test_float_minmax_ops { - ($id:ident, $elem_ty:ident) => { - #[test] - fn minmax() { - use coresimd::simd::$id; - let n = ::std::$elem_ty::NAN; - let o = $id::splat(1. as $elem_ty); - let t = $id::splat(2. as $elem_ty); - - let mut m = o; - let mut on = o; - for i in 0..$id::lanes() { - if i % 2 == 0 { - m = m.replace(i, 2. as $elem_ty); - on = on.replace(i, n); - } - } - - assert_eq!(o.min(t), o); - assert_eq!(t.min(o), o); - assert_eq!(m.min(o), o); - assert_eq!(o.min(m), o); - assert_eq!(m.min(t), m); - assert_eq!(t.min(m), m); - - assert_eq!(o.max(t), t); - assert_eq!(t.max(o), t); - assert_eq!(m.max(o), m); - assert_eq!(o.max(m), m); - assert_eq!(m.max(t), t); - assert_eq!(t.max(m), t); - - assert_eq!(on.min(o), o); - assert_eq!(o.min(on), o); - assert_eq!(on.max(o), o); - assert_eq!(o.max(on), o); - } - }; -} diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs deleted file mode 100644 index dd461b30dd..0000000000 --- a/coresimd/ppsv/api/minmax_reductions.rs +++ /dev/null @@ -1,85 +0,0 @@ -//! Implements portable horizontal arithmetic reductions. -#![allow(unused)] - -macro_rules! impl_minmax_reductions { - ($id:ident, $elem_ty:ident) => { - impl $id { - /// Largest vector element value. - #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))] - #[inline] - pub fn max_element(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_max; - unsafe { simd_reduce_max(self) } - } - - /// Largest vector element value. - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - #[allow(unused_imports)] - #[inline] - pub fn max_element(self) -> $elem_ty { - // FIXME: broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - use cmp::Ord; - let mut x = self.extract(0); - for i in 1..$id::lanes() { - x = x.max(self.extract(i)); - } - x - } - - /// Smallest vector element value. - #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))] - #[inline] - pub fn min_element(self) -> $elem_ty { - use coresimd::simd_llvm::simd_reduce_min; - unsafe { simd_reduce_min(self) } - } - - /// Smallest vector element value. - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - #[allow(unused_imports)] - #[inline] - pub fn min_element(self) -> $elem_ty { - // FIXME: broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - use cmp::Ord; - let mut x = self.extract(0); - for i in 1..$id::lanes() { - x = x.min(self.extract(i)); - } - x - } - } - }; -} - -#[cfg(test)] -macro_rules! test_minmax_reductions { - ($id:ident, $elem_ty:ident) => { - #[test] - fn max_element() { - use coresimd::simd::$id; - let v = $id::splat(0 as $elem_ty); - assert_eq!(v.max_element(), 0 as $elem_ty); - let v = v.replace(1, 1 as $elem_ty); - assert_eq!(v.max_element(), 1 as $elem_ty); - let v = v.replace(0, 2 as $elem_ty); - assert_eq!(v.max_element(), 2 as $elem_ty); - } - - #[test] - fn min_element() { - use coresimd::simd::$id; - let v = $id::splat(0 as $elem_ty); - assert_eq!(v.min_element(), 0 as $elem_ty); - let v = v.replace(1, 1 as $elem_ty); - assert_eq!(v.min_element(), 0 as $elem_ty); - let v = $id::splat(1 as $elem_ty); - let v = v.replace(0, 2 as $elem_ty); - assert_eq!(v.min_element(), 1 as $elem_ty); - let v = $id::splat(2 as $elem_ty); - let v = v.replace(1, 1 as $elem_ty); - assert_eq!(v.min_element(), 1 as $elem_ty); - } - }; -} diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs deleted file mode 100644 index 1c38926a65..0000000000 --- a/coresimd/ppsv/api/mod.rs +++ /dev/null @@ -1,266 +0,0 @@ -//! This module defines the API of portable vector types. -#![allow(unused)] - -/// Adds the vector type `$id`, with elements of types `$elem_tys`. -macro_rules! define_ty { - ($id:ident, $($elem_tys:ident),+ | $(#[$doc:meta])*) => { - $(#[$doc])* - #[repr(simd)] - #[derive(Copy, Clone, Debug, - /*FIXME: manually implement and add tests*/ PartialOrd)] - #[allow(non_camel_case_types)] - pub struct $id($($elem_tys),*); - } -} - -#[macro_use] -mod arithmetic_ops; -#[macro_use] -mod arithmetic_scalar_ops; -#[macro_use] -mod arithmetic_reductions; -#[macro_use] -mod bitwise_ops; -#[macro_use] -mod bitwise_scalar_ops; -#[macro_use] -mod bitwise_reductions; -#[macro_use] -mod cmp; -#[macro_use] -mod default; -#[macro_use] -mod eq; -#[macro_use] -mod float_math; -#[macro_use] -mod fmt; -#[macro_use] -mod from; -#[macro_use] -mod from_bits; -#[macro_use] -mod hash; -#[macro_use] -mod load_store; -#[macro_use] -mod masks; -#[macro_use] -mod masks_reductions; -#[macro_use] -mod minimal; -#[macro_use] -mod minmax; -#[macro_use] -mod minmax_reductions; -#[macro_use] -mod neg; -#[macro_use] -mod partial_eq; -// TODO: -//#[macro_use] -//mod partial_ord; -// TODO: -//#[macro_use] -//mod shuffles; -// TODO: -//#[macro_use] -//mod gather_scatter; -#[macro_use] -mod masks_select; -#[macro_use] -mod scalar_shifts; -#[macro_use] -mod shifts; -#[macro_use] -mod swap_bytes; - -/// Sealed trait used for constraining select implementations. -pub trait Lanes {} - -/// Defines a portable packed SIMD floating-point vector type. -macro_rules! simd_f_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, - $test_macro:ident | - $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - vector_impl!( - [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], - [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], - [impl_load_store, $id, $elem_ty, $elem_count], - [impl_cmp, $id, $mask_ty], - [impl_arithmetic_ops, $id], - [impl_arithmetic_scalar_ops, $id, $elem_ty], - [impl_float_arithmetic_reductions, $id, $elem_ty], - [impl_minmax_reductions, $id, $elem_ty], - [impl_neg_op, $id, $elem_ty], - [impl_partial_eq, $id], - [impl_default, $id, $elem_ty], - [impl_float_minmax_ops, $id], - [impl_float_math, $id] - ); - - $test_macro!( - #[cfg(test)] - mod $test_mod { - test_minimal!($id, $elem_ty, $elem_count); - test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $mask_ty, 1. as $elem_ty, 0. as $elem_ty); - test_arithmetic_ops!($id, $elem_ty); - test_arithmetic_scalar_ops!($id, $elem_ty); - test_float_arithmetic_reductions!($id, $elem_ty); - test_minmax_reductions!($id, $elem_ty); - test_neg_op!($id, $elem_ty); - test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty); - test_default!($id, $elem_ty); - test_mask_select!($mask_ty, $id, $elem_ty); - test_float_minmax_ops!($id, $elem_ty); - test_float_math!($id, $elem_ty); - } - ); - } -} - -/// Defines a portable packed SIMD signed-integer vector type. -macro_rules! simd_i_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, - $test_macro:ident | - $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - vector_impl!( - [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], - [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], - [impl_load_store, $id, $elem_ty, $elem_count], - [impl_cmp, $id, $mask_ty], - [impl_hash, $id, $elem_ty], - [impl_arithmetic_ops, $id], - [impl_arithmetic_scalar_ops, $id, $elem_ty], - [impl_int_arithmetic_reductions, $id, $elem_ty], - [impl_minmax_reductions, $id, $elem_ty], - [impl_neg_op, $id, $elem_ty], - [impl_bitwise_ops, $id, !(0 as $elem_ty)], - [impl_bitwise_scalar_ops, $id, $elem_ty], - [impl_bitwise_reductions, $id, $elem_ty], - [impl_all_scalar_shifts, $id, $elem_ty], - [impl_vector_shifts, $id, $elem_ty], - [impl_hex_fmt, $id, $elem_ty], - [impl_eq, $id], - [impl_partial_eq, $id], - [impl_default, $id, $elem_ty], - [impl_int_minmax_ops, $id], - [impl_swap_bytes, $id] - ); - - $test_macro!( - #[cfg(test)] - mod $test_mod { - test_minimal!($id, $elem_ty, $elem_count); - test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $mask_ty, 1 as $elem_ty, 0 as $elem_ty); - test_hash!($id, $elem_ty); - test_arithmetic_ops!($id, $elem_ty); - test_arithmetic_scalar_ops!($id, $elem_ty); - test_int_arithmetic_reductions!($id, $elem_ty); - test_minmax_reductions!($id, $elem_ty); - test_neg_op!($id, $elem_ty); - test_int_bitwise_ops!($id, $elem_ty); - test_int_bitwise_scalar_ops!($id, $elem_ty); - test_bitwise_reductions!($id, !(0 as $elem_ty)); - test_all_scalar_shift_ops!($id, $elem_ty); - test_vector_shift_ops!($id, $elem_ty); - test_hex_fmt!($id, $elem_ty); - test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); - test_default!($id, $elem_ty); - test_mask_select!($mask_ty, $id, $elem_ty); - test_int_minmax_ops!($id, $elem_ty); - } - ); - } -} - -/// Defines a portable packed SIMD unsigned-integer vector type. -macro_rules! simd_u_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, - $test_macro:ident | - $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - vector_impl!( - [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], - [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], - [impl_load_store, $id, $elem_ty, $elem_count], - [impl_cmp, $id, $mask_ty], - [impl_hash, $id, $elem_ty], - [impl_arithmetic_ops, $id], - [impl_arithmetic_scalar_ops, $id, $elem_ty], - [impl_int_arithmetic_reductions, $id, $elem_ty], - [impl_minmax_reductions, $id, $elem_ty], - [impl_bitwise_scalar_ops, $id, $elem_ty], - [impl_bitwise_ops, $id, !(0 as $elem_ty)], - [impl_bitwise_reductions, $id, $elem_ty], - [impl_all_scalar_shifts, $id, $elem_ty], - [impl_vector_shifts, $id, $elem_ty], - [impl_hex_fmt, $id, $elem_ty], - [impl_eq, $id], - [impl_partial_eq, $id], - [impl_default, $id, $elem_ty], - [impl_int_minmax_ops, $id], - [impl_swap_bytes, $id] - ); - - $test_macro!( - #[cfg(test)] - mod $test_mod { - test_minimal!($id, $elem_ty, $elem_count); - test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $mask_ty, 1 as $elem_ty, 0 as $elem_ty); - test_hash!($id, $elem_ty); - test_arithmetic_ops!($id, $elem_ty); - test_arithmetic_scalar_ops!($id, $elem_ty); - test_int_arithmetic_reductions!($id, $elem_ty); - test_minmax_reductions!($id, $elem_ty); - test_int_bitwise_ops!($id, $elem_ty); - test_int_bitwise_scalar_ops!($id, $elem_ty); - test_bitwise_reductions!($id, !(0 as $elem_ty)); - test_all_scalar_shift_ops!($id, $elem_ty); - test_vector_shift_ops!($id, $elem_ty); - test_hex_fmt!($id, $elem_ty); - test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); - test_default!($id, $elem_ty); - test_mask_select!($mask_ty, $id, $elem_ty); - test_int_minmax_ops!($id, $elem_ty); - test_swap_bytes!($id, $elem_ty); - } - ); - } -} - -/// Defines a portable packed SIMD mask type. -macro_rules! simd_m_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $test_mod:ident, $test_macro:ident | - $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { - vector_impl!( - [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], - [impl_mask_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], - [impl_bitwise_ops, $id, true], - [impl_bitwise_scalar_ops, $id, bool], - [impl_mask_bitwise_reductions, $id, bool, $elem_ty], - [impl_mask_reductions, $id], - [impl_mask_select, $id, $elem_ty, $elem_count], - [impl_mask_cmp, $id, $id], - [impl_eq, $id], - [impl_partial_eq, $id], - [impl_default, $id, bool] - ); - - $test_macro!( - #[cfg(test)] - mod $test_mod { - test_mask_minimal!($id, $elem_count); - test_mask_bitwise_ops!($id); - test_mask_bitwise_scalar_ops!($id); - test_mask_reductions!($id); - test_bitwise_reductions!($id, true); - test_cmp!($id, $elem_ty, $id, true, false); - test_partial_eq!($id, true, false); - test_default!($id, bool); - } - ); - } -} diff --git a/coresimd/ppsv/api/neg.rs b/coresimd/ppsv/api/neg.rs deleted file mode 100644 index 55dd4d65bd..0000000000 --- a/coresimd/ppsv/api/neg.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! Implements `std::ops::Neg` for signed vector types. -#![allow(unused)] - -macro_rules! impl_neg_op { - ($id:ident, $elem_ty:ident) => { - impl ::ops::Neg for $id { - type Output = Self; - #[inline] - fn neg(self) -> Self { - Self::splat(-1 as $elem_ty) * self - } - } - }; -} - -#[cfg(test)] -macro_rules! test_neg_op { - ($id:ident, $elem_ty:ident) => { - #[test] - fn neg() { - use coresimd::simd::$id; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - let t = $id::splat(2 as $elem_ty); - let f = $id::splat(4 as $elem_ty); - - let nz = $id::splat(-(0 as $elem_ty)); - let no = $id::splat(-(1 as $elem_ty)); - let nt = $id::splat(-(2 as $elem_ty)); - let nf = $id::splat(-(4 as $elem_ty)); - - assert_eq!(-z, nz); - assert_eq!(-o, no); - assert_eq!(-t, nt); - assert_eq!(-f, nf); - - assert_eq!(z, -nz); - assert_eq!(o, -no); - assert_eq!(t, -nt); - assert_eq!(f, -nf); - } - }; -} diff --git a/coresimd/ppsv/api/partial_eq.rs b/coresimd/ppsv/api/partial_eq.rs deleted file mode 100644 index ab4997b72a..0000000000 --- a/coresimd/ppsv/api/partial_eq.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Implements `PartialEq` for vector types. -#![allow(unused)] - -macro_rules! impl_partial_eq { - ($id:ident) => { - impl ::cmp::PartialEq<$id> for $id { - #[inline] - fn eq(&self, other: &Self) -> bool { - $id::eq(*self, *other).all() - } - #[inline] - fn ne(&self, other: &Self) -> bool { - $id::ne(*self, *other).any() - } - } - }; -} - -#[cfg(test)] -macro_rules! test_partial_eq { - ($id:ident, $true:expr, $false:expr) => { - #[test] - fn partial_eq() { - use coresimd::simd::*; - - let a = $id::splat($false); - let b = $id::splat($true); - - assert!(a != b); - assert!(!(a == b)); - assert!(a == a); - assert!(!(a != a)); - - // Test further to make sure comparisons work with non-splatted - // values. - // This is to test the fix for #511 - - let a = $id::splat($false).replace(0, $true); - let b = $id::splat($true); - - assert!(a != b); - assert!(!(a == b)); - assert!(a == a); - assert!(!(a != a)); - } - }; -} diff --git a/coresimd/ppsv/api/scalar_shifts.rs b/coresimd/ppsv/api/scalar_shifts.rs deleted file mode 100644 index 586d909c32..0000000000 --- a/coresimd/ppsv/api/scalar_shifts.rs +++ /dev/null @@ -1,120 +0,0 @@ -//! Implements integer shifts. -#![allow(unused)] - -macro_rules! impl_shifts { - ($id:ident, $elem_ty:ident, $($by:ident),+) => { - $( - impl ::ops::Shl<$by> for $id { - type Output = Self; - #[inline] - fn shl(self, other: $by) -> Self { - use coresimd::simd_llvm::simd_shl; - unsafe { simd_shl(self, $id::splat(other as $elem_ty)) } - } - } - impl ::ops::Shr<$by> for $id { - type Output = Self; - #[inline] - fn shr(self, other: $by) -> Self { - use coresimd::simd_llvm::simd_shr; - unsafe { simd_shr(self, $id::splat(other as $elem_ty)) } - } - } - - impl ::ops::ShlAssign<$by> for $id { - #[inline] - fn shl_assign(&mut self, other: $by) { - *self = *self << other; - } - } - impl ::ops::ShrAssign<$by> for $id { - #[inline] - fn shr_assign(&mut self, other: $by) { - *self = *self >> other; - } - } - - )+ - } -} - -macro_rules! impl_all_scalar_shifts { - ($id:ident, $elem_ty:ident) => { - impl_shifts!( - $id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize - ); - }; -} - -#[cfg(test)] -macro_rules! test_shift_ops { - ($id:ident, $elem_ty:ident, $($index_ty:ident),+) => { - #[test] - fn scalar_shift_ops() { - use ::coresimd::simd::$id; - use ::std::mem; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - let t = $id::splat(2 as $elem_ty); - let f = $id::splat(4 as $elem_ty); - - $( - { - let zi = 0 as $index_ty; - let oi = 1 as $index_ty; - let ti = 2 as $index_ty; - let maxi = (mem::size_of::<$elem_ty>() * 8 - 1) as $index_ty; - - // shr - assert_eq!(z >> zi, z); - assert_eq!(z >> oi, z); - assert_eq!(z >> ti, z); - assert_eq!(z >> ti, z); - - assert_eq!(o >> zi, o); - assert_eq!(t >> zi, t); - assert_eq!(f >> zi, f); - assert_eq!(f >> maxi, z); - - assert_eq!(o >> oi, z); - assert_eq!(t >> oi, o); - assert_eq!(t >> ti, z); - assert_eq!(f >> oi, t); - assert_eq!(f >> ti, o); - assert_eq!(f >> maxi, z); - - // shl - assert_eq!(z << zi, z); - assert_eq!(o << zi, o); - assert_eq!(t << zi, t); - assert_eq!(f << zi, f); - assert_eq!(f << maxi, z); - - assert_eq!(o << oi, t); - assert_eq!(o << ti, f); - assert_eq!(t << oi, f); - - { // shr_assign - let mut v = o; - v >>= oi; - assert_eq!(v, z); - } - { // shl_assign - let mut v = o; - v <<= oi; - assert_eq!(v, t); - } - } - )+ - } - }; -} - -#[cfg(test)] -macro_rules! test_all_scalar_shift_ops { - ($id:ident, $elem_ty:ident) => { - test_shift_ops!( - $id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize - ); - }; -} diff --git a/coresimd/ppsv/api/shifts.rs b/coresimd/ppsv/api/shifts.rs deleted file mode 100644 index 70850d8b74..0000000000 --- a/coresimd/ppsv/api/shifts.rs +++ /dev/null @@ -1,95 +0,0 @@ -//! Implements integer shifts. -#![allow(unused)] - -macro_rules! impl_vector_shifts { - ($id:ident, $elem_ty:ident) => { - impl ::ops::Shl<$id> for $id { - type Output = Self; - #[inline] - fn shl(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_shl; - unsafe { simd_shl(self, other) } - } - } - impl ::ops::Shr<$id> for $id { - type Output = Self; - #[inline] - fn shr(self, other: Self) -> Self { - use coresimd::simd_llvm::simd_shr; - unsafe { simd_shr(self, other) } - } - } - impl ::ops::ShlAssign<$id> for $id { - #[inline] - fn shl_assign(&mut self, other: Self) { - *self = *self << other; - } - } - impl ::ops::ShrAssign<$id> for $id { - #[inline] - fn shr_assign(&mut self, other: Self) { - *self = *self >> other; - } - } - }; -} - -#[cfg(test)] -macro_rules! test_vector_shift_ops { - ($id:ident, $elem_ty:ident) => { - #[test] - fn shift_ops() { - use coresimd::simd::$id; - use std::mem; - let z = $id::splat(0 as $elem_ty); - let o = $id::splat(1 as $elem_ty); - let t = $id::splat(2 as $elem_ty); - let f = $id::splat(4 as $elem_ty); - - let max = - $id::splat((mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty); - - // shr - assert_eq!(z >> z, z); - assert_eq!(z >> o, z); - assert_eq!(z >> t, z); - assert_eq!(z >> t, z); - - assert_eq!(o >> z, o); - assert_eq!(t >> z, t); - assert_eq!(f >> z, f); - assert_eq!(f >> max, z); - - assert_eq!(o >> o, z); - assert_eq!(t >> o, o); - assert_eq!(t >> t, z); - assert_eq!(f >> o, t); - assert_eq!(f >> t, o); - assert_eq!(f >> max, z); - - // shl - assert_eq!(z << z, z); - assert_eq!(o << z, o); - assert_eq!(t << z, t); - assert_eq!(f << z, f); - assert_eq!(f << max, z); - - assert_eq!(o << o, t); - assert_eq!(o << t, f); - assert_eq!(t << o, f); - - { - // shr_assign - let mut v = o; - v >>= o; - assert_eq!(v, z); - } - { - // shl_assign - let mut v = o; - v <<= o; - assert_eq!(v, t); - } - } - }; -} diff --git a/coresimd/ppsv/api/swap_bytes.rs b/coresimd/ppsv/api/swap_bytes.rs deleted file mode 100644 index d94dbb592f..0000000000 --- a/coresimd/ppsv/api/swap_bytes.rs +++ /dev/null @@ -1,130 +0,0 @@ -//! Horizontal swap bytes. - -macro_rules! impl_swap_bytes { - ($id:ident) => { - impl $id { - /// Reverses the byte order of the vector. - #[inline] - pub fn swap_bytes(self) -> Self { - unsafe { - super::codegen::swap_bytes::SwapBytes::swap_bytes(self) - } - } - - /// Converts self to little endian from the target's endianness. - /// - /// On little endian this is a no-op. On big endian the bytes are - /// swapped. - #[inline] - pub fn to_le(self) -> Self { - #[cfg(target_endian = "little")] - { - self - } - #[cfg(not(target_endian = "little"))] - { - self.swap_bytes() - } - } - - /// Converts self to big endian from the target's endianness. - /// - /// On big endian this is a no-op. On little endian the bytes are - /// swapped. - #[inline] - pub fn to_be(self) -> Self { - #[cfg(target_endian = "big")] - { - self - } - #[cfg(not(target_endian = "big"))] - { - self.swap_bytes() - } - } - } - }; -} - -#[cfg(test)] -macro_rules! test_swap_bytes { - ($id:ident, $elem_ty:ty) => { - use coresimd::simd::$id; - use std::{mem, slice}; - - const BYTES: [u8; 64] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - ]; - - macro_rules! swap { - ($func: ident) => {{ - // catch possible future >512 vectors - assert!(mem::size_of::<$id>() <= 64); - - let mut actual = BYTES; - let elems: &mut [$elem_ty] = unsafe { - slice::from_raw_parts_mut( - actual.as_mut_ptr() as *mut $elem_ty, - $id::lanes(), - ) - }; - - let vec = $id::load_unaligned(elems); - vec.$func().store_unaligned(elems); - - actual - }}; - } - - macro_rules! test_swap { - ($func: ident) => {{ - let actual = swap!($func); - let expected = - BYTES.iter().rev().skip(64 - mem::size_of::<$id>()); - - assert!(actual.iter().zip(expected).all(|(x, y)| x == y)); - }}; - } - - macro_rules! test_no_swap { - ($func: ident) => {{ - let actual = swap!($func); - let expected = BYTES.iter().take(mem::size_of::<$id>()); - - assert!(actual.iter().zip(expected).all(|(x, y)| x == y)); - }}; - } - - #[test] - fn swap_bytes() { - test_swap!(swap_bytes); - } - - #[test] - fn to_le() { - #[cfg(target_endian = "little")] - { - test_no_swap!(to_le); - } - #[cfg(not(target_endian = "little"))] - { - test_swap!(to_le); - } - } - - #[test] - fn to_be() { - #[cfg(target_endian = "big")] - { - test_no_swap!(to_be); - } - #[cfg(not(target_endian = "big"))] - { - test_swap!(to_be); - } - } - }; -} diff --git a/coresimd/ppsv/codegen/abs.rs b/coresimd/ppsv/codegen/abs.rs deleted file mode 100644 index c829ff8c5b..0000000000 --- a/coresimd/ppsv/codegen/abs.rs +++ /dev/null @@ -1,77 +0,0 @@ -//! Vector absolute value -#![allow(dead_code)] -use coresimd::simd::*; - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.fabs.f32"] - fn abs_f32(x: f32) -> f32; - #[link_name = "llvm.fabs.f64"] - fn abs_f64(x: f64) -> f64; - - #[link_name = "llvm.fabs.v2f32"] - fn abs_v2f32(x: f32x2) -> f32x2; - #[link_name = "llvm.fabs.v4f32"] - fn abs_v4f32(x: f32x4) -> f32x4; - #[link_name = "llvm.fabs.v8f32"] - fn abs_v8f32(x: f32x8) -> f32x8; - #[link_name = "llvm.fabs.v16f32"] - fn abs_v16f32(x: f32x16) -> f32x16; - #[link_name = "llvm.fabs.v2f64"] - fn abs_v2f64(x: f64x2) -> f64x2; - #[link_name = "llvm.fabs.v4f64"] - fn abs_v4f64(x: f64x4) -> f64x4; - #[link_name = "llvm.fabs.v8f64"] - fn abs_v8f64(x: f64x8) -> f64x8; -} - -pub(crate) trait FloatAbs { - fn abs(self) -> Self; -} - -trait RawAbs { - fn raw_abs(self) -> Self; -} - -impl RawAbs for f32 { - fn raw_abs(self) -> Self { - unsafe { abs_f32(self) } - } -} - -impl RawAbs for f64 { - fn raw_abs(self) -> Self { - unsafe { abs_f64(self) } - } -} - - -macro_rules! impl_fabs { - ($id:ident : $fn:ident) => { - #[cfg(not(target_arch = "s390x"))] - impl FloatAbs for $id { - fn abs(self) -> Self { - unsafe { $fn(self) } - } - } - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501 - #[cfg(target_arch = "s390x")] - impl FloatAbs for $id { - fn abs(self) -> Self { - let mut v = $id::splat(0.); - for i in 0..$id::lanes() { - v = v.replace(i, self.extract(i).raw_abs()) - } - v - } - } - }; -} - -impl_fabs!(f32x2: abs_v2f32); -impl_fabs!(f32x4: abs_v4f32); -impl_fabs!(f32x8: abs_v8f32); -impl_fabs!(f32x16: abs_v16f32); -impl_fabs!(f64x2: abs_v2f64); -impl_fabs!(f64x4: abs_v4f64); -impl_fabs!(f64x8: abs_v8f64); diff --git a/coresimd/ppsv/codegen/cos.rs b/coresimd/ppsv/codegen/cos.rs deleted file mode 100644 index 38dce584f8..0000000000 --- a/coresimd/ppsv/codegen/cos.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! Exact vector cos -#![allow(dead_code)] -use coresimd::simd::*; - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.cos.f32"] - fn cos_f32(x: f32) -> f32; - #[link_name = "llvm.cos.f64"] - fn cos_f64(x: f64) -> f64; - - #[link_name = "llvm.cos.v2f32"] - fn cos_v2f32(x: f32x2) -> f32x2; - #[link_name = "llvm.cos.v4f32"] - fn cos_v4f32(x: f32x4) -> f32x4; - #[link_name = "llvm.cos.v8f32"] - fn cos_v8f32(x: f32x8) -> f32x8; - #[link_name = "llvm.cos.v16f32"] - fn cos_v16f32(x: f32x16) -> f32x16; - #[link_name = "llvm.cos.v2f64"] - fn cos_v2f64(x: f64x2) -> f64x2; - #[link_name = "llvm.cos.v4f64"] - fn cos_v4f64(x: f64x4) -> f64x4; - #[link_name = "llvm.cos.v8f64"] - fn cos_v8f64(x: f64x8) -> f64x8; -} - -pub(crate) trait FloatCos { - fn cos(self) -> Self; -} - -trait RawCos { - fn raw_cos(self) -> Self; -} - -impl RawCos for f32 { - fn raw_cos(self) -> Self { - unsafe { cos_f32(self) } - } -} - -impl RawCos for f64 { - fn raw_cos(self) -> Self { - unsafe { cos_f64(self) } - } -} - - -macro_rules! impl_fcos { - ($id:ident : $fn:ident) => { - #[cfg(not(target_arch = "s390x"))] - impl FloatCos for $id { - fn cos(self) -> Self { - unsafe { $fn(self) } - } - } - - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501 - #[cfg(target_arch = "s390x")] - impl FloatCos for $id { - fn cos(self) -> Self { - let mut v = $id::splat(0.); - for i in 0..$id::lanes() { - v = v.replace(i, self.extract(i).raw_cos()) - } - v - } - } - }; -} - -impl_fcos!(f32x2: cos_v2f32); -impl_fcos!(f32x4: cos_v4f32); -impl_fcos!(f32x8: cos_v8f32); -impl_fcos!(f32x16: cos_v16f32); -impl_fcos!(f64x2: cos_v2f64); -impl_fcos!(f64x4: cos_v4f64); -impl_fcos!(f64x8: cos_v8f64); diff --git a/coresimd/ppsv/codegen/fma.rs b/coresimd/ppsv/codegen/fma.rs deleted file mode 100644 index a0f0e8f729..0000000000 --- a/coresimd/ppsv/codegen/fma.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Vector fused multiply add -#![allow(dead_code)] -use coresimd::simd::*; - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.fma.v2f32"] - fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; - #[link_name = "llvm.fma.v4f32"] - fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; - #[link_name = "llvm.fma.v8f32"] - fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; - #[link_name = "llvm.fma.v16f32"] - fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; - #[link_name = "llvm.fma.v2f64"] - fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; - #[link_name = "llvm.fma.v4f64"] - fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; - #[link_name = "llvm.fma.v8f64"] - fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; -} - -pub(crate) trait FloatFma { - fn fma(self, y: Self, z: Self) -> Self; -} - -macro_rules! impl_fma { - ($id:ident : $fn:ident) => { - #[cfg(not(target_arch = "s390x"))] - impl FloatFma for $id { - fn fma(self, y: Self, z: Self) -> Self { - unsafe { $fn(self, y, z) } - } - } - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501 - #[cfg(target_arch = "s390x")] - impl FloatFma for $id { - fn fma(self, y: Self, z: Self) -> Self { - self * y + z - } - } - }; -} - -impl_fma!(f32x2: fma_v2f32); -impl_fma!(f32x4: fma_v4f32); -impl_fma!(f32x8: fma_v8f32); -impl_fma!(f32x16: fma_v16f32); -impl_fma!(f64x2: fma_v2f64); -impl_fma!(f64x4: fma_v4f64); -impl_fma!(f64x8: fma_v8f64); diff --git a/coresimd/ppsv/codegen/masks_reductions.rs b/coresimd/ppsv/codegen/masks_reductions.rs deleted file mode 100644 index 617f1fd300..0000000000 --- a/coresimd/ppsv/codegen/masks_reductions.rs +++ /dev/null @@ -1,651 +0,0 @@ -//! LLVM6 currently generates sub-optimal code for the `all` mask reductions. -//! -//! See https://github.com/rust-lang-nursery/stdsimd/issues/362#issuecomment-372774371 -//! and the associated LLVM bug: -//! https://bugs.llvm.org/show_bug.cgi?id=36702 - -#![allow(unused)] - -use coresimd::simd::*; - -pub trait All: ::marker::Sized { - unsafe fn all(self) -> bool; -} - -pub trait Any: ::marker::Sized { - unsafe fn any(self) -> bool; -} - -// By default we use the simd_reduce_{all,any} intrinsics, which produces -// sub-optimal code, except on aarch64 where that intrinsic is broken -// due to https://bugs.llvm.org/show_bug.cgi?id=36796 so we just use -// full-blown bitwise and/or reduction there. -macro_rules! default_impl { - ($id:ident) => { - impl All for $id { - #[inline] - unsafe fn all(self) -> bool { - #[cfg(not(target_arch = "aarch64"))] - { - use coresimd::simd_llvm::simd_reduce_all; - simd_reduce_all(self) - } - #[cfg(target_arch = "aarch64")] - { - // FIXME: Broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - self.and() - } - } - } - - impl Any for $id { - #[inline] - unsafe fn any(self) -> bool { - #[cfg(not(target_arch = "aarch64"))] - { - use coresimd::simd_llvm::simd_reduce_any; - simd_reduce_any(self) - } - #[cfg(target_arch = "aarch64")] - { - // FIXME: Broken on AArch64 - // https://bugs.llvm.org/show_bug.cgi?id=36796 - self.or() - } - } - } - }; -} - -// On x86 both SSE2 and AVX2 provide movemask instructions that can be used -// here. The AVX2 instructions aren't necessarily better than the AVX -// instructions below, so they aren't implemented here. -// -// FIXME: for mask generated from f32x4 LLVM6 emits pmovmskb but should emit -// movmskps. Since the masks don't track whether they were produced by integer -// or floating point vectors, we can't currently work around this yet. The -// performance impact for this shouldn't be large, but this is filled as: -// https://bugs.llvm.org/show_bug.cgi?id=37087 -#[cfg( - all( - any(target_arch = "x86", target_arch = "x86_64"), - target_feature = "sse2" - ) -)] -macro_rules! x86_128_sse2_movemask_impl { - ($id:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn all(self) -> bool { - #[cfg(target_arch = "x86")] - use coresimd::arch::x86::_mm_movemask_epi8; - #[cfg(target_arch = "x86_64")] - use coresimd::arch::x86_64::_mm_movemask_epi8; - // _mm_movemask_epi8(a) creates a 16bit mask containing the - // most significant bit of each byte of `a`. If all - // bits are set, then all 16 lanes of the mask are - // true. - _mm_movemask_epi8(::mem::transmute(self)) - == u16::max_value() as i32 - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn any(self) -> bool { - #[cfg(target_arch = "x86")] - use coresimd::arch::x86::_mm_movemask_epi8; - #[cfg(target_arch = "x86_64")] - use coresimd::arch::x86_64::_mm_movemask_epi8; - - _mm_movemask_epi8(::mem::transmute(self)) != 0 - } - } - }; -} - -// On x86 with AVX we use _mm256_testc_si256 and _mm256_testz_si256. -// -// FIXME: for masks generated from floating point vectors one should use -// x86_mm256_testc_ps, x86_mm256_testz_ps, x86_mm256_testc_pd, -// x86_mm256_testz_pd.Since the masks don't track whether they were produced by -// integer or floating point vectors, we can't currently work around this yet. -// -// TODO: investigate perf impact and fill LLVM bugs as necessary. -#[cfg( - all( - any(target_arch = "x86", target_arch = "x86_64"), - target_feature = "avx" - ) -)] -macro_rules! x86_256_avx_test_impl { - ($id:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "avx")] - unsafe fn all(self) -> bool { - #[cfg(target_arch = "x86")] - use coresimd::arch::x86::_mm256_testc_si256; - #[cfg(target_arch = "x86_64")] - use coresimd::arch::x86_64::_mm256_testc_si256; - _mm256_testc_si256( - ::mem::transmute(self), - ::mem::transmute($id::splat(true)), - ) != 0 - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "avx")] - unsafe fn any(self) -> bool { - #[cfg(target_arch = "x86")] - use coresimd::arch::x86::_mm256_testz_si256; - #[cfg(target_arch = "x86_64")] - use coresimd::arch::x86_64::_mm256_testz_si256; - _mm256_testz_si256( - ::mem::transmute(self), - ::mem::transmute(self), - ) == 0 - } - } - }; -} - -// On x86 with SSE2 all/any for 256-bit wide vectors is implemented by -// executing the algorithm for 128-bit on the higher and lower elements of the -// vector independently. -#[cfg( - all( - any(target_arch = "x86", target_arch = "x86_64"), - target_feature = "sse2" - ) -)] -macro_rules! x86_256_sse2_impl { - ($id:ident, $v128:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn all(self) -> bool { - unsafe { - union U { - halves: ($v128, $v128), - vec: $id, - } - let halves = U { vec: self }.halves; - halves.0.all() && halves.1.all() - } - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn any(self) -> bool { - unsafe { - union U { - halves: ($v128, $v128), - vec: $id, - } - let halves = U { vec: self }.halves; - halves.0.any() || halves.1.any() - } - } - } - }; -} - -// Implementation for 64-bit wide masks on x86. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -macro_rules! x86_64_mmx_movemask_impl { - ($id:ident, $vec128:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "mmx")] - unsafe fn all(self) -> bool { - #[cfg(target_arch = "x86")] - use coresimd::arch::x86::_mm_movemask_pi8; - #[cfg(target_arch = "x86_64")] - use coresimd::arch::x86_64::_mm_movemask_pi8; - // _mm_movemask_pi8(a) creates an 8bit mask containing the most - // significant bit of each byte of `a`. If all bits are set, - // then all 8 lanes of the mask are true. - _mm_movemask_pi8(::mem::transmute(self)) - == u8::max_value() as i32 - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "mmx")] - unsafe fn any(self) -> bool { - #[cfg(target_arch = "x86")] - use coresimd::arch::x86::_mm_movemask_pi8; - #[cfg(target_arch = "x86_64")] - use coresimd::arch::x86_64::_mm_movemask_pi8; - - _mm_movemask_pi8(::mem::transmute(self)) != 0 - } - } - }; -} - -// Implementation for 128-bit wide masks on x86 -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -macro_rules! x86_128_impl { - ($id:ident) => { - cfg_if! { - if #[cfg(target_feature = "sse2")] { - x86_128_sse2_movemask_impl!($id); - } else { - default_impl!($id); - } - } - }; -} - -// Implementation for 256-bit wide masks on x86 -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -macro_rules! x86_256_impl { - ($id:ident, $half_id:ident) => { - cfg_if! { - if #[cfg(target_feature = "avx")] { - x86_256_avx_test_impl!($id); - } else if #[cfg(target_feature = "sse2")] { - x86_256_sse2_impl!($id, $half_id); - } else { - default_impl!($id); - } - } - }; -} - -// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding -// minimum/maximum of adjacent pairs) for 64-bit wide two-element vectors. -#[cfg( - all(target_arch = "arm", target_feature = "v7", target_feature = "neon") -)] -macro_rules! arm_64_x2_v7_neon_impl { - ($id:ident, $vpmin:ident, $vpmax:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn all(self) -> bool { - use coresimd::arch::arm::$vpmin; - use mem::transmute; - // pmin((a, b), (-,-)) => (b, -).0 => b - let tmp: $id = - transmute($vpmin(transmute(self), ::mem::uninitialized())); - tmp.extract(0) - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn any(self) -> bool { - use coresimd::arch::arm::$vpmax; - use mem::transmute; - // pmax((a, b), (-,-)) => (b, -).0 => b - let tmp: $id = - transmute($vpmax(transmute(self), ::mem::uninitialized())); - tmp.extract(0) - } - } - }; -} - -// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding -// minimum/maximum of adjacent pairs) for 64-bit wide four-element vectors. -#[cfg( - all(target_arch = "arm", target_feature = "v7", target_feature = "neon") -)] -macro_rules! arm_64_x4_v7_neon_impl { - ($id:ident, $vpmin:ident, $vpmax:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn all(self) -> bool { - use coresimd::arch::arm::$vpmin; - use mem::transmute; - // tmp = pmin((a, b, c, d), (-,-,-,-)) => (a, c, -, -) - let tmp = $vpmin(transmute(self), ::mem::uninitialized()); - // tmp = pmin((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c - let tmp: $id = transmute($vpmin(tmp, ::mem::uninitialized())); - tmp.extract(0) - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn any(self) -> bool { - use coresimd::arch::arm::$vpmax; - use mem::transmute; - // tmp = pmax((a, b, c, d), (-,-,-,-)) => (a, c, -, -) - let tmp = $vpmax(transmute(self), ::mem::uninitialized()); - // tmp = pmax((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c - let tmp: $id = transmute($vpmax(tmp, ::mem::uninitialized())); - tmp.extract(0) - } - } - }; -} - -// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding -// minimum/maximum of adjacent pairs) for 64-bit wide eight-element vectors. -#[cfg( - all(target_arch = "arm", target_feature = "v7", target_feature = "neon") -)] -macro_rules! arm_64_x8_v7_neon_impl { - ($id:ident, $vpmin:ident, $vpmax:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn all(self) -> bool { - use coresimd::arch::arm::$vpmin; - use mem::transmute; - // tmp = pmin( - // (a, b, c, d, e, f, g, h), - // (-, -, -, -, -, -, -, -) - // ) => (a, c, e, g, -, -, -, -) - let tmp = $vpmin(transmute(self), ::mem::uninitialized()); - // tmp = pmin( - // (a, c, e, g, -, -, -, -), - // (-, -, -, -, -, -, -, -) - // ) => (c, g, -, -, -, -, -, -) - let tmp = $vpmin(tmp, ::mem::uninitialized()); - // tmp = pmin( - // (c, g, -, -, -, -, -, -), - // (-, -, -, -, -, -, -, -) - // ) => (g, -, -, -, -, -, -, -).0 => g - let tmp: $id = transmute($vpmin(tmp, ::mem::uninitialized())); - tmp.extract(0) - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn any(self) -> bool { - use coresimd::arch::arm::$vpmax; - use mem::transmute; - // tmp = pmax( - // (a, b, c, d, e, f, g, h), - // (-, -, -, -, -, -, -, -) - // ) => (a, c, e, g, -, -, -, -) - let tmp = $vpmax(transmute(self), ::mem::uninitialized()); - // tmp = pmax( - // (a, c, e, g, -, -, -, -), - // (-, -, -, -, -, -, -, -) - // ) => (c, g, -, -, -, -, -, -) - let tmp = $vpmax(tmp, ::mem::uninitialized()); - // tmp = pmax( - // (c, g, -, -, -, -, -, -), - // (-, -, -, -, -, -, -, -) - // ) => (g, -, -, -, -, -, -, -).0 => g - let tmp: $id = transmute($vpmax(tmp, ::mem::uninitialized())); - tmp.extract(0) - } - } - }; -} - -// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding -// minimum/maximum of adjacent pairs) for 64-bit or 128-bit wide vectors with -// more than two elements. -#[cfg( - all(target_arch = "arm", target_feature = "v7", target_feature = "neon") -)] -macro_rules! arm_128_v7_neon_impl { - ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn all(self) -> bool { - use coresimd::arch::arm::$vpmin; - use mem::transmute; - union U { - halves: ($half, $half), - vec: $id, - } - let halves = U { vec: self }.halves; - let h: $half = transmute($vpmin( - transmute(halves.0), - transmute(halves.1), - )); - h.all() - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "v7,neon")] - unsafe fn any(self) -> bool { - use coresimd::arch::arm::$vpmax; - use mem::transmute; - union U { - halves: ($half, $half), - vec: $id, - } - let halves = U { vec: self }.halves; - let h: $half = transmute($vpmax( - transmute(halves.0), - transmute(halves.1), - )); - h.any() - } - } - }; -} - -// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector -// min/max) for 128-bit wide vectors. -#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] -macro_rules! aarch64_128_neon_impl { - ($id:ident, $vmin:ident, $vmax:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "neon")] - unsafe fn all(self) -> bool { - use coresimd::arch::aarch64::$vmin; - $vmin(::mem::transmute(self)) != 0 - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "neon")] - unsafe fn any(self) -> bool { - use coresimd::arch::aarch64::$vmax; - $vmax(::mem::transmute(self)) != 0 - } - } - }; -} - -// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector -// min/max) for 64-bit wide vectors. -// -// This impl duplicates the 64-bit vector into a 128-bit one and calls -// all/any on that. -#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] -macro_rules! aarch64_64_neon_impl { - ($id:ident, $vec128:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "neon")] - unsafe fn all(self) -> bool { - union U { - halves: ($id, $id), - vec: $vec128, - } - U { - halves: (self, self), - }.vec - .all() - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "neon")] - unsafe fn any(self) -> bool { - union U { - halves: ($id, $id), - vec: $vec128, - } - U { - halves: (self, self), - }.vec - .any() - } - } - }; -} - -macro_rules! impl_mask_all_any { - // 64-bit wide masks - (m8x8) => { - cfg_if! { - if #[cfg(target_arch = "x86_64")] { - x86_64_mmx_movemask_impl!(m8x8, m8x16); - } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] { - arm_64_x8_v7_neon_impl!(m8x8, vpmin_u8, vpmax_u8); - } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - aarch64_64_neon_impl!(m8x8, m8x16); - } else { - default_impl!(m8x8); - } - } - }; - (m16x4) => { - cfg_if! { - if #[cfg(target_arch = "x86_64")] { - x86_64_mmx_movemask_impl!(m16x4, m16x8); - } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] { - arm_64_x4_v7_neon_impl!(m16x4, vpmin_u16, vpmax_u16); - } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - aarch64_64_neon_impl!(m16x4, m16x8); - } else { - default_impl!(m16x4); - } - } - }; - (m32x2) => { - cfg_if! { - if #[cfg(all(target_arch = "x86_64", not(target_os = "macos")))] { - // FIXME: this fails on travis-ci osx build bots. - x86_64_mmx_movemask_impl!(m32x2, m32x4); - } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] { - arm_64_x2_v7_neon_impl!(m32x2, vpmin_u32, vpmax_u32); - } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - aarch64_64_neon_impl!(m32x2, m32x4); - } else { - default_impl!(m32x2); - } - } - }; - // 128-bit wide masks - (m8x16) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_128_impl!(m8x16); - } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] { - arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); - } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); - } else { - default_impl!(m8x16); - } - } - }; - (m16x8) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_128_impl!(m16x8); - } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] { - arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); - } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); - } else { - default_impl!(m16x8); - } - } - }; - (m32x4) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_128_impl!(m32x4); - } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] { - arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); - } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); - } else { - default_impl!(m32x4); - } - } - }; - (m64x2) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_128_impl!(m64x2); - } else { - default_impl!(m64x2); - } - } - }; - // 256-bit wide masks: - (m8x32) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_256_impl!(m8x32, m8x16); - } else { - default_impl!(m8x32); - } - } - }; - (m16x16) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_256_impl!(m16x16, m16x8); - } else { - default_impl!(m16x16); - } - } - }; - (m32x8) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_256_impl!(m32x8, m32x4); - } else { - default_impl!(m32x8); - } - } - }; - (m64x4) => { - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - x86_256_impl!(m64x4, m64x2); - } else { - default_impl!(m64x4); - } - } - }; - // Fallback to LLVM's default code-generation: - ($id:ident) => { default_impl!($id); }; -} - -vector_impl!( - [impl_mask_all_any, m1x8], - [impl_mask_all_any, m1x16], - [impl_mask_all_any, m1x32], - [impl_mask_all_any, m1x64], - [impl_mask_all_any, m8x2], - [impl_mask_all_any, m8x4], - [impl_mask_all_any, m8x8], - [impl_mask_all_any, m8x16], - [impl_mask_all_any, m8x32], - [impl_mask_all_any, m16x2], - [impl_mask_all_any, m16x4], - [impl_mask_all_any, m16x8], - [impl_mask_all_any, m16x16], - [impl_mask_all_any, m32x2], - [impl_mask_all_any, m32x4], - [impl_mask_all_any, m32x8], - [impl_mask_all_any, m64x2], - [impl_mask_all_any, m64x4] -); diff --git a/coresimd/ppsv/codegen/mod.rs b/coresimd/ppsv/codegen/mod.rs deleted file mode 100644 index 6e9a73fe5c..0000000000 --- a/coresimd/ppsv/codegen/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -//! Work arounds for code generation issues - -#[cfg(target_arch = "aarch64")] -pub mod wrapping; - -pub mod masks_reductions; -pub mod swap_bytes; - -pub mod abs; -pub mod cos; -pub mod fma; -pub mod sin; -pub mod sqrt; diff --git a/coresimd/ppsv/codegen/sin.rs b/coresimd/ppsv/codegen/sin.rs deleted file mode 100644 index c13ae31d34..0000000000 --- a/coresimd/ppsv/codegen/sin.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! Exact vector sin -#![allow(dead_code)] -use coresimd::simd::*; - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.sin.f32"] - fn sin_f32(x: f32) -> f32; - #[link_name = "llvm.sin.f64"] - fn sin_f64(x: f64) -> f64; - - #[link_name = "llvm.sin.v2f32"] - fn sin_v2f32(x: f32x2) -> f32x2; - #[link_name = "llvm.sin.v4f32"] - fn sin_v4f32(x: f32x4) -> f32x4; - #[link_name = "llvm.sin.v8f32"] - fn sin_v8f32(x: f32x8) -> f32x8; - #[link_name = "llvm.sin.v16f32"] - fn sin_v16f32(x: f32x16) -> f32x16; - #[link_name = "llvm.sin.v2f64"] - fn sin_v2f64(x: f64x2) -> f64x2; - #[link_name = "llvm.sin.v4f64"] - fn sin_v4f64(x: f64x4) -> f64x4; - #[link_name = "llvm.sin.v8f64"] - fn sin_v8f64(x: f64x8) -> f64x8; -} - -pub(crate) trait FloatSin { - fn sin(self) -> Self; -} - -trait RawSin { - fn raw_sin(self) -> Self; -} - -impl RawSin for f32 { - fn raw_sin(self) -> Self { - unsafe { sin_f32(self) } - } -} - -impl RawSin for f64 { - fn raw_sin(self) -> Self { - unsafe { sin_f64(self) } - } -} - -macro_rules! impl_fsin { - ($id:ident : $fn:ident) => { - #[cfg(not(target_arch = "s390x"))] - impl FloatSin for $id { - fn sin(self) -> Self { - unsafe { $fn(self) } - } - } - - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501 - #[cfg(target_arch = "s390x")] - impl FloatSin for $id { - fn sin(self) -> Self { - let mut v = $id::splat(0.); - for i in 0..$id::lanes() { - v = v.replace(i, self.extract(i).raw_sin()) - } - v - } - } - - }; -} - -impl_fsin!(f32x2: sin_v2f32); -impl_fsin!(f32x4: sin_v4f32); -impl_fsin!(f32x8: sin_v8f32); -impl_fsin!(f32x16: sin_v16f32); -impl_fsin!(f64x2: sin_v2f64); -impl_fsin!(f64x4: sin_v4f64); -impl_fsin!(f64x8: sin_v8f64); diff --git a/coresimd/ppsv/codegen/sqrt.rs b/coresimd/ppsv/codegen/sqrt.rs deleted file mode 100644 index 6a18589e71..0000000000 --- a/coresimd/ppsv/codegen/sqrt.rs +++ /dev/null @@ -1,77 +0,0 @@ -//! Exact vector square-root -#![allow(dead_code)] -use coresimd::simd::*; - -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.sqrt.f32"] - fn sqrt_f32(x: f32) -> f32; - #[link_name = "llvm.sqrt.f64"] - fn sqrt_f64(x: f64) -> f64; - - #[link_name = "llvm.sqrt.v2f32"] - fn sqrt_v2f32(x: f32x2) -> f32x2; - #[link_name = "llvm.sqrt.v4f32"] - fn sqrt_v4f32(x: f32x4) -> f32x4; - #[link_name = "llvm.sqrt.v8f32"] - fn sqrt_v8f32(x: f32x8) -> f32x8; - #[link_name = "llvm.sqrt.v16f32"] - fn sqrt_v16f32(x: f32x16) -> f32x16; - #[link_name = "llvm.sqrt.v2f64"] - fn sqrt_v2f64(x: f64x2) -> f64x2; - #[link_name = "llvm.sqrt.v4f64"] - fn sqrt_v4f64(x: f64x4) -> f64x4; - #[link_name = "llvm.sqrt.v8f64"] - fn sqrt_v8f64(x: f64x8) -> f64x8; -} - -pub(crate) trait FloatSqrt { - fn sqrt(self) -> Self; -} - -trait RawSqrt { - fn raw_sqrt(self) -> Self; -} - -impl RawSqrt for f32 { - fn raw_sqrt(self) -> Self { - unsafe { sqrt_f32(self) } - } -} - -impl RawSqrt for f64 { - fn raw_sqrt(self) -> Self { - unsafe { sqrt_f64(self) } - } -} - -macro_rules! impl_fsqrt { - ($id:ident : $fn:ident) => { - #[cfg(not(target_arch = "s390x"))] - impl FloatSqrt for $id { - fn sqrt(self) -> Self { - unsafe { $fn(self) } - } - } - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/501 - #[cfg(target_arch = "s390x")] - impl FloatSqrt for $id { - fn sqrt(self) -> Self { - let mut v = $id::splat(0.); - for i in 0..$id::lanes() { - v = v.replace(i, self.extract(i).raw_sqrt()); - } - v - } - } - - }; -} - -impl_fsqrt!(f32x2: sqrt_v2f32); -impl_fsqrt!(f32x4: sqrt_v4f32); -impl_fsqrt!(f32x8: sqrt_v8f32); -impl_fsqrt!(f32x16: sqrt_v16f32); -impl_fsqrt!(f64x2: sqrt_v2f64); -impl_fsqrt!(f64x4: sqrt_v4f64); -impl_fsqrt!(f64x8: sqrt_v8f64); diff --git a/coresimd/ppsv/codegen/swap_bytes.rs b/coresimd/ppsv/codegen/swap_bytes.rs deleted file mode 100644 index e9d2918737..0000000000 --- a/coresimd/ppsv/codegen/swap_bytes.rs +++ /dev/null @@ -1,140 +0,0 @@ -//! Horizontal mask reductions. - -#![allow(unused)] - -use coresimd::simd::*; - -pub trait SwapBytes { - unsafe fn swap_bytes(self) -> Self; -} - -// TODO: switch to shuffle API once it lands -// TODO: investigate `llvm.bswap` -macro_rules! impl_swap_bytes { - (v16, $($id:ident,)+) => {$( - impl SwapBytes for $id { - #[inline] - unsafe fn swap_bytes(self) -> Self { - use coresimd::simd_llvm::simd_shuffle2; - - const INDICES: [u32; 2] = [1, 0]; - simd_shuffle2(self, self, INDICES) - } - } - )+}; - (v32, $($id:ident,)+) => {$( - impl SwapBytes for $id { - #[inline] - unsafe fn swap_bytes(self) -> Self { - use coresimd::simd_llvm::simd_shuffle4; - - const INDICES: [u32; 4] = [3, 2, 1, 0]; - let vec8 = u8x4::from_bits(self); - let shuffled: u8x4 = simd_shuffle4(vec8, vec8, INDICES); - $id::from_bits(shuffled) - } - } - )+}; - (v64, $($id:ident,)+) => {$( - impl SwapBytes for $id { - #[inline] - unsafe fn swap_bytes(self) -> Self { - use coresimd::simd_llvm::simd_shuffle8; - - const INDICES: [u32; 8] = [7, 6, 5, 4, 3, 2, 1, 0]; - let vec8 = u8x8::from_bits(self); - let shuffled: u8x8 = simd_shuffle8(vec8, vec8, INDICES); - $id::from_bits(shuffled) - } - } - )+}; - (v128, $($id:ident,)+) => {$( - impl SwapBytes for $id { - #[inline] - unsafe fn swap_bytes(self) -> Self { - use coresimd::simd_llvm::simd_shuffle16; - - const INDICES: [u32; 16] = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]; - let vec8 = u8x16::from_bits(self); - let shuffled: u8x16 = simd_shuffle16(vec8, vec8, INDICES); - $id::from_bits(shuffled) - } - } - )+}; - (v256, $($id:ident,)+) => {$( - impl SwapBytes for $id { - #[inline] - unsafe fn swap_bytes(self) -> Self { - use coresimd::simd_llvm::simd_shuffle32; - - const INDICES: [u32; 32] = [ - 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, - ]; - let vec8 = u8x32::from_bits(self); - let shuffled: u8x32 = simd_shuffle32(vec8, vec8, INDICES); - $id::from_bits(shuffled) - } - } - )+}; - (v512, $($id:ident,)+) => {$( - impl SwapBytes for $id { - #[inline] - unsafe fn swap_bytes(self) -> Self { - use coresimd::simd_llvm::simd_shuffle64; - - const INDICES: [u32; 64] = [ - 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, - 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, - 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, - ]; - let vec8 = u8x64::from_bits(self); - let shuffled: u8x64 = simd_shuffle64(vec8, vec8, INDICES); - $id::from_bits(shuffled) - } - } - )+}; -} - -vector_impl!( - [impl_swap_bytes, v16, u8x2, i8x2,], - [impl_swap_bytes, v32, u8x4, i8x4, u16x2, i16x2,], - [impl_swap_bytes, v64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,], - [ - impl_swap_bytes, - v128, - u8x16, - i8x16, - u16x8, - i16x8, - u32x4, - i32x4, - u64x2, - i64x2, - ], - [ - impl_swap_bytes, - v256, - u8x32, - i8x32, - u16x16, - i16x16, - u32x8, - i32x8, - u64x4, - i64x4, - ], - [ - impl_swap_bytes, - v512, - u8x64, - i8x64, - u16x32, - i16x32, - u32x16, - i32x16, - u64x8, - i64x8, - ] -); diff --git a/coresimd/ppsv/codegen/wrapping.rs b/coresimd/ppsv/codegen/wrapping.rs deleted file mode 100644 index 0e2f306eb0..0000000000 --- a/coresimd/ppsv/codegen/wrapping.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! Used by the wrapping_sum and wrapping_product algorithms for AArch64. - -pub(crate) trait Wrapping { - fn add(self, other: Self) -> Self; - fn mul(self, other: Self) -> Self; -} - -macro_rules! int_impl { - ($id:ident) => { - impl Wrapping for $id { - fn add(self, other: Self) -> Self { - self.wrapping_add(other) - } - fn mul(self, other: Self) -> Self { - self.wrapping_mul(other) - } - } - }; -} -int_impl!(i8); -int_impl!(i16); -int_impl!(i32); -int_impl!(i64); -int_impl!(u8); -int_impl!(u16); -int_impl!(u32); -int_impl!(u64); - -macro_rules! float_impl { - ($id:ident) => { - impl Wrapping for $id { - fn add(self, other: Self) -> Self { - self + other - } - fn mul(self, other: Self) -> Self { - self * other - } - } - }; -} -float_impl!(f32); -float_impl!(f64); diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs deleted file mode 100644 index eb2ba49541..0000000000 --- a/coresimd/ppsv/mod.rs +++ /dev/null @@ -1,96 +0,0 @@ -//! Portable Packed-SIMD Vectors. -//! -//! These types are: -//! -//! * portable: work correctly on all architectures, -//! * packed: have a size fixed at compile-time. -//! -//! These two terms are the opposites of: -//! -//! * architecture-specific: only available in a particular architecture, -//! * scalable: the vector's size is dynamic. -//! -//! This module is structured as follows: -//! -//! * `api`: defines the API of the portable packed vector types. -//! * `v{width}`: defines the portable vector types for a particular `width`. -//! -//! The portable packed vector types are named using the following schema: -//! `{t}{l_w}x{l_n}`: -//! -//! * `t`: type - single letter corresponding to the following Rust literal -//! types: -//! * `i`: signed integer -//! * `u`: unsigned integer -//! * `f`: floating point -//! * `m`: vector mask -//! * `l_w`: lane width in bits -//! * `l_n`: number of lanes -//! -//! For example, `f32x4` is a vector type containing four 32-bit wide -//! floating-point numbers. The total width of this type is 32 bit times 4 -//! lanes, that is, 128 bits, and is thus defined in the `v128` module. - -#[macro_use] -mod api; - -mod v128; -mod v16; -mod v256; -mod v32; -mod v512; -mod v64; - -pub use self::v128::*; -pub use self::v16::*; -pub use self::v256::*; -pub use self::v32::*; -pub use self::v512::*; -pub use self::v64::*; - -/// Safe lossless bitwise conversion from `T` to `Self`. -pub trait FromBits: ::marker::Sized { - /// Safe lossless bitwise from `T` to `Self`. - fn from_bits(T) -> Self; -} - -/// Safe lossless bitwise conversion from `Self` to `T`. -pub trait IntoBits: ::marker::Sized { - /// Safe lossless bitwise transmute from `self` to `T`. - fn into_bits(self) -> T; -} - -// FromBits implies IntoBits. -impl IntoBits for T -where - U: FromBits, -{ - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449 - #[cfg_attr( - any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always) - )] - #[cfg_attr( - not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline - )] - fn into_bits(self) -> U { - debug_assert!(::mem::size_of::() == ::mem::size_of::()); - U::from_bits(self) - } -} - -// FromBits (and thus IntoBits) is reflexive. -impl FromBits for T { - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449 - #[cfg_attr( - any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always) - )] - #[cfg_attr( - not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline - )] - fn from_bits(t: Self) -> Self { - t - } -} - -/// Work arounds code generation issues. -mod codegen; diff --git a/coresimd/ppsv/v128.rs b/coresimd/ppsv/v128.rs deleted file mode 100644 index 08593bcef7..0000000000 --- a/coresimd/ppsv/v128.rs +++ /dev/null @@ -1,550 +0,0 @@ -//! 128-bit wide portable packed vector types. - -simd_i_ty! { - i8x16: 16, i8, m8x16, i8x16_tests, test_v128 | - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 128-bit vector with 16 `i8` lanes. -} - -simd_u_ty! { - u8x16: 16, u8, m8x16, u8x16_tests, test_v128 | - u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | - x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 128-bit vector with 16 `u8` lanes. -} - -simd_m_ty! { - m8x16: 16, i8, m8x16_tests, test_v128 | - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 128-bit vector mask with 16 lanes. -} - -simd_i_ty! { - i16x8: 8, i16, m16x8, i16x8_tests, test_v128 | - i16, i16, i16, i16, i16, i16, i16, i16 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 128-bit vector with 8 `i16` lanes. -} - -simd_u_ty! { - u16x8: 8, u16, m16x8, u16x8_tests, test_v128 | - u16, u16, u16, u16, u16, u16, u16, u16 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 128-bit vector with 8 `u16` lanes. -} - -simd_m_ty! { - m16x8: 8, i16, m16x8_tests, test_v128 | - i16, i16, i16, i16, i16, i16, i16, i16 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 128-bit vector mask with 8 lanes. -} - -simd_i_ty! { - i32x4: 4, i32, m32x4, i32x4_tests, test_v128 | - i32, i32, i32, i32 | - x0, x1, x2, x3 | - /// A 128-bit vector with 4 `i32` lanes. -} - -simd_u_ty! { - u32x4: 4, u32, m32x4, u32x4_tests, test_v128 | - u32, u32, u32, u32 | - x0, x1, x2, x3 | - /// A 128-bit vector with 4 `u32` lanes. -} - -simd_f_ty! { - f32x4: 4, f32, m32x4, f32x4_tests, test_v128 | - f32, f32, f32, f32 | - x0, x1, x2, x3 | - /// A 128-bit vector with 4 `f32` lanes. -} - -simd_m_ty! { - m32x4: 4, i32, m32x4_tests, test_v128 | - i32, i32, i32, i32 | - x0, x1, x2, x3 | - /// A 128-bit vector mask with 4 lanes. -} - -simd_i_ty! { - i64x2: 2, i64, m64x2, i64x2_tests, test_v128 | - i64, i64 | - x0, x1 | - /// A 128-bit vector with 2 `u64` lanes. -} - -simd_u_ty! { - u64x2: 2, u64, m64x2, u64x2_tests, test_v128 | - u64, u64 | - x0, x1 | - /// A 128-bit vector with 2 `u64` lanes. -} - -simd_f_ty! { - f64x2: 2, f64, m64x2, f64x2_tests, test_v128 | - f64, f64 | - x0, x1 | - /// A 128-bit vector with 2 `f64` lanes. -} - -simd_m_ty! { - m64x2: 2, i64, m64x4_tests, test_v128 | - i64, i64 | - x0, x1 | - /// A 128-bit vector mask with 2 lanes. -} - -#[cfg(target_arch = "x86")] -use coresimd::arch::x86::{__m128, __m128d, __m128i}; -#[cfg(target_arch = "x86_64")] -use coresimd::arch::x86_64::{__m128, __m128d, __m128i}; - -macro_rules! from_bits_x86 { - ($id:ident, $elem_ty:ident, $test_mod:ident) => { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - impl_from_bits_!($id: __m128, __m128i, __m128d); - }; -} - -#[cfg( - all(target_arch = "arm", target_feature = "neon", target_feature = "v7") -)] -use coresimd::arch::arm::{ - // FIXME: float16x8_t, - float32x4_t, - int16x8_t, - int32x4_t, - int64x2_t, - int8x16_t, - poly16x8_t, - poly8x16_t, - uint16x8_t, - uint32x4_t, - uint64x2_t, - uint8x16_t, -}; - -#[cfg(target_arch = "aarch64")] -use coresimd::arch::aarch64::{ - // FIXME: float16x8_t, - float32x4_t, - float64x2_t, - int16x8_t, - int32x4_t, - int64x2_t, - int8x16_t, - poly16x8_t, - poly8x16_t, - uint16x8_t, - uint32x4_t, - uint64x2_t, - uint8x16_t, -}; - -macro_rules! from_bits_arm { - ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => { - #[cfg( - any( - all( - target_arch = "arm", - target_feature = "neon", - target_feature = "v7" - ), - target_arch = "aarch64" - ) - )] - impl_from_bits_!( - $id: int8x16_t, - uint8x16_t, - int16x8_t, - uint16x8_t, - int32x4_t, - uint32x4_t, - int64x2_t, - uint64x2_t, - // FIXME: float16x8_t, - float32x4_t, - poly8x16_t, - poly16x8_t - ); - #[cfg(target_arch = "aarch64")] - impl_from_bits_!($id: float64x2_t); - }; -} - -impl_from_bits!( - u64x2: u64, - u64x2_from_bits, - test_v128 | i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(u64x2, u64, u64x2_from_bits_x86); -from_bits_arm!(u64x2, u64, u64x2_from_bits_arm, u64x2_from_bits_aarch64); - -impl_from_bits!( - i64x2: i64, - i64x2_from_bits, - test_v128 | u64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(i64x2, i64, i64x2_from_bits_x86); -from_bits_arm!(i64x2, i64, i64x2_from_bits_arm, i64x2_from_bits_aarch64); - -impl_from_bits!( - f64x2: f64, - f64x2_from_bits, - test_v128 | i64x2, - u64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(f64x2, f64, f64x2_from_bits_x86); -from_bits_arm!(f64x2, f64, f64x2_from_bits_arm, f64x2_from_bits_aarch64); - -impl_from_bits!( - u32x4: u32, - u32x4_from_bits, - test_v128 | u64x2, - i64x2, - f64x2, - m64x2, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(u32x4, u32, u32x4_from_bits_x86); -from_bits_arm!(u32x4, u32, u32x4_from_bits_arm, u32x4_from_bits_aarch64); - -impl_from_bits!( - i32x4: i32, - i32x4_from_bits, - test_v128 | u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(i32x4, i32, i32x4_from_bits_x86); -from_bits_arm!(i32x4, i32, i32x4_from_bits_arm, i32x4_from_bits_aarch64); - -impl_from_bits!( - f32x4: f32, - f32x4_from_bits, - test_v128 | u64x2, - i64x2, - f64x2, - m64x2, - i32x4, - u32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(f32x4, f32, f32x4_from_bits_x86); -from_bits_arm!(f32x4, f32, f32x4_from_bits_arm, f32x4_from_bits_aarch64); - -impl_from_bits!( - u16x8: u16, - u16x8_from_bits, - test_v128 | u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(u16x8, u16, u16x8_from_bits_x86); -from_bits_arm!(u16x8, u16, u16x8_from_bits_arm, u16x8_from_bits_aarch64); - -impl_from_bits!( - i16x8: i16, - i16x8_from_bits, - test_v128 | u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -from_bits_x86!(i16x8, i16, i16x8_from_bits_x86); -from_bits_arm!(i16x8, i16, i16x8_from_bits_arm, i16x8_from_bits_aarch64); - -impl_from_bits!( - u8x16: u8, - u8x16_from_bits, - test_v128 | u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - i8x16, - m8x16 -); -from_bits_x86!(u8x16, u8, u8x16_from_bits_x86); -from_bits_arm!(u8x16, u8, u8x16_from_bits_arm, u8x16_from_bits_aarch64); - -impl_from_bits!( - i8x16: i8, - i8x16_from_bits, - test_v128 | u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - m8x16 -); -from_bits_x86!(i8x16, i8, i8x16_from_bits_x86); -from_bits_arm!(i8x16, i8, i8x16_from_bits_arm, i8x16_from_bits_aarch64); - -impl_from!( - f64x2: f64, - f64x2_from, - test_v128 | f32x2, - u64x2, - i64x2, - m64x2, - u32x2, - i32x2, - m32x2, - u16x2, - i16x2, - m16x2, - u8x2, - i8x2, - m8x2 -); -impl_from!( - u64x2: u64, - u64x2_from, - test_v128 | f64x2, - i64x2, - m64x2, - f32x2, - i32x2, - u32x2, - m32x2, - i16x2, - u16x2, - m16x2, - i8x2, - u8x2, - m8x2 -); -impl_from!( - i64x2: i64, - i64x2_from, - test_v128 | f64x2, - u64x2, - m64x2, - i32x2, - u32x2, - f32x2, - m32x2, - i16x2, - u16x2, - m16x2, - i8x2, - u8x2, - m8x2 -); -impl_from!( - u32x4: u32, - u32x4_from, - test_v128 | f64x4, - u64x4, - i64x4, - m64x4, - f32x4, - i32x4, - m32x4, - u16x4, - i16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); -impl_from!( - i32x4: i32, - i32x4_from, - test_v128 | f64x4, - u64x4, - i64x4, - m64x4, - f32x4, - u32x4, - m32x4, - u16x4, - i16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); -impl_from!( - f32x4: f32, - f32x4_from, - test_v128 | f64x4, - u64x4, - i64x4, - m64x4, - u32x4, - i32x4, - m32x4, - u16x4, - i16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); - -impl_from!( - i16x8: i16, - i16x8_from, - test_v128 | f64x8, - u64x8, - i64x8, - m1x8, - f32x8, - u32x8, - i32x8, - m32x8, - u16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); -impl_from!( - u16x8: u16, - u16x8_from, - test_v128 | f64x8, - u64x8, - i64x8, - m1x8, - f32x8, - u32x8, - i32x8, - m32x8, - i16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); - -impl_from!( - u8x16: u8, - u8x16_from, - test_v128 | i32x16, - u32x16, - f32x16, - m1x16, - i16x16, - u16x16, - m16x16, - i8x16, - m8x16 -); -impl_from!( - i8x16: i8, - i8x16_from, - test_v128 | i32x16, - u32x16, - f32x16, - m1x16, - i16x16, - u16x16, - m16x16, - u8x16, - m8x16 -); - -impl_from!(m8x16: i8, m8x16_from, test_v128 | m1x16, m16x16); - -impl_from!(m16x8: i16, m16x8_from, test_v128 | m1x8, m32x8, m8x8); - -impl_from!(m32x4: i32, m32x4_from, test_v128 | m64x4, m16x4, m8x4); - -impl_from!(m64x2: i64, m64x2_from, test_v128 | m32x2, m16x2, m8x2); diff --git a/coresimd/ppsv/v16.rs b/coresimd/ppsv/v16.rs deleted file mode 100644 index a2baf8dfc5..0000000000 --- a/coresimd/ppsv/v16.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! 16-bit wide portable packed vector types. - -simd_i_ty! { - i8x2: 2, i8, m8x2, i8x2_tests, test_v16 | - i8, i8 | - x0, x1 | - /// A 16-bit wide vector with 2 `i8` lanes. -} - -simd_u_ty! { - u8x2: 2, u8, m8x2, u8x2_tests, test_v16 | - u8, u8 | - x0, x1 | - /// A 16-bit wide vector with 2 `u8` lanes. -} - -simd_m_ty! { - m8x2: 2, i8, m8x2_tests, test_v16 | - i8, i8 | - x0, x1 | - /// A 16-bit wide vector mask with 2 lanes. -} - -impl_from_bits!(i8x2: i8, i8x2_from_bits, test_v16 | u8x2, m8x2); -impl_from_bits!(u8x2: u8, u8x2_from_bits, test_v16 | i8x2, m8x2); - -impl_from!( - i8x2: i8, - i8x2_from, - test_v16 | f64x2, - u64x2, - m64x2, - i64x2, - f32x2, - u32x2, - i32x2, - m32x2, - u16x2, - m16x2, - u8x2, - m8x2 -); -impl_from!( - u8x2: u8, - u8x2_from, - test_v16 | f64x2, - u64x2, - i64x2, - m64x2, - f32x2, - u32x2, - i32x2, - m32x2, - u16x2, - m16x2, - i8x2, - m8x2 -); - -impl_from!(m8x2: i8, m8x2_from, test_v16 | m64x2, m32x2, m16x2); diff --git a/coresimd/ppsv/v256.rs b/coresimd/ppsv/v256.rs deleted file mode 100644 index c68ec9118e..0000000000 --- a/coresimd/ppsv/v256.rs +++ /dev/null @@ -1,472 +0,0 @@ -//! 256-bit wide portable packed vector types. - -simd_i_ty! { - i8x32: 32, i8, m8x32, i8x32_tests, test_v256 | - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 | - /// A 256-bit vector with 32 `i8` lanes. -} - -simd_u_ty! { - u8x32: 32, u8, m8x32, u8x32_tests, test_v256 | - u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 | - /// A 256-bit vector with 32 `u8` lanes. -} - -simd_m_ty! { - m8x32: 32, i8, m8x32_tests, test_v256 | - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 | - /// A 256-bit vector mask with 32 lanes. -} - -simd_i_ty! { - i16x16: 16, i16, m16x16, i16x16_tests, test_v256 | - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 256-bit vector with 16 `i16` lanes. -} - -simd_u_ty! { - u16x16: 16, u16, m16x16, u16x16_tests, test_v256 | - u16, u16, u16, u16, u16, u16, u16, u16, - u16, u16, u16, u16, u16, u16, u16, u16 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 256-bit vector with 16 `u16` lanes. -} - -simd_m_ty! { - m16x16: 16, i16, m16x16_tests, test_v256 | - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 256-bit vector mask with 16 lanes. -} - -simd_i_ty! { - i32x8: 8, i32, m32x8, i32x8_tests, test_v256 | - i32, i32, i32, i32, i32, i32, i32, i32 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 256-bit vector with 8 `i32` lanes. -} - -simd_u_ty! { - u32x8: 8, u32, m32x8, u32x8_tests, test_v256 | - u32, u32, u32, u32, u32, u32, u32, u32 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 256-bit vector with 8 `u32` lanes. -} - -simd_f_ty! { - f32x8: 8, f32, m32x8, f32x8_tests, test_v256 | - f32, f32, f32, f32, f32, f32, f32, f32 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 256-bit vector with 8 `f32` lanes. -} - -simd_m_ty! { - m32x8: 8, i32, m32x8_tests, test_v256 | - i32, i32, i32, i32, i32, i32, i32, i32 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 256-bit vector mask with 8 lanes. -} - -simd_i_ty! { - i64x4: 4, i64, m64x4, i64x4_tests, test_v256 | - i64, i64, i64, i64 | - x0, x1, x2, x3 | - /// A 256-bit vector with 4 `i64` lanes. -} - -simd_u_ty! { - u64x4: 4, u64, m64x4, u64x4_tests, test_v256 | - u64, u64, u64, u64 | - x0, x1, x2, x3 | - /// A 256-bit vector with 4 `u64` lanes. -} - -simd_f_ty! { - f64x4: 4, f64, m64x4, f64x4_tests, test_v256 | - f64, f64, f64, f64 | - x0, x1, x2, x3 | - /// A 256-bit vector with 4 `f64` lanes. -} - -simd_m_ty! { - m64x4: 4, i64, m64x4_tests, test_v256 | - i64, i64, i64, i64 | - x0, x1, x2, x3 | - /// A 256-bit vector mask with 4 lanes. -} - -#[cfg(target_arch = "x86")] -use coresimd::arch::x86::{__m256, __m256d, __m256i}; -#[cfg(target_arch = "x86_64")] -use coresimd::arch::x86_64::{__m256, __m256d, __m256i}; - -macro_rules! from_bits_x86 { - ($id:ident, $elem_ty:ident, $test_mod:ident) => { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - impl_from_bits_!($id: __m256, __m256i, __m256d); - }; -} - -impl_from_bits!( - i8x32: i8, - i8x32_from_bits, - test_v256 | u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - i32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - m8x32 -); -from_bits_x86!(i8x32, i8, i8x32_from_bits_x86); - -impl_from_bits!( - u8x32: u8, - u8x32_from_bits, - test_v256 | u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - i32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - i8x32, - m8x32 -); -from_bits_x86!(u8x32, u8, u8x32_from_bits_x86); - -impl_from_bits!( - i16x16: i16, - i16x16_from_bits, - test_v256 | u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - i32x8, - f32x8, - m32x8, - u16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(i16x16, i16, i16x16_from_bits_x86); - -impl_from_bits!( - u16x16: u16, - u16x16_from_bits, - test_v256 | u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - i32x8, - f32x8, - m32x8, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(u16x16, u16, u16x16_from_bits_x86); - -impl_from_bits!( - i32x8: i32, - i32x8_from_bits, - test_v256 | u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(i32x8, i32, i32x8_from_bits_x86); - -impl_from_bits!( - u32x8: u32, - u32x8_from_bits, - test_v256 | u64x4, - i64x4, - f64x4, - m64x4, - i32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(u32x8, u32, u32x8_from_bits_x86); - -impl_from_bits!( - f32x8: f32, - f32x8_from_bits, - test_v256 | u64x4, - i64x4, - f64x4, - m64x4, - i32x8, - u32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(f32x8, f32, f32x8_from_bits_x86); - -impl_from_bits!( - i64x4: i64, - i64x4_from_bits, - test_v256 | u64x4, - f64x4, - m64x4, - i32x8, - u32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(i64x4, i64, i64x4_from_bits_x86); - -impl_from_bits!( - u64x4: u64, - u64x4_from_bits, - test_v256 | i64x4, - f64x4, - m64x4, - i32x8, - u32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(u64x4, u64, u64x4_from_bits_x86); - -impl_from_bits!( - f64x4: f64, - f64x4_from_bits, - test_v256 | i64x4, - u64x4, - m64x4, - i32x8, - u32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -from_bits_x86!(f64x4, f64, f64x4_from_bits_x86); - -impl_from!( - f64x4: f64, - f64x4_from, - test_v256 | u64x4, - i64x4, - m64x4, - u32x4, - i32x4, - f32x4, - m32x4, - u16x4, - i16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); -impl_from!( - i64x4: i64, - i64x4_from, - test_v256 | u64x4, - f64x4, - m64x4, - u32x4, - i32x4, - f32x4, - m32x4, - u16x4, - i16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); -impl_from!( - u64x4: u64, - u64x4_from, - test_v256 | i64x4, - f64x4, - m64x4, - u32x4, - i32x4, - f32x4, - m32x4, - u16x4, - i16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); -impl_from!( - f32x8: f32, - f32x8_from, - test_v256 | u64x8, - i64x8, - f64x8, - m1x8, - u32x8, - i32x8, - m32x8, - u16x8, - i16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); -impl_from!( - i32x8: i32, - i32x8_from, - test_v256 | u64x8, - i64x8, - f64x8, - m1x8, - u32x8, - f32x8, - m32x8, - u16x8, - i16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); -impl_from!( - u32x8: u32, - u32x8_from, - test_v256 | u64x8, - i64x8, - f64x8, - m1x8, - i32x8, - f32x8, - m32x8, - u16x8, - i16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); -impl_from!( - i16x16: i16, - i16x16_from, - test_v256 | u32x16, - i32x16, - f32x16, - m1x16, - u16x16, - m16x16, - u8x16, - i8x16, - m8x16 -); -impl_from!( - u16x16: u16, - u16x16_from, - test_v256 | u32x16, - i32x16, - f32x16, - m1x16, - i16x16, - m16x16, - u8x16, - i8x16, - m8x16 -); -impl_from!( - i8x32: i8, - i8x32_from, - test_v256 | u16x32, - i16x32, - u8x32, - m8x32 -); -impl_from!( - u8x32: u8, - u8x32_from, - test_v256 | u16x32, - i16x32, - i8x32, - m8x32 -); - -impl_from!(m8x32: i8, m8x32_from, test_v256 | m1x32); - -impl_from!(m16x16: i16, m16x16_from, test_v256 | m1x16, m8x16); - -impl_from!(m32x8: i32, m32x8_from, test_v256 | m1x8, m16x8, m8x8); - -impl_from!(m64x4: i64, m64x4_from, test_v256 | m32x4, m16x4, m8x4); diff --git a/coresimd/ppsv/v32.rs b/coresimd/ppsv/v32.rs deleted file mode 100644 index ab56b5ad80..0000000000 --- a/coresimd/ppsv/v32.rs +++ /dev/null @@ -1,156 +0,0 @@ -//! 32-bit wide portable packed vector types. - -simd_i_ty! { - i16x2: 2, i16, m16x2, i16x2_tests, test_v32 | - i16, i16 | - x0, x1 | - /// A 32-bit wide vector with 2 `i16` lanes. -} - -simd_u_ty! { - u16x2: 2, u16, m16x2, u16x2_tests, test_v32 | - u16, u16 | - x0, x1 | - /// A 32-bit wide vector with 2 `u16` lanes. -} - -simd_m_ty! { - m16x2: 2, i16, m16x2_tests, test_v32 | - i16, i16 | - x0, x1 | - /// A 32-bit wide vector mask with 2 lanes. -} - -simd_i_ty! { - i8x4: 4, i8, m8x4, i8x4_tests, test_v32 | - i8, i8, i8, i8 | - x0, x1, x2, x3 | - /// A 32-bit wide vector with 4 `i8` lanes. -} - -simd_u_ty! { - u8x4: 4, u8, m8x4, u8x4_tests, test_v32 | - u8, u8, u8, u8 | - x0, x1, x2, x3 | - /// A 32-bit wide vector with 4 `u8` lanes. -} - -simd_m_ty! { - m8x4: 4, i8, m8x4_tests, test_v32 | - i8, i8, i8, i8 | - x0, x1, x2, x3 | - /// A 32-bit wide vector mask 4 lanes. -} - -impl_from_bits!( - i16x2: i16, - i16x2_from_bits, - test_v32 | u16x2, - m16x2, - i8x4, - u8x4, - m8x4 -); -impl_from_bits!( - u16x2: u16, - u16x2_from_bits, - test_v32 | i16x2, - m16x2, - i8x4, - u8x4, - m8x4 -); -impl_from_bits!( - i8x4: i8, - i8x2_from_bits, - test_v32 | i16x2, - u16x2, - m16x2, - u8x4, - m8x4 -); -impl_from_bits!( - u8x4: u8, - u8x2_from_bits, - test_v32 | i16x2, - u16x2, - m16x2, - i8x4, - m8x4 -); - -impl_from!( - i16x2: i16, - i16x2_from, - test_v32 | f64x2, - u64x2, - i64x2, - m64x2, - f32x2, - u32x2, - i32x2, - m32x2, - u16x2, - m16x2, - u8x2, - i8x2, - m8x2 -); - -impl_from!( - u16x2: u16, - u16x2_from, - test_v32 | f64x2, - u64x2, - i64x2, - m64x2, - f32x2, - u32x2, - i32x2, - m32x2, - i16x2, - m16x2, - u8x2, - i8x2, - m8x2 -); - -impl_from!( - i8x4: i8, - i8x4_from, - test_v32 | f64x4, - u64x4, - i64x4, - m64x4, - u32x4, - i32x4, - f32x4, - m32x4, - u16x4, - i16x4, - m16x4, - u8x4, - m8x4 -); - -impl_from!( - u8x4: u8, - u8x4_from, - test_v32 | f64x4, - u64x4, - i64x4, - m64x4, - u32x4, - i32x4, - f32x4, - m32x4, - u16x4, - i16x4, - m16x4, - i8x4, - m8x4 -); - -impl_from!(m8x4: i8, m8x4_from, test_v32 | m64x4, m32x4, m16x4); - -impl_from!(m16x2: i16, m16x2_from, test_v32 | m64x2, m32x2, m8x2); diff --git a/coresimd/ppsv/v512.rs b/coresimd/ppsv/v512.rs deleted file mode 100644 index 6bea72c73b..0000000000 --- a/coresimd/ppsv/v512.rs +++ /dev/null @@ -1,451 +0,0 @@ -//! 512-bit wide portable packed vector types. - -// FIXME: Here the m1xN masks should map to AVX-512 m1xN registers, -// but due to lack of rustc support (shouldn't be hard to add) these masks -// are currently implemented as being 512-bit wide. - -simd_i_ty! { - i8x64: 64, i8, m1x64, i8x64_tests, test_v512 | - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31, - x32, x33, x34, x35, x36, x37, x38, x39, - x40, x41, x42, x43, x44, x45, x46, x47, - x48, x49, x50, x51, x52, x53, x54, x55, - x56, x57, x58, x59, x60, x61, x62, x63 | - /// A 512-bit vector with 64 `i8` lanes. -} - -simd_u_ty! { - u8x64: 64, u8, m1x64, u8x64_tests, test_v512 | - u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31, - x32, x33, x34, x35, x36, x37, x38, x39, - x40, x41, x42, x43, x44, x45, x46, x47, - x48, x49, x50, x51, x52, x53, x54, x55, - x56, x57, x58, x59, x60, x61, x62, x63 | - /// A 512-bit vector with 64 `u8` lanes. -} - -simd_m_ty! { - m1x64: 64, i8, m1x64_tests, test_v512 | - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31, - x32, x33, x34, x35, x36, x37, x38, x39, - x40, x41, x42, x43, x44, x45, x46, x47, - x48, x49, x50, x51, x52, x53, x54, x55, - x56, x57, x58, x59, x60, x61, x62, x63 | - /// A 64-bit vector mask with 64 lanes (FIXME: 512-bit wide). -} - -simd_i_ty! { - i16x32: 32, i16, m1x32, i16x32_tests, test_v512 | - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 | - /// A 512-bit vector with 32 `i16` lanes. -} - -simd_u_ty! { - u16x32: 32, u16, m1x32, u16x32_tests, test_v512 | - u16, u16, u16, u16, u16, u16, u16, u16, - u16, u16, u16, u16, u16, u16, u16, u16, - u16, u16, u16, u16, u16, u16, u16, u16, - u16, u16, u16, u16, u16, u16, u16, u16 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 | - /// A 512-bit vector with 32 `u16` lanes. -} - -simd_m_ty! { - m1x32: 32, i16, m1x32_tests, test_v512 | - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 | - /// A 32-bit vector mask with 32 lanes (FIXME: 512-bit wide). -} - -simd_i_ty! { - i32x16: 16, i32, m1x16, i32x16_tests, test_v512 | - i32, i32, i32, i32, i32, i32, i32, i32, - i32, i32, i32, i32, i32, i32, i32, i32 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 512-bit vector with 16 `i32` lanes. -} - -simd_u_ty! { - u32x16: 16, u32, m1x16, u32x16_tests, test_v512 | - u32, u32, u32, u32, u32, u32, u32, u32, - u32, u32, u32, u32, u32, u32, u32, u32 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 512-bit vector with 16 `u32` lanes. -} - -simd_f_ty! { - f32x16: 16, f32, m1x16, f32x16_tests, test_v512 | - f32, f32, f32, f32, f32, f32, f32, f32, - f32, f32, f32, f32, f32, f32, f32, f32 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 512-bit vector with 16 `f32` lanes. -} - -simd_m_ty! { - m1x16: 16, i32, m1x16_tests, test_v512 | - i32, i32, i32, i32, i32, i32, i32, i32, - i32, i32, i32, i32, i32, i32, i32, i32 | - x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 16-bit vector mask with 16 lanes (FIXME: 512-bit wide). -} - -simd_i_ty! { - i64x8: 8, i64, m1x8, i64x8_tests, test_v512 | - i64, i64, i64, i64, i64, i64, i64, i64 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 512-bit vector with 8 `i64` lanes. -} - -simd_u_ty! { - u64x8: 8, u64, m1x8, u64x8_tests, test_v512 | - u64, u64, u64, u64, u64, u64, u64, u64 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 512-bit vector with 8 `u64` lanes. -} - -simd_f_ty! { - f64x8: 8, f64, m1x8, f64x8_tests, test_v512 | - f64, f64, f64, f64, f64, f64, f64, f64 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 512-bit vector with 8 `f64` lanes. -} - -simd_m_ty! { - m1x8: 8, i64, m1x8_tests, test_v512 | - i64, i64, i64, i64, - i64, i64, i64, i64 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 8-bit vector mask with 8 lanes (FIXME: 512-bit wide). -} - -impl_from_bits!( - i8x64: i8, - i8x64_from_bits, - test_v512 | u64x8, - i64x8, - f64x8, - m1x8, // FIXME - u32x16, - i32x16, - f32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - u8x64: u8, - u8x64_from_bits, - test_v512 | u64x8, - i64x8, - f64x8, - m1x8, // FIXME - u32x16, - i32x16, - f32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - i8x64, - m1x64 // FIXME -); -impl_from_bits!( - i16x32: i16, - i16x32_from_bits, - test_v512 | u64x8, - i64x8, - f64x8, - m1x8, // FIXME - u32x16, - i32x16, - f32x16, - m1x16, // FIXME - u16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - u16x32: u16, - u16x32_from_bits, - test_v512 | u64x8, - i64x8, - f64x8, - m1x8, // FIXME - u32x16, - i32x16, - f32x16, - m1x16, // FIXME - i16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - i32x16: i32, - i32x16_from_bits, - test_v512 | u64x8, - i64x8, - f64x8, - m1x8, // FIXME - u32x16, - f32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - u32x16: u32, - u32x16_from_bits, - test_v512 | u64x8, - i64x8, - f64x8, - m1x8, // FIXME - i32x16, - f32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - f32x16: f32, - f32x16_from_bits, - test_v512 | u64x8, - i64x8, - f64x8, - m1x8, // FIXME - u32x16, - i32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - i64x8: i64, - i64x8_from_bits, - test_v512 | u64x8, - f64x8, - m1x8, // FIXME - u32x16, - i32x16, - f32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - u64x8: u64, - u64x8_from_bits, - test_v512 | i64x8, - f64x8, - m1x8, // FIXME - u32x16, - i32x16, - f32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); -impl_from_bits!( - f64x8: f64, - f64x8_from_bits, - test_v512 | u64x8, - i64x8, - m1x8, // FIXME - u32x16, - i32x16, - f32x16, - m1x16, // FIXME - u16x32, - i16x32, - m1x32, // FIXME - i8x64, - u8x64, - m1x64 // FIXME -); - -impl_from!( - f64x8: f64, - f64x8_from, - test_v512 | u64x8, - i64x8, - m1x8, - u32x8, - i32x8, - f32x8, - m32x8, - u16x8, - i16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); -impl_from!( - i64x8: i64, - i64x8_from, - test_v512 | u64x8, - f64x8, - m1x8, - u32x8, - i32x8, - f32x8, - m32x8, - u16x8, - i16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); -impl_from!( - u64x8: u64, - u64x8_from, - test_v512 | i64x8, - f64x8, - m1x8, - u32x8, - i32x8, - f32x8, - m32x8, - u16x8, - i16x8, - m16x8, - u8x8, - i8x8, - m8x8 -); - -impl_from!( - f32x16: f32, - f32x16_from, - test_v512 | u32x16, - i32x16, - m1x16, - u16x16, - i16x16, - m16x16, - u8x16, - i8x16, - m8x16 -); -impl_from!( - i32x16: i32, - i32x16_from, - test_v512 | u32x16, - f32x16, - m1x16, - u16x16, - i16x16, - m16x16, - u8x16, - i8x16, - m8x16 -); -impl_from!( - u32x16: u32, - u32x16_from, - test_v512 | i32x16, - f32x16, - m1x16, - u16x16, - i16x16, - m16x16, - u8x16, - i8x16, - m8x16 -); - -impl_from!( - i16x32: i16, - i16x32_from, - test_v512 | u16x32, - u8x32, - i8x32, - m1x32, - m8x32 -); -impl_from!( - u16x32: u16, - u16x32_from, - test_v512 | i16x32, - u8x32, - i8x32, - m1x32, - m8x32 -); - -impl_from!(i8x64: i8, i8x64_from, test_v512 | u8x64, m1x64); -impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64, m1x64); - -impl_from!(m1x32: i16, m1x32_from, test_v512 | m8x32); - -impl_from!(m1x16: i32, m1x16_from, test_v512 | m16x16, m8x16); - -impl_from!(m1x8: i64, m1x8_from, test_v512 | m32x8, m16x8, m8x8); diff --git a/coresimd/ppsv/v64.rs b/coresimd/ppsv/v64.rs deleted file mode 100644 index 64a86b601d..0000000000 --- a/coresimd/ppsv/v64.rs +++ /dev/null @@ -1,388 +0,0 @@ -//! 64-bit wide portable packed vector types. - -simd_i_ty! { - i8x8: 8, i8, m8x8, i8x8_tests, test_v64 | - i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 64-bit vector with 8 `i8` lanes. -} - -simd_u_ty! { - u8x8: 8, u8, m8x8, u8x8_tests, test_v64 | - u8, u8, u8, u8, u8, u8, u8, u8 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 64-bit vector with 8 `u8` lanes. -} - -simd_m_ty! { - m8x8: 8, i8, m8x8_tests, test_v64 | - i8, i8, i8, i8, i8, i8, i8, i8 | - x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 64-bit vector mask with 8 lanes. -} - -simd_i_ty! { - i16x4: 4, i16, m16x4, i16x4_tests, test_v64 | - i16, i16, i16, i16 | - x0, x1, x2, x3 | - /// A 64-bit vector with 4 `i16` lanes. -} - -simd_u_ty! { - u16x4: 4, u16, m16x4, u16x4_tests, test_v64 | - u16, u16, u16, u16 | - x0, x1, x2, x3 | - /// A 64-bit vector with 4 `u16` lanes. -} - -simd_m_ty! { - m16x4: 4, i16, m16x4_tests, test_v64 | - i16, i16, i16, i16 | - x0, x1, x2, x3 | - /// A 64-bit vector mask with 4 lanes. -} - -simd_i_ty! { - i32x2: 2, i32, m32x2, i32x2_tests, test_v64 | - i32, i32 | - x0, x1 | - /// A 64-bit vector with 2 `i32` lanes. -} - -simd_u_ty! { - u32x2: 2, u32, m32x2, u32x2_tests, test_v64 | - u32, u32 | - x0, x1 | - /// A 64-bit vector with 2 `u32` lanes. -} - -simd_m_ty! { - m32x2: 2, i32, m32x2_tests, test_v64 | - i32, i32 | - x0, x1 | - /// A 64-bit vector mask with 2 lanes. -} - -simd_f_ty! { - f32x2: 2, f32, m32x2, f32x2_tests, test_v64 | - f32, f32 | - x0, x1 | - /// A 64-bit vector with 2 `f32` lanes. -} - -#[cfg(target_arch = "x86")] -use coresimd::arch::x86::__m64; - -#[cfg(target_arch = "x86_64")] -use coresimd::arch::x86_64::__m64; - -macro_rules! from_bits_x86 { - ($id:ident, $elem_ty:ident, $test_mod:ident) => { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - impl_from_bits_!($id: __m64); - }; -} - -#[cfg( - all(target_arch = "arm", target_feature = "neon", target_feature = "v7") -)] -use coresimd::arch::arm::{ - // FIXME: float16x4_t, - float32x2_t, - int16x4_t, - int32x2_t, - int64x1_t, - int8x8_t, - poly16x4_t, - poly8x8_t, - uint16x4_t, - uint32x2_t, - uint64x1_t, - uint8x8_t, -}; - -#[cfg(target_arch = "aarch64")] -use coresimd::arch::aarch64::{ - // FIXME: float16x4_t, - float32x2_t, - float64x1_t, - int16x4_t, - int32x2_t, - int64x1_t, - int8x8_t, - poly16x4_t, - poly8x8_t, - uint16x4_t, - uint32x2_t, - uint64x1_t, - uint8x8_t, -}; - -macro_rules! from_bits_arm { - ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => { - #[cfg( - any( - all( - target_arch = "arm", - target_feature = "neon", - target_feature = "v7" - ), - target_arch = "aarch64" - ) - )] - impl_from_bits_!( - $id: int64x1_t, - uint64x1_t, - uint32x2_t, - int32x2_t, - float32x2_t, - uint16x4_t, - int16x4_t, - // FIXME: float16x4_t - poly16x4_t, - uint8x8_t, - int8x8_t, - poly8x8_t - ); - #[cfg(target_arch = "aarch64")] - impl_from_bits_!($id: float64x1_t); - }; -} - -impl_from_bits!( - u32x2: u32, - u32x2_from_bits, - test_v64 | i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -from_bits_x86!(u32x2, u32, u32x2_from_bits_x86); -from_bits_arm!(u32x2, u32, u32x2_from_bits_arm, u32x2_from_bits_aarch64); - -impl_from_bits!( - i32x2: i32, - i32x2_from_bits, - test_v64 | u32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -from_bits_x86!(i32x2, i32, i32x2_from_bits_x86); -from_bits_arm!(i32x2, i32, i32x2_from_bits_arm, i32x2_from_bits_aarch64); - -impl_from_bits!( - f32x2: f32, - f32x2_from_bits, - test_v64 | i32x2, - u32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -from_bits_x86!(f32x2, f32, f32x2_from_bits_x86); -from_bits_arm!(f32x2, f32, f32x2_from_bits_arm, f32x2_from_bits_aarch64); - -impl_from_bits!( - u16x4: u16, - u16x4_from_bits, - test_v64 | u32x2, - i32x2, - m32x2, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -from_bits_x86!(u16x4, u16, u16x4_from_bits_x86); -from_bits_arm!(u16x4, u16, u16x4_from_bits_arm, u16x4_from_bits_aarch64); - -impl_from_bits!( - i16x4: i16, - i16x4_from_bits, - test_v64 | u32x2, - i32x2, - m32x2, - u16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); -from_bits_x86!(i16x4, i16, i16x4_from_bits_x86); -from_bits_arm!(i16x4, i16, i16x4_from_bits_arm, i16x4_from_bits_aarch64); - -impl_from_bits!( - u8x8: u8, - u8x8_from_bits, - test_v64 | u32x2, - i32x2, - m32x2, - u16x4, - i16x4, - m16x4, - i8x8, - m8x8 -); -from_bits_x86!(u8x8, u8, u8x8_from_bits_x86); -from_bits_arm!(u8x8, u8, u8x8_from_bits_arm, u8x8_from_bits_aarch64); - -impl_from_bits!( - i8x8: i8, - i8x8_from_bits, - test_v64 | u32x2, - i32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - m8x8 -); -from_bits_x86!(i8x8, i8, i8x8_from_bits_x86); -from_bits_arm!(i8x8, i8, i8x8_from_bits_arm, i8x8_from_bits_aarch64); - -impl_from!( - f32x2: f32, - f32x2_from, - test_v64 | f64x2, - u64x2, - i64x2, - m64x2, - u32x2, - i32x2, - m32x2, - u16x2, - i16x2, - m16x2, - u8x2, - i8x2, - m8x2 -); - -impl_from!( - u32x2: u32, - u32x2_from, - test_v64 | f64x2, - u64x2, - i64x2, - m64x2, - f32x2, - i32x2, - m32x2, - u16x2, - i16x2, - m16x2, - u8x2, - i8x2, - m8x2 -); - -impl_from!( - i32x2: i32, - i32x2_from, - test_v64 | f64x2, - u64x2, - i64x2, - m64x2, - f32x2, - u32x2, - m32x2, - u16x2, - i16x2, - m16x2, - u8x2, - i8x2, - m8x2 -); - -impl_from!( - u16x4: u16, - u16x4_from, - test_v64 | f64x4, - u64x4, - i64x4, - m64x4, - f32x4, - i32x4, - u32x4, - m32x4, - i16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); - -impl_from!( - i16x4: i16, - i16x4_from, - test_v64 | f64x4, - u64x4, - i64x4, - m64x4, - f32x4, - i32x4, - u32x4, - m32x4, - u16x4, - m16x4, - u8x4, - i8x4, - m8x4 -); -impl_from!( - i8x8: i8, - i8x8_from, - test_v64 | f64x8, - u64x8, - i64x8, - m1x8, - f32x8, - u32x8, - i32x8, - m32x8, - i16x8, - u16x8, - m16x8, - u8x8, - m8x8 -); -impl_from!( - u8x8: u8, - u8x8_from, - test_v64 | f64x8, - u64x8, - i64x8, - m1x8, - f32x8, - u32x8, - i32x8, - m32x8, - i16x8, - u16x8, - m16x8, - i8x8, - m8x8 -); - -impl_from!(m8x8: i8, m8x8_from, test_v64 | m1x8, m32x8, m16x8); - -impl_from!(m16x4: i16, m16x4_from, test_v64 | m64x4, m32x4, m8x4); - -impl_from!(m32x2: i32, m32x2_from, test_v64 | m64x2, m16x2, m8x2); diff --git a/coresimd/simd.rs b/coresimd/simd.rs new file mode 100644 index 0000000000..6bdac0bfd6 --- /dev/null +++ b/coresimd/simd.rs @@ -0,0 +1,161 @@ +//! Internal `#[repr(simd)]` types + +#![allow(non_camel_case_types)] + +macro_rules! simd_ty { + ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => { + #[repr(simd)] + #[derive(Copy, Clone, Debug, PartialEq)] + pub(crate) struct $id($(pub $elem_ty),*); + + impl $id { + #[inline] + pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self { + $id($($elem_name),*) + } + + #[inline] + pub(crate) const fn splat(value: $ety) -> Self { + $id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + value + }),*) + } + + #[inline] + pub(crate) fn extract(self, index: usize) -> $ety { + unsafe { ::coresimd::simd_llvm::simd_extract(self, index as u32) } + } + } + } +} + +macro_rules! simd_m_ty { + ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => { + #[repr(simd)] + #[derive(Copy, Clone, Debug, PartialEq)] + pub(crate) struct $id($(pub $elem_ty),*); + + impl $id { + #[inline] + const fn bool_to_internal(x: bool) -> $ety { + [0 as $ety, !(0 as $ety)][x as usize] + } + + #[inline] + pub(crate) const fn new($($elem_name: bool),*) -> Self { + $id($(Self::bool_to_internal($elem_name)),*) + } + + #[inline] + pub(crate) const fn splat(value: bool) -> Self { + $id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + Self::bool_to_internal(value) + }),*) + } + + #[inline] + pub(crate) fn extract(self, index: usize) -> bool { + let r: $ety = unsafe { ::coresimd::simd_llvm::simd_extract(self, index as u32) }; + r != 0 + } + } + } +} + +// 16-bit wide types: + +simd_ty!(u8x2[u8]: u8, u8 | x0, x1); +simd_ty!(i8x2[i8]: i8, i8 | x0, x1); + +// 32-bit wide types: + +simd_ty!(u8x4[u8]: u8, u8, u8, u8 | x0, x1, x2, x3); +simd_ty!(u16x2[u16]: u16, u16 | x0, x1); + +simd_ty!(i8x4[i8]: i8, i8, i8, i8 | x0, x1, x2, x3); +simd_ty!(i16x2[i16]: i16, i16 | x0, x1); + +// 64-bit wide types: + +simd_ty!(u8x8[u8]: u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3); +simd_ty!(u32x2[u32]: u32, u32 | x0, x1); +simd_ty!(u64x1[u64]: u64 | x1); + +simd_ty!(i8x8[i8]: i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3); +simd_ty!(i32x2[i32]: i32, i32 | x0, x1); +simd_ty!(i64x1[i64]: i64 | x1); + +simd_ty!(f32x2[f32]: f32, f32 | x0, x1); + +// 128-bit wide types: + +simd_ty!(u8x16[u8]: + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(u16x8[u16]: u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3); +simd_ty!(u64x2[u64]: u64, u64 | x0, x1); + +simd_ty!(i8x16[i8]: + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(i16x8[i16]: i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); +simd_ty!(i64x2[i64]: i64, i64 | x0, x1); + +simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3); +simd_ty!(f64x2[f64]: f64, f64 | x0, x1); + +simd_m_ty!(m8x16[i8]: + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_m_ty!(m16x8[i16]: i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7); +simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); +simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1); + +// 256-bit wide types: + +simd_ty!(u8x32[u8]: + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 +); +simd_ty!(u16x16[u16]: + u16, u16, u16, u16, u16, u16, u16, u16, + u16, u16, u16, u16, u16, u16, u16, u16 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(u32x8[u32]: u32, u32, u32, u32, u32, u32, u32, u32 | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3); + +simd_ty!(i8x32[i8]: + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 +); +simd_ty!(i16x16[i16]: + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(i32x8[i32]: i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3); + diff --git a/coresimd/x86/avx.rs b/coresimd/x86/avx.rs index f41ebb8974..58d9482ff1 100644 --- a/coresimd/x86/avx.rs +++ b/coresimd/x86/avx.rs @@ -61,7 +61,7 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); - mem::transmute(a & b) + mem::transmute(simd_and(a, b)) } /// Compute the bitwise AND of packed single-precision (32-bit) floating-point @@ -75,7 +75,7 @@ pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); - mem::transmute(a & b) + mem::transmute(simd_and(a, b)) } /// Compute the bitwise OR packed double-precision (64-bit) floating-point @@ -91,7 +91,7 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); - mem::transmute(a | b) + mem::transmute(simd_or(a, b)) } /// Compute the bitwise OR packed single-precision (32-bit) floating-point @@ -105,7 +105,7 @@ pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); - mem::transmute(a | b) + mem::transmute(simd_or(a, b)) } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit @@ -230,7 +230,7 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); - mem::transmute((!a) & b) + mem::transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) } /// Compute the bitwise NOT of packed single-precision (32-bit) floating-point @@ -245,7 +245,7 @@ pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); - mem::transmute((!a) & b) + mem::transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) } /// Compare packed double-precision (64-bit) floating-point elements @@ -741,7 +741,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); - mem::transmute(a ^ b) + mem::transmute(simd_xor(a, b)) } /// Compute the bitwise XOR of packed single-precision (32-bit) floating-point @@ -755,7 +755,7 @@ pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); - mem::transmute(a ^ b) + mem::transmute(simd_xor(a, b)) } /// Equal (ordered, non-signaling) diff --git a/coresimd/x86/mod.rs b/coresimd/x86/mod.rs index 31d950a2c6..83aea858f4 100644 --- a/coresimd/x86/mod.rs +++ b/coresimd/x86/mod.rs @@ -444,123 +444,6 @@ impl m256iExt for __m256i { } } -use coresimd::simd::{ - f32x2, f32x4, f32x8, f64x2, f64x4, i16x16, i16x4, i16x8, i32x2, i32x4, - i32x8, i64x2, i64x4, i8x16, i8x32, i8x8, m16x16, m16x4, m16x8, m32x2, - m32x4, m32x8, m64x2, m64x4, m8x16, m8x32, m8x8, u16x16, u16x4, u16x8, - u32x2, u32x4, u32x8, u64x2, u64x4, u8x16, u8x32, u8x8, -}; - -impl_from_bits_!( - __m64: u32x2, - i32x2, - f32x2, - m32x2, - u16x4, - i16x4, - m16x4, - u8x8, - i8x8, - m8x8 -); - -impl_from_bits_!( - __m128: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - __m128i: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - __m128d: u64x2, - i64x2, - f64x2, - m64x2, - u32x4, - i32x4, - f32x4, - m32x4, - u16x8, - i16x8, - m16x8, - u8x16, - i8x16, - m8x16 -); -impl_from_bits_!( - __m256: u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - i32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -impl_from_bits_!( - __m256i: u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - i32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); -impl_from_bits_!( - __m256d: u64x4, - i64x4, - f64x4, - m64x4, - u32x8, - i32x8, - f32x8, - m32x8, - u16x16, - i16x16, - m16x16, - u8x32, - i8x32, - m8x32 -); - mod eflags; pub use self::eflags::*; diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs index e31cf5fbed..c45a1dd1ff 100644 --- a/crates/coresimd/src/lib.rs +++ b/crates/coresimd/src/lib.rs @@ -39,7 +39,6 @@ test(attr(allow(dead_code, deprecated, unused_variables, unused_mut))) )] -#[cfg_attr(not(test), macro_use)] extern crate core as _core; #[cfg(test)] #[macro_use] @@ -52,33 +51,10 @@ extern crate stdsimd_test; #[cfg(test)] extern crate test; -macro_rules! test_v16 { - ($item:item) => {}; -} -macro_rules! test_v32 { - ($item:item) => {}; -} -macro_rules! test_v64 { - ($item:item) => {}; -} -macro_rules! test_v128 { - ($item:item) => {}; -} -macro_rules! test_v256 { - ($item:item) => {}; -} -macro_rules! test_v512 { - ($item:item) => {}; -} -macro_rules! vector_impl { - ($([$f:ident, $($args:tt)*]),*) => { $($f!($($args)*);)* } -} - #[path = "../../../coresimd/mod.rs"] mod coresimd; pub use coresimd::arch; -pub use coresimd::simd; #[allow(unused_imports)] use _core::clone; diff --git a/crates/coresimd/tests/endian_tests.rs b/crates/coresimd/tests/endian_tests.rs deleted file mode 100644 index 8bb9c49051..0000000000 --- a/crates/coresimd/tests/endian_tests.rs +++ /dev/null @@ -1,278 +0,0 @@ -#![feature(stdsimd)] -#![cfg_attr(stdsimd_strict, deny(warnings))] - -extern crate core; -extern crate coresimd; - -use core::{mem, slice}; -use coresimd::simd::*; - -#[test] -fn endian_indexing() { - let v = i32x4::new(0, 1, 2, 3); - assert_eq!(v.extract(0), 0); - assert_eq!(v.extract(1), 1); - assert_eq!(v.extract(2), 2); - assert_eq!(v.extract(3), 3); -} - -#[test] -fn endian_bitcasts() { - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let t: i16x8 = unsafe { mem::transmute(x) }; - let e: i16x8 = if cfg!(target_endian = "little") { - i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854) - } else { - i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599) - }; - assert_eq!(t, e); -} - -#[test] -fn endian_casts() { - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let t: i16x16 = x.into(); // simd_cast - #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i16x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - assert_eq!(t, e); -} - -#[test] -fn endian_load_and_stores() { - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let mut y: [i16; 8] = [0; 8]; - x.store_unaligned(unsafe { - slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16) - }); - - let e: [i16; 8] = if cfg!(target_endian = "little") { - [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] - } else { - [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] - }; - assert_eq!(y, e); - - let z = i8x16::load_unaligned(unsafe { - slice::from_raw_parts(&y as *const _ as *const i8, 16) - }); - assert_eq!(z, x); -} - -#[test] -fn endian_array_union() { - union A { - data: [f32; 4], - vec: f32x4, - } - let x: [f32; 4] = unsafe { - A { - vec: f32x4::new(0., 1., 2., 3.), - }.data - }; - assert_eq!(x[0], 0_f32); - assert_eq!(x[1], 1_f32); - assert_eq!(x[2], 2_f32); - assert_eq!(x[3], 3_f32); - let y: f32x4 = unsafe { - A { - data: [3., 2., 1., 0.], - }.vec - }; - assert_eq!(y, f32x4::new(3., 2., 1., 0.)); - - union B { - data: [i8; 16], - vec: i8x16, - } - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let x: [i8; 16] = unsafe { B { vec: x }.data }; - - for i in 0..16 { - assert_eq!(x[i], i as i8); - } - - #[cfg_attr(rustfmt, rustfmt_skip)] - let y = [ - 15, 14, 13, 12, 11, 19, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0 - ]; - #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x16::new( - 15, 14, 13, 12, 11, 19, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0 - ); - let z = unsafe { B { data: y }.vec }; - assert_eq!(z, e); - - union C { - data: [i16; 8], - vec: i8x16, - } - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let x: [i16; 8] = unsafe { C { vec: x }.data }; - - let e: [i16; 8] = if cfg!(target_endian = "little") { - [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] - } else { - [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] - }; - assert_eq!(x, e); -} - -#[test] -fn endian_tuple_access() { - type F32x4T = (f32, f32, f32, f32); - union A { - data: F32x4T, - vec: f32x4, - } - let x: F32x4T = unsafe { - A { - vec: f32x4::new(0., 1., 2., 3.), - }.data - }; - assert_eq!(x.0, 0_f32); - assert_eq!(x.1, 1_f32); - assert_eq!(x.2, 2_f32); - assert_eq!(x.3, 3_f32); - let y: f32x4 = unsafe { - A { - data: (3., 2., 1., 0.), - }.vec - }; - assert_eq!(y, f32x4::new(3., 2., 1., 0.)); - - #[cfg_attr(rustfmt, rustfmt_skip)] - type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); - union B { - data: I8x16T, - vec: i8x16, - } - - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let x: I8x16T = unsafe { B { vec: x }.data }; - - assert_eq!(x.0, 0); - assert_eq!(x.1, 1); - assert_eq!(x.2, 2); - assert_eq!(x.3, 3); - assert_eq!(x.4, 4); - assert_eq!(x.5, 5); - assert_eq!(x.6, 6); - assert_eq!(x.7, 7); - assert_eq!(x.8, 8); - assert_eq!(x.9, 9); - assert_eq!(x.10, 10); - assert_eq!(x.11, 11); - assert_eq!(x.12, 12); - assert_eq!(x.13, 13); - assert_eq!(x.14, 14); - assert_eq!(x.15, 15); - - #[cfg_attr(rustfmt, rustfmt_skip)] - let y = ( - 15, 14, 13, 12, 11, 10, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0 - ); - let z: i8x16 = unsafe { B { data: y }.vec }; - #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x16::new( - 15, 14, 13, 12, 11, 10, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0 - ); - assert_eq!(e, z); - - #[cfg_attr(rustfmt, rustfmt_skip)] - type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16); - union C { - data: I16x8T, - vec: i8x16, - } - - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let x: I16x8T = unsafe { C { vec: x }.data }; - - let e: [i16; 8] = if cfg!(target_endian = "little") { - [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] - } else { - [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] - }; - assert_eq!(x.0, e[0]); - assert_eq!(x.1, e[1]); - assert_eq!(x.2, e[2]); - assert_eq!(x.3, e[3]); - assert_eq!(x.4, e[4]); - assert_eq!(x.5, e[5]); - assert_eq!(x.6, e[6]); - assert_eq!(x.7, e[7]); - - // Without repr(C) this produces total garbage. - // FIXME: investigate more, this is maybe due to - // to tuple field reordering to minimize padding. - #[cfg_attr(rustfmt, rustfmt_skip)] - #[repr(C)] - #[derive(Copy ,Clone)] - pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16, - pub i8, pub i8, pub i16, pub i8, pub i8, pub i16); - - union D { - data: Tup, - vec: i8x16, - } - - #[cfg_attr(rustfmt, rustfmt_skip)] - let x = i8x16::new( - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - ); - let x: Tup = unsafe { D { vec: x }.data }; - - let e: [i16; 12] = if cfg!(target_endian = "little") { - [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854] - } else { - [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599] - }; - assert_eq!(x.0 as i16, e[0]); - assert_eq!(x.1 as i16, e[1]); - assert_eq!(x.2 as i16, e[2]); - assert_eq!(x.3 as i16, e[3]); - assert_eq!(x.4 as i16, e[4]); - assert_eq!(x.5 as i16, e[5]); - assert_eq!(x.6 as i16, e[6]); - assert_eq!(x.7 as i16, e[7]); - assert_eq!(x.8 as i16, e[8]); - assert_eq!(x.9 as i16, e[9]); - assert_eq!(x.10 as i16, e[10]); - assert_eq!(x.11 as i16, e[11]); -} diff --git a/crates/coresimd/tests/reductions.rs b/crates/coresimd/tests/reductions.rs deleted file mode 100644 index 123410b879..0000000000 --- a/crates/coresimd/tests/reductions.rs +++ /dev/null @@ -1,510 +0,0 @@ -#![feature(stdsimd, sse4a_target_feature, avx512_target_feature)] -#![feature(arm_target_feature)] -#![feature(aarch64_target_feature)] -#![feature(powerpc_target_feature)] -#![allow(unused_attributes, dead_code, unused_imports, unused_macros)] - -#[macro_use] -extern crate stdsimd; - -use stdsimd::simd::*; - -#[cfg(target_arch = "powerpc")] -macro_rules! is_powerpc_feature_detected { - ($t:tt) => { - false - }; -} - -macro_rules! invoke_arch { - ($macro:ident, $feature_macro:ident, $id:ident, $elem_ty:ident, - [$($feature:tt),*]) => { - $($macro!($feature, $feature_macro, $id, $elem_ty);)* - } -} - -macro_rules! invoke_vectors { - ($macro:ident, [$(($id:ident, $elem_ty:ident)),*]) => { - $( - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - invoke_arch!($macro, is_x86_feature_detected, $id, $elem_ty, - ["sse", "sse2", "sse3", "ssse3", "sse4.1", - "sse4.2", "sse4a", "avx2", "avx2", "avx512f"]); - #[cfg(target_arch = "aarch64")] - invoke_arch!($macro, is_aarch64_feature_detected, $id, $elem_ty, - ["neon"]); - #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] - invoke_arch!($macro, is_arm_feature_detected, $id, $elem_ty, - ["neon"]); - #[cfg(target_arch = "powerpc")] - invoke_arch!($macro, is_powerpc_feature_detected, $id, $elem_ty, ["altivec"]); - #[cfg(target_arch = "powerpc64")] - invoke_arch!($macro, is_powerpc64_feature_detected, $id, $elem_ty, ["altivec"]); - )* - } -} - -macro_rules! finvoke { - ($macro:ident) => { - invoke_vectors!( - $macro, - [ - (f32x2, f32), - (f32x4, f32), - (f32x8, f32), - (f32x16, f32), - (f64x2, f64), - (f64x4, f64), - (f64x8, f64) - ] - ); - }; -} - -macro_rules! iinvoke { - ($macro:ident) => { - invoke_vectors!( - $macro, - [ - (i8x2, i8), - (i8x4, i8), - (i8x8, i8), - (i8x16, i8), - (i8x32, i8), - (i8x64, i8), - (i16x2, i16), - (i16x4, i16), - (i16x8, i16), - (i16x16, i16), - (i16x32, i16), - (i32x2, i32), - (i32x4, i32), - (i32x8, i32), - (i32x16, i32), - (i64x2, i64), - (i64x4, i64), - (i64x8, i64), - (u8x2, u8), - (u8x4, u8), - (u8x8, u8), - (u8x16, u8), - (u8x32, u8), - (u8x64, u8), - (u16x2, u16), - (u16x4, u16), - (u16x8, u16), - (u16x16, u16), - (u16x32, u16), - (u32x2, u32), - (u32x4, u32), - (u32x8, u32), - (u32x16, u32), - (u64x2, u64), - (u64x4, u64), - (u64x8, u64) - ] - ); - }; -} - -macro_rules! min_nan_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - unsafe fn test_fn() { - let n0 = ::std::$elem_ty::NAN; - - assert_eq!(n0.min(-3.0), -3.0); - assert_eq!((-3.0 as $elem_ty).min(n0), -3.0); - - let v0 = $id::splat(-3.0); - - // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408): - // When the last element is NaN the current implementation produces incorrect results. - let bugbug = 1; - for i in 0..$id::lanes() - bugbug { - let mut v = v0.replace(i, n0); - // If there is a NaN, the result is always the smallest element: - assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int()); - for j in 0..i { - v = v.replace(j, n0); - assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int()); - } - } - // If the vector contains all NaNs the result is NaN: - let vn = $id::splat(n0); - assert!(vn.min_element().is_nan(), "all nans | v={:?} | min={} | is_nan: {}", - vn, vn.min_element(), vn.min_element().is_nan()); - } - unsafe { test_fn() }; - } - } -} - -#[test] -fn min_nan() { - finvoke!(min_nan_test); -} - -macro_rules! max_nan_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - unsafe fn test_fn() { - let n0 = ::std::$elem_ty::NAN; - - assert_eq!(n0.max(-3.0), -3.0); - assert_eq!((-3.0 as $elem_ty).max(n0), -3.0); - - let v0 = $id::splat(-3.0); - - // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408): - // When the last element is NaN the current implementation produces incorrect results. - let bugbug = 1; - for i in 0..$id::lanes() - bugbug { - let mut v = v0.replace(i, n0); - // If there is a NaN the result is always the largest element: - assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int()); - for j in 0..i { - v = v.replace(j, n0); - assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int()); - } - } - - // If the vector contains all NaNs the result is NaN: - let vn = $id::splat(n0); - assert!(vn.max_element().is_nan(), "all nans | v={:?} | max={} | is_nan: {}", - vn, vn.max_element(), vn.max_element().is_nan()); - } - unsafe { test_fn() }; - } - } -} - -#[test] -fn max_nan() { - finvoke!(max_nan_test); -} - -macro_rules! sum_nan_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - #[allow(unreachable_code)] - unsafe fn test_fn() { - // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 - // https://github.com/rust-lang-nursery/stdsimd/issues/409 - return; - - let n0 = ::std::$elem_ty::NAN; - let v0 = $id::splat(-3.0); - for i in 0..$id::lanes() { - let mut v = v0.replace(i, n0); - // If the vector contains a NaN the result is NaN: - assert!( - v.sum().is_nan(), - "nan at {} => {} | {:?}", - i, - v.sum(), - v - ); - for j in 0..i { - v = v.replace(j, n0); - assert!(v.sum().is_nan()); - } - } - let v = $id::splat(n0); - assert!(v.sum().is_nan(), "all nans | {:?}", v); - } - unsafe { test_fn() }; - } - }; -} - -#[test] -fn sum_nan() { - finvoke!(sum_nan_test); -} - -macro_rules! product_nan_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - #[allow(unreachable_code)] - unsafe fn test_fn() { - // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 - // https://github.com/rust-lang-nursery/stdsimd/issues/409 - return; - - let n0 = ::std::$elem_ty::NAN; - let v0 = $id::splat(-3.0); - for i in 0..$id::lanes() { - let mut v = v0.replace(i, n0); - // If the vector contains a NaN the result is NaN: - assert!( - v.product().is_nan(), - "nan at {} | {:?}", - i, - v - ); - for j in 0..i { - v = v.replace(j, n0); - assert!(v.product().is_nan()); - } - } - let v = $id::splat(n0); - assert!(v.product().is_nan(), "all nans | {:?}", v); - } - unsafe { test_fn() }; - } - }; -} - -#[test] -fn product_nan() { - finvoke!(product_nan_test); -} - -trait AsInt { - type Int; - fn as_int(self) -> Self::Int; - fn from_int(Self::Int) -> Self; -} - -macro_rules! as_int { - ($float:ident, $int:ident) => { - impl AsInt for $float { - type Int = $int; - fn as_int(self) -> $int { - unsafe { ::std::mem::transmute(self) } - } - fn from_int(x: $int) -> $float { - unsafe { ::std::mem::transmute(x) } - } - } - }; -} - -as_int!(f32, u32); -as_int!(f64, u64); -as_int!(f32x2, i32x2); -as_int!(f32x4, i32x4); -as_int!(f32x8, i32x8); -as_int!(f32x16, i32x16); -as_int!(f64x2, i64x2); -as_int!(f64x4, i64x4); -as_int!(f64x8, i64x8); - -// FIXME: these fail on i586 for some reason -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))] -mod offset { - use super::*; - - trait TreeSum { - type R; - fn tree_sum(self) -> Self::R; - } - - macro_rules! tree_sum_f { - ($elem_ty:ident) => { - impl<'a> TreeSum for &'a [$elem_ty] { - type R = $elem_ty; - fn tree_sum(self) -> $elem_ty { - if self.len() == 2 { - self[0] + self[1] - } else { - let mid = self.len() / 2; - let (left, right) = self.split_at(mid); - Self::tree_sum(left) + Self::tree_sum(right) - } - } - } - }; - } - tree_sum_f!(f32); - tree_sum_f!(f64); - - macro_rules! sum_roundoff_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - unsafe fn test_fn() { - let mut start = std::$elem_ty::EPSILON; - let mut sum = 0. as $elem_ty; - - let mut v = $id::splat(0. as $elem_ty); - for i in 0..$id::lanes() { - let c = if i % 2 == 0 { 1e3 } else { -1. }; - start *= 3.14 * c; - sum += start; - v = v.replace(i, start); - } - let vsum = v.sum(); - let _r = vsum.as_int() == sum.as_int(); - // This is false in general; the intrinsic performs a - // tree-reduce: - let mut a = [0. as $elem_ty; $id::lanes()]; - v.store_unaligned(&mut a); - - let tsum = a.tree_sum(); - - // tolerate 1 ULP difference: - if vsum.as_int() > tsum.as_int() { - assert!( - vsum.as_int() - tsum.as_int() < 2, - "v: {:?} | vsum: {} | tsum: {}", - v, - vsum, - tsum - ); - } else { - assert!( - tsum.as_int() - vsum.as_int() < 2, - "v: {:?} | vsum: {} | tsum: {}", - v, - vsum, - tsum - ); - } - } - unsafe { test_fn() }; - } - }; - } - - #[test] - fn sum_roundoff_test() { - finvoke!(sum_roundoff_test); - } - - trait TreeProduct { - type R; - fn tree_product(self) -> Self::R; - } - - macro_rules! tree_product_f { - ($elem_ty:ident) => { - impl<'a> TreeProduct for &'a [$elem_ty] { - type R = $elem_ty; - fn tree_product(self) -> $elem_ty { - if self.len() == 2 { - self[0] * self[1] - } else { - let mid = self.len() / 2; - let (left, right) = self.split_at(mid); - Self::tree_product(left) * Self::tree_product(right) - } - } - } - }; - } - - tree_product_f!(f32); - tree_product_f!(f64); - - macro_rules! product_roundoff_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - unsafe fn test_fn() { - let mut start = std::$elem_ty::EPSILON; - let mut mul = 1. as $elem_ty; - - let mut v = $id::splat(1. as $elem_ty); - for i in 0..$id::lanes() { - let c = if i % 2 == 0 { 1e3 } else { -1. }; - start *= 3.14 * c; - mul *= start; - v = v.replace(i, start); - } - let vmul = v.product(); - let _r = vmul.as_int() == mul.as_int(); - // This is false in general; the intrinsic performs a - // tree-reduce: - let mut a = [0. as $elem_ty; $id::lanes()]; - v.store_unaligned(&mut a); - - let tmul = a.tree_product(); - // tolerate 1 ULP difference: - if vmul.as_int() > tmul.as_int() { - assert!( - vmul.as_int() - tmul.as_int() < 2, - "v: {:?} | vmul: {} | tmul: {}", - v, - vmul, - tmul - ); - } else { - assert!( - tmul.as_int() - vmul.as_int() < 2, - "v: {:?} | vmul: {} | tmul: {}", - v, - vmul, - tmul - ); - } - } - unsafe { test_fn() }; - } - }; - } - - #[test] - fn product_roundoff_test() { - finvoke!(product_roundoff_test); - } - - macro_rules! wrapping_sum_overflow_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - unsafe fn test_fn() { - let start = $elem_ty::max_value() - - ($id::lanes() as $elem_ty / 2); - - let v = $id::splat(start as $elem_ty); - let vwrapping_sum = v.wrapping_sum(); - - let mut wrapping_sum = start; - for _ in 1..$id::lanes() { - wrapping_sum = wrapping_sum.wrapping_add(start); - } - assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); - } - unsafe { test_fn() }; - } - }; - } - - #[test] - fn wrapping_sum_overflow_test() { - iinvoke!(wrapping_sum_overflow_test); - } - - macro_rules! product_overflow_test { - ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => { - if $feature_macro!($feature) { - #[target_feature(enable = $feature)] - unsafe fn test_fn() { - let start = $elem_ty::max_value() - - ($id::lanes() as $elem_ty / 2); - - let v = $id::splat(start as $elem_ty); - let vmul = v.wrapping_product(); - - let mut mul = start; - for _ in 1..$id::lanes() { - mul = mul.wrapping_mul(start); - } - assert_eq!(mul, vmul, "v = {:?}", v); - } - unsafe { test_fn() }; - } - }; - } - - #[test] - fn product_overflow_test() { - iinvoke!(product_overflow_test); - } -} diff --git a/crates/coresimd/tests/v128.rs b/crates/coresimd/tests/v128.rs deleted file mode 100644 index 3cba8811cb..0000000000 --- a/crates/coresimd/tests/v128.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! coresimd 128-bit wide vector tests - -#![cfg_attr(stdsimd_strict, deny(warnings))] -#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] -#![allow(unused_imports, dead_code)] - -#[cfg(test)] -extern crate coresimd; - -#[cfg(test)] -macro_rules! test_v16 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v32 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v64 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v128 { - ($item:item) => { - $item - }; -} -#[cfg(test)] -macro_rules! test_v256 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v512 { - ($item:item) => {}; -} - -#[cfg(test)] -macro_rules! vector_impl { - ($([$f:ident, $($args:tt)*]),*) => {}; -} - -#[cfg(test)] -#[path = "../../../coresimd/ppsv/mod.rs"] -mod ppsv; - -#[cfg(test)] -use std::{marker, mem}; - -#[cfg(all(test, target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(test, target_arch = "aarch64"))] -extern crate core as _core; - -#[cfg(all(test, target_arch = "aarch64"))] -use _core::num; diff --git a/crates/coresimd/tests/v16.rs b/crates/coresimd/tests/v16.rs deleted file mode 100644 index 77c78323fe..0000000000 --- a/crates/coresimd/tests/v16.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! coresimd 16-bit wide vector tests - -#![cfg_attr(stdsimd_strict, deny(warnings))] -#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] -#![allow(unused_imports, dead_code)] - -#[cfg(test)] -extern crate coresimd; - -#[cfg(test)] -macro_rules! test_v16 { - ($item:item) => { - $item - }; -} -#[cfg(test)] -macro_rules! test_v32 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v64 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v128 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v256 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v512 { - ($item:item) => {}; -} - -#[cfg(test)] -macro_rules! vector_impl { - ($([$f:ident, $($args:tt)*]),*) => {}; -} - -#[cfg(test)] -#[path = "../../../coresimd/ppsv/mod.rs"] -mod ppsv; - -#[cfg(test)] -use std::{marker, mem}; - -#[cfg(all(test, target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(test, target_arch = "aarch64"))] -extern crate core as _core; - -#[cfg(all(test, target_arch = "aarch64"))] -use _core::num; diff --git a/crates/coresimd/tests/v256.rs b/crates/coresimd/tests/v256.rs deleted file mode 100644 index fac31c316f..0000000000 --- a/crates/coresimd/tests/v256.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! coresimd 256-bit wide vector tests - -#![cfg_attr(stdsimd_strict, deny(warnings))] -#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] -#![allow(unused_imports)] - -#[cfg(test)] -extern crate coresimd; - -#[cfg(test)] -macro_rules! test_v16 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v32 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v64 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v128 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v256 { - ($item:item) => { - $item - }; -} -#[cfg(test)] -macro_rules! test_v512 { - ($item:item) => {}; -} - -#[cfg(test)] -macro_rules! vector_impl { - ($([$f:ident, $($args:tt)*]),*) => {}; -} - -#[cfg(test)] -#[path = "../../../coresimd/ppsv/mod.rs"] -mod ppsv; - -#[cfg(test)] -use std::{marker, mem}; - -#[cfg(all(test, target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(test, target_arch = "aarch64"))] -extern crate core as _core; - -#[cfg(all(test, target_arch = "aarch64"))] -use _core::num; diff --git a/crates/coresimd/tests/v32.rs b/crates/coresimd/tests/v32.rs deleted file mode 100644 index 3f89acc9a0..0000000000 --- a/crates/coresimd/tests/v32.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! coresimd 32-bit wide vector tests - -#![cfg_attr(stdsimd_strict, deny(warnings))] -#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] -#![allow(unused_imports, dead_code)] - -#[cfg(test)] -extern crate coresimd; - -#[cfg(test)] -macro_rules! test_v16 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v32 { - ($item:item) => { - $item - }; -} -#[cfg(test)] -macro_rules! test_v64 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v128 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v256 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v512 { - ($item:item) => {}; -} - -#[cfg(test)] -macro_rules! vector_impl { - ($([$f:ident, $($args:tt)*]),*) => {}; -} - -#[cfg(test)] -#[path = "../../../coresimd/ppsv/mod.rs"] -mod ppsv; - -#[cfg(test)] -use std::{marker, mem}; - -#[cfg(all(test, target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(test, target_arch = "aarch64"))] -extern crate core as _core; - -#[cfg(all(test, target_arch = "aarch64"))] -use _core::num; diff --git a/crates/coresimd/tests/v512.rs b/crates/coresimd/tests/v512.rs deleted file mode 100644 index cda0661370..0000000000 --- a/crates/coresimd/tests/v512.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! coresimd 512-bit wide vector tests - -#![cfg_attr(stdsimd_strict, deny(warnings))] -#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] -#![allow(unused_imports)] - -#[cfg(test)] -extern crate coresimd; - -#[cfg(test)] -macro_rules! test_v16 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v32 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v64 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v128 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v256 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v512 { - ($item:item) => { - $item - }; -} - -#[cfg(test)] -macro_rules! vector_impl { - ($([$f:ident, $($args:tt)*]),*) => {}; -} - -#[cfg(test)] -#[path = "../../../coresimd/ppsv/mod.rs"] -mod ppsv; - -#[cfg(test)] -use std::{marker, mem}; - -#[cfg(all(test, target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(test, target_arch = "aarch64"))] -extern crate core as _core; - -#[cfg(all(test, target_arch = "aarch64"))] -use _core::num; diff --git a/crates/coresimd/tests/v64.rs b/crates/coresimd/tests/v64.rs deleted file mode 100644 index 65b91219ca..0000000000 --- a/crates/coresimd/tests/v64.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! coresimd 64-bit wide vector tests - -#![cfg_attr(stdsimd_strict, deny(warnings))] -#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)] -#![allow(unused_imports, dead_code)] - -#[cfg(test)] -extern crate coresimd; - -#[cfg(test)] -macro_rules! test_v16 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v32 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v64 { - ($item:item) => { - $item - }; -} -#[cfg(test)] -macro_rules! test_v128 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v256 { - ($item:item) => {}; -} -#[cfg(test)] -macro_rules! test_v512 { - ($item:item) => {}; -} - -#[cfg(test)] -macro_rules! vector_impl { - ($([$f:ident, $($args:tt)*]),*) => {}; -} - -#[cfg(test)] -#[path = "../../../coresimd/ppsv/mod.rs"] -mod ppsv; - -#[cfg(test)] -use std::{marker, mem}; - -#[cfg(all(test, target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(test, target_arch = "aarch64"))] -extern crate core as _core; - -#[cfg(all(test, target_arch = "aarch64"))] -use _core::num; diff --git a/crates/stdsimd/Cargo.toml b/crates/stdsimd/Cargo.toml index 0371f38403..4ab553db48 100644 --- a/crates/stdsimd/Cargo.toml +++ b/crates/stdsimd/Cargo.toml @@ -33,10 +33,6 @@ cupid = "0.6.0" name = "hex" path = "../../examples/hex.rs" -[[example]] -name = "nbody" -path = "../../examples/nbody.rs" - [[example]] name = "wasm" crate-type = ["cdylib"] diff --git a/examples/nbody.rs b/examples/nbody.rs deleted file mode 100644 index 63281e78e8..0000000000 --- a/examples/nbody.rs +++ /dev/null @@ -1,243 +0,0 @@ -//! n-body benchmark from the [benchmarks game][bg]. -//! -//! [bg]: https://benchmarksgame.alioth.debian.org/u64q/nbody-description. -//! html#nbody - -#![cfg_attr(stdsimd_strict, deny(warnings))] -#![feature(stdsimd)] -#![cfg_attr( - feature = "cargo-clippy", - allow( - similar_names, missing_docs_in_private_items, shadow_reuse, - print_stdout - ) -)] - -extern crate stdsimd; -#[macro_use] -extern crate cfg_if; - -use stdsimd::simd::*; - -const PI: f64 = std::f64::consts::PI; -const SOLAR_MASS: f64 = 4.0 * PI * PI; -const DAYS_PER_YEAR: f64 = 365.24; - -pub trait Frsqrt { - fn frsqrt(&self) -> Self; -} - -cfg_if! { - if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), - target_feature = "sse"))] { - fn frsqrt(s: &f64x2) -> f64x2 { - #[cfg(target_arch = "x86")] - use stdsimd::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use stdsimd::arch::x86_64::*; - let t: f32x2 = (*s).into(); - - let u: f64x4 = unsafe { - let res = _mm_rsqrt_ps(_mm_setr_ps( - t.extract(0), - t.extract(1), - 0., - 0., - )); - f32x4::from_bits(res).into() - }; - f64x2::new(u.extract(0), u.extract(1)) - } - } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - fn frsqrt(s: &f64x2) -> f64x2 { - #[cfg(target_arch = "aarch64")] - use stdsimd::arch::aarch64::*; - #[cfg(target_arch = "arm")] - use stdsimd::arch::arm::*; - - let t: f32x2 = (*s).into(); - let t: f32x2 = unsafe { vrsqrte_f32(t.into_bits()).into_bits() }; - t.into() - } - } else { - fn frsqrt(s: &f64x2) -> f64x2 { - let r = s.replace(0, 1. / s.extract(0).sqrt()); - let r = r.replace(1, 1. / s.extract(1).sqrt()); - r - } - } -} - -impl Frsqrt for f64x2 { - fn frsqrt(&self) -> Self { - frsqrt(self) - } -} - -struct Body { - x: [f64; 3], - _fill: f64, - v: [f64; 3], - mass: f64, -} - -impl Body { - fn new( - x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64, - ) -> Self { - Self { - x: [x0, x1, x2], - _fill: 0.0, - v: [v0, v1, v2], - mass, - } - } -} - -const N_BODIES: usize = 5; -const N: usize = N_BODIES * (N_BODIES - 1) / 2; -fn offset_momentum(bodies: &mut [Body; N_BODIES]) { - let (sun, rest) = bodies.split_at_mut(1); - let sun = &mut sun[0]; - for body in rest { - for k in 0..3 { - sun.v[k] -= body.v[k] * body.mass / SOLAR_MASS; - } - } -} -fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { - let mut r = [[0.0; 4]; N]; - let mut mag = [0.0; N]; - - let mut dx = [f64x2::splat(0.0); 3]; - let mut dsquared; - let mut distance; - let mut dmag; - - let mut i = 0; - for j in 0..N_BODIES { - for k in j + 1..N_BODIES { - for m in 0..3 { - r[i][m] = bodies[j].x[m] - bodies[k].x[m]; - } - i += 1; - } - } - - i = 0; - while i < N { - for (m, dx) in dx.iter_mut().enumerate() { - *dx = f64x2::new(r[i][m], r[i + 1][m]); - } - - dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - distance = dsquared.frsqrt(); - for _ in 0..2 { - distance = distance * f64x2::splat(1.5) - - ((f64x2::splat(0.5) * dsquared) * distance) - * (distance * distance) - } - dmag = f64x2::splat(dt) / dsquared * distance; - dmag.store_unaligned(&mut mag[i..]); - - i += 2; - } - - i = 0; - for j in 0..N_BODIES { - for k in j + 1..N_BODIES { - for m in 0..3 { - bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i]; - bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i]; - } - i += 1 - } - } - for body in bodies { - for m in 0..3 { - body.x[m] += dt * body.v[m] - } - } -} - -fn energy(bodies: &[Body; N_BODIES]) -> f64 { - let mut e = 0.0; - for i in 0..N_BODIES { - let bi = &bodies[i]; - e += bi.mass - * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) - / 2.0; - for bj in bodies.iter().take(N_BODIES).skip(i + 1) { - let mut dx = [0.0; 3]; - for (k, dx) in dx.iter_mut().enumerate() { - *dx = bi.x[k] - bj.x[k]; - } - let mut distance = 0.0; - for &d in &dx { - distance += d * d - } - e -= bi.mass * bj.mass / distance.sqrt() - } - } - e -} - -fn main() { - let mut bodies: [Body; N_BODIES] = [ - /* sun */ - Body::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SOLAR_MASS), - /* jupiter */ - Body::new( - 4.84143144246472090e+00, - -1.16032004402742839e+00, - -1.03622044471123109e-01, - 1.66007664274403694e-03 * DAYS_PER_YEAR, - 7.69901118419740425e-03 * DAYS_PER_YEAR, - -6.90460016972063023e-05 * DAYS_PER_YEAR, - 9.54791938424326609e-04 * SOLAR_MASS, - ), - /* saturn */ - Body::new( - 8.34336671824457987e+00, - 4.12479856412430479e+00, - -4.03523417114321381e-01, - -2.76742510726862411e-03 * DAYS_PER_YEAR, - 4.99852801234917238e-03 * DAYS_PER_YEAR, - 2.30417297573763929e-05 * DAYS_PER_YEAR, - 2.85885980666130812e-04 * SOLAR_MASS, - ), - /* uranus */ - Body::new( - 1.28943695621391310e+01, - -1.51111514016986312e+01, - -2.23307578892655734e-01, - 2.96460137564761618e-03 * DAYS_PER_YEAR, - 2.37847173959480950e-03 * DAYS_PER_YEAR, - -2.96589568540237556e-05 * DAYS_PER_YEAR, - 4.36624404335156298e-05 * SOLAR_MASS, - ), - /* neptune */ - Body::new( - 1.53796971148509165e+01, - -2.59193146099879641e+01, - 1.79258772950371181e-01, - 2.68067772490389322e-03 * DAYS_PER_YEAR, - 1.62824170038242295e-03 * DAYS_PER_YEAR, - -9.51592254519715870e-05 * DAYS_PER_YEAR, - 5.15138902046611451e-05 * SOLAR_MASS, - ), - ]; - - let n: usize = std::env::args() - .nth(1) - .expect("need one arg") - .parse() - .expect("argument should be a usize"); - - offset_momentum(&mut bodies); - println!("{:.9}", energy(&bodies)); - for _ in 0..n { - advance(&mut bodies, 0.01); - } - println!("{:.9}", energy(&bodies)); -} diff --git a/stdsimd/mod.rs b/stdsimd/mod.rs index b76deb520e..d6a7c2cba6 100644 --- a/stdsimd/mod.rs +++ b/stdsimd/mod.rs @@ -480,6 +480,3 @@ pub mod arch { #[unstable(feature = "stdsimd", issue = "27731")] pub mod powerpc64 {} } - -#[unstable(feature = "stdsimd", issue = "27731")] -pub use coresimd::simd; From 3bf763f8bf0c81ffd515768c96fc9fa903370a22 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 18 Jul 2018 16:38:36 +0200 Subject: [PATCH 2/2] LLVM7 generates different machine than LLVM6 for x86/x86_64 targets for some intrinsics. These are new optimizations --- coresimd/x86/avx.rs | 5 ++++- coresimd/x86/sse41.rs | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/coresimd/x86/avx.rs b/coresimd/x86/avx.rs index 58d9482ff1..c1c2fff846 100644 --- a/coresimd/x86/avx.rs +++ b/coresimd/x86/avx.rs @@ -524,7 +524,10 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] +// Note: LLVM7 prefers single-precision blend instructions when +// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194 +// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] +#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { diff --git a/coresimd/x86/sse41.rs b/coresimd/x86/sse41.rs index 198bb16ba0..91722507da 100644 --- a/coresimd/x86/sse41.rs +++ b/coresimd/x86/sse41.rs @@ -80,7 +80,10 @@ pub unsafe fn _mm_blendv_epi8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16) #[inline] #[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))] +// Note: LLVM7 prefers the single-precision floating-point domain when possible +// see https://bugs.llvm.org/show_bug.cgi?id=38195 +// #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))] +#[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { @@ -124,7 +127,10 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd) #[inline] #[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))] +// Note: LLVM7 prefers the single-precision floating-point domain when possible +// see https://bugs.llvm.org/show_bug.cgi?id=38195 +// #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))] +#[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {