diff --git a/.gitignore b/.gitignore index 04a4061d35..c16125ed70 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ Cargo.lock .*.swp -/target -tags +target +tags \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index f97609447c..95be95f59f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,8 @@ matrix: - os: osx env: TARGET=x86_64-apple-darwin NO_ADD=1 script: ci/run.sh - - install: true + - env: DOCUMENTATION + install: true script: ci/dox.sh - env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1 script: | diff --git a/Cargo.toml b/Cargo.toml index cd4560406b..b64b2f8e67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,15 +2,17 @@ name = "stdsimd" version = "0.0.3" authors = ["Andrew Gallant "] -description = "Experiments for adding SIMD support to Rust's standard library." +description = "SIMD support in Rust's standard library." documentation = "https://docs.rs/stdsimd" homepage = "https://github.com/BurntSushi/stdsimd" repository = "https://github.com/BurntSushi/stdsimd" readme = "README.md" keywords = ["std", "simd", "intrinsics"] -categories = ["hardware-support", "no-std"] +categories = ["hardware-support"] license = "MIT/Apache-2.0" +[workspace] + [badges] travis-ci = { repository = "BurntSushi/stdsimd" } appveyor = { repository = "BurntSushi/stdsimd" } @@ -18,6 +20,9 @@ is-it-maintained-issue-resolution = { repository = "BurntSushi/stdsimd" } is-it-maintained-open-issues = { repository = "BurntSushi/stdsimd" } maintenance = { status = "experimental" } +[dependencies] +coresimd = { version = "0.0.3", path = "coresimd/" } + [profile.release] debug = true opt-level = 3 @@ -26,15 +31,9 @@ opt-level = 3 debug = true opt-level = 3 -[dev-dependencies] -stdsimd-test = { version = "0.*", path = "stdsimd-test" } -cupid = "0.5.0" - [features] -std = [] - -# Internal-only: denies all warnings. -strict = [] -# Internal-only: enables only those intrinsics supported by Intel's +# Internal-usage only: denies all warnings. +strict = [ "coresimd/strict" ] +# Internal-usage only: enables only those intrinsics supported by Intel's # Software Development Environment (SDE). -intel_sde = [] \ No newline at end of file +intel_sde = [ "coresimd/intel_sde" ] diff --git a/ci/dox.sh b/ci/dox.sh index b8998bb15e..3bcb7bcf9f 100755 --- a/ci/dox.sh +++ b/ci/dox.sh @@ -22,7 +22,10 @@ dox() { rm -rf target/doc/$arch mkdir target/doc/$arch - rustdoc --target $target -o target/doc/$arch src/lib.rs --crate-name stdsimd + cargo clean + cargo build --target $target + + rustdoc --target $target -o target/doc/$arch src/lib.rs --crate-name stdsimd --library-path target/$target/debug/deps } dox i686 i686-unknown-linux-gnu diff --git a/ci/run.sh b/ci/run.sh index e2326b2d9b..51734e5fce 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -16,14 +16,15 @@ case ${TARGET} in esac FEATURES="strict,$FEATURES" -FEATURES_STD="$std,${FEATURES}" echo "RUSTFLAGS=${RUSTFLAGS}" echo "FEATURES=${FEATURES}" echo "OBJDUMP=${OBJDUMP}" -cargo test --target $TARGET --features $FEATURES --verbose -- --nocapture -cargo test --release --target $TARGET --features $FEATURES --verbose -- --nocapture +cargo_test() { + cmd="cargo test --all --target=$TARGET --features $FEATURES --verbose $1 -- --nocapture $2" + $cmd +} -cargo test --target $TARGET --features $FEATURES_STD --verbose -- --nocapture -cargo test --release --target $TARGET --features $FEATURES_STD --verbose -- --nocapture +cargo_test +cargo_test "--release" diff --git a/coresimd/Cargo.toml b/coresimd/Cargo.toml new file mode 100644 index 0000000000..52415468b9 --- /dev/null +++ b/coresimd/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "coresimd" +version = "0.0.3" +authors = ["Andrew Gallant "] +description = "SIMD support in Rust's core library." +documentation = "https://docs.rs/stdsimd" +homepage = "https://github.com/BurntSushi/stdsimd" +repository = "https://github.com/BurntSushi/stdsimd" +readme = "README.md" +keywords = ["core", "simd", "intrinsics"] +categories = ["hardware-support", "no-std"] +license = "MIT/Apache-2.0" + +[badges] +travis-ci = { repository = "BurntSushi/stdsimd" } +appveyor = { repository = "BurntSushi/stdsimd" } +is-it-maintained-issue-resolution = { repository = "BurntSushi/stdsimd" } +is-it-maintained-open-issues = { repository = "BurntSushi/stdsimd" } +maintenance = { status = "experimental" } + +[dev-dependencies] +cupid = "0.5.0" +stdsimd-test = { version = "0.*", path = "../stdsimd-test" } + +[features] +# Internal-usage only: denies all warnings. +strict = [] +# Internal-usage only: enables only those intrinsics supported by Intel's +# Software Development Environment (SDE). +intel_sde = [] \ No newline at end of file diff --git a/coresimd/LICENSE-APACHE b/coresimd/LICENSE-APACHE new file mode 120000 index 0000000000..965b606f33 --- /dev/null +++ b/coresimd/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/coresimd/LICENSE-MIT b/coresimd/LICENSE-MIT new file mode 120000 index 0000000000..76219eb72e --- /dev/null +++ b/coresimd/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/coresimd/README.md b/coresimd/README.md new file mode 120000 index 0000000000..32d46ee883 --- /dev/null +++ b/coresimd/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/coresimd/rustfmt.toml b/coresimd/rustfmt.toml new file mode 120000 index 0000000000..39f97b043b --- /dev/null +++ b/coresimd/rustfmt.toml @@ -0,0 +1 @@ +../rustfmt.toml \ No newline at end of file diff --git a/src/aarch64/mod.rs b/coresimd/src/aarch64/mod.rs similarity index 100% rename from src/aarch64/mod.rs rename to coresimd/src/aarch64/mod.rs diff --git a/src/aarch64/neon.rs b/coresimd/src/aarch64/neon.rs similarity index 100% rename from src/aarch64/neon.rs rename to coresimd/src/aarch64/neon.rs diff --git a/src/aarch64/v8.rs b/coresimd/src/aarch64/v8.rs similarity index 100% rename from src/aarch64/v8.rs rename to coresimd/src/aarch64/v8.rs diff --git a/src/arm/mod.rs b/coresimd/src/arm/mod.rs similarity index 100% rename from src/arm/mod.rs rename to coresimd/src/arm/mod.rs diff --git a/src/arm/neon.rs b/coresimd/src/arm/neon.rs similarity index 100% rename from src/arm/neon.rs rename to coresimd/src/arm/neon.rs diff --git a/src/arm/v6.rs b/coresimd/src/arm/v6.rs similarity index 100% rename from src/arm/v6.rs rename to coresimd/src/arm/v6.rs diff --git a/src/arm/v7.rs b/coresimd/src/arm/v7.rs similarity index 100% rename from src/arm/v7.rs rename to coresimd/src/arm/v7.rs diff --git a/coresimd/src/lib.rs b/coresimd/src/lib.rs new file mode 100644 index 0000000000..f0ce9e8178 --- /dev/null +++ b/coresimd/src/lib.rs @@ -0,0 +1,231 @@ +//! SIMD support +//! +//! This crate provides the fundamentals of supporting SIMD in Rust. This crate +//! should compile on all platforms and provide `simd` and `vendor` modules at +//! the top-level. The `simd` module contains *portable vector types* which +//! should work across all platforms and be implemented in the most efficient +//! manner possible for the platform at hand. The `vendor` module contains +//! vendor intrinsics that operate over these SIMD types, typically +//! corresponding to a particular CPU instruction +//! +//! ```rust +//! extern crate coresimd as stdsimd; +//! use stdsimd::simd::u32x4; +//! +//! fn main() { +//! let a = u32x4::new(1, 2, 3, 4); +//! let b = u32x4::splat(10); +//! assert_eq!(a + b, u32x4::new(11, 12, 13, 14)); +//! } +//! ``` +//! +//! > **Note**: This crate is *nightly only* at the moment, and requires a +//! > nightly rust toolchain to compile. +//! +//! This documentation is only for one particular architecture, you can find +//! others at: +//! +//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/) +//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/) +//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/) +//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/) +//! +//! ## Portability +//! +//! The `simd` module and its types should be portable to all platforms. The +//! runtime characteristics of these types may vary per platform and per CPU +//! feature enabled, but they should always have the most optimized +//! implementation for the target at hand. +//! +//! The `vendor` module provides no portability guarantees. The `vendor` module +//! is per CPU architecture currently and provides intrinsics corresponding to +//! functions for that particular CPU architecture. Note that the functions +//! provided in this module are intended to correspond to CPU instructions and +//! have no runtime support for whether you CPU actually supports the +//! instruction. +//! +//! CPU target feature detection is done via the `cfg_feature_enabled!` macro +//! at runtime. This macro will detect at runtime whether the specified feature +//! is available or not, returning true or false depending on the current CPU. +//! +//! ``` +//! #![feature(cfg_target_feature)] +//! +//! #[macro_use] +//! extern crate coresimd as stdsimd; +//! +//! fn main() { +//! if cfg_feature_enabled!("avx2") { +//! println!("avx2 intrinsics will work"); +//! } else { +//! println!("avx2 intrinsics will not work"); +//! // undefined behavior: may generate a `SIGILL`. +//! } +//! } +//! ``` +//! +//! After verifying that a specified feature is available, use `target_feature` +//! to enable a given feature and use the desired intrinsic. +//! +//! ```ignore +//! # #![feature(cfg_target_feature)] +//! # #![feature(target_feature)] +//! # #[macro_use] +//! # extern crate coresimd as stdsimd; +//! # fn main() { +//! # if cfg_feature_enabled!("avx2") { +//! // avx2 specific code may be used in this function +//! #[target_feature = "+avx2"] +//! fn and_256() { +//! // avx2 feature specific intrinsics will work here! +//! use stdsimd::vendor::{__m256i, _mm256_and_si256}; +//! +//! let a = __m256i::splat(5); +//! let b = __m256i::splat(3); +//! +//! let got = unsafe { _mm256_and_si256(a, b) }; +//! +//! assert_eq!(got, __m256i::splat(1)); +//! } +//! # and_256(); +//! # } +//! # } +//! ``` +//! +//! # Status +//! +//! This crate is intended for eventual inclusion into the standard library, +//! but some work and experimentation is needed to get there! First and +//! foremost you can help out by kicking the tires on this crate and seeing if +//! it works for your use case! Next up you can help us fill out the [vendor +//! intrinsics][vendor] to ensure that we've got all the SIMD support +//! necessary. +//! +//! The language support and status of SIMD is also still a little up in the +//! air right now, you may be interested in a few issues along these lines: +//! +//! * [Overal tracking issue for SIMD support][simd_tracking_issue] +//! * [`cfg_target_feature` tracking issue][cfg_target_feature_issue] +//! * [SIMD types currently not sound][simd_soundness_bug] +//! * [`#[target_feature]` improvements][target_feature_impr] +//! +//! [vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40 +//! [simd_tracking_issue]: https://github.com/rust-lang/rust/issues/27731 +//! [cfg_target_feature_issue]: https://github.com/rust-lang/rust/issues/29717 +//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367 +//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839 + +#![cfg_attr(feature = "strict", deny(warnings))] +#![allow(dead_code)] +#![allow(unused_features)] +#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, + simd_ffi, target_feature, cfg_target_feature, i128_type, asm, + const_atomic_usize_new, stmt_expr_attributes)] +#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))] +#![cfg_attr(feature = "cargo-clippy", + allow(inline_always, too_many_arguments, cast_sign_loss, + cast_lossless, cast_possible_wrap, + cast_possible_truncation, cast_precision_loss, + shadow_reuse, cyclomatic_complexity, similar_names, + many_single_char_names))] +#![no_std] + +#[cfg(test)] +#[macro_use] +extern crate std; + +#[cfg(test)] +extern crate stdsimd_test; + +#[cfg(test)] +extern crate test; + +/// Platform independent SIMD vector types and operations. +pub mod simd { + pub use v128::*; + pub use v256::*; + pub use v512::*; + pub use v64::*; +} + +/// Platform dependent vendor intrinsics. +pub mod vendor { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub use x86::*; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + pub use arm::*; + + #[cfg(target_arch = "aarch64")] + pub use aarch64::*; + + // FIXME: rust does not expose the nvptx and nvptx64 targets yet + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", + target_arch = "arm", target_arch = "aarch64")))] + pub use nvptx::*; + + #[cfg( + // x86/x86_64: + any(target_arch = "x86", target_arch = "x86_64") + )] + pub use runtime::{__unstable_detect_feature, __Feature}; +} + +#[cfg( + // x86/x86_64: + any(target_arch = "x86", target_arch = "x86_64") +)] +#[macro_use] +mod runtime; + +#[macro_use] +mod macros; +mod simd_llvm; +mod v128; +mod v256; +mod v512; +mod v64; + +/// 32-bit wide vector tpyes +mod v32 { + use simd_llvm::*; + + define_ty! { i16x2, i16, i16 } + define_impl! { i16x2, i16, 2, i16x2, x0, x1 } + define_ty! { u16x2, u16, u16 } + define_impl! { u16x2, u16, 2, i16x2, x0, x1 } + + define_ty! { i8x4, i8, i8, i8, i8 } + define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 } + define_ty! { u8x4, u8, u8, u8, u8 } + define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 } + + define_casts!( + (i16x2, i64x2, as_i64x2), + (u16x2, i64x2, as_i64x2), + (i8x4, i32x4, as_i32x4), + (u8x4, i32x4, as_i32x4) + ); +} + +/// 16-bit wide vector tpyes +mod v16 { + use simd_llvm::*; + + define_ty! { i8x2, i8, i8 } + define_impl! { i8x2, i8, 2, i8x2, x0, x1 } + define_ty! { u8x2, u8, u8 } + define_impl! { u8x2, u8, 2, i8x2, x0, x1 } + + define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2)); +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod x86; + +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +mod arm; +#[cfg(target_arch = "aarch64")] +mod aarch64; + +mod nvptx; diff --git a/src/macros.rs b/coresimd/src/macros.rs similarity index 100% rename from src/macros.rs rename to coresimd/src/macros.rs diff --git a/src/nvptx/mod.rs b/coresimd/src/nvptx/mod.rs similarity index 100% rename from src/nvptx/mod.rs rename to coresimd/src/nvptx/mod.rs diff --git a/coresimd/src/runtime/bit.rs b/coresimd/src/runtime/bit.rs new file mode 100644 index 0000000000..42483e5225 --- /dev/null +++ b/coresimd/src/runtime/bit.rs @@ -0,0 +1,11 @@ +//! Bit manipulation utilities + +/// Sets the `bit` of `x`. +pub const fn set(x: usize, bit: u32) -> usize { + x | 1 << bit +} + +/// Tests the `bit` of `x`. +pub const fn test(x: usize, bit: u32) -> bool { + x & (1 << bit) != 0 +} diff --git a/coresimd/src/runtime/cache.rs b/coresimd/src/runtime/cache.rs new file mode 100644 index 0000000000..bb247fb531 --- /dev/null +++ b/coresimd/src/runtime/cache.rs @@ -0,0 +1,31 @@ +//! Cache of run-time feature detection + +use core::sync::atomic::{AtomicUsize, Ordering}; +use core::usize; + +use super::bit; + +/// This global variable is a bitset used to cache the features supported by +/// the +/// CPU. +static CACHE: AtomicUsize = AtomicUsize::new(usize::MAX); + +/// Test the `bit` of the storage. If the storage has not been initialized, +/// initializes it with the result of `f()`. +/// +/// On its first invocation, it detects the CPU features and caches them in the +/// `FEATURES` global variable as an `AtomicUsize`. +/// +/// It uses the `__Feature` variant to index into this variable as a bitset. If +/// the bit is set, the feature is enabled, and otherwise it is disabled. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +pub fn test(bit: u32, f: F) -> bool +where + F: FnOnce() -> usize, +{ + if CACHE.load(Ordering::Relaxed) == usize::MAX { + CACHE.store(f(), Ordering::Relaxed); + } + bit::test(CACHE.load(Ordering::Relaxed), bit) +} diff --git a/coresimd/src/runtime/macros.rs b/coresimd/src/runtime/macros.rs new file mode 100644 index 0000000000..e8278bb295 --- /dev/null +++ b/coresimd/src/runtime/macros.rs @@ -0,0 +1,39 @@ +//! Run-time feature detection macros. + +/// Is a feature supported by the host CPU? +/// +/// This macro performs run-time feature detection. It returns true if the host +/// CPU in which the binary is running on supports a particular feature. +#[macro_export] +macro_rules! cfg_feature_enabled { + ($name:tt) => ( + { + #[cfg(target_feature = $name)] + { + true + } + #[cfg(not(target_feature = $name))] + { + __unstable_detect_feature!($name) + } + } + ) +} + +/// In all unsupported architectures using the macro is an error +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", + target_arch = "arm", target_arch = "aarch64")))] +#[macro_export] +#[doc(hidden)] +macro_rules! __unstable_detect_feature { + ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; +} + +#[cfg(test)] +mod tests { + #[cfg(target_arch = "x86_64")] + #[test] + fn test_macros() { + assert!(cfg_feature_enabled!("sse")); + } +} diff --git a/coresimd/src/runtime/mod.rs b/coresimd/src/runtime/mod.rs new file mode 100644 index 0000000000..6ad497f7df --- /dev/null +++ b/coresimd/src/runtime/mod.rs @@ -0,0 +1,17 @@ +//! Run-time feature detection +mod cache; +mod bit; + +#[macro_use] +mod macros; + +#[macro_use] +mod x86; +pub use self::x86::__Feature; +use self::x86::detect_features; + +/// Performs run-time feature detection. +#[doc(hidden)] +pub fn __unstable_detect_feature(x: __Feature) -> bool { + cache::test(x as u32, detect_features) +} diff --git a/src/runtime/x86.rs b/coresimd/src/runtime/x86.rs similarity index 85% rename from src/runtime/x86.rs rename to coresimd/src/runtime/x86.rs index a9d88cf8c1..54dda2eeca 100644 --- a/src/runtime/x86.rs +++ b/coresimd/src/runtime/x86.rs @@ -364,8 +364,7 @@ pub fn detect_features() -> usize { enable(extended_features_ebx, 5, __Feature::avx2); // For AVX-512 the OS also needs to support saving/restoring - // the - // extended state, only then we enable AVX-512 support: + // the extended state, only then we enable AVX-512 support: if os_avx512_support { enable(extended_features_ebx, 16, __Feature::avx512f); enable(extended_features_ebx, 17, __Feature::avx512dq); @@ -384,8 +383,8 @@ pub fn detect_features() -> usize { } } - // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = - // 1) + // Processor Extended State Enumeration Sub-leaf + // (EAX = 0DH, ECX = 1) if max_basic_leaf >= 0xd { let CpuidResult { eax: proc_extended_state1_eax, @@ -417,9 +416,10 @@ pub fn detect_features() -> usize { #[cfg(test)] mod tests { - #[cfg(feature = "std")] + extern crate cupid; + #[test] - fn runtime_detection_x86_nocapture() { + fn dump() { println!("sse: {:?}", cfg_feature_enabled!("sse")); println!("sse2: {:?}", cfg_feature_enabled!("sse2")); println!("sse3: {:?}", cfg_feature_enabled!("sse3")); @@ -435,10 +435,10 @@ mod tests { println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw")); println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq")); println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl")); - println!("avx512ifma {:?}", cfg_feature_enabled!("avx512ifma")); - println!("avx512vbmi {:?}", cfg_feature_enabled!("avx512vbmi")); + println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma")); + println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi")); println!( - "avx512vpopcntdq {:?}", + "avx512_vpopcntdq {:?}", cfg_feature_enabled!("avx512vpopcntdq") ); println!("fma: {:?}", cfg_feature_enabled!("fma")); @@ -453,4 +453,54 @@ mod tests { println!("xsaves {:?}", cfg_feature_enabled!("xsaves")); println!("xsavec {:?}", cfg_feature_enabled!("xsavec")); } + + #[test] + fn compare_with_cupid() { + let information = cupid::master().unwrap(); + assert_eq!(cfg_feature_enabled!("sse"), information.sse()); + assert_eq!(cfg_feature_enabled!("sse2"), information.sse2()); + assert_eq!(cfg_feature_enabled!("sse3"), information.sse3()); + assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3()); + assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1()); + assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2()); + assert_eq!(cfg_feature_enabled!("avx"), information.avx()); + assert_eq!(cfg_feature_enabled!("avx2"), information.avx2()); + assert_eq!(cfg_feature_enabled!("avx512f"), information.avx512f()); + assert_eq!(cfg_feature_enabled!("avx512cd"), information.avx512cd()); + assert_eq!(cfg_feature_enabled!("avx512er"), information.avx512er()); + assert_eq!(cfg_feature_enabled!("avx512pf"), information.avx512pf()); + assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw()); + assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq()); + assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl()); + assert_eq!( + cfg_feature_enabled!("avx512ifma"), + information.avx512_ifma() + ); + assert_eq!( + cfg_feature_enabled!("avx512vbmi"), + information.avx512_vbmi() + ); + assert_eq!( + cfg_feature_enabled!("avx512vpopcntdq"), + information.avx512_vpopcntdq() + ); + assert_eq!(cfg_feature_enabled!("fma"), information.fma()); + assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1()); + assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2()); + assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt()); + assert_eq!(cfg_feature_enabled!("sse4a"), information.sse4a()); + assert_eq!(cfg_feature_enabled!("abm"), information.lzcnt()); + assert_eq!(cfg_feature_enabled!("tbm"), information.tbm()); + assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt()); + assert_eq!(cfg_feature_enabled!("xsave"), information.xsave()); + assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt()); + assert_eq!( + cfg_feature_enabled!("xsavec"), + information.xsavec_and_xrstor() + ); + assert_eq!( + cfg_feature_enabled!("xsaves"), + information.xsaves_xrstors_and_ia32_xss() + ); + } } diff --git a/src/simd_llvm.rs b/coresimd/src/simd_llvm.rs similarity index 100% rename from src/simd_llvm.rs rename to coresimd/src/simd_llvm.rs diff --git a/src/v128.rs b/coresimd/src/v128.rs similarity index 100% rename from src/v128.rs rename to coresimd/src/v128.rs diff --git a/src/v256.rs b/coresimd/src/v256.rs similarity index 100% rename from src/v256.rs rename to coresimd/src/v256.rs diff --git a/src/v512.rs b/coresimd/src/v512.rs similarity index 100% rename from src/v512.rs rename to coresimd/src/v512.rs diff --git a/src/v64.rs b/coresimd/src/v64.rs similarity index 100% rename from src/v64.rs rename to coresimd/src/v64.rs diff --git a/src/x86/i386/eflags.rs b/coresimd/src/x86/i386/eflags.rs similarity index 100% rename from src/x86/i386/eflags.rs rename to coresimd/src/x86/i386/eflags.rs diff --git a/src/x86/i386/mod.rs b/coresimd/src/x86/i386/mod.rs similarity index 100% rename from src/x86/i386/mod.rs rename to coresimd/src/x86/i386/mod.rs diff --git a/src/x86/i586/abm.rs b/coresimd/src/x86/i586/abm.rs similarity index 98% rename from src/x86/i586/abm.rs rename to coresimd/src/x86/i586/abm.rs index a912bdded1..52dc991a84 100644 --- a/src/x86/i586/abm.rs +++ b/coresimd/src/x86/i586/abm.rs @@ -15,8 +15,7 @@ //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wikipedia_bmi]: -//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_. -//! 28Advanced_Bit_Manipulation.29 +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs similarity index 99% rename from src/x86/i586/avx.rs rename to coresimd/src/x86/i586/avx.rs index aa1e6fe99b..7df5fd6a2e 100644 --- a/src/x86/i586/avx.rs +++ b/coresimd/src/x86/i586/avx.rs @@ -326,9 +326,9 @@ pub unsafe fn _mm256_div_pd(a: f64x4, b: f64x4) -> f64x4 { /// - `0x02`: Round up, toward positive infinity. /// - `0x03`: Truncate the values. /// -/// For a complete list of options, check the LLVM docs: +/// For a complete list of options, check [the LLVM docs][llvm_docs]. /// -/// https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 +/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 #[inline(always)] #[target_feature = "+avx"] #[cfg_attr(test, assert_instr(vroundpd, b = 0x3))] diff --git a/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs similarity index 92% rename from src/x86/i586/avx2.rs rename to coresimd/src/x86/i586/avx2.rs index f7a6fb894a..7bf376b95f 100644 --- a/src/x86/i586/avx2.rs +++ b/coresimd/src/x86/i586/avx2.rs @@ -695,400 +695,496 @@ pub unsafe fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 { phsubsw(a, b) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm_i32gather_epi32(slice: *const i32, offsets: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm_i32gather_epi32( + slice: *const i32, offsets: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherdd(i32x4::splat(0), slice as *const i8, offsets, i32x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm_mask_i32gather_epi32(src: i32x4, slice: *const i32, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm_mask_i32gather_epi32( + src: i32x4, slice: *const i32, offsets: i32x4, mask: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherdd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm256_i32gather_epi32(slice: *const i32, offsets: i32x8, scale: i8) -> i32x8 { +pub unsafe fn _mm256_i32gather_epi32( + slice: *const i32, offsets: i32x8, scale: i8 +) -> i32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdd(i32x8::splat(0), slice as *const i8, offsets, i32x8::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_epi32(src: i32x8, slice: *const i32, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8 { +pub unsafe fn _mm256_mask_i32gather_epi32( + src: i32x8, slice: *const i32, offsets: i32x8, mask: i32x8, scale: i8 +) -> i32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm_i32gather_ps(slice: *const f32, offsets: i32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm_i32gather_ps( + slice: *const f32, offsets: i32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherdps(f32x4::splat(0.0), slice as *const i8, offsets, f32x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm_mask_i32gather_ps(src: f32x4, slice: *const f32, offsets: i32x4, mask: f32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm_mask_i32gather_ps( + src: f32x4, slice: *const f32, offsets: i32x4, mask: f32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherdps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm256_i32gather_ps(slice: *const f32, offsets: i32x8, scale: i8) -> f32x8 { +pub unsafe fn _mm256_i32gather_ps( + slice: *const f32, offsets: i32x8, scale: i8 +) -> f32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdps(f32x8::splat(0.0), slice as *const i8, offsets, f32x8::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_ps(src: f32x8, slice: *const f32, offsets: i32x8, mask: f32x8, scale: i8) -> f32x8 { +pub unsafe fn _mm256_mask_i32gather_ps( + src: f32x8, slice: *const f32, offsets: i32x8, mask: f32x8, scale: i8 +) -> f32x8 { macro_rules! call { ($imm8:expr) => (vpgatherdps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm_i32gather_epi64(slice: *const i64, offsets: i32x4, scale: i8) -> i64x2 { +pub unsafe fn _mm_i32gather_epi64( + slice: *const i64, offsets: i32x4, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherdq(i64x2::splat(0), slice as *const i8, offsets, i64x2::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm_mask_i32gather_epi64(src: i64x2, slice: *const i64, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2 { +pub unsafe fn _mm_mask_i32gather_epi64( + src: i64x2, slice: *const i64, offsets: i32x4, mask: i64x2, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherdq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm256_i32gather_epi64(slice: *const i64, offsets: i32x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_i32gather_epi64( + slice: *const i64, offsets: i32x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdq(i64x4::splat(0), slice as *const i8, offsets, i64x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_epi64(src: i64x4, slice: *const i64, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_mask_i32gather_epi64( + src: i64x4, slice: *const i64, offsets: i32x4, mask: i64x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm_i32gather_pd(slice: *const f64, offsets: i32x4, scale: i8) -> f64x2 { +pub unsafe fn _mm_i32gather_pd( + slice: *const f64, offsets: i32x4, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherdpd(f64x2::splat(0.0), slice as *const i8, offsets, f64x2::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm_mask_i32gather_pd(src: f64x2, slice: *const f64, offsets: i32x4, mask: f64x2, scale: i8) -> f64x2 { +pub unsafe fn _mm_mask_i32gather_pd( + src: f64x2, slice: *const f64, offsets: i32x4, mask: f64x2, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherdpd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm256_i32gather_pd(slice: *const f64, offsets: i32x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_i32gather_pd( + slice: *const f64, offsets: i32x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdpd(f64x4::splat(0.0), slice as *const i8, offsets, f64x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -pub unsafe fn _mm256_mask_i32gather_pd(src: f64x4, slice: *const f64, offsets: i32x4, mask: f64x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_mask_i32gather_pd( + src: f64x4, slice: *const f64, offsets: i32x4, mask: f64x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherdpd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm_i64gather_epi32(slice: *const i32, offsets: i64x2, scale: i8) -> i32x4 { +pub unsafe fn _mm_i64gather_epi32( + slice: *const i32, offsets: i64x2, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherqd(i32x4::splat(0), slice as *const i8, offsets, i32x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm_mask_i64gather_epi32(src: i32x4, slice: *const i32, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm_mask_i64gather_epi32( + src: i32x4, slice: *const i32, offsets: i64x2, mask: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (pgatherqd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm256_i64gather_epi32(slice: *const i32, offsets: i64x4, scale: i8) -> i32x4 { +pub unsafe fn _mm256_i64gather_epi32( + slice: *const i32, offsets: i64x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqd(i32x4::splat(0), slice as *const i8, offsets, i32x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_epi32(src: i32x4, slice: *const i32, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4 { +pub unsafe fn _mm256_mask_i64gather_epi32( + src: i32x4, slice: *const i32, offsets: i64x4, mask: i32x4, scale: i8 +) -> i32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm_i64gather_ps(slice: *const f32, offsets: i64x2, scale: i8) -> f32x4 { +pub unsafe fn _mm_i64gather_ps( + slice: *const f32, offsets: i64x2, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherqps(f32x4::splat(0.0), slice as *const i8, offsets, f32x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm_mask_i64gather_ps(src: f32x4, slice: *const f32, offsets: i64x2, mask: f32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm_mask_i64gather_ps( + src: f32x4, slice: *const f32, offsets: i64x2, mask: f32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (pgatherqps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm256_i64gather_ps(slice: *const f32, offsets: i64x4, scale: i8) -> f32x4 { +pub unsafe fn _mm256_i64gather_ps( + slice: *const f32, offsets: i64x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqps(f32x4::splat(0.0), slice as *const i8, offsets, f32x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_ps(src: f32x4, slice: *const f32, offsets: i64x4, mask: f32x4, scale: i8) -> f32x4 { +pub unsafe fn _mm256_mask_i64gather_ps( + src: f32x4, slice: *const f32, offsets: i64x4, mask: f32x4, scale: i8 +) -> f32x4 { macro_rules! call { ($imm8:expr) => (vpgatherqps(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm_i64gather_epi64(slice: *const i64, offsets: i64x2, scale: i8) -> i64x2 { +pub unsafe fn _mm_i64gather_epi64( + slice: *const i64, offsets: i64x2, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherqq(i64x2::splat(0), slice as *const i8, offsets, i64x2::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm_mask_i64gather_epi64(src: i64x2, slice: *const i64, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2 { +pub unsafe fn _mm_mask_i64gather_epi64( + src: i64x2, slice: *const i64, offsets: i64x2, mask: i64x2, scale: i8 +) -> i64x2 { macro_rules! call { ($imm8:expr) => (pgatherqq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm256_i64gather_epi64(slice: *const i64, offsets: i64x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_i64gather_epi64( + slice: *const i64, offsets: i64x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqq(i64x4::splat(0), slice as *const i8, offsets, i64x4::splat(-1), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_epi64(src: i64x4, slice: *const i64, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4 { +pub unsafe fn _mm256_mask_i64gather_epi64( + src: i64x4, slice: *const i64, offsets: i64x4, mask: i64x4, scale: i8 +) -> i64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqq(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm_i64gather_pd(slice: *const f64, offsets: i64x2, scale: i8) -> f64x2 { +pub unsafe fn _mm_i64gather_pd( + slice: *const f64, offsets: i64x2, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherqpd(f64x2::splat(0.0), slice as *const i8, offsets, f64x2::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm_mask_i64gather_pd(src: f64x2, slice: *const f64, offsets: i64x2, mask: f64x2, scale: i8) -> f64x2 { +pub unsafe fn _mm_mask_i64gather_pd( + src: f64x2, slice: *const f64, offsets: i64x2, mask: f64x2, scale: i8 +) -> f64x2 { macro_rules! call { ($imm8:expr) => (pgatherqpd(src, slice as *const i8, offsets, mask, $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm256_i64gather_pd(slice: *const f64, offsets: i64x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_i64gather_pd( + slice: *const f64, offsets: i64x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqpd(f64x4::splat(0.0), slice as *const i8, offsets, f64x4::splat(-1.0), $imm8)) } constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, where +/// Return values from `slice` at offsets determined by `offsets * scale`, +/// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. #[inline(always)] #[target_feature = "+avx2"] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -pub unsafe fn _mm256_mask_i64gather_pd(src: f64x4, slice: *const f64, offsets: i64x4, mask: f64x4, scale: i8) -> f64x4 { +pub unsafe fn _mm256_mask_i64gather_pd( + src: f64x4, slice: *const f64, offsets: i64x4, mask: f64x4, scale: i8 +) -> f64x4 { macro_rules! call { ($imm8:expr) => (vpgatherqpd(src, slice as *const i8, offsets, mask, $imm8)) } @@ -1578,7 +1674,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: u8x32, b: u8x32) -> u8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2015,7 +2111,7 @@ pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2064,7 +2160,7 @@ pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2112,7 +2208,7 @@ pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2156,7 +2252,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2200,7 +2296,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: i16x16, b: i16x16) -> i16x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2239,7 +2335,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: i32x8, b: i32x8) -> i32x8 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2278,7 +2374,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: i32x8, b: i32x8) -> i32x8 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2317,7 +2413,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: i64x4, b: i64x4) -> i64x4 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2531,37 +2627,69 @@ extern "C" { #[link_name = "llvm.x86.avx2.permd"] fn permd(a: u32x8, b: u32x8) -> u32x8; #[link_name = "llvm.x86.avx2.gather.d.d"] - fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4; + fn pgatherdd( + src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8 + ) -> i32x4; #[link_name = "llvm.x86.avx2.gather.d.d.256"] - fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8; + fn vpgatherdd( + src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8 + ) -> i32x8; #[link_name = "llvm.x86.avx2.gather.d.q"] - fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2; + fn pgatherdq( + src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8 + ) -> i64x2; #[link_name = "llvm.x86.avx2.gather.d.q.256"] - fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4; + fn vpgatherdq( + src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8 + ) -> i64x4; #[link_name = "llvm.x86.avx2.gather.q.d"] - fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4; + fn pgatherqd( + src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8 + ) -> i32x4; #[link_name = "llvm.x86.avx2.gather.q.d.256"] - fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4; + fn vpgatherqd( + src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8 + ) -> i32x4; #[link_name = "llvm.x86.avx2.gather.q.q"] - fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2; + fn pgatherqq( + src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8 + ) -> i64x2; #[link_name = "llvm.x86.avx2.gather.q.q.256"] - fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4; + fn vpgatherqq( + src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8 + ) -> i64x4; #[link_name = "llvm.x86.avx2.gather.d.pd"] - fn pgatherdpd (src: f64x2, slice: *const i8, offsets: i32x4, mask: f64x2, scale: i8) -> f64x2; + fn pgatherdpd( + src: f64x2, slice: *const i8, offsets: i32x4, mask: f64x2, scale: i8 + ) -> f64x2; #[link_name = "llvm.x86.avx2.gather.d.pd.256"] - fn vpgatherdpd (src: f64x4, slice: *const i8, offsets: i32x4, mask: f64x4, scale: i8) -> f64x4; + fn vpgatherdpd( + src: f64x4, slice: *const i8, offsets: i32x4, mask: f64x4, scale: i8 + ) -> f64x4; #[link_name = "llvm.x86.avx2.gather.q.pd"] - fn pgatherqpd (src: f64x2, slice: *const i8, offsets: i64x2, mask: f64x2, scale: i8) -> f64x2; + fn pgatherqpd( + src: f64x2, slice: *const i8, offsets: i64x2, mask: f64x2, scale: i8 + ) -> f64x2; #[link_name = "llvm.x86.avx2.gather.q.pd.256"] - fn vpgatherqpd (src: f64x4, slice: *const i8, offsets: i64x4, mask: f64x4, scale: i8) -> f64x4; + fn vpgatherqpd( + src: f64x4, slice: *const i8, offsets: i64x4, mask: f64x4, scale: i8 + ) -> f64x4; #[link_name = "llvm.x86.avx2.gather.d.ps"] - fn pgatherdps (src: f32x4, slice: *const i8, offsets: i32x4, mask: f32x4, scale: i8) -> f32x4; + fn pgatherdps( + src: f32x4, slice: *const i8, offsets: i32x4, mask: f32x4, scale: i8 + ) -> f32x4; #[link_name = "llvm.x86.avx2.gather.d.ps.256"] - fn vpgatherdps (src: f32x8, slice: *const i8, offsets: i32x8, mask: f32x8, scale: i8) -> f32x8; + fn vpgatherdps( + src: f32x8, slice: *const i8, offsets: i32x8, mask: f32x8, scale: i8 + ) -> f32x8; #[link_name = "llvm.x86.avx2.gather.q.ps"] - fn pgatherqps (src: f32x4, slice: *const i8, offsets: i64x2, mask: f32x4, scale: i8) -> f32x4; + fn pgatherqps( + src: f32x4, slice: *const i8, offsets: i64x2, mask: f32x4, scale: i8 + ) -> f32x4; #[link_name = "llvm.x86.avx2.gather.q.ps.256"] - fn vpgatherqps (src: f32x4, slice: *const i8, offsets: i64x4, mask: f32x4, scale: i8) -> f32x4; + fn vpgatherqps( + src: f32x4, slice: *const i8, offsets: i64x4, mask: f32x4, scale: i8 + ) -> f32x4; } @@ -3998,7 +4126,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm_i32gather_epi32(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 4); + let r = avx2::_mm_i32gather_epi32( + arr.as_ptr(), + i32x4::new(0, 16, 32, 48), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 32, 48)); } @@ -4009,10 +4141,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm_mask_i32gather_epi32(i32x4::splat(256), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - i32x4::new(-1, -1, -1, 0), - 4); + let r = avx2::_mm_mask_i32gather_epi32( + i32x4::splat(256), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + i32x4::new(-1, -1, -1, 0), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 64, 256)); } @@ -4023,7 +4158,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_i32gather_epi32(arr.as_ptr(), i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), 4); + let r = avx2::_mm256_i32gather_epi32( + arr.as_ptr(), + i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), + 4, + ); assert_eq!(r, i32x8::new(0, 16, 32, 48, 1, 2, 3, 4)); } @@ -4034,10 +4173,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_mask_i32gather_epi32(i32x8::splat(256), arr.as_ptr(), - i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), - i32x8::new(-1, -1, -1, 0, 0, 0, 0, 0), - 4); + let r = avx2::_mm256_mask_i32gather_epi32( + i32x8::splat(256), + arr.as_ptr(), + i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), + i32x8::new(-1, -1, -1, 0, 0, 0, 0, 0), + 4, + ); assert_eq!(r, i32x8::new(0, 16, 64, 256, 256, 256, 256, 256)); } @@ -4050,7 +4192,8 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm_i32gather_ps(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 4); + let r = + avx2::_mm_i32gather_ps(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 4); assert_eq!(r, f32x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4063,10 +4206,13 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm_mask_i32gather_ps(f32x4::splat(256.0), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - f32x4::new(-1.0, -1.0, -1.0, 0.0), - 4); + let r = avx2::_mm_mask_i32gather_ps( + f32x4::splat(256.0), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + f32x4::new(-1.0, -1.0, -1.0, 0.0), + 4, + ); assert_eq!(r, f32x4::new(0.0, 16.0, 64.0, 256.0)); } @@ -4079,7 +4225,11 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_i32gather_ps(arr.as_ptr(), i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), 4); + let r = avx2::_mm256_i32gather_ps( + arr.as_ptr(), + i32x8::new(0, 16, 32, 48, 1, 2, 3, 4), + 4, + ); assert_eq!(r, f32x8::new(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0)); } @@ -4092,11 +4242,17 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_mask_i32gather_ps(f32x8::splat(256.0), arr.as_ptr(), - i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), - f32x8::new(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0), - 4); - assert_eq!(r, f32x8::new(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0)); + let r = avx2::_mm256_mask_i32gather_ps( + f32x8::splat(256.0), + arr.as_ptr(), + i32x8::new(0, 16, 64, 96, 0, 0, 0, 0), + f32x8::new(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0), + 4, + ); + assert_eq!( + r, + f32x8::new(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0) + ); } @@ -4107,7 +4263,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm_i32gather_epi64(arr.as_ptr(), i32x4::new(0, 16, 0, 0), 8); + let r = avx2::_mm_i32gather_epi64( + arr.as_ptr(), + i32x4::new(0, 16, 0, 0), + 8, + ); assert_eq!(r, i64x2::new(0, 16)); } @@ -4118,10 +4278,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm_mask_i32gather_epi64(i64x2::splat(256), arr.as_ptr(), - i32x4::new(16, 16, 16, 16), - i64x2::new(-1, 0), - 8); + let r = avx2::_mm_mask_i32gather_epi64( + i64x2::splat(256), + arr.as_ptr(), + i32x4::new(16, 16, 16, 16), + i64x2::new(-1, 0), + 8, + ); assert_eq!(r, i64x2::new(16, 256)); } @@ -4132,7 +4295,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_i32gather_epi64(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i32gather_epi64( + arr.as_ptr(), + i32x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 32, 48)); } @@ -4143,10 +4310,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_mask_i32gather_epi64(i64x4::splat(256), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - i64x4::new(-1, -1, -1, 0), - 8); + let r = avx2::_mm256_mask_i32gather_epi64( + i64x4::splat(256), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + i64x4::new(-1, -1, -1, 0), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 64, 256)); } @@ -4159,7 +4329,8 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm_i32gather_pd(arr.as_ptr(), i32x4::new(0, 16, 0, 0), 8); + let r = + avx2::_mm_i32gather_pd(arr.as_ptr(), i32x4::new(0, 16, 0, 0), 8); assert_eq!(r, f64x2::new(0.0, 16.0)); } @@ -4172,10 +4343,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm_mask_i32gather_pd(f64x2::splat(256.0), arr.as_ptr(), - i32x4::new(16, 16, 16, 16), - f64x2::new(-1.0, 0.0), - 8); + let r = avx2::_mm_mask_i32gather_pd( + f64x2::splat(256.0), + arr.as_ptr(), + i32x4::new(16, 16, 16, 16), + f64x2::new(-1.0, 0.0), + 8, + ); assert_eq!(r, f64x2::new(16.0, 256.0)); } @@ -4188,7 +4362,11 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_i32gather_pd(arr.as_ptr(), i32x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i32gather_pd( + arr.as_ptr(), + i32x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4201,10 +4379,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_mask_i32gather_pd(f64x4::splat(256.0), arr.as_ptr(), - i32x4::new(0, 16, 64, 96), - f64x4::new(-1.0, -1.0, -1.0, 0.0), - 8); + let r = avx2::_mm256_mask_i32gather_pd( + f64x4::splat(256.0), + arr.as_ptr(), + i32x4::new(0, 16, 64, 96), + f64x4::new(-1.0, -1.0, -1.0, 0.0), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 64.0, 256.0)); } @@ -4226,10 +4407,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm_mask_i64gather_epi32(i32x4::splat(256), arr.as_ptr(), - i64x2::new(0, 16), - i32x4::new(-1, 0, -1, 0), - 4); + let r = avx2::_mm_mask_i64gather_epi32( + i32x4::splat(256), + arr.as_ptr(), + i64x2::new(0, 16), + i32x4::new(-1, 0, -1, 0), + 4, + ); assert_eq!(r, i32x4::new(0, 256, 0, 0)); } @@ -4240,7 +4424,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_i64gather_epi32(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 4); + let r = avx2::_mm256_i64gather_epi32( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 32, 48)); } @@ -4251,10 +4439,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 4 is word-addressing - let r = avx2::_mm256_mask_i64gather_epi32(i32x4::splat(256), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - i32x4::new(-1, -1, -1, 0), - 4); + let r = avx2::_mm256_mask_i64gather_epi32( + i32x4::splat(256), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + i32x4::new(-1, -1, -1, 0), + 4, + ); assert_eq!(r, i32x4::new(0, 16, 64, 256)); } @@ -4280,10 +4471,13 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm_mask_i64gather_ps(f32x4::splat(256.0), arr.as_ptr(), - i64x2::new(0, 16), - f32x4::new(-1.0, 0.0, -1.0, 0.0), - 4); + let r = avx2::_mm_mask_i64gather_ps( + f32x4::splat(256.0), + arr.as_ptr(), + i64x2::new(0, 16), + f32x4::new(-1.0, 0.0, -1.0, 0.0), + 4, + ); assert_eq!(r, f32x4::new(0.0, 256.0, 0.0, 0.0)); } @@ -4296,7 +4490,11 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_i64gather_ps(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 4); + let r = avx2::_mm256_i64gather_ps( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 4, + ); assert_eq!(r, f32x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4309,10 +4507,13 @@ mod tests { j += 1.0; } // A multiplier of 4 is word-addressing for f32s - let r = avx2::_mm256_mask_i64gather_ps(f32x4::splat(256.0), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - f32x4::new(-1.0, -1.0, -1.0, 0.0), - 4); + let r = avx2::_mm256_mask_i64gather_ps( + f32x4::splat(256.0), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + f32x4::new(-1.0, -1.0, -1.0, 0.0), + 4, + ); assert_eq!(r, f32x4::new(0.0, 16.0, 64.0, 256.0)); } @@ -4335,10 +4536,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm_mask_i64gather_epi64(i64x2::splat(256), arr.as_ptr(), - i64x2::new(16, 16), - i64x2::new(-1, 0), - 8); + let r = avx2::_mm_mask_i64gather_epi64( + i64x2::splat(256), + arr.as_ptr(), + i64x2::new(16, 16), + i64x2::new(-1, 0), + 8, + ); assert_eq!(r, i64x2::new(16, 256)); } @@ -4349,7 +4553,11 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_i64gather_epi64(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i64gather_epi64( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 32, 48)); } @@ -4360,10 +4568,13 @@ mod tests { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s - let r = avx2::_mm256_mask_i64gather_epi64(i64x4::splat(256), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - i64x4::new(-1, -1, -1, 0), - 8); + let r = avx2::_mm256_mask_i64gather_epi64( + i64x4::splat(256), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + i64x4::new(-1, -1, -1, 0), + 8, + ); assert_eq!(r, i64x4::new(0, 16, 64, 256)); } @@ -4389,10 +4600,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm_mask_i64gather_pd(f64x2::splat(256.0), arr.as_ptr(), - i64x2::new(16, 16), - f64x2::new(-1.0, 0.0), - 8); + let r = avx2::_mm_mask_i64gather_pd( + f64x2::splat(256.0), + arr.as_ptr(), + i64x2::new(16, 16), + f64x2::new(-1.0, 0.0), + 8, + ); assert_eq!(r, f64x2::new(16.0, 256.0)); } @@ -4405,7 +4619,11 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_i64gather_pd(arr.as_ptr(), i64x4::new(0, 16, 32, 48), 8); + let r = avx2::_mm256_i64gather_pd( + arr.as_ptr(), + i64x4::new(0, 16, 32, 48), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 32.0, 48.0)); } @@ -4418,10 +4636,13 @@ mod tests { j += 1.0; } // A multiplier of 8 is word-addressing for f64s - let r = avx2::_mm256_mask_i64gather_pd(f64x4::splat(256.0), arr.as_ptr(), - i64x4::new(0, 16, 64, 96), - f64x4::new(-1.0, -1.0, -1.0, 0.0), - 8); + let r = avx2::_mm256_mask_i64gather_pd( + f64x4::splat(256.0), + arr.as_ptr(), + i64x4::new(0, 16, 64, 96), + f64x4::new(-1.0, -1.0, -1.0, 0.0), + 8, + ); assert_eq!(r, f64x4::new(0.0, 16.0, 64.0, 256.0)); } diff --git a/src/x86/i586/bmi.rs b/coresimd/src/x86/i586/bmi.rs similarity index 98% rename from src/x86/i586/bmi.rs rename to coresimd/src/x86/i586/bmi.rs index 1b71896dec..5f00a7c67f 100644 --- a/src/x86/i586/bmi.rs +++ b/coresimd/src/x86/i586/bmi.rs @@ -7,10 +7,7 @@ //! available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf -//! [wikipedia_bmi]: -//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_. -//! 28Advanced_Bit_Manipulation.29 - +//! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/src/x86/i586/bmi2.rs b/coresimd/src/x86/i586/bmi2.rs similarity index 99% rename from src/x86/i586/bmi2.rs rename to coresimd/src/x86/i586/bmi2.rs index 0e1c81a88a..f32778063a 100644 --- a/src/x86/i586/bmi2.rs +++ b/coresimd/src/x86/i586/bmi2.rs @@ -8,8 +8,7 @@ //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [wikipedia_bmi]: -//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_. -//! 28Advanced_Bit_Manipulation.29 +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/src/x86/i586/cpuid.rs b/coresimd/src/x86/i586/cpuid.rs similarity index 100% rename from src/x86/i586/cpuid.rs rename to coresimd/src/x86/i586/cpuid.rs diff --git a/src/x86/i586/mod.rs b/coresimd/src/x86/i586/mod.rs similarity index 100% rename from src/x86/i586/mod.rs rename to coresimd/src/x86/i586/mod.rs diff --git a/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs similarity index 99% rename from src/x86/i586/sse.rs rename to coresimd/src/x86/i586/sse.rs index 6891d4ea64..f5533f5f78 100644 --- a/src/x86/i586/sse.rs +++ b/coresimd/src/x86/i586/sse.rs @@ -6,7 +6,6 @@ use core::ptr; use simd_llvm::simd_shuffle4; use v128::*; use v64::f32x2; -use x86::c_void; #[cfg(test)] use stdsimd_test::assert_instr; @@ -885,7 +884,7 @@ pub unsafe fn _mm_movemask_ps(a: f32x4) -> i32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # // The real main function /// # fn main() { @@ -937,7 +936,7 @@ pub unsafe fn _mm_loadh_pi(a: f32x4, p: *const f32) -> f32x4 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # // The real main function /// # fn main() { @@ -1572,7 +1571,7 @@ pub const _MM_HINT_NTA: i8 = 0; #[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))] #[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))] #[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))] -pub unsafe fn _mm_prefetch(p: *const c_void, strategy: i8) { +pub unsafe fn _mm_prefetch(p: *const u8, strategy: i8) { // The `strategy` must be a compile-time constant, so we use a short form // of `constify_imm8!` for now. // We use the `llvm.prefetch` instrinsic with `rw` = 0 (read), and @@ -1684,7 +1683,7 @@ extern "C" { #[link_name = "llvm.x86.sse.ldmxcsr"] fn ldmxcsr(p: *const i8); #[link_name = "llvm.prefetch"] - fn prefetch(p: *const c_void, rw: i32, loc: i32, ty: i32); + fn prefetch(p: *const u8, rw: i32, loc: i32, ty: i32); #[link_name = "llvm.x86.sse.cmp.ss"] fn cmpss(a: f32x4, b: f32x4, imm8: i8) -> f32x4; } diff --git a/src/x86/i586/sse2.rs b/coresimd/src/x86/i586/sse2.rs similarity index 99% rename from src/x86/i586/sse2.rs rename to coresimd/src/x86/i586/sse2.rs index a31c88ed22..19978f5400 100644 --- a/src/x86/i586/sse2.rs +++ b/coresimd/src/x86/i586/sse2.rs @@ -8,7 +8,6 @@ use core::ptr; use simd_llvm::{simd_cast, simd_shuffle16, simd_shuffle2, simd_shuffle4, simd_shuffle8}; -use x86::c_void; use x86::__m128i; use v128::*; use v64::*; @@ -29,7 +28,7 @@ pub unsafe fn _mm_pause() { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(clflush))] -pub unsafe fn _mm_clflush(p: *mut c_void) { +pub unsafe fn _mm_clflush(p: *mut u8) { clflush(p) } @@ -1989,7 +1988,7 @@ extern "C" { #[link_name = "llvm.x86.sse2.pause"] fn pause(); #[link_name = "llvm.x86.sse2.clflush"] - fn clflush(p: *mut c_void); + fn clflush(p: *mut u8); #[link_name = "llvm.x86.sse2.lfence"] fn lfence(); #[link_name = "llvm.x86.sse2.mfence"] @@ -2142,7 +2141,6 @@ extern "C" { #[cfg(test)] mod tests { - use super::c_void; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. @@ -2158,7 +2156,7 @@ mod tests { #[simd_test = "sse2"] unsafe fn _mm_clflush() { let x = 0; - sse2::_mm_clflush(&x as *const _ as *mut c_void); + sse2::_mm_clflush(&x as *const _ as *mut u8); } #[simd_test = "sse2"] diff --git a/src/x86/i586/sse3.rs b/coresimd/src/x86/i586/sse3.rs similarity index 100% rename from src/x86/i586/sse3.rs rename to coresimd/src/x86/i586/sse3.rs diff --git a/src/x86/i586/sse41.rs b/coresimd/src/x86/i586/sse41.rs similarity index 99% rename from src/x86/i586/sse41.rs rename to coresimd/src/x86/i586/sse41.rs index a87fc35708..7e51a9c3e9 100644 --- a/src/x86/i586/sse41.rs +++ b/coresimd/src/x86/i586/sse41.rs @@ -504,7 +504,7 @@ pub unsafe fn _mm_ceil_ss(a: f32x4, b: f32x4) -> f32x4 { /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` -/// use stdsimd::vendor; +/// use coresimd::vendor; /// /// // round to nearest, and suppress exceptions: /// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC); @@ -533,7 +533,7 @@ pub unsafe fn _mm_round_pd(a: f64x2, rounding: i32) -> f64x2 { /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` -/// use stdsimd::vendor; +/// use coresimd::vendor; /// /// // round to nearest, and suppress exceptions: /// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC); @@ -564,7 +564,7 @@ pub unsafe fn _mm_round_ps(a: f32x4, rounding: i32) -> f32x4 { /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` -/// use stdsimd::vendor; +/// use coresimd::vendor; /// /// // round to nearest, and suppress exceptions: /// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC); @@ -595,7 +595,7 @@ pub unsafe fn _mm_round_sd(a: f64x2, b: f64x2, rounding: i32) -> f64x2 { /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` -/// use stdsimd::vendor; +/// use coresimd::vendor; /// /// // round to nearest, and suppress exceptions: /// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC); diff --git a/src/x86/i586/sse42.rs b/coresimd/src/x86/i586/sse42.rs similarity index 99% rename from src/x86/i586/sse42.rs rename to coresimd/src/x86/i586/sse42.rs index e3bc7b7757..ce3bcdbaaa 100644 --- a/src/x86/i586/sse42.rs +++ b/coresimd/src/x86/i586/sse42.rs @@ -96,7 +96,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -139,7 +139,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -180,7 +180,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -219,7 +219,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -392,7 +392,7 @@ pub unsafe fn _mm_cmpestrm( /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate coresimd as stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { diff --git a/src/x86/i586/ssse3.rs b/coresimd/src/x86/i586/ssse3.rs similarity index 100% rename from src/x86/i586/ssse3.rs rename to coresimd/src/x86/i586/ssse3.rs diff --git a/src/x86/i586/tbm.rs b/coresimd/src/x86/i586/tbm.rs similarity index 99% rename from src/x86/i586/tbm.rs rename to coresimd/src/x86/i586/tbm.rs index b0d6253296..f42a382abf 100644 --- a/src/x86/i586/tbm.rs +++ b/coresimd/src/x86/i586/tbm.rs @@ -8,8 +8,7 @@ //! //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wikipedia_bmi]: -//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_. -//! 28Advanced_Bit_Manipulation.29 +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/src/x86/i586/xsave.rs b/coresimd/src/x86/i586/xsave.rs similarity index 91% rename from src/x86/i586/xsave.rs rename to coresimd/src/x86/i586/xsave.rs index 45c567fba5..10003388a5 100644 --- a/src/x86/i586/xsave.rs +++ b/coresimd/src/x86/i586/xsave.rs @@ -5,14 +5,12 @@ #[cfg(test)] use stdsimd_test::assert_instr; -use x86::c_void; - #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.xsave"] fn xsave(p: *mut i8, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xrstor"] - fn xrstor(p: *const c_void, hi: i32, lo: i32) -> (); + fn xrstor(p: *const u8, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xsetbv"] fn xsetbv(v: i32, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xgetbv"] @@ -20,7 +18,7 @@ extern "C" { #[link_name = "llvm.x86.xsave64"] fn xsave64(p: *mut i8, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xrstor64"] - fn xrstor64(p: *const c_void, hi: i32, lo: i32) -> (); + fn xrstor64(p: *const u8, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xsaveopt"] fn xsaveopt(p: *mut i8, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xsaveopt64"] @@ -34,9 +32,9 @@ extern "C" { #[link_name = "llvm.x86.xsaves64"] fn xsaves64(p: *mut i8, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xrstors"] - fn xrstors(p: *const c_void, hi: i32, lo: i32) -> (); + fn xrstors(p: *const u8, hi: i32, lo: i32) -> (); #[link_name = "llvm.x86.xrstors64"] - fn xrstors64(p: *const c_void, hi: i32, lo: i32) -> (); + fn xrstors64(p: *const u8, hi: i32, lo: i32) -> (); } /// Perform a full or partial save of the enabled processor states to memory at @@ -50,7 +48,7 @@ extern "C" { #[inline(always)] #[target_feature = "+xsave"] #[cfg_attr(test, assert_instr(xsave))] -pub unsafe fn _xsave(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) -> () { xsave(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); } @@ -63,7 +61,7 @@ pub unsafe fn _xsave(mem_addr: *mut c_void, save_mask: u64) -> () { #[inline(always)] #[target_feature = "+xsave"] #[cfg_attr(test, assert_instr(xrstor))] -pub unsafe fn _xrstor(mem_addr: *const c_void, rs_mask: u64) -> () { +pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) -> () { xrstor(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); } @@ -104,7 +102,7 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { #[target_feature = "+xsave"] #[cfg_attr(test, assert_instr(xsave64))] #[cfg(not(target_arch = "x86"))] -pub unsafe fn _xsave64(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) -> () { xsave64(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); } @@ -118,7 +116,7 @@ pub unsafe fn _xsave64(mem_addr: *mut c_void, save_mask: u64) -> () { #[target_feature = "+xsave"] #[cfg_attr(test, assert_instr(xrstor64))] #[cfg(not(target_arch = "x86"))] -pub unsafe fn _xrstor64(mem_addr: *const c_void, rs_mask: u64) -> () { +pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) -> () { xrstor64(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); } @@ -132,7 +130,7 @@ pub unsafe fn _xrstor64(mem_addr: *const c_void, rs_mask: u64) -> () { #[inline(always)] #[target_feature = "+xsave,+xsaveopt"] #[cfg_attr(test, assert_instr(xsaveopt))] -pub unsafe fn _xsaveopt(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) -> () { xsaveopt(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); } @@ -147,7 +145,7 @@ pub unsafe fn _xsaveopt(mem_addr: *mut c_void, save_mask: u64) -> () { #[target_feature = "+xsave,+xsaveopt"] #[cfg_attr(test, assert_instr(xsaveopt64))] #[cfg(not(target_arch = "x86"))] -pub unsafe fn _xsaveopt64(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) -> () { xsaveopt64( mem_addr as *mut i8, (save_mask >> 32) as i32, @@ -164,7 +162,7 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut c_void, save_mask: u64) -> () { #[inline(always)] #[target_feature = "+xsave,+xsavec"] #[cfg_attr(test, assert_instr(xsavec))] -pub unsafe fn _xsavec(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) -> () { xsavec(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); } @@ -178,7 +176,7 @@ pub unsafe fn _xsavec(mem_addr: *mut c_void, save_mask: u64) -> () { #[target_feature = "+xsave,+xsavec"] #[cfg_attr(test, assert_instr(xsavec64))] #[cfg(not(target_arch = "x86"))] -pub unsafe fn _xsavec64(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) -> () { xsavec64(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); } @@ -192,7 +190,7 @@ pub unsafe fn _xsavec64(mem_addr: *mut c_void, save_mask: u64) -> () { #[inline(always)] #[target_feature = "+xsave,+xsaves"] #[cfg_attr(test, assert_instr(xsaves))] -pub unsafe fn _xsaves(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) -> () { xsaves(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); } @@ -207,7 +205,7 @@ pub unsafe fn _xsaves(mem_addr: *mut c_void, save_mask: u64) -> () { #[target_feature = "+xsave,+xsaves"] #[cfg_attr(test, assert_instr(xsaves64))] #[cfg(not(target_arch = "x86"))] -pub unsafe fn _xsaves64(mem_addr: *mut c_void, save_mask: u64) -> () { +pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) -> () { xsaves64(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); } @@ -223,7 +221,7 @@ pub unsafe fn _xsaves64(mem_addr: *mut c_void, save_mask: u64) -> () { #[inline(always)] #[target_feature = "+xsave,+xsaves"] #[cfg_attr(test, assert_instr(xrstors))] -pub unsafe fn _xrstors(mem_addr: *const c_void, rs_mask: u64) -> () { +pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) -> () { xrstors(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); } @@ -240,13 +238,12 @@ pub unsafe fn _xrstors(mem_addr: *const c_void, rs_mask: u64) -> () { #[target_feature = "+xsave,+xsaves"] #[cfg_attr(test, assert_instr(xrstors64))] #[cfg(not(target_arch = "x86"))] -pub unsafe fn _xrstors64(mem_addr: *const c_void, rs_mask: u64) -> () { +pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) -> () { xrstors64(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); } #[cfg(test)] mod tests { - use x86::c_void; use x86::i586::xsave; use stdsimd_test::simd_test; use std::fmt; @@ -260,8 +257,8 @@ mod tests { fn new() -> Buffer { Buffer { data: [0; 1024] } } - fn ptr(&mut self) -> *mut c_void { - &mut self.data[0] as *mut _ as *mut c_void + fn ptr(&mut self) -> *mut u8 { + &mut self.data[0] as *mut _ as *mut u8 } } diff --git a/src/x86/i686/mod.rs b/coresimd/src/x86/i686/mod.rs similarity index 100% rename from src/x86/i686/mod.rs rename to coresimd/src/x86/i686/mod.rs diff --git a/src/x86/i686/sse2.rs b/coresimd/src/x86/i686/sse2.rs similarity index 100% rename from src/x86/i686/sse2.rs rename to coresimd/src/x86/i686/sse2.rs diff --git a/src/x86/i686/sse41.rs b/coresimd/src/x86/i686/sse41.rs similarity index 100% rename from src/x86/i686/sse41.rs rename to coresimd/src/x86/i686/sse41.rs diff --git a/src/x86/i686/sse42.rs b/coresimd/src/x86/i686/sse42.rs similarity index 100% rename from src/x86/i686/sse42.rs rename to coresimd/src/x86/i686/sse42.rs diff --git a/src/x86/macros.rs b/coresimd/src/x86/macros.rs similarity index 100% rename from src/x86/macros.rs rename to coresimd/src/x86/macros.rs diff --git a/src/x86/mod.rs b/coresimd/src/x86/mod.rs similarity index 74% rename from src/x86/mod.rs rename to coresimd/src/x86/mod.rs index 11811b328f..fcbcdead03 100644 --- a/src/x86/mod.rs +++ b/coresimd/src/x86/mod.rs @@ -32,17 +32,3 @@ pub type __m128i = ::v128::i8x16; /// 256-bit wide signed integer vector type #[allow(non_camel_case_types)] pub type __m256i = ::v256::i8x32; - - -/// `C`'s `void` type. -#[cfg(not(feature = "std"))] -#[allow(non_camel_case_types)] -#[repr(u8)] -pub enum c_void { - #[doc(hidden)] __variant1, - #[doc(hidden)] __variant2, -} - -// FIXME: we should not depend on std for this -#[cfg(feature = "std")] -use std::os::raw::c_void; diff --git a/src/x86/x86_64/mod.rs b/coresimd/src/x86/x86_64/mod.rs similarity index 100% rename from src/x86/x86_64/mod.rs rename to coresimd/src/x86/x86_64/mod.rs diff --git a/src/x86/x86_64/sse.rs b/coresimd/src/x86/x86_64/sse.rs similarity index 100% rename from src/x86/x86_64/sse.rs rename to coresimd/src/x86/x86_64/sse.rs diff --git a/src/x86/x86_64/sse2.rs b/coresimd/src/x86/x86_64/sse2.rs similarity index 100% rename from src/x86/x86_64/sse2.rs rename to coresimd/src/x86/x86_64/sse2.rs diff --git a/src/x86/x86_64/sse42.rs b/coresimd/src/x86/x86_64/sse42.rs similarity index 100% rename from src/x86/x86_64/sse42.rs rename to coresimd/src/x86/x86_64/sse42.rs diff --git a/src/lib.rs b/src/lib.rs index 1d9345b995..37fe7b6f59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -104,139 +104,41 @@ //! The language support and status of SIMD is also still a little up in the //! air right now, you may be interested in a few issues along these lines: //! -//! * [Overal tracking issue for SIMD support] -//! (https://github.com/rust-lang/rust/issues/27731) -//! * [`cfg_target_feature` tracking issue] -//! (https://github.com/rust-lang/rust/issues/29717) -//! * [SIMD types currently not sound] -//! (https://github.com/rust-lang/rust/issues/44367) -//! * [`#[target_feature]` improvements] -//! (https://github.com/rust-lang/rust/issues/44839) +//! * [Overal tracking issue for SIMD support][simd_tracking_issue] +//! * [`cfg_target_feature` tracking issue][cfg_target_feature_issue] +//! * [SIMD types currently not sound][simd_soundness_bug] +//! * [`#[target_feature]` improvements][target_feature_impr] //! //! [vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40 +//! [simd_tracking_issue]: https://github.com/rust-lang/rust/issues/27731 +//! [cfg_target_feature_issue]: https://github.com/rust-lang/rust/issues/29717 +//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367 +//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839 -#![cfg_attr(feature = "strict", deny(warnings))] -#![allow(dead_code)] -#![allow(unused_features)] -#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, - simd_ffi, target_feature, cfg_target_feature, i128_type, asm, - const_atomic_usize_new, stmt_expr_attributes)] -#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))] -#![cfg_attr(feature = "cargo-clippy", - allow(inline_always, too_many_arguments, cast_sign_loss, - cast_lossless, cast_possible_wrap, - cast_possible_truncation, cast_precision_loss, - shadow_reuse, cyclomatic_complexity, similar_names, - doc_markdown, many_single_char_names))] -#![no_std] +#![feature(macro_reexport, const_fn, const_atomic_usize_new)] -#[cfg(any(feature = "std", test))] -#[macro_use] -extern crate std; - -#[cfg(test)] -extern crate stdsimd_test; - -#[cfg(test)] -extern crate test; +/// We re-export run-time feature detection for those architectures that have +/// suport for it in `core`: +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[macro_reexport(cfg_feature_enabled, __unstable_detect_feature)] +extern crate coresimd; -/// Platform independent SIMD vector types and operations. -pub mod simd { - pub use v128::*; - pub use v256::*; - pub use v512::*; - pub use v64::*; -} +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +extern crate coresimd; /// Platform dependent vendor intrinsics. pub mod vendor { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub use x86::*; + pub use coresimd::vendor::*; - #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] - pub use arm::*; - - #[cfg(target_arch = "aarch64")] - pub use aarch64::*; - - // FIXME: rust does not expose the nvptx and nvptx64 targets yet - #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", - target_arch = "arm", target_arch = "aarch64")))] - pub use nvptx::*; + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + pub use super::runtime::{__unstable_detect_feature, __Feature}; +} - #[cfg(any( - // x86/x86_64: - any(target_arch = "x86", target_arch = "x86_64"), - // linux + std + (arm|aarch64): - all(target_os = "linux", - feature = "std", - any(target_arch = "arm", target_arch = "aarch64")) - ))] - pub use runtime::{__unstable_detect_feature, __Feature}; +/// Platform independent SIMD vector types and operations. +pub mod simd { + pub use coresimd::simd::*; } -#[cfg(any( - // x86/x86_64: - any(target_arch = "x86", target_arch = "x86_64"), - // linux + std + (arm|aarch64): - all(target_os = "linux", - feature = "std", - any(target_arch = "arm", target_arch = "aarch64")) -))] +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] #[macro_use] mod runtime; - -#[macro_use] -mod macros; -mod simd_llvm; -mod v128; -mod v256; -mod v512; -mod v64; - -/// 32-bit wide vector tpyes -mod v32 { - use simd_llvm::*; - - define_ty! { i16x2, i16, i16 } - define_impl! { i16x2, i16, 2, i16x2, x0, x1 } - define_ty! { u16x2, u16, u16 } - define_impl! { u16x2, u16, 2, i16x2, x0, x1 } - - define_ty! { i8x4, i8, i8, i8, i8 } - define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 } - define_ty! { u8x4, u8, u8, u8, u8 } - define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 } - - define_casts!( - (i16x2, i64x2, as_i64x2), - (u16x2, i64x2, as_i64x2), - (i8x4, i32x4, as_i32x4), - (u8x4, i32x4, as_i32x4) - ); -} - -/// 16-bit wide vector tpyes -mod v16 { - use simd_llvm::*; - - define_ty! { i8x2, i8, i8 } - define_impl! { i8x2, i8, 2, i8x2, x0, x1 } - define_ty! { u8x2, u8, u8 } - define_impl! { u8x2, u8, 2, i8x2, x0, x1 } - - define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2)); -} - -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -mod x86; - -#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] -mod arm; -#[cfg(target_arch = "aarch64")] -mod aarch64; - -// FIXME: rust does not expose the nvptx and nvptx64 targets yet -#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", - target_arch = "arm", target_arch = "aarch64")))] -mod nvptx; diff --git a/src/runtime/cache.rs b/src/runtime/cache.rs index bb247fb531..6aab8add41 100644 --- a/src/runtime/cache.rs +++ b/src/runtime/cache.rs @@ -1,7 +1,7 @@ //! Cache of run-time feature detection -use core::sync::atomic::{AtomicUsize, Ordering}; -use core::usize; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::usize; use super::bit; diff --git a/src/runtime/linux/cpuinfo.rs b/src/runtime/linux/cpuinfo.rs index 8e4c8e066b..777be3de56 100644 --- a/src/runtime/linux/cpuinfo.rs +++ b/src/runtime/linux/cpuinfo.rs @@ -28,6 +28,7 @@ impl<'a> CpuInfoField<'a> { } } /// Does the field exist? + #[cfg(test)] pub fn exists(&self) -> bool { self.0.is_some() } @@ -61,17 +62,19 @@ impl CpuInfo { pub fn field(&self, field: &str) -> CpuInfoField { for l in self.raw.lines() { if l.trim().starts_with(field) { - return CpuInfoField(l.split(": ").skip(1).next()); + return CpuInfoField::new(l.split(": ").skip(1).next()); } } CpuInfoField(None) } /// Returns the `raw` contents of `/proc/cpuinfo` + #[cfg(test)] fn raw(&self) -> &String { &self.raw } + #[cfg(test)] fn from_str(other: &str) -> Result { Ok(CpuInfo { raw: String::from(other), @@ -148,7 +151,8 @@ power management: assert!(!cpuinfo.field("flags").has("avx")); } - const ARM_CORTEX_A53: &str = r"Processor : AArch64 Processor rev 3 (aarch64) + const ARM_CORTEX_A53: &str = + r"Processor : AArch64 Processor rev 3 (aarch64) processor : 0 processor : 1 processor : 2 diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index a48b7b20ce..2ab5e246c1 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -5,31 +5,23 @@ mod bit; #[macro_use] mod macros; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -#[macro_use] -mod x86; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub use self::x86::__Feature; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use self::x86::detect_features; - -#[cfg(all(target_arch = "arm", target_os = "linux", feature = "std"))] +#[cfg(all(target_arch = "arm", target_os = "linux"))] #[macro_use] mod arm; -#[cfg(all(target_arch = "arm", target_os = "linux", feature = "std"))] +#[cfg(all(target_arch = "arm", target_os = "linux"))] pub use self::arm::__Feature; -#[cfg(all(target_arch = "aarch64", target_os = "linux", feature = "std"))] +#[cfg(all(target_arch = "aarch64", target_os = "linux"))] #[macro_use] mod aarch64; -#[cfg(all(target_arch = "aarch64", target_os = "linux", feature = "std"))] +#[cfg(all(target_arch = "aarch64", target_os = "linux"))] pub use self::aarch64::__Feature; -#[cfg(all(feature = "std", target_os = "linux", +#[cfg(all(target_os = "linux", any(target_arch = "arm", target_arch = "aarch64")))] mod linux; -#[cfg(all(feature = "std", target_os = "linux", +#[cfg(all(target_os = "linux", any(target_arch = "arm", target_arch = "aarch64")))] pub use self::linux::detect_features; diff --git a/stdsimd-test/assert-instr-macro/Cargo.toml b/stdsimd-test/assert-instr-macro/Cargo.toml index ba66015381..6f224d1f40 100644 --- a/stdsimd-test/assert-instr-macro/Cargo.toml +++ b/stdsimd-test/assert-instr-macro/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Alex Crichton "] [lib] proc-macro = true +test = false [dependencies] proc-macro2 = { version = "0.1", features = ["unstable"] } diff --git a/stdsimd-test/simd-test-macro/Cargo.toml b/stdsimd-test/simd-test-macro/Cargo.toml index b4b860d3b1..0bb9be7e7a 100644 --- a/stdsimd-test/simd-test-macro/Cargo.toml +++ b/stdsimd-test/simd-test-macro/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Alex Crichton "] [lib] proc-macro = true +test = false [dependencies] proc-macro2 = { version = "0.1", features = ["unstable"] } diff --git a/stdsimd-test/src/lib.rs b/stdsimd-test/src/lib.rs index ce52ea5d27..bd497b6cd5 100644 --- a/stdsimd-test/src/lib.rs +++ b/stdsimd-test/src/lib.rs @@ -5,6 +5,8 @@ //! assertions about the disassembly of a function. #![feature(proc_macro)] +#![cfg_attr(feature = "cargo-clippy", + allow(missing_docs_in_private_items, print_stdout))] extern crate assert_instr_macro; extern crate backtrace; @@ -71,9 +73,10 @@ fn disassemble_myself() -> HashMap> { ); assert!(output.status.success()); - parse_otool(&str::from_utf8(&output.stdout).expect("stdout not utf8")) + parse_otool(str::from_utf8(&output.stdout).expect("stdout not utf8")) } else { - let objdump = env::var("OBJDUMP").unwrap_or("objdump".to_string()); + let objdump = + env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string()); let output = Command::new(objdump) .arg("--disassemble") .arg(&me) @@ -86,21 +89,18 @@ fn disassemble_myself() -> HashMap> { ); assert!(output.status.success()); - parse_objdump( - &str::from_utf8(&output.stdout).expect("stdout not utf8"), - ) + parse_objdump(str::from_utf8(&output.stdout).expect("stdout not utf8")) } } fn parse_objdump(output: &str) -> HashMap> { let mut lines = output.lines(); - let expected_len = if cfg!(target_arch = "arm") { - 8 - } else if cfg!(target_arch = "aarch64") { - 8 - } else { - 2 - }; + let expected_len = + if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") { + 8 + } else { + 2 + }; for line in output.lines().take(100) { println!("{}", line); @@ -112,7 +112,8 @@ fn parse_objdump(output: &str) -> HashMap> { if !header.ends_with(">:") { continue; } - let start = header.find("<").unwrap(); + let start = header.find('<') + .expect(&format!("\"<\" not found in symbol pattern of the form \"$hex_addr <$name>\": {}", header)); let symbol = &header[start + 1..header.len() - 2]; let mut instructions = Vec::new(); @@ -136,13 +137,13 @@ fn parse_objdump(output: &str) -> HashMap> { } ret.entry(normalize(symbol)) - .or_insert(Vec::new()) + .or_insert_with(Vec::new) .push(Function { instrs: instructions, }); } - return ret; + ret } fn parse_otool(output: &str) -> HashMap> { @@ -154,13 +155,9 @@ fn parse_otool(output: &str) -> HashMap> { let mut ret = HashMap::new(); let mut cached_header = None; - loop { - let header = match cached_header.take().or_else(|| lines.next()) { - Some(header) => header, - None => break, - }; + while let Some(header) = cached_header.take().or_else(|| lines.next()) { // symbols should start with `$symbol:` - if !header.ends_with(":") { + if !header.ends_with(':') { continue; } // strip the leading underscore and the trailing colon @@ -168,7 +165,7 @@ fn parse_otool(output: &str) -> HashMap> { let mut instructions = Vec::new(); while let Some(instruction) = lines.next() { - if instruction.ends_with(":") { + if instruction.ends_with(':') { cached_header = Some(instruction); break; } @@ -184,13 +181,13 @@ fn parse_otool(output: &str) -> HashMap> { } ret.entry(normalize(symbol)) - .or_insert(Vec::new()) + .or_insert_with(Vec::new) .push(Function { instrs: instructions, }); } - return ret; + ret } fn parse_dumpbin(output: &str) -> HashMap> { @@ -202,13 +199,9 @@ fn parse_dumpbin(output: &str) -> HashMap> { let mut ret = HashMap::new(); let mut cached_header = None; - loop { - let header = match cached_header.take().or_else(|| lines.next()) { - Some(header) => header, - None => break, - }; + while let Some(header) = cached_header.take().or_else(|| lines.next()) { // symbols should start with `$symbol:` - if !header.ends_with(":") { + if !header.ends_with(':') { continue; } // strip the trailing colon @@ -239,13 +232,13 @@ fn parse_dumpbin(output: &str) -> HashMap> { } ret.entry(normalize(symbol)) - .or_insert(Vec::new()) + .or_insert_with(Vec::new) .push(Function { instrs: instructions, }); } - return ret; + ret } fn normalize(symbol: &str) -> String { @@ -268,9 +261,10 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { sym = name.name().and_then(|s| s.as_str()).map(normalize); }); - let functions = match sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) { - Some(s) => s, - None => { + let functions = + if let Some(s) = sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) { + s + } else { if let Some(sym) = sym { println!("assumed symbol name: `{}`", sym); } @@ -279,8 +273,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { println!("\t- {}", f); } panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname); - } - }; + }; assert_eq!(functions.len(), 1); let function = &functions[0]; @@ -288,7 +281,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // Look for `expected` as the first part of any instruction in this // function, returning if we do indeed find it. let mut found = false; - for instr in function.instrs.iter() { + for instr in &function.instrs { // Gets the first instruction, e.g. tzcntl in tzcntl %rax,%rax if let Some(part) = instr.parts.get(0) { // Truncates the instruction with the length of the expected @@ -308,13 +301,13 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // Help debug by printing out the found disassembly, and then panic as we // didn't find the instruction. - println!("disassembly for {}: ", sym.as_ref().unwrap()); + println!("disassembly for {}: ", sym.as_ref().expect("symbol not found")); for (i, instr) in function.instrs.iter().enumerate() { print!("\t{:2}: ", i); - for part in instr.parts.iter() { + for part in &instr.parts { print!("{} ", part); } - println!(""); + println!(); } if !found { diff --git a/tests/cpu-detection.rs b/tests/cpu-detection.rs index 9bbb17500f..b272b1e442 100644 --- a/tests/cpu-detection.rs +++ b/tests/cpu-detection.rs @@ -2,53 +2,21 @@ #![cfg_attr(feature = "strict", deny(warnings))] #![cfg_attr(feature = "cargo-clippy", allow(option_unwrap_used))] -extern crate cupid; - +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] #[macro_use] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] extern crate stdsimd; #[test] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -fn works() { - let information = cupid::master().unwrap(); - assert_eq!(cfg_feature_enabled!("sse"), information.sse()); - assert_eq!(cfg_feature_enabled!("sse2"), information.sse2()); - assert_eq!(cfg_feature_enabled!("sse3"), information.sse3()); - assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3()); - assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1()); - assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2()); - assert_eq!(cfg_feature_enabled!("avx"), information.avx()); - assert_eq!(cfg_feature_enabled!("avx2"), information.avx2()); - assert_eq!(cfg_feature_enabled!("avx512f"), information.avx512f()); - assert_eq!(cfg_feature_enabled!("avx512cd"), information.avx512cd()); - assert_eq!(cfg_feature_enabled!("avx512er"), information.avx512er()); - assert_eq!(cfg_feature_enabled!("avx512pf"), information.avx512pf()); - assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw()); - assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq()); - assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl()); - assert_eq!(cfg_feature_enabled!("avx512ifma"), information.avx512_ifma()); - assert_eq!(cfg_feature_enabled!("avx512vbmi"), information.avx512_vbmi()); - assert_eq!( - cfg_feature_enabled!("avx512vpopcntdq"), - information.avx512_vpopcntdq() - ); - assert_eq!(cfg_feature_enabled!("fma"), information.fma()); - assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1()); - assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2()); - assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt()); - assert_eq!(cfg_feature_enabled!("sse4a"), information.sse4a()); - assert_eq!(cfg_feature_enabled!("abm"), information.lzcnt()); - assert_eq!(cfg_feature_enabled!("tbm"), information.tbm()); - assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt()); - assert_eq!(cfg_feature_enabled!("xsave"), information.xsave()); - assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt()); - assert_eq!( - cfg_feature_enabled!("xsavec"), - information.xsavec_and_xrstor() - ); - assert_eq!( - cfg_feature_enabled!("xsavec"), - information.xsaves_xrstors_and_ia32_xss() - ); +#[cfg(all(target_arch = "arm", target_os = "linux"))] +fn arm_linux() { + println!("neon: {}", cfg_feature_enabled!("neon")); + println!("pmull: {}", cfg_feature_enabled!("pmull")); +} + +#[test] +#[cfg(all(target_arch = "aarch64", target_os = "linux"))] +fn aarch64_linux() { + println!("neon: {}", cfg_feature_enabled!("neon")); + println!("asimd: {}", cfg_feature_enabled!("asimd")); + println!("pmull: {}", cfg_feature_enabled!("pmull")); }