From d46b0ac44357e7d3aca94421f07c8cd9dd035cef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sat, 10 Feb 2018 21:00:35 +0300 Subject: [PATCH 1/9] added pclmul --- coresimd/src/x86/i686/clmul.rs | 58 ++++++++++++++++++++++++++++++++++ coresimd/x86/i686/mod.rs | 3 ++ 2 files changed, 61 insertions(+) create mode 100644 coresimd/src/x86/i686/clmul.rs diff --git a/coresimd/src/x86/i686/clmul.rs b/coresimd/src/x86/i686/clmul.rs new file mode 100644 index 0000000000..a84cfa95cf --- /dev/null +++ b/coresimd/src/x86/i686/clmul.rs @@ -0,0 +1,58 @@ +use x86::__m128i; + +#[cfg(test)] +use stdsimd_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.pclmulqdq"] + fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i; +} + +/// Perform a carry-less multiplication of two 64-bit polynomials over the +/// finite field GF(2^k). +/// +/// The immediate byte is used for determining which halves of `a` and `b` +/// should be used. Immediate bits other than 0 and 4 are ignored. +#[inline] +#[target_feature(enable = "pclmul")] +#[cfg_attr(test, assert_instr(pclmulqdq, imm8 = 0))] +pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: u8) -> __m128i { + macro_rules! call { + ($imm8:expr) => (pclmulqdq(a, b, $imm8)) + } + constify_imm8!(imm8, call) +} + + +#[cfg(test)] +mod tests { + // The constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __m128i happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use stdsimd_test::simd_test; + + use x86::*; + + #[simd_test = "pclmul"] + unsafe fn test_mm_clmulepi64_si128() { + // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf + let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); + let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d); + let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451); + let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315); + let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); + let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); + + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x00), r00); + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x10), r01); + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x01), r10); + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x11), r11); + + let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); + let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); + assert_eq_m128i(_mm_clmulepi64_si128(a0, a0, 0x00), r); + } +} diff --git a/coresimd/x86/i686/mod.rs b/coresimd/x86/i686/mod.rs index ad40c73c6d..12513010ec 100644 --- a/coresimd/x86/i686/mod.rs +++ b/coresimd/x86/i686/mod.rs @@ -3,6 +3,9 @@ mod aes; pub use self::aes::*; +mod clmul; +pub use self::clmul::*; + mod mmx; pub use self::mmx::*; From ea70b97b70ffd5972979124c99be54a2d9dd2d65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sat, 10 Feb 2018 21:14:09 +0300 Subject: [PATCH 2/9] added docs --- coresimd/src/x86/i686/clmul.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/coresimd/src/x86/i686/clmul.rs b/coresimd/src/x86/i686/clmul.rs index a84cfa95cf..d2f5b4729d 100644 --- a/coresimd/src/x86/i686/clmul.rs +++ b/coresimd/src/x86/i686/clmul.rs @@ -1,3 +1,9 @@ +//! Carry-less Multiplication (CLMUL) +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241). +//! +//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf use x86::__m128i; #[cfg(test)] From 5e08b163ca130978c0c50b2e3acd6bd87397c021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 11 Feb 2018 07:20:24 +0300 Subject: [PATCH 3/9] pclmul -> pclmulqdq --- coresimd/src/x86/i686/{clmul.rs => pclmulqdq.rs} | 4 ++-- coresimd/x86/i686/mod.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) rename coresimd/src/x86/i686/{clmul.rs => pclmulqdq.rs} (97%) diff --git a/coresimd/src/x86/i686/clmul.rs b/coresimd/src/x86/i686/pclmulqdq.rs similarity index 97% rename from coresimd/src/x86/i686/clmul.rs rename to coresimd/src/x86/i686/pclmulqdq.rs index d2f5b4729d..b30999fa0e 100644 --- a/coresimd/src/x86/i686/clmul.rs +++ b/coresimd/src/x86/i686/pclmulqdq.rs @@ -21,7 +21,7 @@ extern "C" { /// The immediate byte is used for determining which halves of `a` and `b` /// should be used. Immediate bits other than 0 and 4 are ignored. #[inline] -#[target_feature(enable = "pclmul")] +#[target_feature(enable = "pclmulqdq")] #[cfg_attr(test, assert_instr(pclmulqdq, imm8 = 0))] pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: u8) -> __m128i { macro_rules! call { @@ -42,7 +42,7 @@ mod tests { use x86::*; - #[simd_test = "pclmul"] + #[simd_test = "pclmulqdq"] unsafe fn test_mm_clmulepi64_si128() { // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); diff --git a/coresimd/x86/i686/mod.rs b/coresimd/x86/i686/mod.rs index 12513010ec..7dd55802e3 100644 --- a/coresimd/x86/i686/mod.rs +++ b/coresimd/x86/i686/mod.rs @@ -3,12 +3,12 @@ mod aes; pub use self::aes::*; -mod clmul; -pub use self::clmul::*; - mod mmx; pub use self::mmx::*; +mod pclmulqdq; +pub use self::pclmulqdq::*; + mod sse; pub use self::sse::*; From aaeeb50428c42de0a639ebd3191547ceab4cf080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 18 Feb 2018 04:20:34 +0300 Subject: [PATCH 4/9] imm8: u8 -> imm8: i32 --- coresimd/src/x86/i686/pclmulqdq.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coresimd/src/x86/i686/pclmulqdq.rs b/coresimd/src/x86/i686/pclmulqdq.rs index b30999fa0e..0937c2091e 100644 --- a/coresimd/src/x86/i686/pclmulqdq.rs +++ b/coresimd/src/x86/i686/pclmulqdq.rs @@ -23,7 +23,7 @@ extern "C" { #[inline] #[target_feature(enable = "pclmulqdq")] #[cfg_attr(test, assert_instr(pclmulqdq, imm8 = 0))] -pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: u8) -> __m128i { +pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => (pclmulqdq(a, b, $imm8)) } From 6d3b106f5163fa2af1205bd23254c4f91b6d19de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 18 Feb 2018 04:50:40 +0300 Subject: [PATCH 5/9] return changes to stdsimd/arch/detect/x86.rs --- stdsimd/arch/detect/x86.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/stdsimd/arch/detect/x86.rs b/stdsimd/arch/detect/x86.rs index e7b95a93c7..4149efae14 100644 --- a/stdsimd/arch/detect/x86.rs +++ b/stdsimd/arch/detect/x86.rs @@ -30,6 +30,9 @@ macro_rules! is_target_feature_detected { ("aes") => { $crate::arch::detect::check_for( $crate::arch::detect::Feature::aes) }; + ("pclmulqdq") => { + $crate::arch::detect::check_for( + $crate::arch::detect::Feature::pclmulqdq) }; ("tsc") => { $crate::arch::detect::check_for( $crate::arch::detect::Feature::tsc) }; @@ -174,6 +177,8 @@ macro_rules! is_target_feature_detected { pub enum Feature { /// AES (Advanced Encryption Standard New Instructions AES-NI) aes, + /// CLMUL (Carry-less Multiplication) + pclmulqdq, /// TSC (Time Stamp Counter) tsc, /// MMX @@ -345,6 +350,7 @@ pub fn detect_features() -> cache::Initializer { enable(proc_info_ecx, 20, Feature::sse4_2); enable(proc_info_ecx, 23, Feature::popcnt); enable(proc_info_ecx, 25, Feature::aes); + enable(proc_info_ecx, 1, Feature::pclmulqdq); enable(proc_info_edx, 4, Feature::tsc); enable(proc_info_edx, 23, Feature::mmx); enable(proc_info_edx, 24, Feature::fxsr); @@ -457,6 +463,7 @@ mod tests { #[test] fn dump() { println!("aes: {:?}", is_target_feature_detected!("aes")); + println!("pclmulqdq: {:?}", cfg_feature_enabled!("pclmulqdq")); println!("tsc: {:?}", is_target_feature_detected!("tsc")); println!("sse: {:?}", is_target_feature_detected!("sse")); println!("sse2: {:?}", is_target_feature_detected!("sse2")); @@ -498,6 +505,7 @@ mod tests { fn compare_with_cupid() { let information = cupid::master().unwrap(); assert_eq!(is_target_feature_detected!("aes"), information.aesni()); + assert_eq!(cfg_feature_enabled!("pclmulqdq"), information.pclmulqdq()); assert_eq!(is_target_feature_detected!("tsc"), information.tsc()); assert_eq!(is_target_feature_detected!("sse"), information.sse()); assert_eq!(is_target_feature_detected!("sse2"), information.sse2()); From 9ad33532271da3bcef96d252ddc24f6ecc0af657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 18 Feb 2018 05:08:21 +0300 Subject: [PATCH 6/9] error fixes --- coresimd/{src => }/x86/i686/pclmulqdq.rs | 5 +++-- stdsimd/arch/detect/x86.rs | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) rename coresimd/{src => }/x86/i686/pclmulqdq.rs (97%) diff --git a/coresimd/src/x86/i686/pclmulqdq.rs b/coresimd/x86/i686/pclmulqdq.rs similarity index 97% rename from coresimd/src/x86/i686/pclmulqdq.rs rename to coresimd/x86/i686/pclmulqdq.rs index 0937c2091e..16bdc96933 100644 --- a/coresimd/src/x86/i686/pclmulqdq.rs +++ b/coresimd/x86/i686/pclmulqdq.rs @@ -4,7 +4,8 @@ //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241). //! //! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf -use x86::__m128i; + +use coresimd::x86::__m128i; #[cfg(test)] use stdsimd_test::assert_instr; @@ -40,7 +41,7 @@ mod tests { use stdsimd_test::simd_test; - use x86::*; + use coresimd::x86::*; #[simd_test = "pclmulqdq"] unsafe fn test_mm_clmulepi64_si128() { diff --git a/stdsimd/arch/detect/x86.rs b/stdsimd/arch/detect/x86.rs index 4149efae14..83532f1168 100644 --- a/stdsimd/arch/detect/x86.rs +++ b/stdsimd/arch/detect/x86.rs @@ -463,7 +463,7 @@ mod tests { #[test] fn dump() { println!("aes: {:?}", is_target_feature_detected!("aes")); - println!("pclmulqdq: {:?}", cfg_feature_enabled!("pclmulqdq")); + println!("pclmulqdq: {:?}", is_target_feature_detected!("pclmulqdq")); println!("tsc: {:?}", is_target_feature_detected!("tsc")); println!("sse: {:?}", is_target_feature_detected!("sse")); println!("sse2: {:?}", is_target_feature_detected!("sse2")); @@ -505,7 +505,7 @@ mod tests { fn compare_with_cupid() { let information = cupid::master().unwrap(); assert_eq!(is_target_feature_detected!("aes"), information.aesni()); - assert_eq!(cfg_feature_enabled!("pclmulqdq"), information.pclmulqdq()); + assert_eq!(is_target_feature_detected!("pclmulqdq"), information.pclmulqdq()); assert_eq!(is_target_feature_detected!("tsc"), information.tsc()); assert_eq!(is_target_feature_detected!("sse"), information.sse()); assert_eq!(is_target_feature_detected!("sse2"), information.sse2()); From 007bccd48ab15799bee66e1edfbab8bd70d926a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 18 Feb 2018 05:18:51 +0300 Subject: [PATCH 7/9] added rustc_args_required_const --- coresimd/x86/i686/pclmulqdq.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/coresimd/x86/i686/pclmulqdq.rs b/coresimd/x86/i686/pclmulqdq.rs index 16bdc96933..f88edd64a6 100644 --- a/coresimd/x86/i686/pclmulqdq.rs +++ b/coresimd/x86/i686/pclmulqdq.rs @@ -24,6 +24,7 @@ extern "C" { #[inline] #[target_feature(enable = "pclmulqdq")] #[cfg_attr(test, assert_instr(pclmulqdq, imm8 = 0))] +#[rustc_args_required_const(2)] pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => (pclmulqdq(a, b, $imm8)) From 816f19d9fd1623aa1cc77321e44f51a8fc7f9b50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 18 Feb 2018 05:27:34 +0300 Subject: [PATCH 8/9] fixed assert_instr for _mm_clmulepi64_si128 --- coresimd/x86/i686/pclmulqdq.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/coresimd/x86/i686/pclmulqdq.rs b/coresimd/x86/i686/pclmulqdq.rs index f88edd64a6..a9e8c33a1f 100644 --- a/coresimd/x86/i686/pclmulqdq.rs +++ b/coresimd/x86/i686/pclmulqdq.rs @@ -23,7 +23,10 @@ extern "C" { /// should be used. Immediate bits other than 0 and 4 are ignored. #[inline] #[target_feature(enable = "pclmulqdq")] -#[cfg_attr(test, assert_instr(pclmulqdq, imm8 = 0))] +#[cfg_attr(test, assert_instr(pclmullqlqdq, imm8 = 0))] +#[cfg_attr(test, assert_instr(pclmulhqlqdq, imm8 = 1))] +#[cfg_attr(test, assert_instr(pclmullqhqdq, imm8 = 16))] +#[cfg_attr(test, assert_instr(pclmulhqhqdq, imm8 = 17))] #[rustc_args_required_const(2)] pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i { macro_rules! call { From c4e68ce96a141f74f796b451b5ddbd03970d6530 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 18 Feb 2018 06:18:50 +0300 Subject: [PATCH 9/9] fixed pclmul assert_instr tests --- coresimd/x86/i686/pclmulqdq.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/coresimd/x86/i686/pclmulqdq.rs b/coresimd/x86/i686/pclmulqdq.rs index a9e8c33a1f..8c4f4b8574 100644 --- a/coresimd/x86/i686/pclmulqdq.rs +++ b/coresimd/x86/i686/pclmulqdq.rs @@ -23,10 +23,11 @@ extern "C" { /// should be used. Immediate bits other than 0 and 4 are ignored. #[inline] #[target_feature(enable = "pclmulqdq")] -#[cfg_attr(test, assert_instr(pclmullqlqdq, imm8 = 0))] -#[cfg_attr(test, assert_instr(pclmulhqlqdq, imm8 = 1))] -#[cfg_attr(test, assert_instr(pclmullqhqdq, imm8 = 16))] -#[cfg_attr(test, assert_instr(pclmulhqhqdq, imm8 = 17))] +#[cfg_attr(all(test, not(target_os="linux")), assert_instr(pclmulqdq, imm8 = 0))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqlqdq, imm8 = 0))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqlqdq, imm8 = 1))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqhqdq, imm8 = 16))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqhqdq, imm8 = 17))] #[rustc_args_required_const(2)] pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i { macro_rules! call {