rust-lang · Amanieu · Mar 10, 2021 · Mar 9, 2021 · Mar 9, 2021 · Mar 9, 2021
diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs
@@ -2565,35 +2565,67 @@ pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256)
 #[inline]
 #[target_feature(enable = "avx2")]
-#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
-#[rustc_args_required_const(1)]
+#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
+pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
+    static_assert_imm8!(IMM8);
     let a = a.as_i64x4();
-    macro_rules! call {
-        ($imm8:expr) => {
-            vpslldq(a, $imm8)
-        };
-    }
-    transmute(constify_imm8!(imm8 * 8, call))
+    let r = vpslldq(a, IMM8 * 8);
+    transmute(r)
 }
 
 /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128)
 #[inline]
 #[target_feature(enable = "avx2")]
-#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
-#[rustc_args_required_const(1)]
+#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i {
-    let a = a.as_i64x4();
-    macro_rules! call {
-        ($imm8:expr) => {
-            vpslldq(a, $imm8)
-        };
-    }
-    transmute(constify_imm8!(imm8 * 8, call))
+pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
+    static_assert_imm8!(IMM8);
+    let a = a.as_i8x32();
+    let zero = _mm256_setzero_si256().as_i8x32();
+    let r: i8x32 = simd_shuffle32(
+        zero,
+        a,
+        [
+            32 - (IMM8 as u32 & 0xff),
+            33 - (IMM8 as u32 & 0xff),
+            34 - (IMM8 as u32 & 0xff),
+            35 - (IMM8 as u32 & 0xff),
+            36 - (IMM8 as u32 & 0xff),
+            37 - (IMM8 as u32 & 0xff),
+            38 - (IMM8 as u32 & 0xff),
+            39 - (IMM8 as u32 & 0xff),
+            40 - (IMM8 as u32 & 0xff),
+            41 - (IMM8 as u32 & 0xff),
+            42 - (IMM8 as u32 & 0xff),
+            43 - (IMM8 as u32 & 0xff),
+            44 - (IMM8 as u32 & 0xff),
+            45 - (IMM8 as u32 & 0xff),
+            46 - (IMM8 as u32 & 0xff),
+            47 - (IMM8 as u32 & 0xff),
+            48 - (IMM8 as u32 & 0xff) - 16,
+            49 - (IMM8 as u32 & 0xff) - 16,
+            50 - (IMM8 as u32 & 0xff) - 16,
+            51 - (IMM8 as u32 & 0xff) - 16,
+            52 - (IMM8 as u32 & 0xff) - 16,
+            53 - (IMM8 as u32 & 0xff) - 16,
+            54 - (IMM8 as u32 & 0xff) - 16,
+            55 - (IMM8 as u32 & 0xff) - 16,
+            56 - (IMM8 as u32 & 0xff) - 16,
+            57 - (IMM8 as u32 & 0xff) - 16,
+            58 - (IMM8 as u32 & 0xff) - 16,
+            59 - (IMM8 as u32 & 0xff) - 16,
+            60 - (IMM8 as u32 & 0xff) - 16,
+            61 - (IMM8 as u32 & 0xff) - 16,
+            62 - (IMM8 as u32 & 0xff) - 16,
+            63 - (IMM8 as u32 & 0xff) - 16,
+        ],
+    );
+    transmute(r)
 }
 
 /// Shifts packed 32-bit integers in `a` left by the amount
@@ -2729,35 +2761,66 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256)
 #[inline]
 #[target_feature(enable = "avx2")]
-#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
-#[rustc_args_required_const(1)]
+#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
+pub unsafe fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
     let a = a.as_i64x4();
-    macro_rules! call {
-        ($imm8:expr) => {
-            vpsrldq(a, $imm8)
-        };
-    }
-    transmute(constify_imm8!(imm8 * 8, call))
+    let r = vpsrldq(a, IMM8 * 8);
+    transmute(r)
 }
 
 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128)
 #[inline]
 #[target_feature(enable = "avx2")]
-#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
-#[rustc_args_required_const(1)]
+#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i {
-    let a = a.as_i64x4();
-    macro_rules! call {
-        ($imm8:expr) => {
-            vpsrldq(a, $imm8)
-        };
-    }
-    transmute(constify_imm8!(imm8 * 8, call))
+pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
+    static_assert_imm8!(IMM8);
+    let a = a.as_i8x32();
+    let zero = _mm256_setzero_si256().as_i8x32();
+    let r: i8x32 = simd_shuffle32(
+        a,
+        zero,
+        [
+            0 + (IMM8 as u32 & 0xff) + 16,
+            1 + (IMM8 as u32 & 0xff) + 16,
+            2 + (IMM8 as u32 & 0xff) + 16,
+            3 + (IMM8 as u32 & 0xff) + 16,
+            4 + (IMM8 as u32 & 0xff) + 16,
+            5 + (IMM8 as u32 & 0xff) + 16,
+            6 + (IMM8 as u32 & 0xff) + 16,
+            7 + (IMM8 as u32 & 0xff) + 16,
+            8 + (IMM8 as u32 & 0xff) + 16,
+            9 + (IMM8 as u32 & 0xff) + 16,
+            10 + (IMM8 as u32 & 0xff) + 16,
+            11 + (IMM8 as u32 & 0xff) + 16,
+            12 + (IMM8 as u32 & 0xff) + 16,
+            13 + (IMM8 as u32 & 0xff) + 16,
+            14 + (IMM8 as u32 & 0xff) + 16,
+            15 + (IMM8 as u32 & 0xff) + 16,
+            16 + (IMM8 as u32 & 0xff),
+            17 + (IMM8 as u32 & 0xff),
+            18 + (IMM8 as u32 & 0xff),
+            19 + (IMM8 as u32 & 0xff),
+            20 + (IMM8 as u32 & 0xff),
+            21 + (IMM8 as u32 & 0xff),
+            22 + (IMM8 as u32 & 0xff),
+            23 + (IMM8 as u32 & 0xff),
+            24 + (IMM8 as u32 & 0xff),
+            25 + (IMM8 as u32 & 0xff),
+            26 + (IMM8 as u32 & 0xff),
+            27 + (IMM8 as u32 & 0xff),
+            28 + (IMM8 as u32 & 0xff),
+            29 + (IMM8 as u32 & 0xff),
+            30 + (IMM8 as u32 & 0xff),
+            31 + (IMM8 as u32 & 0xff),
+        ],
+    );
+    transmute(r)
 }
 
 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
@@ -4824,7 +4887,7 @@ mod tests {
     #[simd_test(enable = "avx2")]
     unsafe fn test_mm256_slli_si256() {
         let a = _mm256_set1_epi64x(0xFFFFFFFF);
-        let r = _mm256_slli_si256(a, 3);
+        let r = _mm256_slli_si256::<3>(a);
         assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
     }
 
@@ -4923,7 +4986,7 @@ mod tests {
             17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
         );
-        let r = _mm256_srli_si256(a, 3);
+        let r = _mm256_srli_si256::<3>(a);
         #[rustfmt::skip]
         let e = _mm256_setr_epi8(
             4, 5, 6, 7, 8, 9, 10, 11,

diff --git a/crates/core_arch/src/x86/f16c.rs b/crates/core_arch/src/x86/f16c.rs
@@ -4,7 +4,7 @@
 
 use crate::{
     core_arch::{simd::*, x86::*},
-    hint::unreachable_unchecked,
+    //    hint::unreachable_unchecked,
     mem::transmute,
 };
 
@@ -42,22 +42,6 @@ pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
     transmute(llvm_vcvtph2ps_256(transmute(a)))
 }
 
-macro_rules! dispatch_rounding {
-    ($rounding:ident, $call:ident) => {{
-        match $rounding {
-            0 => call!(0),
-            1 => call!(1),
-            2 => call!(2),
-            3 => call!(3),
-            4 => call!(4),
-            5 => call!(5),
-            6 => call!(6),
-            7 => call!(7),
-            _ => unreachable_unchecked(),
-        }
-    }};
-}
-
 /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
 /// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
 /// vector.
@@ -71,16 +55,13 @@ macro_rules! dispatch_rounding {
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
 #[target_feature(enable = "f16c")]
-#[rustc_args_required_const(1)]
-#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
-pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
-    let a = transmute(a);
-    macro_rules! call {
-        ($rounding:expr) => {
-            llvm_vcvtps2ph_128(a, $rounding)
-        };
-    }
-    transmute(dispatch_rounding!(imm_rounding, call))
+#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
+    static_assert_imm3!(IMM_ROUNDING);
+    let a = a.as_f32x4();
+    let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
+    transmute(r)
 }
 
 /// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
@@ -95,16 +76,13 @@ pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
 #[target_feature(enable = "f16c")]
-#[rustc_args_required_const(1)]
-#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
-pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
-    let a = transmute(a);
-    macro_rules! call {
-        ($rounding:expr) => {
-            llvm_vcvtps2ph_256(a, $rounding)
-        };
-    }
-    transmute(dispatch_rounding!(imm_rounding, call))
+#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
+    static_assert_imm3!(IMM_ROUNDING);
+    let a = a.as_f32x8();
+    let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
+    transmute(r)
 }
 
 #[cfg(test)]
@@ -116,7 +94,7 @@ mod tests {
     unsafe fn test_mm_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32];
         let float_vec: __m128 = transmute(array);
-        let halfs: __m128i = _mm_cvtps_ph(float_vec, 0);
+        let halfs: __m128i = _mm_cvtps_ph::<0>(float_vec);
         let floats: __m128 = _mm_cvtph_ps(halfs);
         let result: [f32; 4] = transmute(floats);
         assert_eq!(result, array);
@@ -126,7 +104,7 @@ mod tests {
     unsafe fn test_mm256_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
         let float_vec: __m256 = transmute(array);
-        let halfs: __m128i = _mm256_cvtps_ph(float_vec, 0);
+        let halfs: __m128i = _mm256_cvtps_ph::<0>(float_vec);
         let floats: __m256 = _mm256_cvtph_ps(halfs);
         let result: [f32; 8] = transmute(floats);
         assert_eq!(result, array);