@@ -1402,14 +1402,317 @@ pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
14021402 psubusb ( a, b)
14031403}
14041404
1405- // TODO __mm256_unpackhi_epi16 (__m256i a, __m256i b)
1406- // TODO __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b)
1407- // TODO __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b)
1408- // TODO __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b)
1409- // TODO __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b)
1410- // TODO __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b)
1411- // TODO __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b)
1412- // TODO __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b)
1405+ /// Unpack and interleave 8-bit integers from the high half of each
1406+ /// 128-bit lane in `a` and `b`.
1407+ ///
1408+ /// ```rust
1409+ /// # #![feature(cfg_target_feature)]
1410+ /// # #![feature(target_feature)]
1411+ /// #
1412+ /// # #[macro_use] extern crate stdsimd;
1413+ /// #
1414+ /// # fn main() {
1415+ /// # if cfg_feature_enabled!("avx2") {
1416+ /// # #[target_feature = "+avx2"]
1417+ /// # fn worker() {
1418+ /// use stdsimd::simd::i8x32;
1419+ /// use stdsimd::vendor::_mm256_unpackhi_epi8;
1420+ ///
1421+ /// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
1422+ /// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
1423+ ///
1424+ /// let c: i8x32;
1425+ /// unsafe {
1426+ /// c = _mm256_unpackhi_epi8(a, b);
1427+ /// }
1428+ ///
1429+ /// let expected = i8x32::new(8,-8, 9,-9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15, 24,-24, 25,-25, 26,-26, 27,-27, 28,-28, 29,-29, 30,-30, 31,-31);
1430+ /// assert_eq!(c, expected);
1431+ ///
1432+ /// # }
1433+ /// # worker();
1434+ /// # }
1435+ /// # }
1436+ /// ```
1437+ #[ inline( always) ]
1438+ #[ target_feature = "+avx2" ]
1439+ #[ cfg_attr( test, assert_instr( vpunpckhbw) ) ]
1440+ pub unsafe fn _mm256_unpackhi_epi8 ( a : i8x32 , b : i8x32 ) -> i8x32 {
1441+ simd_shuffle32 ( a, b, [ 8 , 40 , 9 , 41 , 10 , 42 , 11 , 43 , 12 , 44 , 13 , 45 , 14 , 46 , 15 , 47 , 24 , 56 , 25 , 57 , 26 , 58 , 27 , 59 , 28 , 60 , 29 , 61 , 30 , 62 , 31 , 63 ] )
1442+ }
1443+
1444+ /// Unpack and interleave 8-bit integers from the low half of each
1445+ /// 128-bit lane of `a` and `b`.
1446+ ///
1447+ /// ```rust
1448+ /// # #![feature(cfg_target_feature)]
1449+ /// # #![feature(target_feature)]
1450+ /// #
1451+ /// # #[macro_use] extern crate stdsimd;
1452+ /// #
1453+ /// # fn main() {
1454+ /// # if cfg_feature_enabled!("avx2") {
1455+ /// # #[target_feature = "+avx2"]
1456+ /// # fn worker() {
1457+ /// use stdsimd::simd::i8x32;
1458+ /// use stdsimd::vendor::_mm256_unpacklo_epi8;
1459+ ///
1460+ /// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
1461+ /// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
1462+ ///
1463+ /// let c: i8x32;
1464+ /// unsafe {
1465+ /// c = _mm256_unpacklo_epi8(a, b);
1466+ /// }
1467+ ///
1468+ /// let expected = i8x32::new(0, 0, 1,-1, 2,-2, 3,-3, 4,-4, 5,-5, 6,-6, 7,-7, 16,-16, 17,-17, 18,-18, 19,-19, 20,-20, 21,-21, 22,-22, 23,-23);
1469+ /// assert_eq!(c, expected);
1470+ ///
1471+ /// # }
1472+ /// # worker();
1473+ /// # }
1474+ /// # }
1475+ /// ```
1476+ #[ inline( always) ]
1477+ #[ target_feature = "+avx2" ]
1478+ #[ cfg_attr( test, assert_instr( vpunpcklbw) ) ]
1479+ pub unsafe fn _mm256_unpacklo_epi8 ( a : i8x32 , b : i8x32 ) -> i8x32 {
1480+ simd_shuffle32 ( a, b, [ 0 , 32 , 1 , 33 , 2 , 34 , 3 , 35 , 4 , 36 , 5 , 37 , 6 , 38 , 7 , 39 , 16 , 48 , 17 , 49 , 18 , 50 , 19 , 51 , 20 , 52 , 21 , 53 , 22 , 54 , 23 , 55 ] )
1481+ }
1482+
1483+ /// Unpack and interleave 16-bit integers from the high half of each
1484+ /// 128-bit lane of `a` and `b`.
1485+ ///
1486+ /// ```rust
1487+ /// # #![feature(cfg_target_feature)]
1488+ /// # #![feature(target_feature)]
1489+ /// #
1490+ /// # #[macro_use] extern crate stdsimd;
1491+ /// #
1492+ /// # fn main() {
1493+ /// # if cfg_feature_enabled!("avx2") {
1494+ /// # #[target_feature = "+avx2"]
1495+ /// # fn worker() {
1496+ /// use stdsimd::simd::i16x16;
1497+ /// use stdsimd::vendor::_mm256_unpackhi_epi16;
1498+ ///
1499+ /// let a = i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1500+ /// let b = i16x16::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
1501+ ///
1502+ /// let c: i16x16;
1503+ /// unsafe {
1504+ /// c = _mm256_unpackhi_epi16(a, b);
1505+ /// }
1506+ ///
1507+ /// let expected = i16x16::new(4,-4, 5,-5, 6,-6, 7,-7, 12,-12, 13,-13, 14,-14, 15,-15);
1508+ /// assert_eq!(c, expected);
1509+ ///
1510+ /// # }
1511+ /// # worker();
1512+ /// # }
1513+ /// # }
1514+ /// ```
1515+ #[ inline( always) ]
1516+ #[ target_feature = "+avx2" ]
1517+ #[ cfg_attr( test, assert_instr( vpunpckhwd) ) ]
1518+ pub unsafe fn _mm256_unpackhi_epi16 ( a : i16x16 , b : i16x16 ) -> i16x16 {
1519+ simd_shuffle16 ( a, b, [ 4 , 20 , 5 , 21 , 6 , 22 , 7 , 23 , 12 , 28 , 13 , 29 , 14 , 30 , 15 , 31 ] )
1520+ }
1521+
1522+ /// Unpack and interleave 16-bit integers from the low half of each
1523+ /// 128-bit lane of `a` and `b`.
1524+ ///
1525+ /// ```rust
1526+ /// # #![feature(cfg_target_feature)]
1527+ /// # #![feature(target_feature)]
1528+ /// #
1529+ /// # #[macro_use] extern crate stdsimd;
1530+ /// #
1531+ /// # fn main() {
1532+ /// # if cfg_feature_enabled!("avx2") {
1533+ /// # #[target_feature = "+avx2"]
1534+ /// # fn worker() {
1535+ /// use stdsimd::simd::i16x16;
1536+ /// use stdsimd::vendor::_mm256_unpacklo_epi16;
1537+ ///
1538+ /// let a = i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1539+ /// let b = i16x16::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
1540+ ///
1541+ /// let c: i16x16;
1542+ /// unsafe {
1543+ /// c = _mm256_unpacklo_epi16(a, b);
1544+ /// }
1545+ ///
1546+ /// let expected = i16x16::new(0, 0, 1,-1, 2,-2, 3,-3, 8,-8, 9,-9, 10,-10, 11,-11);
1547+ /// assert_eq!(c, expected);
1548+ ///
1549+ /// # }
1550+ /// # worker();
1551+ /// # }
1552+ /// # }
1553+ /// ```
1554+ #[ inline( always) ]
1555+ #[ target_feature = "+avx2" ]
1556+ #[ cfg_attr( test, assert_instr( vpunpcklwd) ) ]
1557+ pub unsafe fn _mm256_unpacklo_epi16 ( a : i16x16 , b : i16x16 ) -> i16x16 {
1558+ simd_shuffle16 ( a, b, [ 0 , 16 , 1 , 17 , 2 , 18 , 3 , 19 , 8 , 24 , 9 , 25 , 10 , 26 , 11 , 27 ] )
1559+ }
1560+
1561+ /// Unpack and interleave 32-bit integers from the high half of each
1562+ /// 128-bit lane of `a` and `b`.
1563+ ///
1564+ /// ```rust
1565+ /// # #![feature(cfg_target_feature)]
1566+ /// # #![feature(target_feature)]
1567+ /// #
1568+ /// # #[macro_use] extern crate stdsimd;
1569+ /// #
1570+ /// # fn main() {
1571+ /// # if cfg_feature_enabled!("avx2") {
1572+ /// # #[target_feature = "+avx2"]
1573+ /// # fn worker() {
1574+ /// use stdsimd::simd::i32x8;
1575+ /// use stdsimd::vendor::_mm256_unpackhi_epi32;
1576+ ///
1577+ /// let a = i32x8::new(0, 1, 2, 3, 4, 5, 6, 7);
1578+ /// let b = i32x8::new(0,-1,-2,-3,-4,-5,-6,-7);
1579+ ///
1580+ /// let c: i32x8;
1581+ /// unsafe {
1582+ /// c = _mm256_unpackhi_epi32(a, b);
1583+ /// }
1584+ ///
1585+ /// let expected = i32x8::new(2,-2, 3,-3, 6,-6, 7,-7);
1586+ /// assert_eq!(c, expected);
1587+ ///
1588+ /// # }
1589+ /// # worker();
1590+ /// # }
1591+ /// # }
1592+ /// ```
1593+ #[ inline( always) ]
1594+ #[ target_feature = "+avx2" ]
1595+ #[ cfg_attr( test, assert_instr( vpunpckhdq) ) ]
1596+ pub unsafe fn _mm256_unpackhi_epi32 ( a : i32x8 , b : i32x8 ) -> i32x8 {
1597+ simd_shuffle8 ( a, b, [ 2 , 10 , 3 , 11 , 6 , 14 , 7 , 15 ] )
1598+ }
1599+
1600+ /// Unpack and interleave 32-bit integers from the low half of each
1601+ /// 128-bit lane of `a` and `b`.
1602+ ///
1603+ /// ```rust
1604+ /// # #![feature(cfg_target_feature)]
1605+ /// # #![feature(target_feature)]
1606+ /// #
1607+ /// # #[macro_use] extern crate stdsimd;
1608+ /// #
1609+ /// # fn main() {
1610+ /// # if cfg_feature_enabled!("avx2") {
1611+ /// # #[target_feature = "+avx2"]
1612+ /// # fn worker() {
1613+ /// use stdsimd::simd::i32x8;
1614+ /// use stdsimd::vendor::_mm256_unpacklo_epi32;
1615+ ///
1616+ /// let a = i32x8::new(0, 1, 2, 3, 4, 5, 6, 7);
1617+ /// let b = i32x8::new(0,-1,-2,-3,-4,-5,-6,-7);
1618+ ///
1619+ /// let c: i32x8;
1620+ /// unsafe {
1621+ /// c = _mm256_unpacklo_epi32(a, b);
1622+ /// }
1623+ ///
1624+ /// let expected = i32x8::new(0, 0, 1,-1, 4,-4, 5,-5);
1625+ /// assert_eq!(c, expected);
1626+ ///
1627+ /// # }
1628+ /// # worker();
1629+ /// # }
1630+ /// # }
1631+ /// ```
1632+ #[ inline( always) ]
1633+ #[ target_feature = "+avx2" ]
1634+ #[ cfg_attr( test, assert_instr( vpunpckldq) ) ]
1635+ pub unsafe fn _mm256_unpacklo_epi32 ( a : i32x8 , b : i32x8 ) -> i32x8 {
1636+ simd_shuffle8 ( a, b, [ 0 , 8 , 1 , 9 , 4 , 12 , 5 , 13 ] )
1637+ }
1638+
1639+ /// Unpack and interleave 64-bit integers from the high half of each
1640+ /// 128-bit lane of `a` and `b`.
1641+ ///
1642+ /// ```rust
1643+ /// # #![feature(cfg_target_feature)]
1644+ /// # #![feature(target_feature)]
1645+ /// #
1646+ /// # #[macro_use] extern crate stdsimd;
1647+ /// #
1648+ /// # fn main() {
1649+ /// # if cfg_feature_enabled!("avx2") {
1650+ /// # #[target_feature = "+avx2"]
1651+ /// # fn worker() {
1652+ /// use stdsimd::simd::i64x4;
1653+ /// use stdsimd::vendor::_mm256_unpackhi_epi64;
1654+ ///
1655+ /// let a = i64x4::new(0, 1, 2, 3);
1656+ /// let b = i64x4::new(0,-1,-2,-3);
1657+ ///
1658+ /// let c: i64x4;
1659+ /// unsafe {
1660+ /// c = _mm256_unpackhi_epi64(a, b);
1661+ /// }
1662+ ///
1663+ /// let expected = i64x4::new(1,-1, 3,-3);
1664+ /// assert_eq!(c, expected);
1665+ ///
1666+ /// # }
1667+ /// # worker();
1668+ /// # }
1669+ /// # }
1670+ /// ```
1671+ #[ inline( always) ]
1672+ #[ target_feature = "+avx2" ]
1673+ #[ cfg_attr( test, assert_instr( vpunpckhqdq) ) ]
1674+ pub unsafe fn _mm256_unpackhi_epi64 ( a : i64x4 , b : i64x4 ) -> i64x4 {
1675+ simd_shuffle4 ( a, b, [ 1 , 5 , 3 , 7 ] )
1676+ }
1677+
1678+ /// Unpack and interleave 64-bit integers from the low half of each
1679+ /// 128-bit lane of `a` and `b`.
1680+ ///
1681+ /// ```rust
1682+ /// # #![feature(cfg_target_feature)]
1683+ /// # #![feature(target_feature)]
1684+ /// #
1685+ /// # #[macro_use] extern crate stdsimd;
1686+ /// #
1687+ /// # fn main() {
1688+ /// # if cfg_feature_enabled!("avx2") {
1689+ /// # #[target_feature = "+avx2"]
1690+ /// # fn worker() {
1691+ /// use stdsimd::simd::i64x4;
1692+ /// use stdsimd::vendor::_mm256_unpacklo_epi64;
1693+ ///
1694+ /// let a = i64x4::new(0, 1, 2, 3);
1695+ /// let b = i64x4::new(0,-1,-2,-3);
1696+ ///
1697+ /// let c: i64x4;
1698+ /// unsafe {
1699+ /// c = _mm256_unpacklo_epi64(a, b);
1700+ /// }
1701+ ///
1702+ /// let expected = i64x4::new(0, 0, 2,-2);
1703+ /// assert_eq!(c, expected);
1704+ ///
1705+ /// # }
1706+ /// # worker();
1707+ /// # }
1708+ /// # }
1709+ /// ```
1710+ #[ inline( always) ]
1711+ #[ target_feature = "+avx2" ]
1712+ #[ cfg_attr( test, assert_instr( vpunpcklqdq) ) ]
1713+ pub unsafe fn _mm256_unpacklo_epi64 ( a : i64x4 , b : i64x4 ) -> i64x4 {
1714+ simd_shuffle4 ( a, b, [ 0 , 4 , 2 , 6 ] )
1715+ }
14131716
14141717/// Compute the bitwise XOR of 256 bits (representing integer data)
14151718/// in `a` and `b`
0 commit comments