Skip to content
This repository was archived by the owner on May 28, 2022. It is now read-only.

Commit 329bc88

Browse files
committed
raduga: Replace std::simd with the packed_simd crate
`std::simd` (feature-gated with `stdsimd`) was removed by this pull request: <rust-lang/stdarch#528>.
1 parent 6167379 commit 329bc88

File tree

6 files changed

+190
-156
lines changed

6 files changed

+190
-156
lines changed

EngineCore/Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

EngineCore/src/support/raduga/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,7 @@ authors = ["yvt <[email protected]>"]
66
[dependencies]
77
num-traits = "0.2.2"
88
x86intrin = "0.4.5"
9+
10+
[dependencies.packed_simd]
11+
git = "https://github.com/rust-lang-nursery/packed_simd.git"
12+
rev = "6e6bb03aa9d5bbf88a83ef7f792b7892ce6915ea"

EngineCore/src/support/raduga/src/intrin.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use std::arch::x86_64 as vendor;
1717
use self::vendor::__m256i;
1818

1919
#[cfg(target_feature = "avx2")]
20-
use std::simd::i32x8;
20+
use packed_simd::i32x8;
2121

2222
// A replacement for `stdsimd::vendor::_mm256_i32gather_epi32` which generates
2323
// horrible code for some reasons.

EngineCore/src/support/raduga/src/kernel/mapu8x4inplace.rs

Lines changed: 106 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
// This source code is a part of Nightingales.
55
//
66
#![allow(unused_imports)]
7+
use packed_simd::{self as simd, Cast};
78
#[cfg(target_arch = "x86")]
89
use std::arch::x86 as vendor;
910
#[cfg(target_arch = "x86_64")]
1011
use std::arch::x86_64 as vendor;
11-
use std::simd::{self, IntoBits};
12+
use std::mem::transmute;
1213
use {intrin, simd16};
1314
use {ScalarMode, SimdMode};
1415

@@ -21,7 +22,8 @@ pub trait MapU8x4InplaceKernel {
2122
pub trait MapU8x4InplaceKernelExt: MapU8x4InplaceKernel {
2223
/// Run a mapping kernel on a given slice.
2324
fn dispatch(&self, slice: &mut [u8]) {
24-
let _ = self.dispatch_simd16_masked(slice) || self.dispatch_simd16_unaligned(slice)
25+
let _ = self.dispatch_simd16_masked(slice)
26+
|| self.dispatch_simd16_unaligned(slice)
2527
|| self.dispatch_scalar(slice);
2628
}
2729

@@ -74,22 +76,22 @@ pub trait MapU8x4InplaceKernelExt: MapU8x4InplaceKernel {
7476
while i < end_addr {
7577
let indices1 = indices0 + simd::i32x8::splat(16);
7678

77-
let indices0b: simd::u32x8 = indices0.into_bits();
78-
let indices1b: simd::u32x8 = indices1.into_bits();
79+
let indices0b: simd::u32x8 = indices0.cast();
80+
let indices1b: simd::u32x8 = indices1.cast();
7981

80-
let indices0c: simd::i32x8 = (indices0b >> 1u32).into_bits();
81-
let indices1c: simd::i32x8 = (indices1b >> 1u32).into_bits();
82+
let indices0c: simd::i32x8 = (indices0b >> 1u32).cast();
83+
let indices1c: simd::i32x8 = (indices1b >> 1u32).cast();
8284

83-
let mask0 = indices0c.lt(bounds).into_bits();
84-
let mask1 = indices1c.lt(bounds).into_bits();
85+
let mask0 = transmute(indices0c.lt(bounds));
86+
let mask1 = transmute(indices1c.lt(bounds));
8587

86-
let a0 = vendor::_mm256_maskload_epi32(i as *const _, mask0).into_bits();
87-
let a1 = vendor::_mm256_maskload_epi32((i + 32) as *const _, mask1).into_bits();
88+
let a0 = vendor::_mm256_maskload_epi32(i as *const _, mask0);
89+
let a1 = vendor::_mm256_maskload_epi32((i + 32) as *const _, mask1);
8890

8991
let c = self.dispatch_simd16_m256([a0, a1]);
9092

91-
vendor::_mm256_maskstore_epi32(i as *mut _, mask0, c[0].into_bits());
92-
vendor::_mm256_maskstore_epi32((i + 32) as *mut _, mask1, c[1].into_bits());
93+
vendor::_mm256_maskstore_epi32(i as *mut _, mask0, c[0]);
94+
vendor::_mm256_maskstore_epi32((i + 32) as *mut _, mask1, c[1]);
9395

9496
indices0 += simd::i32x8::splat(32);
9597
i += 64;
@@ -111,13 +113,18 @@ pub trait MapU8x4InplaceKernelExt: MapU8x4InplaceKernel {
111113
let mut i = 0;
112114
while i + 63 < slice.len() {
113115
unsafe {
114-
let a0 = vendor::_mm_loadu_si128(p as *const vendor::__m128i).into_bits();
115-
let a1 =
116-
vendor::_mm_loadu_si128(p.offset(16) as *const vendor::__m128i).into_bits();
117-
let a2 =
118-
vendor::_mm_loadu_si128(p.offset(32) as *const vendor::__m128i).into_bits();
119-
let a3 =
120-
vendor::_mm_loadu_si128(p.offset(48) as *const vendor::__m128i).into_bits();
116+
let a0 = transmute(vendor::_mm_loadu_si128(
117+
p.offset(0) as *const vendor::__m128i
118+
));
119+
let a1 = transmute(vendor::_mm_loadu_si128(
120+
p.offset(16) as *const vendor::__m128i
121+
));
122+
let a2 = transmute(vendor::_mm_loadu_si128(
123+
p.offset(32) as *const vendor::__m128i
124+
));
125+
let a3 = transmute(vendor::_mm_loadu_si128(
126+
p.offset(48) as *const vendor::__m128i
127+
));
121128

122129
let f = self.dispatch_simd16_m128([a0, a1, a2, a3]);
123130

@@ -142,9 +149,8 @@ pub trait MapU8x4InplaceKernelExt: MapU8x4InplaceKernel {
142149
let mut i = 0;
143150
while i + 63 < slice.len() {
144151
unsafe {
145-
let a0 = vendor::_mm256_loadu_si256(p as *const vendor::__m256i).into_bits();
146-
let a1 =
147-
vendor::_mm256_loadu_si256(p.offset(32) as *const vendor::__m256i).into_bits();
152+
let a0 = vendor::_mm256_loadu_si256(p as *const vendor::__m256i);
153+
let a1 = vendor::_mm256_loadu_si256(p.offset(32) as *const vendor::__m256i);
148154

149155
let f = self.dispatch_simd16_m256([a0, a1]);
150156

@@ -174,55 +180,55 @@ pub trait MapU8x4InplaceKernelExt: MapU8x4InplaceKernel {
174180
let a0 = a[0]; // hgfedcba 3210
175181
let a1 = a[1]; // ponmlkji 3210
176182

177-
let transpose4x4 = simd::u8x32::new(
183+
let transpose4x4 = transmute(simd::u8x32::new(
178184
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6,
179185
10, 14, 3, 7, 11, 15,
180-
).into_bits();
186+
));
181187

182-
let b0 = vendor::_mm256_shuffle_epi8(a0.into_bits(), transpose4x4).into_bits(); // 3210 hgfe / 3210 dcba
183-
let b1 = vendor::_mm256_shuffle_epi8(a1.into_bits(), transpose4x4).into_bits(); // 3210 ponm / 3210 lkji
188+
let b0 = vendor::_mm256_shuffle_epi8(a0, transpose4x4); // 3210 hgfe / 3210 dcba
189+
let b1 = vendor::_mm256_shuffle_epi8(a1, transpose4x4); // 3210 ponm / 3210 lkji
184190

185-
let transpose4x2 = simd::u32x8::new(0, 4, 1, 5, 2, 6, 3, 7).into_bits();
191+
let transpose4x2 = transmute(simd::u32x8::new(0, 4, 1, 5, 2, 6, 3, 7));
186192

187-
let c0 = vendor::_mm256_permutevar8x32_epi32(b0, transpose4x2).into_bits(); // 3210 hgfedcba
188-
let c1 = vendor::_mm256_permutevar8x32_epi32(b1, transpose4x2).into_bits(); // 3210 ponmlkji
193+
let c0 = vendor::_mm256_permutevar8x32_epi32(b0, transpose4x2); // 3210 hgfedcba
194+
let c1 = vendor::_mm256_permutevar8x32_epi32(b1, transpose4x2); // 3210 ponmlkji
189195

190-
let d0 = vendor::_mm256_unpacklo_epi64(c0, c1).into_bits(); // 20 ponmlkjihgfedcba
191-
let d1 = vendor::_mm256_unpackhi_epi64(c0, c1).into_bits(); // 31 ponmlkjihgfedcba
196+
let d0 = vendor::_mm256_unpacklo_epi64(c0, c1); // 20 ponmlkjihgfedcba
197+
let d1 = vendor::_mm256_unpackhi_epi64(c0, c1); // 31 ponmlkjihgfedcba
192198

193-
let e0 = vendor::_mm256_extractf128_si256(d0, 0).into_bits(); // 0 ponmlkjihgfedcba
194-
let e1 = vendor::_mm256_extractf128_si256(d1, 0).into_bits(); // 1 ponmlkjihgfedcba
195-
let e2 = vendor::_mm256_extractf128_si256(d0, 1).into_bits(); // 2 ponmlkjihgfedcba
196-
let e3 = vendor::_mm256_extractf128_si256(d1, 1).into_bits(); // 3 ponmlkjihgfedcba
199+
let e0 = vendor::_mm256_extractf128_si256(d0, 0); // 0 ponmlkjihgfedcba
200+
let e1 = vendor::_mm256_extractf128_si256(d1, 0); // 1 ponmlkjihgfedcba
201+
let e2 = vendor::_mm256_extractf128_si256(d0, 1); // 2 ponmlkjihgfedcba
202+
let e3 = vendor::_mm256_extractf128_si256(d1, 1); // 3 ponmlkjihgfedcba
197203

198204
let f = self.apply::<simd16::Simd16Mode>([
199-
simd16::Simd16U8(e0),
200-
simd16::Simd16U8(e1),
201-
simd16::Simd16U8(e2),
202-
simd16::Simd16U8(e3),
205+
simd16::Simd16U8(transmute(e0)),
206+
simd16::Simd16U8(transmute(e1)),
207+
simd16::Simd16U8(transmute(e2)),
208+
simd16::Simd16U8(transmute(e3)),
203209
]);
204210

205-
let f0 = f[0].0.into_bits();
206-
let f1 = f[1].0.into_bits();
207-
let f2 = f[2].0.into_bits();
208-
let f3 = f[3].0.into_bits();
211+
let f0 = transmute(f[0].0);
212+
let f1 = transmute(f[1].0);
213+
let f2 = transmute(f[2].0);
214+
let f3 = transmute(f[3].0);
209215

210-
let g0 = vendor::_mm256_set_m128i(f1, f0).into_bits(); // 10 ponmlkjihgfedcba
211-
let g1 = vendor::_mm256_set_m128i(f3, f2).into_bits(); // 32 ponmlkjihgfedcba
216+
let g0 = vendor::_mm256_set_m128i(f1, f0); // 10 ponmlkjihgfedcba
217+
let g1 = vendor::_mm256_set_m128i(f3, f2); // 32 ponmlkjihgfedcba
212218

213-
let h0 = vendor::_mm256_permute4x64_epi64(g0, 0b11_01_10_00).into_bits(); // 10 ponmlkji / 10 hgfedcba
214-
let h1 = vendor::_mm256_permute4x64_epi64(g1, 0b11_01_10_00).into_bits(); // 32 ponmlkji / 32 hgfedcba
219+
let h0 = vendor::_mm256_permute4x64_epi64(g0, 0b11_01_10_00); // 10 ponmlkji / 10 hgfedcba
220+
let h1 = vendor::_mm256_permute4x64_epi64(g1, 0b11_01_10_00); // 32 ponmlkji / 32 hgfedcba
215221

216-
let i0 = intrin::mm256_permute2x128_si256(h0, h1, 0b0010_0000).into_bits(); // 3210 hgfedcba
217-
let i1 = intrin::mm256_permute2x128_si256(h0, h1, 0b0011_0001).into_bits(); // 3210 ponmlkji
222+
let i0 = intrin::mm256_permute2x128_si256(h0, h1, 0b0010_0000); // 3210 hgfedcba
223+
let i1 = intrin::mm256_permute2x128_si256(h0, h1, 0b0011_0001); // 3210 ponmlkji
218224

219-
let transpose2x4 = simd::u32x8::new(0, 2, 4, 6, 1, 3, 5, 7).into_bits();
225+
let transpose2x4 = transmute(simd::u32x8::new(0, 2, 4, 6, 1, 3, 5, 7));
220226

221-
let j0 = vendor::_mm256_permutevar8x32_epi32(i0, transpose2x4).into_bits(); // 3210 hgfe / 3210 dcba
222-
let j1 = vendor::_mm256_permutevar8x32_epi32(i1, transpose2x4).into_bits(); // 3210 ponm / 3210 lkji
227+
let j0 = vendor::_mm256_permutevar8x32_epi32(i0, transpose2x4); // 3210 hgfe / 3210 dcba
228+
let j1 = vendor::_mm256_permutevar8x32_epi32(i1, transpose2x4); // 3210 ponm / 3210 lkji
223229

224-
let k0 = vendor::_mm256_shuffle_epi8(j0, transpose4x4).into_bits(); // hgfedcba 3210
225-
let k1 = vendor::_mm256_shuffle_epi8(j1, transpose4x4).into_bits(); // ponmlkji 3210
230+
let k0 = vendor::_mm256_shuffle_epi8(j0, transpose4x4); // hgfedcba 3210
231+
let k1 = vendor::_mm256_shuffle_epi8(j1, transpose4x4); // ponmlkji 3210
226232

227233
[k0, k1]
228234
}
@@ -232,68 +238,68 @@ pub trait MapU8x4InplaceKernelExt: MapU8x4InplaceKernel {
232238
#[inline(always)]
233239
unsafe fn dispatch_simd16_m128(&self, x: [vendor::__m128i; 4]) -> [vendor::__m128i; 4] {
234240
// [ 3d 2d 1d 0d ] [ 3c 2c 1c 0c ] [ 3b 2b 1b 0b ] [ 3a 2a 1a 0a ]
235-
let a0 = x[0].into_bits(); // dcba 3210
236-
let a1 = x[1].into_bits(); // hgfe 3210
237-
let a2 = x[2].into_bits(); // lkji 3210
238-
let a3 = x[3].into_bits(); // ponm 3210
241+
let a0 = x[0]; // dcba 3210
242+
let a1 = x[1]; // hgfe 3210
243+
let a2 = x[2]; // lkji 3210
244+
let a3 = x[3]; // ponm 3210
239245

240246
// [ 3f 3b ] [ 2f 2b ] [ 1f 1b ] [ 0f 0b ] [ 3e 3a ] [ 2e 2a ] [ 1e 1a ] [ 0e 0a ]
241-
let b0 = vendor::_mm_unpacklo_epi8(a0, a1).into_bits(); // 3210 fb / 3210 ea
242-
let b1 = vendor::_mm_unpackhi_epi8(a0, a1).into_bits(); // 3210 hd / 3210 gc
243-
let b2 = vendor::_mm_unpacklo_epi8(a2, a3).into_bits(); // 3210 nj / 3210 mi
244-
let b3 = vendor::_mm_unpackhi_epi8(a2, a3).into_bits(); // 3210 pl / 3210 ok
247+
let b0 = vendor::_mm_unpacklo_epi8(a0, a1); // 3210 fb / 3210 ea
248+
let b1 = vendor::_mm_unpackhi_epi8(a0, a1); // 3210 hd / 3210 gc
249+
let b2 = vendor::_mm_unpacklo_epi8(a2, a3); // 3210 nj / 3210 mi
250+
let b3 = vendor::_mm_unpackhi_epi8(a2, a3); // 3210 pl / 3210 ok
245251

246252
// [ 3g 3c 3e 3a ] [ 2g 2c 2e 2a ] [ 1g 1c 1e 1a ] [ 0g 0c 0e 0a ]
247-
let c0 = vendor::_mm_unpacklo_epi16(b0, b1).into_bits(); // 3210 gcea
248-
let c1 = vendor::_mm_unpackhi_epi16(b0, b1).into_bits(); // 3210 hdfb
249-
let c2 = vendor::_mm_unpacklo_epi16(b2, b3).into_bits(); // 3210 okmi
250-
let c3 = vendor::_mm_unpackhi_epi16(b2, b3).into_bits(); // 3210 plnj
253+
let c0 = vendor::_mm_unpacklo_epi16(b0, b1); // 3210 gcea
254+
let c1 = vendor::_mm_unpackhi_epi16(b0, b1); // 3210 hdfb
255+
let c2 = vendor::_mm_unpacklo_epi16(b2, b3); // 3210 okmi
256+
let c3 = vendor::_mm_unpackhi_epi16(b2, b3); // 3210 plnj
251257

252258
// [ 1h 1d 1f 1b 1g 1c 1e 1a ] [ 0h 0d 0f 0b 0g 0c 0e 0a ]
253-
let d0 = vendor::_mm_unpacklo_epi32(c0, c1).into_bits(); // 10 hdfbgcea
254-
let d1 = vendor::_mm_unpackhi_epi32(c0, c1).into_bits(); // 32 hdfbgcea
255-
let d2 = vendor::_mm_unpacklo_epi32(c2, c3).into_bits(); // 10 plnjokmi
256-
let d3 = vendor::_mm_unpackhi_epi32(c2, c3).into_bits(); // 32 plnjokmi
259+
let d0 = vendor::_mm_unpacklo_epi32(c0, c1); // 10 hdfbgcea
260+
let d1 = vendor::_mm_unpackhi_epi32(c0, c1); // 32 hdfbgcea
261+
let d2 = vendor::_mm_unpacklo_epi32(c2, c3); // 10 plnjokmi
262+
let d3 = vendor::_mm_unpackhi_epi32(c2, c3); // 32 plnjokmi
257263

258264
// [ 0p 0l 0n 0j 0o 0k 0m 0i 0h 0d 0f 0b 0g 0c 0e 0a ]
259-
let e0 = vendor::_mm_unpacklo_epi64(d0, d2).into_bits(); // 0 plnjokmihdfbgcea
260-
let e1 = vendor::_mm_unpackhi_epi64(d0, d2).into_bits(); // 1 plnjokmihdfbgcea
261-
let e2 = vendor::_mm_unpacklo_epi64(d1, d3).into_bits(); // 2 plnjokmihdfbgcea
262-
let e3 = vendor::_mm_unpackhi_epi64(d1, d3).into_bits(); // 3 plnjokmihdfbgcea
265+
let e0 = vendor::_mm_unpacklo_epi64(d0, d2); // 0 plnjokmihdfbgcea
266+
let e1 = vendor::_mm_unpackhi_epi64(d0, d2); // 1 plnjokmihdfbgcea
267+
let e2 = vendor::_mm_unpacklo_epi64(d1, d3); // 2 plnjokmihdfbgcea
268+
let e3 = vendor::_mm_unpackhi_epi64(d1, d3); // 3 plnjokmihdfbgcea
263269

264270
let f = self.apply::<simd16::Simd16Mode>([
265-
simd16::Simd16U8(e0),
266-
simd16::Simd16U8(e1),
267-
simd16::Simd16U8(e2),
268-
simd16::Simd16U8(e3),
271+
simd16::Simd16U8(transmute(e0)),
272+
simd16::Simd16U8(transmute(e1)),
273+
simd16::Simd16U8(transmute(e2)),
274+
simd16::Simd16U8(transmute(e3)),
269275
]);
270276

271-
let f0 = f[0].0.into_bits();
272-
let f1 = f[1].0.into_bits();
273-
let f2 = f[2].0.into_bits();
274-
let f3 = f[3].0.into_bits();
277+
let f0 = transmute(f[0].0);
278+
let f1 = transmute(f[1].0);
279+
let f2 = transmute(f[2].0);
280+
let f3 = transmute(f[3].0);
275281

276282
// [ 1h 0h ] [ 1d 0d ] [ 1f 0f ] [ 1b 0b ] [ 1g 0g ] [ 1c 0c ] [ 1e 0e ] [ 1a 0a ]
277-
let g0 = vendor::_mm_unpacklo_epi8(f0, f1).into_bits(); // hdfbgcea 10
278-
let g1 = vendor::_mm_unpackhi_epi8(f0, f1).into_bits(); // plnjokmi 10
279-
let g2 = vendor::_mm_unpacklo_epi8(f2, f3).into_bits(); // hdfbgcea 32
280-
let g3 = vendor::_mm_unpackhi_epi8(f2, f3).into_bits(); // plnjokmi 32
283+
let g0 = vendor::_mm_unpacklo_epi8(f0, f1); // hdfbgcea 10
284+
let g1 = vendor::_mm_unpackhi_epi8(f0, f1); // plnjokmi 10
285+
let g2 = vendor::_mm_unpacklo_epi8(f2, f3); // hdfbgcea 32
286+
let g3 = vendor::_mm_unpackhi_epi8(f2, f3); // plnjokmi 32
281287

282288
// [ 3g 2g 1g 0g ] [ 3c 2c 1c 0c ] [ 3e 2e 1e 0e ] [ 3a 2a 1a 0a ]
283-
let h0 = vendor::_mm_unpacklo_epi16(g0, g2).into_bits(); // gcea 3210
284-
let h1 = vendor::_mm_unpackhi_epi16(g0, g2).into_bits(); // hdfb 3210
285-
let h2 = vendor::_mm_unpacklo_epi16(g1, g3).into_bits(); // okmi 3210
286-
let h3 = vendor::_mm_unpackhi_epi16(g1, g3).into_bits(); // plnj 3210
287-
288-
let i0 = vendor::_mm_unpacklo_epi32(h0, h1).into_bits(); // feba 3210
289-
let i1 = vendor::_mm_unpackhi_epi32(h0, h1).into_bits(); // hgdc 3210
290-
let i2 = vendor::_mm_unpacklo_epi32(h2, h3).into_bits(); // nmji 3210
291-
let i3 = vendor::_mm_unpackhi_epi32(h2, h3).into_bits(); // polk 3210
292-
293-
let j0 = vendor::_mm_unpacklo_epi64(i0, i1).into_bits(); // dcba 3210
294-
let j1 = vendor::_mm_unpackhi_epi64(i0, i1).into_bits(); // hgfe 3210
295-
let j2 = vendor::_mm_unpacklo_epi64(i2, i3).into_bits(); // lkji 3210
296-
let j3 = vendor::_mm_unpackhi_epi64(i2, i3).into_bits(); // ponm 3210
289+
let h0 = vendor::_mm_unpacklo_epi16(g0, g2); // gcea 3210
290+
let h1 = vendor::_mm_unpackhi_epi16(g0, g2); // hdfb 3210
291+
let h2 = vendor::_mm_unpacklo_epi16(g1, g3); // okmi 3210
292+
let h3 = vendor::_mm_unpackhi_epi16(g1, g3); // plnj 3210
293+
294+
let i0 = vendor::_mm_unpacklo_epi32(h0, h1); // feba 3210
295+
let i1 = vendor::_mm_unpackhi_epi32(h0, h1); // hgdc 3210
296+
let i2 = vendor::_mm_unpacklo_epi32(h2, h3); // nmji 3210
297+
let i3 = vendor::_mm_unpackhi_epi32(h2, h3); // polk 3210
298+
299+
let j0 = vendor::_mm_unpacklo_epi64(i0, i1); // dcba 3210
300+
let j1 = vendor::_mm_unpackhi_epi64(i0, i1); // hgfe 3210
301+
let j2 = vendor::_mm_unpacklo_epi64(i2, i3); // lkji 3210
302+
let j3 = vendor::_mm_unpackhi_epi64(i2, i3); // ponm 3210
297303

298304
[j0, j1, j2, j3]
299305
}

EngineCore/src/support/raduga/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
//! <a href="https://derpibooru.org/1155840">![Радуга Дэш](https://derpicdn.net/img/2016/5/17/1155840/large.png)</a>
1010
//!
1111
//! [`faster`]: https://docs.adamniederer.com/faster/index.html
12-
#![feature(stdsimd)]
1312
extern crate num_traits;
13+
extern crate packed_simd;
1414

1515
#[allow(dead_code)]
1616
mod intrin;
@@ -25,6 +25,7 @@ pub mod utils;
2525

2626
pub mod prelude {
2727
#[doc(no_inline)]
28-
pub use {kernel::*, utils::*, IntPacked, Packed, PackedI16, PackedU16, PackedU32, PackedU8,
29-
SimdMode};
28+
pub use {
29+
kernel::*, utils::*, IntPacked, Packed, PackedI16, PackedU16, PackedU32, PackedU8, SimdMode,
30+
};
3031
}

0 commit comments

Comments
 (0)