Skip to content

Commit 02ecd72

Browse files
gnzlbgalexcrichton
authored andcommitted
add mmx module, mmx run-time detection, intrinsics (#220)
* [sse] _mm_cvtps_pi32, _mm_cvt_ps2pi * [mmx] run-time detection support * [x86] add mmx module * [x86] make __m64 public * [sse] add _mm_cvtps_pi{8,16}, _mm_cvttps_pi32, _mm_cvtt_ps2pi * move new intrinsics from i586 to i686 module * mmx requires i686
1 parent af5a428 commit 02ecd72

File tree

6 files changed

+205
-16
lines changed

6 files changed

+205
-16
lines changed

coresimd/src/runtime/x86.rs

+6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ use super::bit;
2929
#[macro_export]
3030
#[doc(hidden)]
3131
macro_rules! __unstable_detect_feature {
32+
("mmx") => {
33+
$crate::vendor::__unstable_detect_feature(
34+
$crate::vendor::__Feature::mmx{}) };
3235
("sse") => {
3336
$crate::vendor::__unstable_detect_feature(
3437
$crate::vendor::__Feature::sse{}) };
@@ -165,6 +168,8 @@ macro_rules! __unstable_detect_feature {
165168
#[allow(non_camel_case_types)]
166169
#[repr(u8)]
167170
pub enum __Feature {
171+
/// MMX
172+
mmx,
168173
/// SSE (Streaming SIMD Extensions)
169174
sse,
170175
/// SSE2 (Streaming SIMD Extensions 2)
@@ -332,6 +337,7 @@ pub fn detect_features() -> usize {
332337
enable(proc_info_ecx, 20, __Feature::sse4_2);
333338
enable(proc_info_ecx, 23, __Feature::popcnt);
334339
enable(proc_info_edx, 24, __Feature::fxsr);
340+
enable(proc_info_edx, 23, __Feature::mmx);
335341
enable(proc_info_edx, 25, __Feature::sse);
336342
enable(proc_info_edx, 26, __Feature::sse2);
337343

coresimd/src/x86/i586/sse.rs

-8
Original file line numberDiff line numberDiff line change
@@ -626,10 +626,6 @@ pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 {
626626
_mm_cvtss_si32(a)
627627
}
628628

629-
// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
630-
// pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2
631-
// pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) }
632-
633629
/// Convert the lowest 32 bit float in the input vector to a 32 bit integer
634630
/// with
635631
/// truncation.
@@ -655,10 +651,6 @@ pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
655651
_mm_cvttss_si32(a)
656652
}
657653

658-
// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
659-
// pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2;
660-
// pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { _mm_cvttps_pi32(a) }
661-
662654
/// Extract the lowest 32 bit float from the input vector.
663655
#[inline(always)]
664656
#[target_feature = "+sse"]

coresimd/src/x86/i686/mmx.rs

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
//! `i586` MMX instruction set.
2+
//!
3+
//! The intrinsics here roughly correspond to those in the `mmintrin.h` C
4+
//! header.
5+
//!
6+
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
7+
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
8+
//!
9+
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
10+
11+
use v64::{i16x4, i32x2, i8x8};
12+
use x86::__m64;
13+
use core::mem;
14+
15+
#[cfg(test)]
16+
use stdsimd_test::assert_instr;
17+
18+
/// Constructs a 64-bit integer vector initialized to zero.
19+
#[inline(always)]
20+
#[target_feature = "+mmx,+sse"]
21+
// FIXME: this produces a movl instead of xorps on x86
22+
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
23+
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
24+
pub unsafe fn _mm_setzero_si64() -> __m64 {
25+
mem::transmute(0_i64)
26+
}
27+
28+
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
29+
/// using signed saturation.
30+
///
31+
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
32+
/// less than 0x80 are saturated to 0x80.
33+
#[inline(always)]
34+
#[target_feature = "+mmx,+sse"]
35+
#[cfg_attr(test, assert_instr(packsswb))]
36+
pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
37+
mem::transmute(packsswb(mem::transmute(a), mem::transmute(b)))
38+
}
39+
40+
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
41+
/// using signed saturation.
42+
///
43+
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
44+
/// less than 0x80 are saturated to 0x80.
45+
#[inline(always)]
46+
#[target_feature = "+mmx,+sse"]
47+
#[cfg_attr(test, assert_instr(packssdw))]
48+
pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
49+
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
50+
}
51+
52+
#[allow(improper_ctypes)]
53+
extern "C" {
54+
#[link_name = "llvm.x86.mmx.packsswb"]
55+
fn packsswb(a: __m64, b: __m64) -> __m64;
56+
#[link_name = "llvm.x86.mmx.packssdw"]
57+
fn packssdw(a: __m64, b: __m64) -> __m64;
58+
}
59+
60+
#[cfg(test)]
61+
mod tests {
62+
use v64::{i16x4, i32x2, i8x8};
63+
use x86::i686::mmx;
64+
use x86::__m64;
65+
use stdsimd_test::simd_test;
66+
67+
#[simd_test = "sse"] // FIXME: should be mmx
68+
unsafe fn _mm_setzero_si64() {
69+
let r: __m64 = ::std::mem::transmute(0_i64);
70+
assert_eq!(r, mmx::_mm_setzero_si64());
71+
}
72+
73+
#[simd_test = "sse"] // FIXME: should be mmx
74+
unsafe fn _mm_packs_pi16() {
75+
let a = i16x4::new(-1, 2, -3, 4);
76+
let b = i16x4::new(-5, 6, -7, 8);
77+
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
78+
assert_eq!(r, mmx::_mm_packs_pi16(a, b));
79+
}
80+
81+
#[simd_test = "sse"] // FIXME: should be mmx
82+
unsafe fn _mm_packs_pi32() {
83+
let a = i32x2::new(-1, 2);
84+
let b = i32x2::new(-5, 6);
85+
let r = i16x4::new(-1, 2, -5, 6);
86+
assert_eq!(r, mmx::_mm_packs_pi32(a, b));
87+
}
88+
}

coresimd/src/x86/i686/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
//! `i686` intrinsics
22
3+
mod mmx;
4+
pub use self::mmx::*;
5+
36
mod sse;
47
pub use self::sse::*;
58

coresimd/src/x86/i686/sse.rs

+103-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
//! `i686` Streaming SIMD Extensions (SSE)
22
3-
use v64::{i16x4, u8x8};
3+
use v128::f32x4;
4+
use v64::{i16x4, i32x2, i8x8, u8x8};
5+
use x86::__m64;
46
use core::mem;
7+
use x86::i586;
8+
use x86::i686::mmx;
59

610
#[cfg(test)]
711
use stdsimd_test::assert_instr;
812

9-
/// This type is only required for mapping vector types to llvm's `x86_mmx`
10-
/// type.
11-
#[allow(non_camel_case_types)]
12-
#[repr(simd)]
13-
struct __m64(i64);
14-
1513
#[allow(improper_ctypes)]
1614
extern "C" {
1715
#[link_name = "llvm.x86.mmx.pmaxs.w"]
@@ -22,6 +20,10 @@ extern "C" {
2220
fn pminsw(a: __m64, b: __m64) -> __m64;
2321
#[link_name = "llvm.x86.mmx.pminu.b"]
2422
fn pminub(a: __m64, b: __m64) -> __m64;
23+
#[link_name = "llvm.x86.sse.cvtps2pi"]
24+
fn cvtps2pi(a: f32x4) -> __m64;
25+
#[link_name = "llvm.x86.sse.cvttps2pi"]
26+
fn cvttps2pi(a: f32x4) -> __m64;
2527
}
2628

2729
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
@@ -96,9 +98,70 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
9698
_mm_min_pu8(a, b)
9799
}
98100

101+
/// Convert the two lower packed single-precision (32-bit) floating-point
102+
/// elements in `a` to packed 32-bit integers with truncation.
103+
#[inline(always)]
104+
#[target_feature = "+sse"]
105+
#[cfg_attr(test, assert_instr(cvttps2pi))]
106+
pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2 {
107+
mem::transmute(cvttps2pi(a))
108+
}
109+
110+
/// Convert the two lower packed single-precision (32-bit) floating-point
111+
/// elements in `a` to packed 32-bit integers with truncation.
112+
#[inline(always)]
113+
#[target_feature = "+sse"]
114+
#[cfg_attr(test, assert_instr(cvttps2pi))]
115+
pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 {
116+
_mm_cvttps_pi32(a)
117+
}
118+
119+
/// Convert the two lower packed single-precision (32-bit) floating-point
120+
/// elements in `a` to packed 32-bit integers.
121+
#[inline(always)]
122+
#[target_feature = "+sse"]
123+
#[cfg_attr(test, assert_instr(cvtps2pi))]
124+
pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 {
125+
mem::transmute(cvtps2pi(a))
126+
}
127+
128+
/// Convert the two lower packed single-precision (32-bit) floating-point
129+
/// elements in `a` to packed 32-bit integers.
130+
#[inline(always)]
131+
#[target_feature = "+sse"]
132+
#[cfg_attr(test, assert_instr(cvtps2pi))]
133+
pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 {
134+
_mm_cvtps_pi32(a)
135+
}
136+
137+
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
138+
/// packed 16-bit integers.
139+
#[inline(always)]
140+
#[target_feature = "+sse"]
141+
#[cfg_attr(test, assert_instr(cvtps2pi))]
142+
pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 {
143+
let b = _mm_cvtps_pi32(a);
144+
let a = i586::_mm_movehl_ps(a, a);
145+
let c = _mm_cvtps_pi32(a);
146+
mmx::_mm_packs_pi32(b, c)
147+
}
148+
149+
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
150+
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
151+
/// result.
152+
#[inline(always)]
153+
#[target_feature = "+sse"]
154+
#[cfg_attr(test, assert_instr(cvtps2pi))]
155+
pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> i8x8 {
156+
let b = _mm_cvtps_pi16(a);
157+
let c = mmx::_mm_setzero_si64();
158+
mmx::_mm_packs_pi16(b, mem::transmute(c))
159+
}
160+
99161
#[cfg(test)]
100162
mod tests {
101-
use v64::{i16x4, u8x8};
163+
use v128::f32x4;
164+
use v64::{i16x4, i32x2, i8x8, u8x8};
102165
use x86::i686::sse;
103166
use stdsimd_test::simd_test;
104167

@@ -141,4 +204,36 @@ mod tests {
141204
assert_eq!(r, sse::_mm_min_pu8(a, b));
142205
assert_eq!(r, sse::_m_pminub(a, b));
143206
}
207+
208+
#[simd_test = "sse"]
209+
unsafe fn _mm_cvtps_pi32() {
210+
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
211+
let r = i32x2::new(1, 2);
212+
213+
assert_eq!(r, sse::_mm_cvtps_pi32(a));
214+
assert_eq!(r, sse::_mm_cvt_ps2pi(a));
215+
}
216+
217+
#[simd_test = "sse"]
218+
unsafe fn _mm_cvttps_pi32() {
219+
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
220+
let r = i32x2::new(7, 2);
221+
222+
assert_eq!(r, sse::_mm_cvttps_pi32(a));
223+
assert_eq!(r, sse::_mm_cvtt_ps2pi(a));
224+
}
225+
226+
#[simd_test = "sse"]
227+
unsafe fn _mm_cvtps_pi16() {
228+
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
229+
let r = i16x4::new(7, 2, 3, 4);
230+
assert_eq!(r, sse::_mm_cvtps_pi16(a));
231+
}
232+
233+
#[simd_test = "sse"]
234+
unsafe fn _mm_cvtps_pi8() {
235+
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
236+
let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
237+
assert_eq!(r, sse::_mm_cvtps_pi8(a));
238+
}
144239
}

coresimd/src/x86/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ mod x86_64;
2626
#[cfg(target_arch = "x86_64")]
2727
pub use self::x86_64::*;
2828

29+
/// 64-bit wide integer vector type.
30+
#[allow(non_camel_case_types)]
31+
#[repr(simd)]
32+
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
33+
pub struct __m64(i64); // corresponds to llvm's `x86_mmx` type
2934
/// 128-bit wide signed integer vector type
3035
#[allow(non_camel_case_types)]
3136
pub type __m128i = ::v128::i8x16;

0 commit comments

Comments
 (0)