1
1
//! `i686` Streaming SIMD Extensions (SSE)
2
2
3
- use v64:: { i16x4, u8x8} ;
3
+ use v128:: f32x4;
4
+ use v64:: { i16x4, i32x2, i8x8, u8x8} ;
5
+ use x86:: __m64;
4
6
use core:: mem;
7
+ use x86:: i586;
8
+ use x86:: i686:: mmx;
5
9
6
10
#[ cfg( test) ]
7
11
use stdsimd_test:: assert_instr;
8
12
9
- /// This type is only required for mapping vector types to llvm's `x86_mmx`
10
- /// type.
11
- #[ allow( non_camel_case_types) ]
12
- #[ repr( simd) ]
13
- struct __m64 ( i64 ) ;
14
-
15
13
#[ allow( improper_ctypes) ]
16
14
extern "C" {
17
15
#[ link_name = "llvm.x86.mmx.pmaxs.w" ]
@@ -22,6 +20,10 @@ extern "C" {
22
20
fn pminsw ( a : __m64 , b : __m64 ) -> __m64 ;
23
21
#[ link_name = "llvm.x86.mmx.pminu.b" ]
24
22
fn pminub ( a : __m64 , b : __m64 ) -> __m64 ;
23
+ #[ link_name = "llvm.x86.sse.cvtps2pi" ]
24
+ fn cvtps2pi ( a : f32x4 ) -> __m64 ;
25
+ #[ link_name = "llvm.x86.sse.cvttps2pi" ]
26
+ fn cvttps2pi ( a : f32x4 ) -> __m64 ;
25
27
}
26
28
27
29
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
@@ -96,9 +98,70 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
96
98
_mm_min_pu8 ( a, b)
97
99
}
98
100
101
+ /// Convert the two lower packed single-precision (32-bit) floating-point
102
+ /// elements in `a` to packed 32-bit integers with truncation.
103
+ #[ inline( always) ]
104
+ #[ target_feature = "+sse" ]
105
+ #[ cfg_attr( test, assert_instr( cvttps2pi) ) ]
106
+ pub unsafe fn _mm_cvttps_pi32 ( a : f32x4 ) -> i32x2 {
107
+ mem:: transmute ( cvttps2pi ( a) )
108
+ }
109
+
110
+ /// Convert the two lower packed single-precision (32-bit) floating-point
111
+ /// elements in `a` to packed 32-bit integers with truncation.
112
+ #[ inline( always) ]
113
+ #[ target_feature = "+sse" ]
114
+ #[ cfg_attr( test, assert_instr( cvttps2pi) ) ]
115
+ pub unsafe fn _mm_cvtt_ps2pi ( a : f32x4 ) -> i32x2 {
116
+ _mm_cvttps_pi32 ( a)
117
+ }
118
+
119
+ /// Convert the two lower packed single-precision (32-bit) floating-point
120
+ /// elements in `a` to packed 32-bit integers.
121
+ #[ inline( always) ]
122
+ #[ target_feature = "+sse" ]
123
+ #[ cfg_attr( test, assert_instr( cvtps2pi) ) ]
124
+ pub unsafe fn _mm_cvtps_pi32 ( a : f32x4 ) -> i32x2 {
125
+ mem:: transmute ( cvtps2pi ( a) )
126
+ }
127
+
128
+ /// Convert the two lower packed single-precision (32-bit) floating-point
129
+ /// elements in `a` to packed 32-bit integers.
130
+ #[ inline( always) ]
131
+ #[ target_feature = "+sse" ]
132
+ #[ cfg_attr( test, assert_instr( cvtps2pi) ) ]
133
+ pub unsafe fn _mm_cvt_ps2pi ( a : f32x4 ) -> i32x2 {
134
+ _mm_cvtps_pi32 ( a)
135
+ }
136
+
137
+ /// Convert packed single-precision (32-bit) floating-point elements in `a` to
138
+ /// packed 16-bit integers.
139
+ #[ inline( always) ]
140
+ #[ target_feature = "+sse" ]
141
+ #[ cfg_attr( test, assert_instr( cvtps2pi) ) ]
142
+ pub unsafe fn _mm_cvtps_pi16 ( a : f32x4 ) -> i16x4 {
143
+ let b = _mm_cvtps_pi32 ( a) ;
144
+ let a = i586:: _mm_movehl_ps ( a, a) ;
145
+ let c = _mm_cvtps_pi32 ( a) ;
146
+ mmx:: _mm_packs_pi32 ( b, c)
147
+ }
148
+
149
+ /// Convert packed single-precision (32-bit) floating-point elements in `a` to
150
+ /// packed 8-bit integers, and returns theem in the lower 4 elements of the
151
+ /// result.
152
+ #[ inline( always) ]
153
+ #[ target_feature = "+sse" ]
154
+ #[ cfg_attr( test, assert_instr( cvtps2pi) ) ]
155
+ pub unsafe fn _mm_cvtps_pi8 ( a : f32x4 ) -> i8x8 {
156
+ let b = _mm_cvtps_pi16 ( a) ;
157
+ let c = mmx:: _mm_setzero_si64 ( ) ;
158
+ mmx:: _mm_packs_pi16 ( b, mem:: transmute ( c) )
159
+ }
160
+
99
161
#[ cfg( test) ]
100
162
mod tests {
101
- use v64:: { i16x4, u8x8} ;
163
+ use v128:: f32x4;
164
+ use v64:: { i16x4, i32x2, i8x8, u8x8} ;
102
165
use x86:: i686:: sse;
103
166
use stdsimd_test:: simd_test;
104
167
@@ -141,4 +204,36 @@ mod tests {
141
204
assert_eq ! ( r, sse:: _mm_min_pu8( a, b) ) ;
142
205
assert_eq ! ( r, sse:: _m_pminub( a, b) ) ;
143
206
}
207
+
208
+ #[ simd_test = "sse" ]
209
+ unsafe fn _mm_cvtps_pi32 ( ) {
210
+ let a = f32x4:: new ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
211
+ let r = i32x2:: new ( 1 , 2 ) ;
212
+
213
+ assert_eq ! ( r, sse:: _mm_cvtps_pi32( a) ) ;
214
+ assert_eq ! ( r, sse:: _mm_cvt_ps2pi( a) ) ;
215
+ }
216
+
217
+ #[ simd_test = "sse" ]
218
+ unsafe fn _mm_cvttps_pi32 ( ) {
219
+ let a = f32x4:: new ( 7.0 , 2.0 , 3.0 , 4.0 ) ;
220
+ let r = i32x2:: new ( 7 , 2 ) ;
221
+
222
+ assert_eq ! ( r, sse:: _mm_cvttps_pi32( a) ) ;
223
+ assert_eq ! ( r, sse:: _mm_cvtt_ps2pi( a) ) ;
224
+ }
225
+
226
+ #[ simd_test = "sse" ]
227
+ unsafe fn _mm_cvtps_pi16 ( ) {
228
+ let a = f32x4:: new ( 7.0 , 2.0 , 3.0 , 4.0 ) ;
229
+ let r = i16x4:: new ( 7 , 2 , 3 , 4 ) ;
230
+ assert_eq ! ( r, sse:: _mm_cvtps_pi16( a) ) ;
231
+ }
232
+
233
+ #[ simd_test = "sse" ]
234
+ unsafe fn _mm_cvtps_pi8 ( ) {
235
+ let a = f32x4:: new ( 7.0 , 2.0 , 3.0 , 4.0 ) ;
236
+ let r = i8x8:: new ( 7 , 2 , 3 , 4 , 0 , 0 , 0 , 0 ) ;
237
+ assert_eq ! ( r, sse:: _mm_cvtps_pi8( a) ) ;
238
+ }
144
239
}
0 commit comments