Skip to content

Commit 423f68b

Browse files
MaloJaffrealexcrichton
authored andcommitted
Add _MM_TRANSPOSE4_PS pseudo-macro. (#106)
This adds a strange macro, which I've replaced with a function, because it seems there are not many better alternatives. Also adds a test, and `#[allow(non_snake_case)]` to `#[simd_test]`.
1 parent 73f7a27 commit 423f68b

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

src/x86/sse.rs

+31
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,22 @@ pub unsafe fn _mm_undefined_ps() -> f32x4 {
875875
f32x4::splat(mem::uninitialized())
876876
}
877877

878+
/// Transpose the 4x4 matrix formed by 4 rows of f32x4 in place.
879+
#[inline(always)]
880+
#[allow(non_snake_case)]
881+
#[target_feature = "+sse"]
882+
pub unsafe fn _MM_TRANSPOSE4_PS(row0: &mut f32x4, row1: &mut f32x4, row2: &mut f32x4, row3: &mut f32x4) {
883+
let tmp0 = _mm_unpacklo_ps(*row0, *row1);
884+
let tmp2 = _mm_unpacklo_ps(*row2, *row3);
885+
let tmp1 = _mm_unpackhi_ps(*row0, *row1);
886+
let tmp3 = _mm_unpackhi_ps(*row2, *row3);
887+
888+
mem::replace(row0, _mm_movelh_ps(tmp0, tmp2));
889+
mem::replace(row1,_mm_movehl_ps(tmp2, tmp0));
890+
mem::replace(row2, _mm_movelh_ps(tmp1, tmp3));
891+
mem::replace(row3, _mm_movehl_ps(tmp3, tmp1));
892+
}
893+
878894
#[allow(improper_ctypes)]
879895
extern {
880896
#[link_name = "llvm.x86.sse.add.ss"]
@@ -1291,4 +1307,19 @@ mod tests {
12911307
sse::_MM_GET_EXCEPTION_STATE() & sse::_MM_EXCEPT_UNDERFLOW != 0;
12921308
assert_eq!(underflow, true);
12931309
}
1310+
1311+
#[simd_test = "sse"]
1312+
unsafe fn _MM_TRANSPOSE4_PS() {
1313+
let mut a = f32x4::new(1.0, 2.0, 3.0, 4.0);
1314+
let mut b = f32x4::new(5.0, 6.0, 7.0, 8.0);
1315+
let mut c = f32x4::new(9.0, 10.0, 11.0, 12.0);
1316+
let mut d = f32x4::new(13.0, 14.0, 15.0, 16.0);
1317+
1318+
sse::_MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
1319+
1320+
assert_eq!(a, f32x4::new(1.0, 5.0, 9.0, 13.0));
1321+
assert_eq!(b, f32x4::new(2.0, 6.0, 10.0, 14.0));
1322+
assert_eq!(c, f32x4::new(3.0, 7.0, 11.0, 15.0));
1323+
assert_eq!(d, f32x4::new(4.0, 8.0, 12.0, 16.0));
1324+
}
12941325
}

stdsimd-test/simd-test-macro/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub fn simd_test(attr: proc_macro::TokenStream,
4646
let name: TokenStream = name.as_str().parse().unwrap();
4747

4848
let ret: TokenStream = quote! {
49+
#[allow(non_snake_case)]
4950
#[test]
5051
fn #name() {
5152
if cfg_feature_enabled!(#target_feature) {

0 commit comments

Comments
 (0)