Skip to content

Commit db8c6ac

Browse files
gnzlbgalexcrichton
authored andcommitted
[sse] _mm_stream_{ps,pi} (#219)
1 parent 02ecd72 commit db8c6ac

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

coresimd/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#![allow(unused_features)]
1414
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
1515
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
16-
const_atomic_usize_new, stmt_expr_attributes)]
16+
const_atomic_usize_new, stmt_expr_attributes, core_intrinsics)]
1717
#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))]
1818
#![cfg_attr(feature = "cargo-clippy",
1919
allow(inline_always, too_many_arguments, cast_sign_loss,

coresimd/src/x86/i586/sse.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,6 +1680,30 @@ extern "C" {
16801680
fn cmpss(a: f32x4, b: f32x4, imm8: i8) -> f32x4;
16811681
}
16821682

1683+
/// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint.
1684+
///
1685+
/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
1686+
/// exception _may_ be generated.
1687+
#[inline(always)]
1688+
#[target_feature = "+sse"]
1689+
#[cfg_attr(test, assert_instr(movntps))]
1690+
pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: f32x4) {
1691+
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
1692+
}
1693+
1694+
/// Store 64-bits of integer data from a into memory using a non-temporal
1695+
/// memory hint.
1696+
#[inline(always)]
1697+
#[target_feature = "+sse"]
1698+
// generates movnti on i686 and x86_64 but just a mov on i586
1699+
#[cfg_attr(all(test,
1700+
any(target_arch = "x86_64",
1701+
all(target_arch = "x86", target_feature = "sse2"))),
1702+
assert_instr(movnti))]
1703+
pub unsafe fn _mm_stream_pi(mem_addr: *mut i64, a: i64) {
1704+
::core::intrinsics::nontemporal_store(mem_addr, a);
1705+
}
1706+
16831707
#[cfg(test)]
16841708
mod tests {
16851709
use v128::*;
@@ -3261,4 +3285,28 @@ mod tests {
32613285
assert_eq!(c, f32x4::new(3.0, 7.0, 11.0, 15.0));
32623286
assert_eq!(d, f32x4::new(4.0, 8.0, 12.0, 16.0));
32633287
}
3288+
3289+
#[repr(align(16))]
3290+
struct Memory {
3291+
pub data: [f32; 4],
3292+
}
3293+
3294+
#[simd_test = "sse"]
3295+
unsafe fn _mm_stream_ps() {
3296+
let a = f32x4::splat(7.0);
3297+
let mut mem = Memory { data: [-1.0; 4] };
3298+
3299+
sse::_mm_stream_ps(&mut mem.data[0] as *mut f32, a);
3300+
for i in 0..4 {
3301+
assert_eq!(mem.data[i], a.extract(i as u32));
3302+
}
3303+
}
3304+
3305+
#[simd_test = "sse"]
3306+
unsafe fn _mm_stream_pi() {
3307+
let a: i64 = 7;
3308+
let mut mem = ::std::boxed::Box::<i64>::new(-1);
3309+
sse::_mm_stream_pi(&mut *mem as *mut i64, a);
3310+
assert_eq!(a, *mem);
3311+
}
32643312
}

0 commit comments

Comments
 (0)