Skip to content

Commit 57c4239

Browse files
Document movnt needs sfence
For every intrinsic that may generate any of the MOVNT family of instructions, specify it must be followed by `_mm_sfence`. Also, ask people to not think too hard about what actually happens with write-combining memory buffers. They probably don't want to know, and in terms of the Rust abstract machine, we aren't actually entirely sure yet.
1 parent 195e56f commit 57c4239

File tree

4 files changed

+72
-0
lines changed

4 files changed

+72
-0
lines changed

crates/core_arch/src/x86/avx.rs

+10
Original file line numberDiff line numberDiff line change
@@ -1683,6 +1683,16 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
16831683
/// aligned memory location. To minimize caching, the data is flagged as
16841684
/// non-temporal (unlikely to be used again soon)
16851685
///
1686+
/// # Safety
1687+
///
1688+
/// After using this intrinsic, but before any atomic operations occur, a call
1689+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
1690+
/// usage of this intrinsic must always end in `_mm_sfence()`.
1691+
///
1692+
/// Reading and writing to the memory stored-to by any other means, after any
1693+
/// nontemporal store has been used to write to that memory, is discouraged.
1694+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
1695+
///
16861696
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
16871697
#[inline]
16881698
#[target_feature(enable = "avx")]

crates/core_arch/src/x86/avx512f.rs

+30
Original file line numberDiff line numberDiff line change
@@ -26144,6 +26144,16 @@ pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) ->
2614426144

2614526145
/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
2614626146
///
26147+
/// # Safety
26148+
///
26149+
/// After using this intrinsic, but before any atomic operations occur, a call
26150+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
26151+
/// usage of this intrinsic must always end in `_mm_sfence()`.
26152+
///
26153+
/// Reading and writing to the memory stored-to by any other means, after any
26154+
/// nontemporal store has been used to write to that memory, is discouraged.
26155+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
26156+
///
2614726157
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
2614826158
#[inline]
2614926159
#[target_feature(enable = "avx512f")]
@@ -26155,6 +26165,16 @@ pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
2615526165

2615626166
/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
2615726167
///
26168+
/// # Safety
26169+
///
26170+
/// After using this intrinsic, but before any atomic operations occur, a call
26171+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
26172+
/// usage of this intrinsic must always end in `_mm_sfence()`.
26173+
///
26174+
/// Reading and writing to the memory stored-to by any other means, after any
26175+
/// nontemporal store has been used to write to that memory, is discouraged.
26176+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
26177+
///
2615826178
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
2615926179
#[inline]
2616026180
#[target_feature(enable = "avx512f")]
@@ -26166,6 +26186,16 @@ pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
2616626186

2616726187
/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
2616826188
///
26189+
/// # Safety
26190+
///
26191+
/// After using this intrinsic, but before any atomic operations occur, a call
26192+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
26193+
/// usage of this intrinsic must always end in `_mm_sfence()`.
26194+
///
26195+
/// Reading and writing to the memory stored-to by any other means, after any
26196+
/// nontemporal store has been used to write to that memory, is discouraged.
26197+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
26198+
///
2616926199
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
2617026200
#[inline]
2617126201
#[target_feature(enable = "avx512f")]

crates/core_arch/src/x86/sse4a.rs

+22
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
6262
/// Non-temporal store of `a.0` into `p`.
6363
///
6464
/// Writes 64-bit data to a memory location without polluting the caches.
65+
///
66+
/// # Safety
67+
///
68+
/// After using this intrinsic, but before any atomic operations occur, a call
69+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
70+
/// usage of this intrinsic must always end in `_mm_sfence()`.
71+
///
72+
/// Reading and writing to the memory stored-to by any other means, after any
73+
/// nontemporal store has been used to write to that memory, is discouraged.
74+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
75+
///
6576
#[inline]
6677
#[target_feature(enable = "sse4a")]
6778
#[cfg_attr(test, assert_instr(movntsd))]
@@ -73,6 +84,17 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
7384
/// Non-temporal store of `a.0` into `p`.
7485
///
7586
/// Writes 32-bit data to a memory location without polluting the caches.
87+
///
88+
/// # Safety
89+
///
90+
/// After using this intrinsic, but before any atomic operations occur, a call
91+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
92+
/// usage of this intrinsic must always end in `_mm_sfence()`.
93+
///
94+
/// Reading and writing to the memory stored-to by any other means, after any
95+
/// nontemporal store has been used to write to that memory, is discouraged.
96+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
97+
///
7698
#[inline]
7799
#[target_feature(enable = "sse4a")]
78100
#[cfg_attr(test, assert_instr(movntss))]

crates/core_arch/src/x86_64/sse2.rs

+10
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,16 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
6666
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
6767
/// used again soon).
6868
///
69+
/// # Safety
70+
///
71+
/// After using this intrinsic, but before any atomic operations occur, a call
72+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
73+
/// usage of this intrinsic must always end in `_mm_sfence()`.
74+
///
75+
/// Reading and writing to the memory stored-to by any other means, after any
76+
/// nontemporal store has been used to write to that memory, is discouraged.
77+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
78+
///
6979
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64)
7080
#[inline]
7181
#[target_feature(enable = "sse2")]

0 commit comments

Comments
 (0)