diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs
index c4f76cf092..bc647fff0b 100644
--- a/crates/core_arch/src/x86/avx.rs
+++ b/crates/core_arch/src/x86/avx.rs
@@ -1683,6 +1683,17 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
 /// aligned memory location. To minimize caching, the data is flagged as
 /// non-temporal (unlikely to be used again soon)
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
 #[inline]
 #[target_feature(enable = "avx")]
@@ -1696,6 +1707,17 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
 /// to a 32-byte aligned memory location. To minimize caching, the data is
 /// flagged as non-temporal (unlikely to be used again soon).
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd)
 #[inline]
 #[target_feature(enable = "avx")]
@@ -1711,6 +1733,17 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
 /// caching, the data is flagged as non-temporal (unlikely to be used again
 /// soon).
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps)
 #[inline]
 #[target_feature(enable = "avx")]
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index a5c9d6f693..74c1351770 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -26144,6 +26144,17 @@ pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) ->
 
 /// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
 #[inline]
 #[target_feature(enable = "avx512f")]
@@ -26155,6 +26166,17 @@ pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
 
 /// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
 #[inline]
 #[target_feature(enable = "avx512f")]
@@ -26166,6 +26188,17 @@ pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
 
 /// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
 #[inline]
 #[target_feature(enable = "avx512f")]
diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs
index f4fdb50469..b4e00d65b4 100644
--- a/crates/core_arch/src/x86/sse2.rs
+++ b/crates/core_arch/src/x86/sse2.rs
@@ -1277,6 +1277,17 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
 #[inline]
 #[target_feature(enable = "sse2")]
@@ -1290,6 +1301,17 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
 #[inline]
 #[target_feature(enable = "sse2")]
@@ -2469,6 +2491,17 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
 #[inline]
 #[target_feature(enable = "sse2")]
diff --git a/crates/core_arch/src/x86/sse4a.rs b/crates/core_arch/src/x86/sse4a.rs
index 976c907cb2..6df295d0a2 100644
--- a/crates/core_arch/src/x86/sse4a.rs
+++ b/crates/core_arch/src/x86/sse4a.rs
@@ -62,6 +62,18 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
 /// Non-temporal store of `a.0` into `p`.
 ///
 /// Writes 64-bit data to a memory location without polluting the caches.
+///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 #[inline]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntsd))]
@@ -73,6 +85,18 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
 /// Non-temporal store of `a.0` into `p`.
 ///
 /// Writes 32-bit data to a memory location without polluting the caches.
+///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, but before the
+/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
+/// stalls and yet-unspecified program behavior.
+///
 #[inline]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntss))]
diff --git a/crates/core_arch/src/x86_64/sse2.rs b/crates/core_arch/src/x86_64/sse2.rs
index bf2394ebab..e1534813ac 100644
--- a/crates/core_arch/src/x86_64/sse2.rs
+++ b/crates/core_arch/src/x86_64/sse2.rs
@@ -66,6 +66,16 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 ///
+/// # Safety
+///
+/// After using this intrinsic, but before any atomic operations occur, a call
+/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
+/// usage of this intrinsic must always end in `_mm_sfence()`.
+///
+/// Reading and writing to the memory stored-to by any other means, after any
+/// nontemporal store has been used to write to that memory, is discouraged.
+/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64)
 #[inline]
 #[target_feature(enable = "sse2")]