rust-lang · scottmcm · Mar 24, 2024 · clarfonthey · Mar 27, 2024 · scottmcm
diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs
@@ -6,13 +6,14 @@ mod macros;
 use crate::cmp;
 use crate::fmt;
 use crate::hint::assert_unchecked;
+use crate::intrinsics;
 use crate::iter::{
     FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator,
 };
 use crate::marker::PhantomData;
 use crate::mem::{self, SizedTypeProperties};
 use crate::num::NonZero;
-use crate::ptr::{self, without_provenance, without_provenance_mut, NonNull};
+use crate::ptr::{self, NonNull};
 
 use super::{from_raw_parts, from_raw_parts_mut};
 
@@ -65,10 +66,14 @@ pub struct Iter<'a, T: 'a> {
     ///
     /// This address will be used for all ZST elements, never changed.
     ptr: NonNull<T>,
-    /// For non-ZSTs, the non-null pointer to the past-the-end element.
+    /// For non-ZSTs, the address of the past-the-end element.  This is
+    /// intentionally *not* a pointer, so that it doesn't carry provenance.
+    /// If you're turning this into a pointer, you need to use the provenance from
+    /// `ptr` instead.  (If this carried provenance, the compiler wouldn't know
+    /// that reads from the start and the end are actually the same provenance.)
     ///
-    /// For ZSTs, this is `ptr::dangling(len)`.
-    end_or_len: *const T,
+    /// For ZSTs, this is the length.
+    end_addr_or_len: usize,
     _marker: PhantomData<&'a T>,
 }
 
@@ -91,10 +96,9 @@ impl<'a, T> Iter<'a, T> {
         let ptr: NonNull<T> = NonNull::from(slice).cast();
         // SAFETY: Similar to `IterMut::new`.
         unsafe {
-            let end_or_len =
-                if T::IS_ZST { without_provenance(len) } else { ptr.as_ptr().add(len) };
+            let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };
 
-            Self { ptr, end_or_len, _marker: PhantomData }
+            Self { ptr, end_addr_or_len, _marker: PhantomData }
         }
     }
 
@@ -144,7 +148,7 @@ iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, as_ref, {
 impl<T> Clone for Iter<'_, T> {
     #[inline]
     fn clone(&self) -> Self {
-        Iter { ptr: self.ptr, end_or_len: self.end_or_len, _marker: self._marker }
+        Iter { ptr: self.ptr, end_addr_or_len: self.end_addr_or_len, _marker: self._marker }
     }
 }
 
@@ -188,10 +192,14 @@ pub struct IterMut<'a, T: 'a> {
     ///
     /// This address will be used for all ZST elements, never changed.
     ptr: NonNull<T>,
-    /// For non-ZSTs, the non-null pointer to the past-the-end element.
+    /// For non-ZSTs, the address of the past-the-end element.  This is
+    /// intentionally *not* a pointer, so that it doesn't carry provenance.
+    /// If you're turning this into a pointer, you need to use the provenance from
+    /// `ptr` instead.  (If this carried provenance, the compiler wouldn't know
+    /// that reads from the start and the end are actually the same provenance.)
     ///
-    /// For ZSTs, this is `ptr::without_provenance_mut(len)`.
-    end_or_len: *mut T,
+    /// For ZSTs, this is the length.
+    end_addr_or_len: usize,
     _marker: PhantomData<&'a mut T>,
 }
 
@@ -229,10 +237,9 @@ impl<'a, T> IterMut<'a, T> {
         // See the `next_unchecked!` and `is_empty!` macros as well as the
         // `post_inc_start` method for more information.
         unsafe {
-            let end_or_len =
-                if T::IS_ZST { without_provenance_mut(len) } else { ptr.as_ptr().add(len) };
+            let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };
 
-            Self { ptr, end_or_len, _marker: PhantomData }
+            Self { ptr, end_addr_or_len, _marker: PhantomData }
         }
     }
 
@@ -3423,3 +3430,12 @@ impl<'a, T: 'a + fmt::Debug, P> fmt::Debug for ChunkByMut<'a, T, P> {
         f.debug_struct("ChunkByMut").field("slice", &self.slice).finish()
     }
 }
+
+/// Same as `p.addr().get()`, but faster to compile by avoiding a bunch of
+/// intermediate steps and unneeded UB checks, which also inlines better.
+#[inline]
+fn addr_usize<T>(p: NonNull<T>) -> usize {
+    // SAFETY: `NonNull` for a sized type has the same layout as `usize`,
+    // and we intentionally don't want to expose here.
+    unsafe { intrinsics::transmute(p) }
+}
diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
@@ -1,36 +1,60 @@
 //! Macros used by iterators of slice.
 
-/// Convenience & performance macro for consuming the `end_or_len` field, by
+/// Convenience macro for updating the `end_addr_or_len` field for non-ZSTs.
+macro_rules! set_end {
+    ($this:ident . end = $new_end:expr) => {{
+        $this.end_addr_or_len = addr_usize($new_end);
+    }};
+}
+
+/// Convenience & performance macro for consuming the `end_addr_or_len` field, by
 /// giving a `(&mut) usize` or `(&mut) NonNull<T>` depending whether `T` is
 /// or is not a ZST respectively.
 ///
-/// Internally, this reads the `end` through a pointer-to-`NonNull` so that
-/// it'll get the appropriate non-null metadata in the backend without needing
-/// to call `assume` manually.
+/// When giving a `NonNull<T>` for the end, it creates it by offsetting from the
+/// `ptr` so that the backend knows that both pointers have the same provenance.
 macro_rules! if_zst {
     (mut $this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
         #![allow(unused_unsafe)] // we're sometimes used within an unsafe block
 
         if T::IS_ZST {
-            // SAFETY: for ZSTs, the pointer is storing a provenance-free length,
-            // so consuming and updating it as a `usize` is fine.
-            let $len = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<usize>() };
+            let $len = &mut $this.end_addr_or_len;
             $zst_body
         } else {
-            // SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
-            let $end = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<NonNull<T>>() };
+            // SAFETY: By type invariant `end >= ptr`, and thus the subtraction
+            // cannot overflow, and the iter represents a single allocated
+            // object so the `add` will also be in-range.
+            let $end = unsafe {
+                let ptr_addr = addr_usize($this.ptr);
+                // Need to load as `NonZero` to get `!range` metadata
+                let end_addr: NonZero<usize> = *ptr::addr_of!($this.end_addr_or_len).cast();
+                // Not using `with_addr` because we have ordering information that
+                // we can take advantage of here that `with_addr` cannot.
+                let byte_diff = intrinsics::unchecked_sub(end_addr.get(), ptr_addr);
+                $this.ptr.byte_add(byte_diff)
+            };
             $other_body
         }
     }};
     ($this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
         #![allow(unused_unsafe)] // we're sometimes used within an unsafe block
 
         if T::IS_ZST {
-            let $len = $this.end_or_len.addr();
+            let $len = $this.end_addr_or_len;
             $zst_body
         } else {
-            // SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
-            let $end = unsafe { *ptr::addr_of!($this.end_or_len).cast::<NonNull<T>>() };
+            // SAFETY: By type invariant `end >= ptr`, and thus the subtraction
+            // cannot overflow, and the iter represents a single allocated
+            // object so the `add` will also be in-range.
+            let $end = unsafe {
+                let ptr_addr = addr_usize($this.ptr);
+                // Need to load as `NonZero` to get `!range` metadata
+                let end_addr: NonZero<usize> = *ptr::addr_of!($this.end_addr_or_len).cast();
+                // Not using `with_addr` because we have ordering information that
+                // we can take advantage of here that `with_addr` cannot.
+                let byte_diff = intrinsics::unchecked_sub(end_addr.get(), ptr_addr);
+                $this.ptr.byte_add(byte_diff)
+            };
             $other_body
         }
     }};
@@ -128,8 +152,9 @@ macro_rules! iterator {
                     // which is guaranteed to not overflow an `isize`. Also, the resulting pointer
                     // is in bounds of `slice`, which fulfills the other requirements for `offset`.
                     end => unsafe {
-                        *end = end.sub(offset);
-                        *end
+                        let new_end = end.sub(offset);
+                        set_end!(self.end = new_end);
+                        new_end
                     },
                 )
             }
@@ -184,7 +209,7 @@ macro_rules! iterator {
                     // This iterator is now empty.
                     if_zst!(mut self,
                         len => *len = 0,
-                        end => self.ptr = *end,
+                        end => self.ptr = end,
                     );
                     return None;
                 }
@@ -409,7 +434,7 @@ macro_rules! iterator {
                     // This iterator is now empty.
                     if_zst!(mut self,
                         len => *len = 0,
-                        end => *end = self.ptr,
+                        _end => set_end!(self.end = self.ptr),
                     );
                     return None;
                 }

diff --git a/tests/codegen/issues/issue-37945.rs b/tests/codegen/issues/issue-37945.rs
@@ -3,32 +3,33 @@
 
 // Check that LLVM understands that `Iter` pointer is not null. Issue #37945.
 
+// There used to be a comparison against `null`, so we check that it's not there
+// and that the appropriate parameter metadata is.
+
 #![crate_type = "lib"]
 
 use std::slice::Iter;
 
 #[no_mangle]
 pub fn is_empty_1(xs: Iter<f32>) -> bool {
-// CHECK-LABEL: @is_empty_1(
-// CHECK-NEXT:  start:
-// CHECK-NEXT:    [[A:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
-// CHECK-NEXT:    tail call void @llvm.assume(i1 [[A]])
-// The order between %xs.0 and %xs.1 on the next line doesn't matter
-// and different LLVM versions produce different order.
-// CHECK-NEXT:    [[B:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
-// CHECK-NEXT:    ret i1 [[B:%.*]]
+// CHECK-LABEL: @is_empty_1
+// CHECK-SAME: (ptr noundef nonnull %xs.0,
+
+// CHECK-NOT: null
+// CHECK-NOT: i32 0
+// CHECK-NOT: i64 0
+
     {xs}.next().is_none()
 }
 
 #[no_mangle]
 pub fn is_empty_2(xs: Iter<f32>) -> bool {
 // CHECK-LABEL: @is_empty_2
-// CHECK-NEXT:  start:
-// CHECK-NEXT:    [[C:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
-// CHECK-NEXT:    tail call void @llvm.assume(i1 [[C]])
-// The order between %xs.0 and %xs.1 on the next line doesn't matter
-// and different LLVM versions produce different order.
-// CHECK-NEXT:    [[D:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
-// CHECK-NEXT:    ret i1 [[D:%.*]]
+// CHECK-SAME: (ptr noundef nonnull %xs.0,
+
+// CHECK-NOT: null
+// CHECK-NOT: i32 0
+// CHECK-NOT: i64 0
+
     xs.map(|&x| x).next().is_none()
 }
diff --git a/tests/codegen/slice-iter-len-eq-zero.rs b/tests/codegen/slice-iter-len-eq-zero.rs
@@ -7,7 +7,8 @@ type Demo = [u8; 3];
 #[no_mangle]
 pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
     // CHECK-NOT: sub
-    // CHECK: %[[RET:.+]] = icmp eq ptr {{%1|%0}}, {{%1|%0}}
+    // CHECK: %[[ADDR:.+]] = ptrtoint ptr %0 to [[USIZE:i[0-9]+]]
+    // CHECK: %[[RET:.+]] = icmp eq [[USIZE]] %[[ADDR]], %1
     // CHECK: ret i1 %[[RET]]
     y.len() == 0
 }

diff --git a/tests/codegen/slice-iter-nonnull.rs b/tests/codegen/slice-iter-nonnull.rs
@@ -10,36 +10,41 @@
 // needed as the code changed to read it as a `NonNull`, and thus gets the
 // appropriate `!nonnull` annotations naturally.
 
+// Well, now that the end is stored without provenance, the end pointer gets a
+// `!range` annotation instead of a `!nonnull` one.
+
 // CHECK-LABEL: @slice_iter_next(
 #[no_mangle]
 pub fn slice_iter_next<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'a u32> {
-    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
-    // CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
-    // CHECK-SAME: !nonnull
-    // CHECK-SAME: !noundef
     // CHECK: %[[START:.+]] = load ptr, ptr %it,
     // CHECK-SAME: !nonnull
     // CHECK-SAME: !noundef
-    // CHECK: icmp eq ptr %[[START]], %[[END]]
+    // CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE:i32|i64]]
+    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
+    // CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
+    // CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
+    // CHECK-SAME: !noundef
+    // CHECK: icmp eq [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]
 
-    // CHECK: store ptr{{.+}}, ptr %it,
+    // CHECK: store ptr {{.+}}, ptr %it,
 
     it.next()
 }
 
 // CHECK-LABEL: @slice_iter_next_back(
 #[no_mangle]
 pub fn slice_iter_next_back<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'a u32> {
-    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
-    // CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
-    // CHECK-SAME: !nonnull
-    // CHECK-SAME: !noundef
     // CHECK: %[[START:.+]] = load ptr, ptr %it,
     // CHECK-SAME: !nonnull
     // CHECK-SAME: !noundef
-    // CHECK: icmp eq ptr %[[START]], %[[END]]
+    // CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
+    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
+    // CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
+    // CHECK-SAME: !range ![[NONZERO_META]]
+    // CHECK-SAME: !noundef
+    // CHECK: icmp eq [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]
 
-    // CHECK: store ptr{{.+}}, ptr %[[ENDP]],
+    // CHECK: store [[USIZE]] {{.+}}, ptr %[[ENDP]],
 
     it.next_back()
 }
@@ -54,11 +59,12 @@ pub fn slice_iter_next_back<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'
 #[no_mangle]
 pub fn slice_iter_new(slice: &[u32]) -> std::slice::Iter<'_, u32> {
     // CHECK-NOT: slice
-    // CHECK: %[[END:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
+    // CHECK: %[[END_PTR:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
+    // CHECK: %[[END_ADDR:.+]] = ptrtoint ptr %[[END_PTR]] to [[USIZE]]
     // CHECK-NOT: slice
     // CHECK: insertvalue {{.+}} ptr %slice.0, 0
     // CHECK-NOT: slice
-    // CHECK: insertvalue {{.+}} ptr %[[END]], 1
+    // CHECK: insertvalue {{.+}} [[USIZE]] %[[END_ADDR]], 1
     // CHECK-NOT: slice
     // CHECK: }
     slice.iter()
@@ -69,11 +75,12 @@ pub fn slice_iter_new(slice: &[u32]) -> std::slice::Iter<'_, u32> {
 #[no_mangle]
 pub fn slice_iter_mut_new(slice: &mut [u32]) -> std::slice::IterMut<'_, u32> {
     // CHECK-NOT: slice
-    // CHECK: %[[END:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
+    // CHECK: %[[END_PTR:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
+    // CHECK: %[[END_ADDR:.+]] = ptrtoint ptr %[[END_PTR]] to [[USIZE]]
     // CHECK-NOT: slice
     // CHECK: insertvalue {{.+}} ptr %slice.0, 0
     // CHECK-NOT: slice
-    // CHECK: insertvalue {{.+}} ptr %[[END]], 1
+    // CHECK: insertvalue {{.+}} [[USIZE]] %[[END_ADDR]], 1
     // CHECK-NOT: slice
     // CHECK: }
     slice.iter_mut()
@@ -82,33 +89,36 @@ pub fn slice_iter_mut_new(slice: &mut [u32]) -> std::slice::IterMut<'_, u32> {
 // CHECK-LABEL: @slice_iter_is_empty
 #[no_mangle]
 pub fn slice_iter_is_empty(it: &std::slice::Iter<'_, u32>) -> bool {
-    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
-    // CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
-    // CHECK-SAME: !nonnull
-    // CHECK-SAME: !noundef
     // CHECK: %[[START:.+]] = load ptr, ptr %it,
     // CHECK-SAME: !nonnull
     // CHECK-SAME: !noundef
+    // CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
+    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
+    // CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
+    // CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
+    // CHECK-SAME: !noundef
 
-    // CHECK: %[[RET:.+]] = icmp eq ptr %[[START]], %[[END]]
+    // CHECK: %[[RET:.+]] = icmp eq i64 %[[END_ADDR]], %[[START_ADDR]]
     // CHECK: ret i1 %[[RET]]
     it.is_empty()
 }
 
 // CHECK-LABEL: @slice_iter_len
 #[no_mangle]
 pub fn slice_iter_len(it: &std::slice::Iter<'_, u32>) -> usize {
-    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
-    // CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
-    // CHECK-SAME: !nonnull
-    // CHECK-SAME: !noundef
     // CHECK: %[[START:.+]] = load ptr, ptr %it,
     // CHECK-SAME: !nonnull
     // CHECK-SAME: !noundef
+    // CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
+    // CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
+    // CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
+    // CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
+    // CHECK-SAME: !noundef
 
-    // CHECK: ptrtoint
-    // CHECK: ptrtoint
-    // CHECK: sub nuw
-    // CHECK: lshr exact
+    // CHECK: %[[BYTE_DIFF:.+]] = sub nuw [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]
+    // CHECK: %[[ELEM_DIFF:.+]] = lshr exact [[USIZE]] %[[BYTE_DIFF]], 2
+    // CHECK: ret [[USIZE]] %[[ELEM_DIFF]]
     it.len()
 }
+
+// CHECK: ![[NONZERO_META]] = !{[[USIZE]] 1, [[USIZE]] 0}