Skip to content

Make slice iterators carry only a single provenance #122971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions library/core/src/slice/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ mod macros;
use crate::cmp;
use crate::fmt;
use crate::hint::assert_unchecked;
use crate::intrinsics;
use crate::iter::{
FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator,
};
use crate::marker::PhantomData;
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZero;
use crate::ptr::{self, without_provenance, without_provenance_mut, NonNull};
use crate::ptr::{self, NonNull};

use super::{from_raw_parts, from_raw_parts_mut};

Expand Down Expand Up @@ -65,10 +66,14 @@ pub struct Iter<'a, T: 'a> {
///
/// This address will be used for all ZST elements, never changed.
ptr: NonNull<T>,
/// For non-ZSTs, the non-null pointer to the past-the-end element.
/// For non-ZSTs, the address of the past-the-end element. This is
/// intentionally *not* a pointer, so that it doesn't carry provenance.
/// If you're turning this into a pointer, you need to use the provenance from
/// `ptr` instead. (If this carried provenance, the compiler wouldn't know
/// that reads from the start and the end are actually the same provenance.)
///
/// For ZSTs, this is `ptr::dangling(len)`.
end_or_len: *const T,
/// For ZSTs, this is the length.
end_addr_or_len: usize,
_marker: PhantomData<&'a T>,
}

Expand All @@ -91,10 +96,9 @@ impl<'a, T> Iter<'a, T> {
let ptr: NonNull<T> = NonNull::from(slice).cast();
// SAFETY: Similar to `IterMut::new`.
unsafe {
let end_or_len =
if T::IS_ZST { without_provenance(len) } else { ptr.as_ptr().add(len) };
let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };

Self { ptr, end_or_len, _marker: PhantomData }
Self { ptr, end_addr_or_len, _marker: PhantomData }
}
}

Expand Down Expand Up @@ -144,7 +148,7 @@ iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, as_ref, {
impl<T> Clone for Iter<'_, T> {
#[inline]
fn clone(&self) -> Self {
Iter { ptr: self.ptr, end_or_len: self.end_or_len, _marker: self._marker }
Iter { ptr: self.ptr, end_addr_or_len: self.end_addr_or_len, _marker: self._marker }
}
}

Expand Down Expand Up @@ -188,10 +192,14 @@ pub struct IterMut<'a, T: 'a> {
///
/// This address will be used for all ZST elements, never changed.
ptr: NonNull<T>,
/// For non-ZSTs, the non-null pointer to the past-the-end element.
/// For non-ZSTs, the address of the past-the-end element. This is
/// intentionally *not* a pointer, so that it doesn't carry provenance.
/// If you're turning this into a pointer, you need to use the provenance from
/// `ptr` instead. (If this carried provenance, the compiler wouldn't know
/// that reads from the start and the end are actually the same provenance.)
///
/// For ZSTs, this is `ptr::without_provenance_mut(len)`.
end_or_len: *mut T,
/// For ZSTs, this is the length.
end_addr_or_len: usize,
_marker: PhantomData<&'a mut T>,
}

Expand Down Expand Up @@ -229,10 +237,9 @@ impl<'a, T> IterMut<'a, T> {
// See the `next_unchecked!` and `is_empty!` macros as well as the
// `post_inc_start` method for more information.
unsafe {
let end_or_len =
if T::IS_ZST { without_provenance_mut(len) } else { ptr.as_ptr().add(len) };
let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };

Self { ptr, end_or_len, _marker: PhantomData }
Self { ptr, end_addr_or_len, _marker: PhantomData }
}
}

Expand Down Expand Up @@ -3423,3 +3430,12 @@ impl<'a, T: 'a + fmt::Debug, P> fmt::Debug for ChunkByMut<'a, T, P> {
f.debug_struct("ChunkByMut").field("slice", &self.slice).finish()
}
}

/// Same as `p.addr().get()`, but faster to compile by avoiding a bunch of
/// intermediate steps and unneeded UB checks, which also inlines better.
#[inline]
fn addr_usize<T>(p: NonNull<T>) -> usize {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a case where the existence of this method can be made unnecessary? It feels weird that the alternative would perform so poorly, or involve UB checks at all.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The runtime performance of .addr().get() is completely fine.

The problem is just in the sheer volume of MIR that it ends up producing -- just transmuting directly is literally an order of magnitude less stuff: https://rust.godbolt.org/z/cnzTW51oh

And with how often these are used, that makes a difference.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I can remove a bit of that, at least: #123139

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'm mostly just wondering if this is sufficient motivation to add this method on NonNull directly (it can be pub(crate) for now), whether this hack is in fact specific to this module, or whether this function should be marked as FIXME and should be addressed later.

// SAFETY: `NonNull` for a sized type has the same layout as `usize`,
// and we intentionally don't want to expose here.
unsafe { intrinsics::transmute(p) }
}
57 changes: 41 additions & 16 deletions library/core/src/slice/iter/macros.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,60 @@
//! Macros used by iterators of slice.

/// Convenience & performance macro for consuming the `end_or_len` field, by
/// Convenience macro for updating the `end_addr_or_len` field for non-ZSTs.
macro_rules! set_end {
($this:ident . end = $new_end:expr) => {{
$this.end_addr_or_len = addr_usize($new_end);
}};
}

/// Convenience & performance macro for consuming the `end_addr_or_len` field, by
/// giving a `(&mut) usize` or `(&mut) NonNull<T>` depending whether `T` is
/// or is not a ZST respectively.
///
/// Internally, this reads the `end` through a pointer-to-`NonNull` so that
/// it'll get the appropriate non-null metadata in the backend without needing
/// to call `assume` manually.
/// When giving a `NonNull<T>` for the end, it creates it by offsetting from the
/// `ptr` so that the backend knows that both pointers have the same provenance.
macro_rules! if_zst {
(mut $this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

if T::IS_ZST {
// SAFETY: for ZSTs, the pointer is storing a provenance-free length,
// so consuming and updating it as a `usize` is fine.
let $len = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<usize>() };
let $len = &mut $this.end_addr_or_len;
$zst_body
} else {
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
let $end = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<NonNull<T>>() };
// SAFETY: By type invariant `end >= ptr`, and thus the subtraction
// cannot overflow, and the iter represents a single allocated
// object so the `add` will also be in-range.
let $end = unsafe {
let ptr_addr = addr_usize($this.ptr);
// Need to load as `NonZero` to get `!range` metadata
let end_addr: NonZero<usize> = *ptr::addr_of!($this.end_addr_or_len).cast();
// Not using `with_addr` because we have ordering information that
// we can take advantage of here that `with_addr` cannot.
let byte_diff = intrinsics::unchecked_sub(end_addr.get(), ptr_addr);
$this.ptr.byte_add(byte_diff)
};
Comment on lines +24 to +35
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's unfortunate. I think this will make #120682 impossible since there my approach for forward iteration is to only operate off end and never read ptr, only write to it. That makes the writes easy to hoist. If re-acquiring the provenance requires reading from ptr in each next that no longer works.

It'd be great if instead we could somehow tell LLVM that two pointers have the same provenance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we keep it a pointer so it can be used as such when it's useful and do ptr-addr-arithmetic-ptr dances otherwise? or would that lose the intended benefits?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the problem with "when it's useful" is that it has to mean potentially-two-provenances, so start + len and end are not equivalent, and thus anything that uses both isn't fixed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My goal with that PR is to make unchecked indexing less hazardous and also applicable to vec::IntoIter with T: Drop which would mean zip(vec::IntoIter, slice::Iter) would now get the unchecked indexing too in those cases which it didn't previously. So there would also be perf benefits from that approach.

Also, have you checked how this affects looping over next_back or rev? It seems that it would have to recalculate the length / pointer provenance on each iteration. In the end it might optimize away, but the IR probably becomes more gnarly.

I think the problem with "when it's useful" is that it has to mean potentially-two-provenances

For those methods. But the other methods could treat the field as usize, no?

$other_body
}
}};
($this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

if T::IS_ZST {
let $len = $this.end_or_len.addr();
let $len = $this.end_addr_or_len;
$zst_body
} else {
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
let $end = unsafe { *ptr::addr_of!($this.end_or_len).cast::<NonNull<T>>() };
// SAFETY: By type invariant `end >= ptr`, and thus the subtraction
// cannot overflow, and the iter represents a single allocated
// object so the `add` will also be in-range.
let $end = unsafe {
let ptr_addr = addr_usize($this.ptr);
// Need to load as `NonZero` to get `!range` metadata
let end_addr: NonZero<usize> = *ptr::addr_of!($this.end_addr_or_len).cast();
// Not using `with_addr` because we have ordering information that
// we can take advantage of here that `with_addr` cannot.
let byte_diff = intrinsics::unchecked_sub(end_addr.get(), ptr_addr);
$this.ptr.byte_add(byte_diff)
};
$other_body
}
}};
Expand Down Expand Up @@ -128,8 +152,9 @@ macro_rules! iterator {
// which is guaranteed to not overflow an `isize`. Also, the resulting pointer
// is in bounds of `slice`, which fulfills the other requirements for `offset`.
end => unsafe {
*end = end.sub(offset);
*end
let new_end = end.sub(offset);
set_end!(self.end = new_end);
new_end
},
)
}
Expand Down Expand Up @@ -184,7 +209,7 @@ macro_rules! iterator {
// This iterator is now empty.
if_zst!(mut self,
len => *len = 0,
end => self.ptr = *end,
end => self.ptr = end,
);
return None;
}
Expand Down Expand Up @@ -409,7 +434,7 @@ macro_rules! iterator {
// This iterator is now empty.
if_zst!(mut self,
len => *len = 0,
end => *end = self.ptr,
_end => set_end!(self.end = self.ptr),
);
return None;
}
Expand Down
31 changes: 16 additions & 15 deletions tests/codegen/issues/issue-37945.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,33 @@

// Check that LLVM understands that `Iter` pointer is not null. Issue #37945.

// There used to be a comparison against `null`, so we check that it's not there
// and that the appropriate parameter metadata is.

#![crate_type = "lib"]

use std::slice::Iter;

#[no_mangle]
pub fn is_empty_1(xs: Iter<f32>) -> bool {
// CHECK-LABEL: @is_empty_1(
// CHECK-NEXT: start:
// CHECK-NEXT: [[A:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
// CHECK-NEXT: tail call void @llvm.assume(i1 [[A]])
// The order between %xs.0 and %xs.1 on the next line doesn't matter
// and different LLVM versions produce different order.
// CHECK-NEXT: [[B:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
// CHECK-NEXT: ret i1 [[B:%.*]]
// CHECK-LABEL: @is_empty_1
// CHECK-SAME: (ptr noundef nonnull %xs.0,

// CHECK-NOT: null
// CHECK-NOT: i32 0
// CHECK-NOT: i64 0

{xs}.next().is_none()
}

#[no_mangle]
pub fn is_empty_2(xs: Iter<f32>) -> bool {
// CHECK-LABEL: @is_empty_2
// CHECK-NEXT: start:
// CHECK-NEXT: [[C:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
// CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
// The order between %xs.0 and %xs.1 on the next line doesn't matter
// and different LLVM versions produce different order.
// CHECK-NEXT: [[D:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
// CHECK-NEXT: ret i1 [[D:%.*]]
// CHECK-SAME: (ptr noundef nonnull %xs.0,

// CHECK-NOT: null
// CHECK-NOT: i32 0
// CHECK-NOT: i64 0

xs.map(|&x| x).next().is_none()
}
3 changes: 2 additions & 1 deletion tests/codegen/slice-iter-len-eq-zero.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ type Demo = [u8; 3];
#[no_mangle]
pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
// CHECK-NOT: sub
// CHECK: %[[RET:.+]] = icmp eq ptr {{%1|%0}}, {{%1|%0}}
// CHECK: %[[ADDR:.+]] = ptrtoint ptr %0 to [[USIZE:i[0-9]+]]
// CHECK: %[[RET:.+]] = icmp eq [[USIZE]] %[[ADDR]], %1
// CHECK: ret i1 %[[RET]]
y.len() == 0
}
Expand Down
68 changes: 39 additions & 29 deletions tests/codegen/slice-iter-nonnull.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,41 @@
// needed as the code changed to read it as a `NonNull`, and thus gets the
// appropriate `!nonnull` annotations naturally.

// Well, now that the end is stored without provenance, the end pointer gets a
// `!range` annotation instead of a `!nonnull` one.

// CHECK-LABEL: @slice_iter_next(
#[no_mangle]
pub fn slice_iter_next<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'a u32> {
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: %[[START:.+]] = load ptr, ptr %it,
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: icmp eq ptr %[[START]], %[[END]]
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE:i32|i64]]
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
// CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
// CHECK-SAME: !noundef
// CHECK: icmp eq [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]

// CHECK: store ptr{{.+}}, ptr %it,
// CHECK: store ptr {{.+}}, ptr %it,

it.next()
}

// CHECK-LABEL: @slice_iter_next_back(
#[no_mangle]
pub fn slice_iter_next_back<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'a u32> {
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: %[[START:.+]] = load ptr, ptr %it,
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: icmp eq ptr %[[START]], %[[END]]
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
// CHECK-SAME: !range ![[NONZERO_META]]
// CHECK-SAME: !noundef
// CHECK: icmp eq [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]

// CHECK: store ptr{{.+}}, ptr %[[ENDP]],
// CHECK: store [[USIZE]] {{.+}}, ptr %[[ENDP]],

it.next_back()
}
Expand All @@ -54,11 +59,12 @@ pub fn slice_iter_next_back<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'
#[no_mangle]
pub fn slice_iter_new(slice: &[u32]) -> std::slice::Iter<'_, u32> {
// CHECK-NOT: slice
// CHECK: %[[END:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
// CHECK: %[[END_PTR:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
// CHECK: %[[END_ADDR:.+]] = ptrtoint ptr %[[END_PTR]] to [[USIZE]]
// CHECK-NOT: slice
// CHECK: insertvalue {{.+}} ptr %slice.0, 0
// CHECK-NOT: slice
// CHECK: insertvalue {{.+}} ptr %[[END]], 1
// CHECK: insertvalue {{.+}} [[USIZE]] %[[END_ADDR]], 1
// CHECK-NOT: slice
// CHECK: }
slice.iter()
Expand All @@ -69,11 +75,12 @@ pub fn slice_iter_new(slice: &[u32]) -> std::slice::Iter<'_, u32> {
#[no_mangle]
pub fn slice_iter_mut_new(slice: &mut [u32]) -> std::slice::IterMut<'_, u32> {
// CHECK-NOT: slice
// CHECK: %[[END:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
// CHECK: %[[END_PTR:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
// CHECK: %[[END_ADDR:.+]] = ptrtoint ptr %[[END_PTR]] to [[USIZE]]
// CHECK-NOT: slice
// CHECK: insertvalue {{.+}} ptr %slice.0, 0
// CHECK-NOT: slice
// CHECK: insertvalue {{.+}} ptr %[[END]], 1
// CHECK: insertvalue {{.+}} [[USIZE]] %[[END_ADDR]], 1
// CHECK-NOT: slice
// CHECK: }
slice.iter_mut()
Expand All @@ -82,33 +89,36 @@ pub fn slice_iter_mut_new(slice: &mut [u32]) -> std::slice::IterMut<'_, u32> {
// CHECK-LABEL: @slice_iter_is_empty
#[no_mangle]
pub fn slice_iter_is_empty(it: &std::slice::Iter<'_, u32>) -> bool {
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: %[[START:.+]] = load ptr, ptr %it,
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
// CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
// CHECK-SAME: !noundef

// CHECK: %[[RET:.+]] = icmp eq ptr %[[START]], %[[END]]
// CHECK: %[[RET:.+]] = icmp eq i64 %[[END_ADDR]], %[[START_ADDR]]
// CHECK: ret i1 %[[RET]]
it.is_empty()
}

// CHECK-LABEL: @slice_iter_len
#[no_mangle]
pub fn slice_iter_len(it: &std::slice::Iter<'_, u32>) -> usize {
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: %[[START:.+]] = load ptr, ptr %it,
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
// CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
// CHECK-SAME: !noundef

// CHECK: ptrtoint
// CHECK: ptrtoint
// CHECK: sub nuw
// CHECK: lshr exact
// CHECK: %[[BYTE_DIFF:.+]] = sub nuw [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]
// CHECK: %[[ELEM_DIFF:.+]] = lshr exact [[USIZE]] %[[BYTE_DIFF]], 2
// CHECK: ret [[USIZE]] %[[ELEM_DIFF]]
it.len()
}

// CHECK: ![[NONZERO_META]] = !{[[USIZE]] 1, [[USIZE]] 0}
Loading
Loading