Skip to content

Commit c49d0aa

Browse files
committed
add wrapping_offset_from which allows wrapping but still requires ptrs to be for the same allocation
1 parent b9d608c commit c49d0aa

File tree

12 files changed

+378
-62
lines changed

12 files changed

+378
-62
lines changed

compiler/rustc_codegen_ssa/src/mir/intrinsic.rs

+18-13
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
438438
bx.icmp(IntPredicate::IntEQ, a, b)
439439
}
440440

441-
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
441+
sym::ptr_offset_from | sym::ptr_offset_from_unsigned | sym::ptr_wrapping_offset_from => {
442442
let ty = substs.type_at(0);
443443
let pointee_size = bx.layout_of(ty).size;
444444

@@ -447,18 +447,23 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
447447
let a = bx.ptrtoint(a, bx.type_isize());
448448
let b = bx.ptrtoint(b, bx.type_isize());
449449
let pointee_size = bx.const_usize(pointee_size.bytes());
450-
if name == sym::ptr_offset_from {
451-
// This is the same sequence that Clang emits for pointer subtraction.
452-
// It can be neither `nsw` nor `nuw` because the input is treated as
453-
// unsigned but then the output is treated as signed, so neither works.
454-
let d = bx.sub(a, b);
455-
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
456-
bx.exactsdiv(d, pointee_size)
457-
} else {
458-
// The `_unsigned` version knows the relative ordering of the pointers,
459-
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
460-
let d = bx.unchecked_usub(a, b);
461-
bx.exactudiv(d, pointee_size)
450+
match name {
451+
sym::ptr_offset_from | sym::ptr_wrapping_offset_from => {
452+
// This is the same sequence that Clang emits for pointer subtraction.
453+
// Even for `ptr_offset_from`, which cannot wrap, this can be neither `nsw`
454+
// nor `nuw` because the input is treated as unsigned but then the output is
455+
// treated as signed, so neither works.
456+
let d = bx.sub(a, b);
457+
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
458+
bx.exactsdiv(d, pointee_size)
459+
}
460+
sym::ptr_offset_from_unsigned => {
461+
// The `_unsigned` version knows the relative ordering of the pointers,
462+
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
463+
let d = bx.unchecked_usub(a, b);
464+
bx.exactudiv(d, pointee_size)
465+
}
466+
_ => bug!(),
462467
}
463468
}
464469

compiler/rustc_const_eval/src/interpret/intrinsics.rs

+49-40
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
270270
let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, self);
271271
self.write_pointer(offset_ptr, dest)?;
272272
}
273-
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
273+
sym::ptr_offset_from
274+
| sym::ptr_offset_from_unsigned
275+
| sym::ptr_wrapping_offset_from => {
274276
let a = self.read_pointer(&args[0])?;
275277
let b = self.read_pointer(&args[1])?;
276278

@@ -288,6 +290,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
288290
(Err(_), _) | (_, Err(_)) => {
289291
// We managed to find a valid allocation for one pointer, but not the other.
290292
// That means they are definitely not pointing to the same allocation.
293+
// FIXME: if at least one pointer was a wildcard pointer, we should not throw UB here
294+
// but just use their absolute addresses instead.
291295
throw_ub_custom!(
292296
fluent::const_eval_different_allocations,
293297
name = intrinsic_name,
@@ -315,57 +319,62 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
315319
let b_offset = ImmTy::from_uint(b_offset, usize_layout);
316320
self.overflowing_binary_op(BinOp::Sub, &a_offset, &b_offset)?
317321
};
318-
if overflowed {
319-
// a < b
320-
if intrinsic_name == sym::ptr_offset_from_unsigned {
321-
throw_ub_custom!(
322-
fluent::const_eval_unsigned_offset_from_overflow,
323-
a_offset = a_offset,
324-
b_offset = b_offset,
325-
);
326-
}
327-
// The signed form of the intrinsic allows this. If we interpret the
328-
// difference as isize, we'll get the proper signed difference. If that
329-
// seems *positive*, they were more than isize::MAX apart.
330-
let dist = val.to_target_isize(self)?;
331-
if dist >= 0 {
332-
throw_ub_custom!(
333-
fluent::const_eval_offset_from_underflow,
334-
name = intrinsic_name,
335-
);
336-
}
337-
dist
338-
} else {
339-
// b >= a
340-
let dist = val.to_target_isize(self)?;
341-
// If converting to isize produced a *negative* result, we had an overflow
342-
// because they were more than isize::MAX apart.
343-
if dist < 0 {
344-
throw_ub_custom!(
345-
fluent::const_eval_offset_from_overflow,
346-
name = intrinsic_name,
347-
);
322+
let dist = val.to_target_isize(self)?;
323+
if intrinsic_name != sym::ptr_wrapping_offset_from {
324+
// Overflow check.
325+
if overflowed {
326+
// a < b
327+
if intrinsic_name == sym::ptr_offset_from_unsigned {
328+
throw_ub_custom!(
329+
fluent::const_eval_unsigned_offset_from_overflow,
330+
a_offset = a_offset,
331+
b_offset = b_offset,
332+
);
333+
}
334+
// The signed form of the intrinsic allows this. If we interpret the
335+
// difference as isize, we'll get the proper signed difference. If that
336+
// seems *positive*, they were more than isize::MAX apart.
337+
if dist >= 0 {
338+
throw_ub_custom!(
339+
fluent::const_eval_offset_from_underflow,
340+
name = intrinsic_name,
341+
);
342+
}
343+
} else {
344+
// b >= a, no overflow during subtraction.
345+
// If converting to isize produced a *negative* result, we had an overflow
346+
// when converting to `isize` because they were more than isize::MAX apart.
347+
if dist < 0 {
348+
throw_ub_custom!(
349+
fluent::const_eval_offset_from_overflow,
350+
name = intrinsic_name,
351+
);
352+
}
348353
}
349-
dist
350354
}
355+
dist
351356
};
352357

353358
// Check that the range between them is dereferenceable ("in-bounds or one past the
354359
// end of the same allocation"). This is like the check in ptr_offset_inbounds.
355-
let min_ptr = if dist >= 0 { b } else { a };
356-
self.check_ptr_access_align(
357-
min_ptr,
358-
Size::from_bytes(dist.unsigned_abs()),
359-
Align::ONE,
360-
CheckInAllocMsg::OffsetFromTest,
361-
)?;
360+
if intrinsic_name != sym::ptr_wrapping_offset_from {
361+
let min_ptr = if dist >= 0 { b } else { a };
362+
self.check_ptr_access_align(
363+
min_ptr,
364+
Size::from_bytes(dist.unsigned_abs()),
365+
Align::ONE,
366+
CheckInAllocMsg::OffsetFromTest,
367+
)?;
368+
}
362369

363370
// Perform division by size to compute return value.
364371
let ret_layout = if intrinsic_name == sym::ptr_offset_from_unsigned {
365372
assert!(0 <= dist && dist <= self.target_isize_max());
366373
usize_layout
367374
} else {
368-
assert!(self.target_isize_min() <= dist && dist <= self.target_isize_max());
375+
if intrinsic_name != sym::ptr_wrapping_offset_from {
376+
assert!(self.target_isize_min() <= dist && dist <= self.target_isize_max());
377+
}
369378
isize_layout
370379
};
371380
let pointee_layout = self.layout_of(substs.type_at(0))?;

compiler/rustc_hir_analysis/src/check/intrinsic.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
355355
tcx.mk_unit(),
356356
),
357357

358-
sym::ptr_offset_from => {
358+
sym::ptr_offset_from | sym::ptr_wrapping_offset_from => {
359359
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.mk_imm_ptr(param(0))], tcx.types.isize)
360360
}
361361
sym::ptr_offset_from_unsigned => {

compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,7 @@ symbols! {
11561156
ptr_offset_from,
11571157
ptr_offset_from_unsigned,
11581158
ptr_unique,
1159+
ptr_wrapping_offset_from,
11591160
pub_macro_rules,
11601161
pub_restricted,
11611162
public,

library/core/src/intrinsics.rs

+6
Original file line numberDiff line numberDiff line change
@@ -2337,6 +2337,12 @@ extern "rust-intrinsic" {
23372337
#[rustc_nounwind]
23382338
pub fn ptr_offset_from_unsigned<T>(ptr: *const T, base: *const T) -> usize;
23392339

2340+
/// See documentation of `<*const T>::wrapping_offset_from` for details.
2341+
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
2342+
#[rustc_nounwind]
2343+
#[cfg(not(bootstrap))]
2344+
pub fn ptr_wrapping_offset_from<T>(ptr: *const T, base: *const T) -> isize;
2345+
23402346
/// See documentation of `<*const T>::guaranteed_eq` for details.
23412347
/// Returns `2` if the result is unknown.
23422348
/// Returns `1` if the pointers are guaranteed equal

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
//
100100
// Library features:
101101
// tidy-alphabetical-start
102+
#![cfg_attr(not(bootstrap), feature(ptr_wrapping_offset_from))]
102103
#![feature(char_indices_offset)]
103104
#![feature(const_align_of_val)]
104105
#![feature(const_align_of_val_raw)]

library/core/src/ptr/const_ptr.rs

+112-2
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,9 @@ impl<T: ?Sized> *const T {
605605
/// Calculates the distance between two pointers. The returned value is in
606606
/// units of T: the distance in bytes divided by `mem::size_of::<T>()`.
607607
///
608-
/// This function is the inverse of [`offset`].
608+
/// This function is the inverse of [`offset`]: it is valid to call if and only if
609+
/// `self` could have been computed as `origin.offset(n)` for some `n`, and it will
610+
/// then return that `n`.
609611
///
610612
/// [`offset`]: #method.offset
611613
///
@@ -644,6 +646,12 @@ impl<T: ?Sized> *const T {
644646
/// (Note that [`offset`] and [`add`] also have a similar limitation and hence cannot be used on
645647
/// such large allocations either.)
646648
///
649+
/// The requirement for pointers to be derived from the same allocated object is primarily
650+
/// needed for `const`-compatibility: at compile-time, pointers into *different* allocated
651+
/// object do not have a known distance to each other. However, the requirement also exists at
652+
/// runtime, and may be exploited by optimizations. You can use `(self as usize).sub(origin as
653+
/// usize) / mem::size_of::<T>()` to avoid this requirement.
654+
///
647655
/// [`add`]: #method.add
648656
/// [allocated object]: crate::ptr#allocated-object
649657
///
@@ -701,7 +709,7 @@ impl<T: ?Sized> *const T {
701709
/// units of **bytes**.
702710
///
703711
/// This is purely a convenience for casting to a `u8` pointer and
704-
/// using [offset_from][pointer::offset_from] on it. See that method for
712+
/// using [`offset_from`][pointer::offset_from] on it. See that method for
705713
/// documentation and safety requirements.
706714
///
707715
/// For non-`Sized` pointees this operation considers only the data pointers,
@@ -799,6 +807,108 @@ impl<T: ?Sized> *const T {
799807
unsafe { intrinsics::ptr_offset_from_unsigned(self, origin) }
800808
}
801809

810+
/// Calculates the distance between two pointers using wrapping arithmetic. The returned value
811+
/// is in units of T: the distance in bytes divided by `mem::size_of::<T>()`.
812+
///
813+
/// This function is the inverse of [`wrapping_offset`]: it is valid to call if and only if
814+
/// `self` could have been computed as `origin.wrapping_offset(n)` for some `n`, and it will
815+
/// then return that `n`.
816+
///
817+
/// [`wrapping_offset`]: #method.wrapping_offset
818+
///
819+
/// # Safety
820+
///
821+
/// If any of the following conditions are violated, the result is Undefined
822+
/// Behavior:
823+
///
824+
/// * Both pointers must be *derived from* a pointer to the same [allocated object].
825+
/// (See below for an example.)
826+
///
827+
/// * The distance between the pointers, in bytes, must be an exact multiple
828+
/// of the size of `T`.
829+
///
830+
/// Unlike [`offset_from`][pointer::offset_from], this method does *not* require the pointers to
831+
/// be in-bounds of the object they are derived from, nor does it impose any restrictions
832+
/// regarding the maximum distance or wrapping around the address space.
833+
///
834+
/// The requirement for pointers to be derived from the same allocated object is primarily
835+
/// needed for `const`-compatibility: at compile-time, pointers into *different* allocated
836+
/// object do not have a known distance to each other. However, the requirement also exists at
837+
/// runtime, and may be exploited by optimizations. You can use `(self as usize).sub(origin as
838+
/// usize) / mem::size_of::<T>()` to avoid this requirement.
839+
///
840+
/// [allocated object]: crate::ptr#allocated-object
841+
///
842+
/// # Panics
843+
///
844+
/// This function panics if `T` is a Zero-Sized Type ("ZST").
845+
///
846+
/// # Examples
847+
///
848+
/// Basic usage:
849+
///
850+
/// ```
851+
/// #![feature(ptr_wrapping_offset_from)]
852+
/// let a = [0; 2];
853+
/// let ptr1: *const i32 = &a[1];
854+
/// let ptr2: *const i32 = a.as_ptr().wrapping_offset(3); // out-of-bounds!
855+
/// unsafe {
856+
/// assert_eq!(ptr2.wrapping_offset_from(ptr1), 2);
857+
/// assert_eq!(ptr1.wrapping_offset_from(ptr2), -2);
858+
/// assert_eq!(ptr1.wrapping_offset(2), ptr2);
859+
/// assert_eq!(ptr2.wrapping_offset(-2), ptr1);
860+
/// }
861+
/// ```
862+
///
863+
/// *Incorrect* usage:
864+
///
865+
/// ```rust,no_run
866+
/// #![feature(ptr_wrapping_offset_from)]
867+
/// let ptr1 = Box::into_raw(Box::new(0u8)) as *const u8;
868+
/// let ptr2 = Box::into_raw(Box::new(1u8)) as *const u8;
869+
/// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize);
870+
/// // Make ptr2_other an "alias" of ptr2, but derived from ptr1.
871+
/// let ptr2_other = (ptr1 as *const u8).wrapping_offset(diff);
872+
/// assert_eq!(ptr2 as usize, ptr2_other as usize);
873+
/// // Since ptr2_other and ptr2 are derived from pointers to different objects,
874+
/// // computing their offset is undefined behavior, even though
875+
/// // they point to the same address!
876+
/// unsafe {
877+
/// let zero = ptr2_other.wrapping_offset_from(ptr2); // Undefined Behavior
878+
/// }
879+
/// ```
880+
#[unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
881+
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
882+
#[inline]
883+
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
884+
#[cfg(not(bootstrap))]
885+
pub const unsafe fn wrapping_offset_from(self, origin: *const T) -> isize
886+
where
887+
T: Sized,
888+
{
889+
// SAFETY: the caller must uphold the safety contract for `ptr_offset_from`.
890+
unsafe { intrinsics::ptr_wrapping_offset_from(self, origin) }
891+
}
892+
893+
/// Calculates the distance between two pointers using wrapping arithmetic. The returned value
894+
/// is in units of **bytes**.
895+
///
896+
/// This is purely a convenience for casting to a `u8` pointer and using
897+
/// [`wrapping_offset_from`][pointer::wrapping_offset_from] on it. See that method for
898+
/// documentation and safety requirements.
899+
///
900+
/// For non-`Sized` pointees this operation considers only the data pointers,
901+
/// ignoring the metadata.
902+
#[inline(always)]
903+
#[unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
904+
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
905+
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
906+
#[cfg(not(bootstrap))]
907+
pub const unsafe fn wrapping_byte_offset_from<U: ?Sized>(self, origin: *const U) -> isize {
908+
// SAFETY: the caller must uphold the safety contract for `wrapping_offset_from`.
909+
unsafe { self.cast::<u8>().wrapping_offset_from(origin.cast::<u8>()) }
910+
}
911+
802912
/// Returns whether two pointers are guaranteed to be equal.
803913
///
804914
/// At runtime this function behaves like `Some(self == other)`.

0 commit comments

Comments
 (0)