Skip to content

Miri provenance cleanup #96165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/const_eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub(crate) fn const_caller_location(
if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &loc_place).is_err() {
bug!("intern_const_alloc_recursive should not error in this case")
}
ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx))
ConstValue::Scalar(Scalar::from_maybe_pointer(loc_place.ptr, &tcx))
}

/// Convert an evaluated constant to a type level constant
Expand Down
28 changes: 21 additions & 7 deletions compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ pub trait Machine<'mir, 'tcx>: Sized {
/// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to.
type PointerTag: Provenance + Eq + Hash + 'static;

/// When getting the AllocId of a pointer, some extra data is also obtained from the tag
/// that is passed to memory access hooks so they can do things with it.
type TagExtra: Copy + 'static;

/// Machines can define extra (non-instance) things that represent values of function pointers.
/// For example, Miri uses this to return a function pointer from `dlsym`
/// that can later be called to execute the right thing.
Expand Down Expand Up @@ -122,6 +126,8 @@ pub trait Machine<'mir, 'tcx>: Sized {

/// Whether, when checking alignment, we should `force_int` and thus support
/// custom alignment logic based on whatever the integer address happens to be.
///
/// Requires PointerTag::OFFSET_IS_ADDR to be true.
fn force_int_for_alignment_check(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool;

/// Whether to enforce the validity invariant
Expand Down Expand Up @@ -285,11 +291,14 @@ pub trait Machine<'mir, 'tcx>: Sized {
addr: u64,
) -> Pointer<Option<Self::PointerTag>>;

/// Convert a pointer with provenance into an allocation-offset pair.
/// Convert a pointer with provenance into an allocation-offset pair
/// and extra provenance info.
///
/// The returned `AllocId` must be the same as `ptr.provenance.get_alloc_id()`.
fn ptr_get_alloc(
ecx: &InterpCx<'mir, 'tcx, Self>,
ptr: Pointer<Self::PointerTag>,
) -> (AllocId, Size);
) -> (AllocId, Size, Self::TagExtra);
Copy link
Member Author

@RalfJung RalfJung Apr 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, this is currently still redundant -- we could make this ptr_get_offset and have it return just a Size, and get all the other data directly from the PointerTag.

But in preparation for @carbotaniuman's PR, I think we should leave this the way it is. After that PR, Provenance::get_alloc_id should be called only for diagnostics and when Provenance::OFFSET_IS_ADDR == false, and ptr_get_alloc should be used throughout the machine to extract information from provenance. (And that's what we already do, so it would be silly to change it back and forth.)


/// Called to initialize the "extra" state of an allocation and make the pointers
/// it contains (in relocations) tagged. The way we construct allocations is
Expand Down Expand Up @@ -321,7 +330,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
_tcx: TyCtxt<'tcx>,
_machine: &Self,
_alloc_extra: &Self::AllocExtra,
_tag: Self::PointerTag,
_tag: (AllocId, Self::TagExtra),
_range: AllocRange,
) -> InterpResult<'tcx> {
Ok(())
Expand All @@ -333,7 +342,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
_tcx: TyCtxt<'tcx>,
_machine: &mut Self,
_alloc_extra: &mut Self::AllocExtra,
_tag: Self::PointerTag,
_tag: (AllocId, Self::TagExtra),
_range: AllocRange,
) -> InterpResult<'tcx> {
Ok(())
Expand All @@ -345,7 +354,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
_tcx: TyCtxt<'tcx>,
_machine: &mut Self,
_alloc_extra: &mut Self::AllocExtra,
_tag: Self::PointerTag,
_tag: (AllocId, Self::TagExtra),
_range: AllocRange,
) -> InterpResult<'tcx> {
Ok(())
Expand Down Expand Up @@ -397,6 +406,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
// (CTFE and ConstProp) use the same instance. Here, we share that code.
pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
type PointerTag = AllocId;
type TagExtra = ();

type ExtraFnVal = !;

type MemoryMap =
Expand Down Expand Up @@ -474,9 +485,12 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
}

#[inline(always)]
fn ptr_get_alloc(_ecx: &InterpCx<$mir, $tcx, Self>, ptr: Pointer<AllocId>) -> (AllocId, Size) {
fn ptr_get_alloc(
_ecx: &InterpCx<$mir, $tcx, Self>,
ptr: Pointer<AllocId>,
) -> (AllocId, Size, Self::TagExtra) {
// We know `offset` is relative to the allocation, so we can use `into_parts`.
let (alloc_id, offset) = ptr.into_parts();
(alloc_id, offset)
(alloc_id, offset, ())
}
}
73 changes: 36 additions & 37 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
&self,
ptr: Pointer<AllocId>,
) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
// We know `offset` is relative to the allocation, so we can use `into_parts`.
let (alloc_id, offset) = ptr.into_parts();
let alloc_id = ptr.provenance;
// We need to handle `extern static`.
match self.tcx.get_global_alloc(alloc_id) {
Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => {
Expand All @@ -171,7 +170,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
_ => {}
}
// And we need to get the tag.
Ok(M::tag_alloc_base_pointer(self, Pointer::new(alloc_id, offset)))
Ok(M::tag_alloc_base_pointer(self, ptr))
}

pub fn create_fn_alloc_ptr(
Expand Down Expand Up @@ -238,7 +237,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
new_align: Align,
kind: MemoryKind<M::MemoryKind>,
) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
if offset.bytes() != 0 {
throw_ub_format!(
"reallocating {:?} which does not point to the beginning of an object",
Expand All @@ -255,14 +254,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
};
// This will also call the access hooks.
self.mem_copy(
ptr.into(),
ptr,
Align::ONE,
new_ptr.into(),
Align::ONE,
old_size.min(new_size),
/*nonoverlapping*/ true,
)?;
self.deallocate_ptr(ptr.into(), old_size_and_align, kind)?;
self.deallocate_ptr(ptr, old_size_and_align, kind)?;

Ok(new_ptr)
}
Expand All @@ -274,7 +273,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
old_size_and_align: Option<(Size, Align)>,
kind: MemoryKind<M::MemoryKind>,
) -> InterpResult<'tcx> {
let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
let (alloc_id, offset, tag) = self.ptr_get_alloc_id(ptr)?;
trace!("deallocating: {}", alloc_id);

if offset.bytes() != 0 {
Expand Down Expand Up @@ -330,7 +329,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
*self.tcx,
&mut self.machine,
&mut alloc.extra,
ptr.provenance,
(alloc_id, tag),
alloc_range(Size::ZERO, size),
)?;

Expand All @@ -350,17 +349,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
ptr: Pointer<Option<M::PointerTag>>,
size: Size,
align: Align,
) -> InterpResult<'tcx, Option<(AllocId, Size, Pointer<M::PointerTag>)>> {
) -> InterpResult<'tcx, Option<(AllocId, Size, M::TagExtra)>> {
let align = M::enforce_alignment(&self).then_some(align);
self.check_and_deref_ptr(
ptr,
size,
align,
CheckInAllocMsg::MemoryAccessTest,
|alloc_id, offset, ptr| {
|alloc_id, offset, tag| {
let (size, align) =
self.get_alloc_size_and_align(alloc_id, AllocCheck::Dereferenceable)?;
Ok((size, align, (alloc_id, offset, ptr)))
Ok((size, align, (alloc_id, offset, tag)))
},
)
}
Expand Down Expand Up @@ -401,11 +400,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
size: Size,
align: Option<Align>,
msg: CheckInAllocMsg,
alloc_size: impl FnOnce(
AllocId,
Size,
Pointer<M::PointerTag>,
) -> InterpResult<'tcx, (Size, Align, T)>,
alloc_size: impl FnOnce(AllocId, Size, M::TagExtra) -> InterpResult<'tcx, (Size, Align, T)>,
) -> InterpResult<'tcx, Option<T>> {
fn check_offset_align(offset: u64, align: Align) -> InterpResult<'static> {
if offset % align.bytes() == 0 {
Expand Down Expand Up @@ -433,8 +428,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
}
None
}
Ok((alloc_id, offset, ptr)) => {
let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, ptr)?;
Ok((alloc_id, offset, tag)) => {
let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, tag)?;
// Test bounds. This also ensures non-null.
// It is sufficient to check this for the end pointer. Also check for overflow!
if offset.checked_add(size, &self.tcx).map_or(true, |end| end > alloc_size) {
Expand All @@ -450,10 +445,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
// we want the error to be about the bounds.
if let Some(align) = align {
if M::force_int_for_alignment_check(self) {
let addr = Scalar::from_pointer(ptr, &self.tcx)
.to_machine_usize(&self.tcx)
.expect("ptr-to-int cast for align check should never fail");
check_offset_align(addr, align)?;
// `force_int_for_alignment_check` can only be true if `OFFSET_IS_ADDR` is true.
check_offset_align(ptr.addr().bytes(), align)?;
} else {
// Check allocation alignment and offset alignment.
if alloc_align.bytes() < align.bytes() {
Expand Down Expand Up @@ -569,14 +562,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
size,
align,
CheckInAllocMsg::MemoryAccessTest,
|alloc_id, offset, ptr| {
|alloc_id, offset, tag| {
let alloc = self.get_alloc_raw(alloc_id)?;
Ok((alloc.size(), alloc.align, (alloc_id, offset, ptr, alloc)))
Ok((alloc.size(), alloc.align, (alloc_id, offset, tag, alloc)))
},
)?;
if let Some((alloc_id, offset, ptr, alloc)) = ptr_and_alloc {
if let Some((alloc_id, offset, tag, alloc)) = ptr_and_alloc {
let range = alloc_range(offset, size);
M::memory_read(*self.tcx, &self.machine, &alloc.extra, ptr.provenance, range)?;
M::memory_read(*self.tcx, &self.machine, &alloc.extra, (alloc_id, tag), range)?;
Ok(Some(AllocRef { alloc, range, tcx: *self.tcx, alloc_id }))
} else {
// Even in this branch we have to be sure that we actually access the allocation, in
Expand Down Expand Up @@ -631,13 +624,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
align: Align,
) -> InterpResult<'tcx, Option<AllocRefMut<'a, 'tcx, M::PointerTag, M::AllocExtra>>> {
let parts = self.get_ptr_access(ptr, size, align)?;
if let Some((alloc_id, offset, ptr)) = parts {
if let Some((alloc_id, offset, tag)) = parts {
let tcx = *self.tcx;
// FIXME: can we somehow avoid looking up the allocation twice here?
// We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
let range = alloc_range(offset, size);
M::memory_written(tcx, machine, &mut alloc.extra, ptr.provenance, range)?;
M::memory_written(tcx, machine, &mut alloc.extra, (alloc_id, tag), range)?;
Ok(Some(AllocRefMut { alloc, range, tcx, alloc_id }))
} else {
Ok(None)
Expand Down Expand Up @@ -732,7 +725,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
ptr: Pointer<Option<M::PointerTag>>,
) -> InterpResult<'tcx, FnVal<'tcx, M::ExtraFnVal>> {
trace!("get_fn({:?})", ptr);
let (alloc_id, offset, _ptr) = self.ptr_get_alloc_id(ptr)?;
let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
if offset.bytes() != 0 {
throw_ub!(InvalidFunctionPointer(Pointer::new(alloc_id, offset)))
}
Expand Down Expand Up @@ -1009,16 +1002,16 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
// and once below to get the underlying `&[mut] Allocation`.

// Source alloc preparations and access hooks.
let Some((src_alloc_id, src_offset, src)) = src_parts else {
let Some((src_alloc_id, src_offset, src_tag)) = src_parts else {
// Zero-sized *source*, that means dst is also zero-sized and we have nothing to do.
return Ok(());
};
let src_alloc = self.get_alloc_raw(src_alloc_id)?;
let src_range = alloc_range(src_offset, size);
M::memory_read(*tcx, &self.machine, &src_alloc.extra, src.provenance, src_range)?;
M::memory_read(*tcx, &self.machine, &src_alloc.extra, (src_alloc_id, src_tag), src_range)?;
// We need the `dest` ptr for the next operation, so we get it now.
// We already did the source checks and called the hooks so we are good to return early.
let Some((dest_alloc_id, dest_offset, dest)) = dest_parts else {
let Some((dest_alloc_id, dest_offset, dest_tag)) = dest_parts else {
// Zero-sized *destination*.
return Ok(());
};
Expand All @@ -1040,7 +1033,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
// Destination alloc preparations and access hooks.
let (dest_alloc, extra) = self.get_alloc_raw_mut(dest_alloc_id)?;
let dest_range = alloc_range(dest_offset, size * num_copies);
M::memory_written(*tcx, extra, &mut dest_alloc.extra, dest.provenance, dest_range)?;
M::memory_written(
*tcx,
extra,
&mut dest_alloc.extra,
(dest_alloc_id, dest_tag),
dest_range,
)?;
let dest_bytes = dest_alloc
.get_bytes_mut_ptr(&tcx, dest_range)
.map_err(|e| e.to_interp_error(dest_alloc_id))?
Expand Down Expand Up @@ -1159,11 +1158,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
pub fn ptr_try_get_alloc_id(
&self,
ptr: Pointer<Option<M::PointerTag>>,
) -> Result<(AllocId, Size, Pointer<M::PointerTag>), u64> {
) -> Result<(AllocId, Size, M::TagExtra), u64> {
match ptr.into_pointer_or_addr() {
Ok(ptr) => {
let (alloc_id, offset) = M::ptr_get_alloc(self, ptr);
Ok((alloc_id, offset, ptr))
let (alloc_id, offset, extra) = M::ptr_get_alloc(self, ptr);
Ok((alloc_id, offset, extra))
}
Err(addr) => Err(addr.bytes()),
}
Expand All @@ -1174,7 +1173,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
pub fn ptr_get_alloc_id(
&self,
ptr: Pointer<Option<M::PointerTag>>,
) -> InterpResult<'tcx, (AllocId, Size, Pointer<M::PointerTag>)> {
) -> InterpResult<'tcx, (AllocId, Size, M::TagExtra)> {
self.ptr_try_get_alloc_id(ptr).map_err(|offset| {
err_ub!(DanglingIntPointer(offset, CheckInAllocMsg::InboundsTest)).into()
})
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
if let Some(ref mut ref_tracking) = self.ref_tracking {
// Proceed recursively even for ZST, no reason to skip them!
// `!` is a ZST and we want to validate it.
if let Ok((alloc_id, _offset, _ptr)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
if let Ok((alloc_id, _offset, _tag)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
// Special handling for pointers to statics (irrespective of their type).
let alloc_kind = self.ecx.tcx.get_global_alloc(alloc_id);
if let Some(GlobalAlloc::Static(did)) = alloc_kind {
Expand Down
17 changes: 17 additions & 0 deletions compiler/rustc_middle/src/mir/interpret/pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ pub struct Pointer<Tag = AllocId> {
}

static_assert_size!(Pointer, 16);
// `Option<Tag>` pointers are also passed around quite a bit
// (but not stored in permanent machine state).
static_assert_size!(Pointer<Option<AllocId>>, 16);

// We want the `Debug` output to be readable as it is used by `derive(Debug)` for
// all the Miri types.
Expand Down Expand Up @@ -198,12 +201,26 @@ impl<Tag> From<Pointer<Tag>> for Pointer<Option<Tag>> {
}

impl<Tag> Pointer<Option<Tag>> {
/// Convert this pointer that *might* have a tag into a pointer that *definitely* has a tag, or
/// an absolute address.
///
/// This is rarely what you want; call `ptr_try_get_alloc_id` instead.
pub fn into_pointer_or_addr(self) -> Result<Pointer<Tag>, Size> {
match self.provenance {
Some(tag) => Ok(Pointer::new(tag, self.offset)),
None => Err(self.offset),
}
}

/// Returns the absolute address the pointer points to.
/// Only works if Tag::OFFSET_IS_ADDR is true!
pub fn addr(self) -> Size
where
Tag: Provenance,
{
assert!(Tag::OFFSET_IS_ADDR);
self.offset
}
}

impl<Tag> Pointer<Option<Tag>> {
Expand Down