From b75b67fa4a77deef9e4436ae342fd07030853cb3 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sun, 16 Feb 2025 16:16:41 -0500 Subject: [PATCH] Add a .bss-like scheme for encoded const allocs --- compiler/rustc_abi/src/lib.rs | 4 +- .../src/mir/interpret/allocation.rs | 117 +++++++++++++++++- 2 files changed, 117 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_abi/src/lib.rs b/compiler/rustc_abi/src/lib.rs index dbb4bed5cdd99..bf7508e0c4b02 100644 --- a/compiler/rustc_abi/src/lib.rs +++ b/compiler/rustc_abi/src/lib.rs @@ -791,7 +791,7 @@ impl Align { } #[inline] - pub fn bytes(self) -> u64 { + pub const fn bytes(self) -> u64 { 1 << self.pow2 } @@ -801,7 +801,7 @@ impl Align { } #[inline] - pub fn bits(self) -> u64 { + pub const fn bits(self) -> u64 { self.bytes() * 8 } diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index 95bc9b71fe0ad..845640a1772af 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -15,7 +15,9 @@ use provenance_map::*; use rustc_abi::{Align, HasDataLayout, Size}; use rustc_ast::Mutability; use rustc_data_structures::intern::Interned; -use rustc_macros::{HashStable, TyDecodable, TyEncodable}; +use rustc_macros::HashStable; +use rustc_serialize::{Decodable, Encodable}; +use rustc_type_ir::{TyDecoder, TyEncoder}; use super::{ AllocId, BadBytesAccess, CtfeProvenance, InterpErrorKind, InterpResult, Pointer, @@ -77,7 +79,7 @@ impl AllocBytes for Box<[u8]> { /// module provides higher-level access. // Note: for performance reasons when interning, some of the `Allocation` fields can be partially // hashed. (see the `Hash` impl below for more details), so the impl is not derived. -#[derive(Clone, Eq, PartialEq, TyEncodable, TyDecodable)] +#[derive(Clone, Eq, PartialEq)] #[derive(HashStable)] pub struct Allocation> { /// The actual bytes of the allocation. @@ -101,6 +103,117 @@ pub struct Allocation Encodable for AllocFlags { + fn encode(&self, encoder: &mut E) { + // Make sure Align::MAX can be stored with the high 2 bits unset. + const { + let max_supported_align_repr = u8::MAX >> 2; + let max_supported_align = 1 << max_supported_align_repr; + assert!(Align::MAX.bytes() <= max_supported_align) + } + + let mut flags = self.align.bytes().trailing_zeros() as u8; + flags |= match self.mutability { + Mutability::Not => 0, + Mutability::Mut => 1 << 6, + }; + flags |= (self.all_zero as u8) << 7; + flags.encode(encoder); + } +} + +impl Decodable for AllocFlags { + fn decode(decoder: &mut D) -> Self { + let flags: u8 = Decodable::decode(decoder); + let align = flags & 0b0011_1111; + let mutability = flags & 0b0100_0000; + let all_zero = flags & 0b1000_0000; + + let align = Align::from_bytes(1 << align).unwrap(); + let mutability = match mutability { + 0 => Mutability::Not, + _ => Mutability::Mut, + }; + let all_zero = all_zero > 0; + + AllocFlags { align, mutability, all_zero } + } +} + +/// Efficiently detect whether a slice of `u8` is all zero. +/// +/// This is used in encoding of [`Allocation`] to special-case all-zero allocations. It is only +/// optimized a little, because for many allocations the encoding of the actual bytes does not +/// dominate runtime. +#[inline] +fn all_zero(buf: &[u8]) -> bool { + // In the empty case we wouldn't encode any contents even without this system where we + // special-case allocations whose contents are all 0. We can return anything in the empty case. + if buf.is_empty() { + return true; + } + // Just fast-rejecting based on the first element significantly reduces the amount that we end + // up walking the whole array. + if buf[0] != 0 { + return false; + } + + // This strategy of combining all slice elements with & or | is unbeatable for the large + // all-zero case because it is so well-understood by autovectorization. + buf.iter().fold(true, |acc, b| acc & (*b == 0)) +} + +/// Custom encoder for [`Allocation`] to more efficiently represent the case where all bytes are 0. +impl Encodable for Allocation +where + Bytes: AllocBytes, + ProvenanceMap: Encodable, + Extra: Encodable, +{ + fn encode(&self, encoder: &mut E) { + let all_zero = all_zero(&self.bytes); + AllocFlags { align: self.align, mutability: self.mutability, all_zero }.encode(encoder); + + encoder.emit_usize(self.bytes.len()); + if !all_zero { + encoder.emit_raw_bytes(&self.bytes); + } + self.provenance.encode(encoder); + self.init_mask.encode(encoder); + self.extra.encode(encoder); + } +} + +impl Decodable for Allocation +where + Bytes: AllocBytes, + ProvenanceMap: Decodable, + Extra: Decodable, +{ + fn decode(decoder: &mut D) -> Self { + let AllocFlags { align, mutability, all_zero } = Decodable::decode(decoder); + + let len = decoder.read_usize(); + let bytes = if all_zero { vec![0u8; len] } else { decoder.read_raw_bytes(len).to_vec() }; + let bytes = Bytes::from_bytes(bytes, align); + + let provenance = Decodable::decode(decoder); + let init_mask = Decodable::decode(decoder); + let extra = Decodable::decode(decoder); + + Self { bytes, provenance, init_mask, align, mutability, extra } + } +} + /// This is the maximum size we will hash at a time, when interning an `Allocation` and its /// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer. /// Used when these two structures are large: we only partially hash the larger fields in that