Skip to content

Use const generics in SipHasher128's short_write #93671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 29 additions & 32 deletions compiler/rustc_data_structures/src/sip128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,28 +202,26 @@ impl SipHasher128 {
hasher
}

// A specialized write function for values with size <= 8.
#[inline]
fn short_write<T>(&mut self, x: T) {
let size = mem::size_of::<T>();
pub fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
let nbuf = self.nbuf;
debug_assert!(size <= 8);
debug_assert!(LEN <= 8);
debug_assert!(nbuf < BUFFER_SIZE);
debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE);
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);

if nbuf + size < BUFFER_SIZE {
if nbuf + LEN < BUFFER_SIZE {
unsafe {
// The memcpy call is optimized away because the size is known.
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
}

self.nbuf = nbuf + size;
self.nbuf = nbuf + LEN;

return;
}

unsafe { self.short_write_process_buffer(x) }
unsafe { self.short_write_process_buffer(bytes) }
}

// A specialized write function for values with size <= 8 that should only
Expand All @@ -233,18 +231,17 @@ impl SipHasher128 {
// `self.nbuf` must cause `self.buf` to become fully initialized (and not
// overflow) if it wasn't already.
#[inline(never)]
unsafe fn short_write_process_buffer<T>(&mut self, x: T) {
let size = mem::size_of::<T>();
unsafe fn short_write_process_buffer<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
let nbuf = self.nbuf;
debug_assert!(size <= 8);
debug_assert!(LEN <= 8);
debug_assert!(nbuf < BUFFER_SIZE);
debug_assert!(nbuf + size >= BUFFER_SIZE);
debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE);
debug_assert!(nbuf + LEN >= BUFFER_SIZE);
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);

// Copy first part of input into end of buffer, possibly into spill
// element. The memcpy call is optimized away because the size is known.
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);

// Process buffer.
for i in 0..BUFFER_CAPACITY {
Expand All @@ -254,17 +251,17 @@ impl SipHasher128 {
self.state.v0 ^= elem;
}

// Copy remaining input into start of buffer by copying size - 1
// elements from spill (at most size - 1 bytes could have overflowed
// Copy remaining input into start of buffer by copying LEN - 1
// elements from spill (at most LEN - 1 bytes could have overflowed
// into the spill). The memcpy call is optimized away because the size
// is known. And the whole copy is optimized away for size == 1.
// is known. And the whole copy is optimized away for LEN == 1.
let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8;
ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, size - 1);
ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, LEN - 1);

// This function should only be called when the write fills the buffer.
// Therefore, when size == 1, the new `self.nbuf` must be zero. The size
// is statically known, so the branch is optimized away.
self.nbuf = if size == 1 { 0 } else { nbuf + size - BUFFER_SIZE };
// Therefore, when LEN == 1, the new `self.nbuf` must be zero.
// LEN is statically known, so the branch is optimized away.
self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE };
self.processed += BUFFER_SIZE;
}

Expand Down Expand Up @@ -412,52 +409,52 @@ impl SipHasher128 {
impl Hasher for SipHasher128 {
#[inline]
fn write_u8(&mut self, i: u8) {
self.short_write(i);
self.short_write(i.to_ne_bytes());
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.short_write(i);
self.short_write(i.to_ne_bytes());
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.short_write(i);
self.short_write(i.to_ne_bytes());
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.short_write(i);
self.short_write(i.to_ne_bytes());
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.short_write(i);
self.short_write(i.to_ne_bytes());
}

#[inline]
fn write_i8(&mut self, i: i8) {
self.short_write(i as u8);
self.short_write((i as u8).to_ne_bytes());
}

#[inline]
fn write_i16(&mut self, i: i16) {
self.short_write(i as u16);
self.short_write((i as u16).to_ne_bytes());
}

#[inline]
fn write_i32(&mut self, i: i32) {
self.short_write(i as u32);
self.short_write((i as u32).to_ne_bytes());
}

#[inline]
fn write_i64(&mut self, i: i64) {
self.short_write(i as u64);
self.short_write((i as u64).to_ne_bytes());
}

#[inline]
fn write_isize(&mut self, i: isize) {
self.short_write(i as usize);
self.short_write((i as usize).to_ne_bytes());
}

#[inline]
Expand Down
24 changes: 10 additions & 14 deletions compiler/rustc_data_structures/src/stable_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,30 +80,30 @@ impl Hasher for StableHasher {

#[inline]
fn write_u16(&mut self, i: u16) {
self.state.write_u16(i.to_le());
self.state.short_write(i.to_le_bytes());
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.state.write_u32(i.to_le());
self.state.short_write(i.to_le_bytes());
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.state.write_u64(i.to_le());
self.state.short_write(i.to_le_bytes());
}

#[inline]
fn write_u128(&mut self, i: u128) {
self.state.write_u128(i.to_le());
self.state.write(&i.to_le_bytes());
}

#[inline]
fn write_usize(&mut self, i: usize) {
// Always treat usize as u64 so we get the same results on 32 and 64 bit
// platforms. This is important for symbol hashes when cross compiling,
// for example.
self.state.write_u64((i as u64).to_le());
self.state.short_write((i as u64).to_le_bytes());
}

#[inline]
Expand All @@ -113,22 +113,22 @@ impl Hasher for StableHasher {

#[inline]
fn write_i16(&mut self, i: i16) {
self.state.write_i16(i.to_le());
self.state.short_write((i as u16).to_le_bytes());
}

#[inline]
fn write_i32(&mut self, i: i32) {
self.state.write_i32(i.to_le());
self.state.short_write((i as u32).to_le_bytes());
}

#[inline]
fn write_i64(&mut self, i: i64) {
self.state.write_i64(i.to_le());
self.state.short_write((i as u64).to_le_bytes());
}

#[inline]
fn write_i128(&mut self, i: i128) {
self.state.write_i128(i.to_le());
self.state.write(&(i as u128).to_le_bytes());
}

#[inline]
Expand All @@ -144,7 +144,7 @@ impl Hasher for StableHasher {
#[inline(never)]
fn hash_value(state: &mut SipHasher128, value: u64) {
state.write_u8(0xFF);
state.write_u64(value.to_le());
state.short_write(value.to_le_bytes());
}

// `isize` values often seem to have a small (positive) numeric value in practice.
Expand All @@ -161,10 +161,6 @@ impl Hasher for StableHasher {
// 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
// `isize`s that fit within a different amount of bytes, they should always produce a different
// byte stream for the hasher.
//
// To ensure that this optimization hashes the exact same bytes on both little-endian and
// big-endian architectures, we compare the value with 0xFF before we convert the number
// into a unified representation (little-endian).
if value < 0xFF {
self.state.write_u8(value as u8);
} else {
Expand Down