Skip to content

Commit 3015c54

Browse files
committed
Port from Rust 1.44.0, faster short writes
1 parent 146ba21 commit 3015c54

File tree

2 files changed

+88
-64
lines changed

2 files changed

+88
-64
lines changed

src/sip.rs

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use core::hash;
1515
use core::marker::PhantomData;
1616
use core::mem;
1717
use core::ptr;
18-
use core::slice;
1918

2019
/// An implementation of SipHash 1-3.
2120
///
@@ -98,7 +97,7 @@ macro_rules! compress {
9897
}};
9998
}
10099

101-
/// Load an integer of the desired type from a byte stream, in LE order. Uses
100+
/// Loads an integer of the desired type from a byte stream, in LE order. Uses
102101
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
103102
/// to load it from a possibly unaligned address.
104103
///
@@ -116,7 +115,9 @@ macro_rules! load_int_le {
116115
}};
117116
}
118117

119-
/// Load an u64 using up to 7 bytes of a byte slice.
118+
/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
119+
/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
120+
/// sizes and avoid calling `memcpy`, which is good for speed.
120121
///
121122
/// Unsafe because: unchecked indexing at start..start+len
122123
#[inline]
@@ -232,35 +233,41 @@ impl<S: Sip> Hasher<S> {
232233
self.ntail = 0;
233234
}
234235

235-
// Specialized write function that is only valid for buffers with len <= 8.
236-
// It's used to force inlining of write_u8 and write_usize, those would normally be inlined
237-
// except for composite types (that includes slices and str hashing because of delimiter).
238-
// Without this extra push the compiler is very reluctant to inline delimiter writes,
239-
// degrading performance substantially for the most common use cases.
240-
#[inline(always)]
241-
fn short_write(&mut self, msg: &[u8]) {
242-
debug_assert!(msg.len() <= 8);
243-
let length = msg.len();
244-
self.length += length;
236+
// A specialized write function for values with size <= 8.
237+
//
238+
// The hashing of multi-byte integers depends on endianness. E.g.:
239+
// - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
240+
// - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
241+
//
242+
// This function does the right thing for little-endian hardware. On
243+
// big-endian hardware `x` must be byte-swapped first to give the right
244+
// behaviour. After any byte-swapping, the input must be zero-extended to
245+
// 64-bits. The caller is responsible for the byte-swapping and
246+
// zero-extension.
247+
#[inline]
248+
fn short_write<T>(&mut self, _x: T, x: u64) {
249+
let size = mem::size_of::<T>();
250+
self.length += size;
245251

252+
// The original number must be zero-extended, not sign-extended.
253+
debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
254+
255+
// The number of bytes needed to fill `self.tail`.
246256
let needed = 8 - self.ntail;
247-
let fill = cmp::min(length, needed);
248-
if fill == 8 {
249-
self.tail = unsafe { load_int_le!(msg, 0, u64) };
250-
} else {
251-
self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail);
252-
if length < needed {
253-
self.ntail += length;
254-
return;
255-
}
257+
258+
self.tail |= x << (8 * self.ntail);
259+
if size < needed {
260+
self.ntail += size;
261+
return;
256262
}
263+
264+
// `self.tail` is full, process it.
257265
self.state.v3 ^= self.tail;
258266
S::c_rounds(&mut self.state);
259267
self.state.v0 ^= self.tail;
260268

261-
// Buffered tail is now flushed, process new input.
262-
self.ntail = length - needed;
263-
self.tail = unsafe { u8to64_le(msg, needed, self.ntail) };
269+
self.ntail = size - needed;
270+
self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
264271
}
265272
}
266273

@@ -301,19 +308,24 @@ impl hash::Hasher for SipHasher24 {
301308
}
302309

303310
impl<S: Sip> hash::Hasher for Hasher<S> {
304-
// see short_write comment for explanation
305311
#[inline]
306312
fn write_usize(&mut self, i: usize) {
307-
let bytes = unsafe {
308-
slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>())
309-
};
310-
self.short_write(bytes);
313+
self.short_write(i, i.to_le() as u64);
311314
}
312315

313-
// see short_write comment for explanation
314316
#[inline]
315317
fn write_u8(&mut self, i: u8) {
316-
self.short_write(&[i]);
318+
self.short_write(i, i as u64);
319+
}
320+
321+
#[inline]
322+
fn write_u32(&mut self, i: u32) {
323+
self.short_write(i, i.to_le() as u64);
324+
}
325+
326+
#[inline]
327+
fn write_u64(&mut self, i: u64) {
328+
self.short_write(i, i.to_le() as u64);
317329
}
318330

319331
#[inline]

src/sip128.rs

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use core::hash;
1515
use core::marker::PhantomData;
1616
use core::mem;
1717
use core::ptr;
18-
use core::slice;
1918

2019
/// A 128-bit (2x64) hash output
2120
#[derive(Debug, Clone, Copy, Default)]
@@ -114,7 +113,7 @@ macro_rules! compress {
114113
}};
115114
}
116115

117-
/// Load an integer of the desired type from a byte stream, in LE order. Uses
116+
/// Loads an integer of the desired type from a byte stream, in LE order. Uses
118117
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
119118
/// to load it from a possibly unaligned address.
120119
///
@@ -132,7 +131,9 @@ macro_rules! load_int_le {
132131
}};
133132
}
134133

135-
/// Load an u64 using up to 7 bytes of a byte slice.
134+
/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
135+
/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
136+
/// sizes and avoid calling `memcpy`, which is good for speed.
136137
///
137138
/// Unsafe because: unchecked indexing at start..start+len
138139
#[inline]
@@ -277,35 +278,41 @@ impl<S: Sip> Hasher<S> {
277278
self.ntail = 0;
278279
}
279280

280-
// Specialized write function that is only valid for buffers with len <= 8.
281-
// It's used to force inlining of write_u8 and write_usize, those would normally be inlined
282-
// except for composite types (that includes slices and str hashing because of delimiter).
283-
// Without this extra push the compiler is very reluctant to inline delimiter writes,
284-
// degrading performance substantially for the most common use cases.
285-
#[inline(always)]
286-
fn short_write(&mut self, msg: &[u8]) {
287-
debug_assert!(msg.len() <= 8);
288-
let length = msg.len();
289-
self.length += length;
281+
// A specialized write function for values with size <= 8.
282+
//
283+
// The hashing of multi-byte integers depends on endianness. E.g.:
284+
// - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
285+
// - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
286+
//
287+
// This function does the right thing for little-endian hardware. On
288+
// big-endian hardware `x` must be byte-swapped first to give the right
289+
// behaviour. After any byte-swapping, the input must be zero-extended to
290+
// 64-bits. The caller is responsible for the byte-swapping and
291+
// zero-extension.
292+
#[inline]
293+
fn short_write<T>(&mut self, _x: T, x: u64) {
294+
let size = mem::size_of::<T>();
295+
self.length += size;
290296

297+
// The original number must be zero-extended, not sign-extended.
298+
debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
299+
300+
// The number of bytes needed to fill `self.tail`.
291301
let needed = 8 - self.ntail;
292-
let fill = cmp::min(length, needed);
293-
if fill == 8 {
294-
self.tail = unsafe { load_int_le!(msg, 0, u64) };
295-
} else {
296-
self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail);
297-
if length < needed {
298-
self.ntail += length;
299-
return;
300-
}
302+
303+
self.tail |= x << (8 * self.ntail);
304+
if size < needed {
305+
self.ntail += size;
306+
return;
301307
}
308+
309+
// `self.tail` is full, process it.
302310
self.state.v3 ^= self.tail;
303311
S::c_rounds(&mut self.state);
304312
self.state.v0 ^= self.tail;
305313

306-
// Buffered tail is now flushed, process new input.
307-
self.ntail = length - needed;
308-
self.tail = unsafe { u8to64_le(msg, needed, self.ntail) };
314+
self.ntail = size - needed;
315+
self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
309316
}
310317
}
311318

@@ -369,19 +376,24 @@ impl hash::Hasher for SipHasher24 {
369376
}
370377

371378
impl<S: Sip> hash::Hasher for Hasher<S> {
372-
// see short_write comment for explanation
373379
#[inline]
374380
fn write_usize(&mut self, i: usize) {
375-
let bytes = unsafe {
376-
slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>())
377-
};
378-
self.short_write(bytes);
381+
self.short_write(i, i.to_le() as u64);
379382
}
380383

381-
// see short_write comment for explanation
382384
#[inline]
383385
fn write_u8(&mut self, i: u8) {
384-
self.short_write(&[i]);
386+
self.short_write(i, i as u64);
387+
}
388+
389+
#[inline]
390+
fn write_u32(&mut self, i: u32) {
391+
self.short_write(i, i.to_le() as u64);
392+
}
393+
394+
#[inline]
395+
fn write_u64(&mut self, i: u64) {
396+
self.short_write(i, i.to_le() as u64);
385397
}
386398

387399
#[inline]

0 commit comments

Comments
 (0)