Skip to content

Commit 006e467

Browse files
committed
Optimize core::ptr::align_offset
- Remove redundant masking from mod_pow_2_inv, caller site already takes care of this (after mul): according to benchmarking, the best performance is acheived when this masking is present in the fast branch (for small modulos), but absent from the slow branch.
1 parent fe1ac25 commit 006e467

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

src/libcore/ptr/mod.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,9 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
10511051
/// * The requested modulo `m` is a power-of-two, so `mpow` can be an argument;
10521052
/// * `x < m`; (if `x >= m`, pass in `x % m` instead)
10531053
///
1054+
/// It also sometimes leaves reducing the result modulu `m` to the caller, so the result may be
1055+
/// larger than `m`.
1056+
///
10541057
/// Implementation of this function shall not panic. Ever.
10551058
#[inline]
10561059
fn mod_pow_2_inv(x: usize, mpow: usize, mask: usize) -> usize {
@@ -1067,6 +1070,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
10671070
let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize;
10681071

10691072
if mpow <= INV_TABLE_MOD_POW {
1073+
// This is explicitly left here, as benchmarking shows this improves performance.
10701074
table_inverse & mask
10711075
} else {
10721076
// We iterate "up" using the following formula:
@@ -1089,7 +1093,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
10891093
// anyway.
10901094
inverse = inverse.wrapping_mul(2usize.wrapping_sub(x.wrapping_mul(inverse)));
10911095
if going_modpow >= mpow {
1092-
return inverse & mask;
1096+
return inverse;
10931097
}
10941098
going_modpow <<= 1;
10951099
}
@@ -1147,6 +1151,8 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
11471151
let a2minus1 = a2.wrapping_sub(1);
11481152
let s2 = smoda >> gcdpow;
11491153
let minusp2 = a2.wrapping_sub(pmoda >> gcdpow);
1154+
// mod_pow_2_inv returns a result which may be out of `a'`-s range, but it's fine to
1155+
// multiply modulu usize::max_value() here, and then take modulu `a'` afterwards.
11501156
return (minusp2.wrapping_mul(mod_pow_2_inv(s2, apow.wrapping_sub(gcdpow), a2minus1)))
11511157
& a2minus1;
11521158
}

0 commit comments

Comments
 (0)