Skip to content

Commit b03eb6d

Browse files
tczajkacuviper
authored andcommitted
Improve performance of the basic division algorithm.
We avoid computing a multiple of the divisor (and thus allocating memory) in the division loop. Instead, directly subtract a multiple of the divisors from the dividend. We also compute a more accurate guess for the next digit of the result. The guess is now exact in almost all cases. In those cases where it's not exact, it will be off by exactly 1.
1 parent 091fa96 commit b03eb6d

File tree

3 files changed

+117
-52
lines changed

3 files changed

+117
-52
lines changed

src/algorithms.rs

Lines changed: 102 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,35 @@ pub(crate) fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) {
266266
}
267267
}
268268

269+
/// Subtract a multiple.
270+
/// a -= b * c
271+
/// Returns a borrow (if a < b then borrow > 0).
272+
fn sub_mul_digit_same_len(a: &mut [BigDigit], b: &[BigDigit], c: BigDigit) -> BigDigit {
273+
debug_assert!(a.len() == b.len());
274+
275+
// carry is between -big_digit::MAX and 0, so to avoid overflow we store
276+
// offset_carry = carry + big_digit::MAX
277+
let mut offset_carry = big_digit::MAX;
278+
279+
for (x, y) in a.iter_mut().zip(b) {
280+
// We want to calculate sum = x - y * c + carry.
281+
// sum >= -(big_digit::MAX * big_digit::MAX) - big_digit::MAX
282+
// sum <= big_digit::MAX
283+
// Offsetting sum by (big_digit::MAX << big_digit::BITS) puts it in DoubleBigDigit range.
284+
let offset_sum = big_digit::to_doublebigdigit(big_digit::MAX, *x)
285+
- big_digit::MAX as DoubleBigDigit
286+
+ offset_carry as DoubleBigDigit
287+
- *y as DoubleBigDigit * c as DoubleBigDigit;
288+
289+
let (new_offset_carry, new_x) = big_digit::from_doublebigdigit(offset_sum);
290+
offset_carry = new_offset_carry;
291+
*x = new_x;
292+
}
293+
294+
// Return the borrow.
295+
big_digit::MAX - offset_carry
296+
}
297+
269298
fn bigint_from_slice(slice: &[BigDigit]) -> BigInt {
270299
BigInt::from(biguint_from_vec(slice.to_vec()))
271300
}
@@ -631,78 +660,99 @@ pub(crate) fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) {
631660
(q, r >> shift)
632661
}
633662

634-
/// an implementation of Knuth, TAOCP vol 2 section 4.3, algorithm D
635-
///
636-
/// # Correctness
637-
///
638-
/// This function requires the following conditions to run correctly and/or effectively
639-
///
640-
/// - `a > b`
641-
/// - `d.data.len() > 1`
642-
/// - `d.data.last().unwrap().leading_zeros() == 0`
663+
/// An implementation of the base division algorithm.
664+
/// Knuth, TAOCP vol 2 section 4.3.1, algorithm D, with an improvement from exercises 19-21.
643665
fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
644-
// The algorithm works by incrementally calculating "guesses", q0, for part of the
645-
// remainder. Once we have any number q0 such that q0 * b <= a, we can set
666+
debug_assert!(
667+
a.data.len() >= b.data.len()
668+
&& b.data.len() > 1
669+
&& b.data.last().unwrap().leading_zeros() == 0
670+
);
671+
672+
// The algorithm works by incrementally calculating "guesses", q0, for the next digit of the
673+
// quotient. Once we have any number q0 such that (q0 << j) * b <= a, we can set
646674
//
647-
// q += q0
648-
// a -= q0 * b
675+
// q += q0 << j
676+
// a -= (q0 << j) * b
649677
//
650678
// and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder.
651679
//
652-
// q0, our guess, is calculated by dividing the last few digits of a by the last digit of b
653-
// - this should give us a guess that is "close" to the actual quotient, but is possibly
654-
// greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction
655-
// until we have a guess such that q0 * b <= a.
680+
// q0, our guess, is calculated by dividing the last three digits of a by the last two digits of
681+
// b - this will give us a guess that is close to the actual quotient, but is possibly greater.
682+
// It can only be greater by 1 and only in rare cases, with probability at most
683+
// 2^-(big_digit::BITS-1) for random a, see TAOCP 4.3.1 exercise 21.
656684
//
685+
// If the quotient turns out to be too large, we adjust it by 1:
686+
// q -= 1 << j
687+
// a += b << j
688+
689+
// a0 stores an additional extra most significant digit of the dividend, not stored in a.
690+
let mut a0 = 0;
691+
692+
// [b1, b0] are the two most significant digits of the divisor. They never change.
693+
let b0 = *b.data.last().unwrap();
694+
let b1 = b.data[b.data.len() - 2];
657695

658-
let bn = *b.data.last().unwrap();
659696
let q_len = a.data.len() - b.data.len() + 1;
660697
let mut q = BigUint {
661698
data: vec![0; q_len],
662699
};
663700

664-
// We reuse the same temporary to avoid hitting the allocator in our inner loop - this is
665-
// sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0
666-
// can be bigger).
667-
//
668-
let mut tmp = BigUint {
669-
data: Vec::with_capacity(2),
670-
};
671-
672701
for j in (0..q_len).rev() {
673-
// When calculating our next guess q0, we don't need to consider the digits below j
674-
// + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
675-
// digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
676-
// two numbers will be zero in all digits up to (j + b.data.len() - 1).
677-
let offset = j + b.data.len() - 1;
678-
if offset >= a.data.len() {
679-
continue;
702+
debug_assert!(a.data.len() == b.data.len() + j);
703+
704+
let a1 = *a.data.last().unwrap();
705+
let a2 = a.data[a.data.len() - 2];
706+
707+
// The first q0 estimate is [a1,a0] / b0. It will never be too small, it may be too large
708+
// by at most 2.
709+
let (mut q0, mut r) = if a0 < b0 {
710+
let (q0, r) = div_wide(a0, a1, b0);
711+
(q0, r as DoubleBigDigit)
712+
} else {
713+
debug_assert!(a0 == b0);
714+
// Avoid overflowing q0, we know the quotient fits in BigDigit.
715+
// [a1,a0] = b0 * (1<<BITS - 1) + (a0 + a1)
716+
(big_digit::MAX, a0 as DoubleBigDigit + a1 as DoubleBigDigit)
717+
};
718+
719+
// r = [a1,a0] - q0 * b0
720+
//
721+
// Now we want to compute a more precise estimate [a2,a1,a0] / [b1,b0] which can only be
722+
// less or equal to the current q0.
723+
//
724+
// q0 is too large if:
725+
// [a2,a1,a0] < q0 * [b1,b0]
726+
// (r << BITS) + a2 < q0 * b1
727+
while r <= big_digit::MAX as DoubleBigDigit
728+
&& big_digit::to_doublebigdigit(r as BigDigit, a2)
729+
< q0 as DoubleBigDigit * b1 as DoubleBigDigit
730+
{
731+
q0 -= 1;
732+
r += b0 as DoubleBigDigit;
680733
}
681734

682-
// just avoiding a heap allocation:
683-
let mut a0 = tmp;
684-
a0.data.truncate(0);
685-
a0.data.extend(a.data[offset..].iter().cloned());
686-
687-
// q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
688-
// implicitly at the end, when adding and subtracting to a and q. Not only do we
689-
// save the cost of the shifts, the rest of the arithmetic gets to work with
690-
// smaller numbers.
691-
let (mut q0, _) = div_rem_digit(a0, bn);
692-
let mut prod = b * &q0;
693-
694-
while cmp_slice(&prod.data[..], &a.data[j..]) == Greater {
695-
q0 -= 1u32;
696-
prod -= b;
735+
// q0 is now either the correct quotient digit, or in rare cases 1 too large.
736+
// Subtract (q0 << j) from a. This may overflow, in which case we will have to correct.
737+
738+
let mut borrow = sub_mul_digit_same_len(&mut a.data[j..], &b.data, q0);
739+
if borrow > a0 {
740+
// q0 is too large. We need to add back one multiple of b.
741+
q0 -= 1;
742+
borrow -= __add2(&mut a.data[j..], &b.data);
697743
}
744+
// The top digit of a, stored in a0, has now been zeroed.
745+
debug_assert!(borrow == a0);
698746

699-
add2(&mut q.data[j..], &q0.data[..]);
700-
sub2(&mut a.data[j..], &prod.data[..]);
701-
a.normalize();
747+
q.data[j] = q0;
702748

703-
tmp = q0;
749+
// Pop off the next top digit of a.
750+
a0 = a.data.pop().unwrap();
704751
}
705752

753+
a.data.push(a0);
754+
a.normalize();
755+
706756
debug_assert!(a < *b);
707757

708758
(q.normalized(), a)

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ mod big_digit {
266266
pub(crate) const HALF: BigDigit = (1 << HALF_BITS) - 1;
267267

268268
const LO_MASK: DoubleBigDigit = (1 << BITS) - 1;
269+
pub(crate) const MAX: BigDigit = LO_MASK as BigDigit;
269270

270271
#[inline]
271272
fn get_hi(n: DoubleBigDigit) -> BigDigit {

tests/biguint.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,20 @@ fn test_div_rem() {
897897
}
898898
}
899899

900+
#[test]
901+
fn test_div_rem_big_multiple() {
902+
let a = BigUint::from(3u32).pow(100u32);
903+
let a2 = &a * &a;
904+
905+
let (div, rem) = a2.div_rem(&a);
906+
assert_eq!(div, a);
907+
assert!(rem.is_zero());
908+
909+
let (div, rem) = (&a2 - 1u32).div_rem(&a);
910+
assert_eq!(div, &a - 1u32);
911+
assert_eq!(rem, &a - 1u32);
912+
}
913+
900914
#[test]
901915
fn test_div_ceil() {
902916
fn check(a: &BigUint, b: &BigUint, d: &BigUint, m: &BigUint) {

0 commit comments

Comments
 (0)