@@ -266,6 +266,35 @@ pub(crate) fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) {
266
266
}
267
267
}
268
268
269
+ /// Subtract a multiple.
270
+ /// a -= b * c
271
+ /// Returns a borrow (if a < b then borrow > 0).
272
+ fn sub_mul_digit_same_len ( a : & mut [ BigDigit ] , b : & [ BigDigit ] , c : BigDigit ) -> BigDigit {
273
+ debug_assert ! ( a. len( ) == b. len( ) ) ;
274
+
275
+ // carry is between -big_digit::MAX and 0, so to avoid overflow we store
276
+ // offset_carry = carry + big_digit::MAX
277
+ let mut offset_carry = big_digit:: MAX ;
278
+
279
+ for ( x, y) in a. iter_mut ( ) . zip ( b) {
280
+ // We want to calculate sum = x - y * c + carry.
281
+ // sum >= -(big_digit::MAX * big_digit::MAX) - big_digit::MAX
282
+ // sum <= big_digit::MAX
283
+ // Offsetting sum by (big_digit::MAX << big_digit::BITS) puts it in DoubleBigDigit range.
284
+ let offset_sum = big_digit:: to_doublebigdigit ( big_digit:: MAX , * x)
285
+ - big_digit:: MAX as DoubleBigDigit
286
+ + offset_carry as DoubleBigDigit
287
+ - * y as DoubleBigDigit * c as DoubleBigDigit ;
288
+
289
+ let ( new_offset_carry, new_x) = big_digit:: from_doublebigdigit ( offset_sum) ;
290
+ offset_carry = new_offset_carry;
291
+ * x = new_x;
292
+ }
293
+
294
+ // Return the borrow.
295
+ big_digit:: MAX - offset_carry
296
+ }
297
+
269
298
fn bigint_from_slice ( slice : & [ BigDigit ] ) -> BigInt {
270
299
BigInt :: from ( biguint_from_vec ( slice. to_vec ( ) ) )
271
300
}
@@ -631,78 +660,99 @@ pub(crate) fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) {
631
660
( q, r >> shift)
632
661
}
633
662
634
- /// an implementation of Knuth, TAOCP vol 2 section 4.3, algorithm D
635
- ///
636
- /// # Correctness
637
- ///
638
- /// This function requires the following conditions to run correctly and/or effectively
639
- ///
640
- /// - `a > b`
641
- /// - `d.data.len() > 1`
642
- /// - `d.data.last().unwrap().leading_zeros() == 0`
663
+ /// An implementation of the base division algorithm.
664
+ /// Knuth, TAOCP vol 2 section 4.3.1, algorithm D, with an improvement from exercises 19-21.
643
665
fn div_rem_core ( mut a : BigUint , b : & BigUint ) -> ( BigUint , BigUint ) {
644
- // The algorithm works by incrementally calculating "guesses", q0, for part of the
645
- // remainder. Once we have any number q0 such that q0 * b <= a, we can set
666
+ debug_assert ! (
667
+ a. data. len( ) >= b. data. len( )
668
+ && b. data. len( ) > 1
669
+ && b. data. last( ) . unwrap( ) . leading_zeros( ) == 0
670
+ ) ;
671
+
672
+ // The algorithm works by incrementally calculating "guesses", q0, for the next digit of the
673
+ // quotient. Once we have any number q0 such that (q0 << j) * b <= a, we can set
646
674
//
647
- // q += q0
648
- // a -= q0 * b
675
+ // q += q0 << j
676
+ // a -= (q0 << j) * b
649
677
//
650
678
// and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder.
651
679
//
652
- // q0, our guess, is calculated by dividing the last few digits of a by the last digit of b
653
- // - this should give us a guess that is " close" to the actual quotient, but is possibly
654
- // greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction
655
- // until we have a guess such that q0 * b <= a .
680
+ // q0, our guess, is calculated by dividing the last three digits of a by the last two digits of
681
+ // b - this will give us a guess that is close to the actual quotient, but is possibly greater.
682
+ // It can only be greater by 1 and only in rare cases, with probability at most
683
+ // 2^-(big_digit::BITS-1) for random a, see TAOCP 4.3.1 exercise 21 .
656
684
//
685
+ // If the quotient turns out to be too large, we adjust it by 1:
686
+ // q -= 1 << j
687
+ // a += b << j
688
+
689
+ // a0 stores an additional extra most significant digit of the dividend, not stored in a.
690
+ let mut a0 = 0 ;
691
+
692
+ // [b1, b0] are the two most significant digits of the divisor. They never change.
693
+ let b0 = * b. data . last ( ) . unwrap ( ) ;
694
+ let b1 = b. data [ b. data . len ( ) - 2 ] ;
657
695
658
- let bn = * b. data . last ( ) . unwrap ( ) ;
659
696
let q_len = a. data . len ( ) - b. data . len ( ) + 1 ;
660
697
let mut q = BigUint {
661
698
data : vec ! [ 0 ; q_len] ,
662
699
} ;
663
700
664
- // We reuse the same temporary to avoid hitting the allocator in our inner loop - this is
665
- // sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0
666
- // can be bigger).
667
- //
668
- let mut tmp = BigUint {
669
- data : Vec :: with_capacity ( 2 ) ,
670
- } ;
671
-
672
701
for j in ( 0 ..q_len) . rev ( ) {
673
- // When calculating our next guess q0, we don't need to consider the digits below j
674
- // + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
675
- // digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
676
- // two numbers will be zero in all digits up to (j + b.data.len() - 1).
677
- let offset = j + b. data . len ( ) - 1 ;
678
- if offset >= a. data . len ( ) {
679
- continue ;
702
+ debug_assert ! ( a. data. len( ) == b. data. len( ) + j) ;
703
+
704
+ let a1 = * a. data . last ( ) . unwrap ( ) ;
705
+ let a2 = a. data [ a. data . len ( ) - 2 ] ;
706
+
707
+ // The first q0 estimate is [a1,a0] / b0. It will never be too small, it may be too large
708
+ // by at most 2.
709
+ let ( mut q0, mut r) = if a0 < b0 {
710
+ let ( q0, r) = div_wide ( a0, a1, b0) ;
711
+ ( q0, r as DoubleBigDigit )
712
+ } else {
713
+ debug_assert ! ( a0 == b0) ;
714
+ // Avoid overflowing q0, we know the quotient fits in BigDigit.
715
+ // [a1,a0] = b0 * (1<<BITS - 1) + (a0 + a1)
716
+ ( big_digit:: MAX , a0 as DoubleBigDigit + a1 as DoubleBigDigit )
717
+ } ;
718
+
719
+ // r = [a1,a0] - q0 * b0
720
+ //
721
+ // Now we want to compute a more precise estimate [a2,a1,a0] / [b1,b0] which can only be
722
+ // less or equal to the current q0.
723
+ //
724
+ // q0 is too large if:
725
+ // [a2,a1,a0] < q0 * [b1,b0]
726
+ // (r << BITS) + a2 < q0 * b1
727
+ while r <= big_digit:: MAX as DoubleBigDigit
728
+ && big_digit:: to_doublebigdigit ( r as BigDigit , a2)
729
+ < q0 as DoubleBigDigit * b1 as DoubleBigDigit
730
+ {
731
+ q0 -= 1 ;
732
+ r += b0 as DoubleBigDigit ;
680
733
}
681
734
682
- // just avoiding a heap allocation:
683
- let mut a0 = tmp;
684
- a0. data . truncate ( 0 ) ;
685
- a0. data . extend ( a. data [ offset..] . iter ( ) . cloned ( ) ) ;
686
-
687
- // q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
688
- // implicitly at the end, when adding and subtracting to a and q. Not only do we
689
- // save the cost of the shifts, the rest of the arithmetic gets to work with
690
- // smaller numbers.
691
- let ( mut q0, _) = div_rem_digit ( a0, bn) ;
692
- let mut prod = b * & q0;
693
-
694
- while cmp_slice ( & prod. data [ ..] , & a. data [ j..] ) == Greater {
695
- q0 -= 1u32 ;
696
- prod -= b;
735
+ // q0 is now either the correct quotient digit, or in rare cases 1 too large.
736
+ // Subtract (q0 << j) from a. This may overflow, in which case we will have to correct.
737
+
738
+ let mut borrow = sub_mul_digit_same_len ( & mut a. data [ j..] , & b. data , q0) ;
739
+ if borrow > a0 {
740
+ // q0 is too large. We need to add back one multiple of b.
741
+ q0 -= 1 ;
742
+ borrow -= __add2 ( & mut a. data [ j..] , & b. data ) ;
697
743
}
744
+ // The top digit of a, stored in a0, has now been zeroed.
745
+ debug_assert ! ( borrow == a0) ;
698
746
699
- add2 ( & mut q. data [ j..] , & q0. data [ ..] ) ;
700
- sub2 ( & mut a. data [ j..] , & prod. data [ ..] ) ;
701
- a. normalize ( ) ;
747
+ q. data [ j] = q0;
702
748
703
- tmp = q0;
749
+ // Pop off the next top digit of a.
750
+ a0 = a. data . pop ( ) . unwrap ( ) ;
704
751
}
705
752
753
+ a. data . push ( a0) ;
754
+ a. normalize ( ) ;
755
+
706
756
debug_assert ! ( a < * b) ;
707
757
708
758
( q. normalized ( ) , a)
0 commit comments