Skip to content

Commit e626135

Browse files
committed
ieee: optimize from/to (decimal) string conversions using smallvec (and benchmarks).
1 parent f40945e commit e626135

File tree

3 files changed

+27
-13
lines changed

3 files changed

+27
-13
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ edition = "2021"
55

66
[dependencies]
77
bitflags = "1.3.2"
8+
smallvec = { version = "1.11.0", features = ["const_generics", "union"] }
89

910
[dev-dependencies]
1011
criterion = { version = "0.5.1", features = ["html_reports"] }

src/ieee.rs

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
22
use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
33

4-
use alloc::vec::Vec;
54
use core::cmp::{self, Ordering};
65
use core::convert::TryFrom;
76
use core::fmt::{self, Write};
@@ -34,6 +33,21 @@ fn limbs_for_bits(bits: usize) -> usize {
3433
(bits + LIMB_BITS - 1) / LIMB_BITS
3534
}
3635

36+
/// Growable `[Limb]` (i.e. heap-allocated and typically `Vec`/`SmallVec`/etc.),
37+
/// used only by algorithms that may require dynamically arbitrary precision,
38+
/// i.e. conversions from/to decimal strings.
39+
///
40+
/// Note: the specific type was chosen by starting with `SmallVec<[_; 1]>` and
41+
/// increasing the inline length as long as benchmarks were showing improvements,
42+
/// or at least the `Double::from_str` ones, which roughly had these behaviors:
43+
/// * `Vec<_>` -> `SmallVec<[_; 1]>`: ~15% speedup, but only for shorter inputs
44+
/// * `SmallVec<[_; 1]>` -> `SmallVec<[_; 2]>`: ~10% speedup for longer inputs
45+
/// * `SmallVec<[_; 2]>` -> `SmallVec<[_; 3]>`: noise and/or diminishing returns
46+
///
47+
/// Note: the choice of type described above, and the factors in its decision,
48+
/// are specific to `Limb` being `u128`, so if `Limb` changes, this should too.
49+
type DynPrecisionLimbVec = smallvec::SmallVec<[Limb; 2]>;
50+
3751
/// Enum that represents what fraction of the LSB truncated bits of an fp number
3852
/// represent.
3953
///
@@ -382,7 +396,7 @@ impl<S: Semantics> fmt::Display for IeeeFloat<S> {
382396

383397
// Decompose the number into an APInt and an exponent.
384398
let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
385-
let mut sig = vec![self.sig[0]];
399+
let mut sig: DynPrecisionLimbVec = [self.sig[0]].into_iter().collect();
386400

387401
// Ignore trailing binary zeros.
388402
let trailing_zeros = sig[0].trailing_zeros();
@@ -405,9 +419,9 @@ impl<S: Semantics> fmt::Display for IeeeFloat<S> {
405419

406420
// Multiply significand by 5^e.
407421
// N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
408-
let mut sig_scratch = vec![];
409-
let mut p5 = vec![];
410-
let mut p5_scratch = vec![];
422+
let mut sig_scratch = DynPrecisionLimbVec::new();
423+
let mut p5 = DynPrecisionLimbVec::new();
424+
let mut p5_scratch = DynPrecisionLimbVec::new();
411425
while texp != 0 {
412426
if p5.is_empty() {
413427
p5.push(5);
@@ -432,7 +446,7 @@ impl<S: Semantics> fmt::Display for IeeeFloat<S> {
432446
}
433447

434448
// Fill the buffer.
435-
let mut buffer = vec![];
449+
let mut buffer = smallvec::SmallVec::<[u8; 64]>::new();
436450

437451
// Ignore digits from the significand until it is no more
438452
// precise than is required for the desired precision.
@@ -1911,7 +1925,7 @@ impl<S: Semantics> IeeeFloat<S> {
19111925
// to hold the full significand, and an extra limb required by
19121926
// tcMultiplyPart.
19131927
let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
1914-
let mut dec_sig = Vec::with_capacity(max_limbs);
1928+
let mut dec_sig = DynPrecisionLimbVec::with_capacity(max_limbs);
19151929

19161930
// Convert to binary efficiently - we do almost all multiplication
19171931
// in a Limb. When this would overflow do we do a single
@@ -1970,11 +1984,11 @@ impl<S: Semantics> IeeeFloat<S> {
19701984

19711985
const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
19721986

1973-
let mut p5_scratch = vec![];
1974-
let mut p5 = vec![FIRST_EIGHT_POWERS[4]];
1987+
let mut p5_scratch = DynPrecisionLimbVec::new();
1988+
let mut p5: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[4]].into_iter().collect();
19751989

1976-
let mut r_scratch = vec![];
1977-
let mut r = vec![FIRST_EIGHT_POWERS[power & 7]];
1990+
let mut r_scratch = DynPrecisionLimbVec::new();
1991+
let mut r: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[power & 7]].into_iter().collect();
19781992
power >>= 3;
19791993

19801994
while power > 0 {
@@ -2007,7 +2021,7 @@ impl<S: Semantics> IeeeFloat<S> {
20072021
let calc_precision = (LIMB_BITS << attempt) - 1;
20082022
attempt += 1;
20092023

2010-
let calc_normal_from_limbs = |sig: &mut Vec<Limb>, limbs: &[Limb]| -> StatusAnd<ExpInt> {
2024+
let calc_normal_from_limbs = |sig: &mut DynPrecisionLimbVec, limbs: &[Limb]| -> StatusAnd<ExpInt> {
20112025
sig.resize(limbs_for_bits(calc_precision), 0);
20122026
let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
20132027

src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#[macro_use]
3737
extern crate bitflags;
3838

39-
#[macro_use]
4039
extern crate alloc;
4140

4241
use core::cmp::Ordering;

0 commit comments

Comments
 (0)