Switch the Standard distribution for floats to [0, 1)

pitdicker · pitdicker · commit caaaf0aea4a8 · 2018-05-03T12:48:25.000+02:00
diff --git a/benches/distributions.rs b/benches/distributions.rs
@@ -96,6 +96,8 @@ distr!(distr_standard_codepoint, char, Standard);
 
 distr_float!(distr_standard_f32, f32, Standard);
 distr_float!(distr_standard_f64, f64, Standard);
+distr_float!(distr_open01_f32, f32, Open01);
+distr_float!(distr_open01_f64, f64, Open01);
 
 // distributions
 distr_float!(distr_exp, f64, Exp::new(1.23 * 4.56));
diff --git a/src/distributions/float.rs b/src/distributions/float.rs
@@ -14,6 +14,30 @@ use core::mem;
 use Rng;
 use distributions::{Distribution, Standard};
 
+/// A distribution to sample floating point numbers uniformly in the open
+/// interval `(0, 1)`, i.e. not including either endpoint.
+///
+/// All values that can be generated are of the form `n * ε + ε/2`. For `f32`
+/// the 22 most significant random bits of an `u32` are used, for `f64` 52 from
+/// an `u64`. The conversion uses a transmute-based method.
+///
+/// To sample from the half-open range `[0, 1)` instead, use the [`Standard`]
+/// distribution.
+///
+/// # Example
+/// ```rust
+/// use rand::{thread_rng, Rng};
+/// use rand::distributions::Open01;
+///
+/// let val: f32 = thread_rng().sample(Open01);
+/// println!("f32 from (0, 1): {}", val);
+/// ```
+///
+/// [`Standard`]: struct.Standard.html
+#[derive(Clone, Copy, Debug)]
+pub struct Open01;
+
+
 pub(crate) trait IntoFloat {
     type F;
 
@@ -29,8 +53,7 @@ pub(crate) trait IntoFloat {
 }
 
 macro_rules! float_impls {
-    ($ty:ty, $uty:ty, $fraction_bits:expr, $exponent_bias:expr,
-     $next_u:ident) => {
+    ($ty:ty, $uty:ty, $fraction_bits:expr, $exponent_bias:expr) => {
         impl IntoFloat for $uty {
             type F = $ty;
             #[inline(always)]
@@ -43,26 +66,42 @@ macro_rules! float_impls {
         }
 
         impl Distribution<$ty> for Standard {
-            /// Generate a floating point number in the open interval `(0, 1)`
-            /// (not including either endpoint) with a uniform distribution.
             fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Multiply-based method; 24/53 random bits; [0, 1) interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
+                let float_size = mem::size_of::<$ty>() * 8;
+                let precision = $fraction_bits + 1;
+                let scale = 1.0 / ((1 as $uty << precision) as $ty);
+
+                let value: $uty = rng.gen();
+                scale * (value >> (float_size - precision)) as $ty
+            }
+        }
+
+        impl Distribution<$ty> for Open01 {
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Transmute-based method; 23/52 random bits; (0, 1) interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
                 const EPSILON: $ty = 1.0 / (1u64 << $fraction_bits) as $ty;
                 let float_size = mem::size_of::<$ty>() * 8;
 
-                let value = rng.$next_u();
+                let value: $uty = rng.gen();
                 let fraction = value >> (float_size - $fraction_bits);
                 fraction.into_float_with_exponent(0) - (1.0 - EPSILON / 2.0)
             }
         }
     }
 }
-float_impls! { f32, u32, 23, 127, next_u32 }
-float_impls! { f64, u64, 52, 1023, next_u64 }
+float_impls! { f32, u32, 23, 127 }
+float_impls! { f64, u64, 52, 1023 }
 
 
 #[cfg(test)]
 mod tests {
     use Rng;
+    use distributions::Open01;
     use mock::StepRng;
 
     const EPSILON32: f32 = ::core::f32::EPSILON;
@@ -71,19 +110,34 @@ mod tests {
     #[test]
     fn floating_point_edge_cases() {
         let mut zeros = StepRng::new(0, 0);
-        assert_eq!(zeros.gen::<f32>(), 0.0 + EPSILON32 / 2.0);
-        assert_eq!(zeros.gen::<f64>(), 0.0 + EPSILON64 / 2.0);
+        assert_eq!(zeros.gen::<f32>(), 0.0);
+        assert_eq!(zeros.gen::<f64>(), 0.0);
 
-        let mut one = StepRng::new(1 << 9, 0);
-        let one32 = one.gen::<f32>();
-        assert!(EPSILON32 < one32 && one32 < EPSILON32 * 2.0);
+        let mut one32 = StepRng::new(1 << 8, 0);
+        assert_eq!(one32.gen::<f32>(), EPSILON32 / 2.0);
 
-        let mut one = StepRng::new(1 << 12, 0);
-        let one64 = one.gen::<f64>();
-        assert!(EPSILON64 < one64 && one64 < EPSILON64 * 2.0);
+        let mut one64 = StepRng::new(1 << 11, 0);
+        assert_eq!(one64.gen::<f64>(), EPSILON64 / 2.0);
 
         let mut max = StepRng::new(!0, 0);
         assert_eq!(max.gen::<f32>(), 1.0 - EPSILON32 / 2.0);
         assert_eq!(max.gen::<f64>(), 1.0 - EPSILON64 / 2.0);
     }
+
+    #[test]
+    fn open01_edge_cases() {
+        let mut zeros = StepRng::new(0, 0);
+        assert_eq!(zeros.sample::<f32, _>(Open01), 0.0 + EPSILON32 / 2.0);
+        assert_eq!(zeros.sample::<f64, _>(Open01), 0.0 + EPSILON64 / 2.0);
+
+        let mut one32 = StepRng::new(1 << 9, 0);
+        assert_eq!(one32.sample::<f32, _>(Open01), EPSILON32 / 2.0 * 3.0);
+
+        let mut one64 = StepRng::new(1 << 12, 0);
+        assert_eq!(one64.sample::<f64, _>(Open01), EPSILON64 / 2.0 * 3.0);
+
+        let mut max = StepRng::new(!0, 0);
+        assert_eq!(max.sample::<f32, _>(Open01), 1.0 - EPSILON32 / 2.0);
+        assert_eq!(max.sample::<f64, _>(Open01), 1.0 - EPSILON64 / 2.0);
+    }
 }
diff --git a/src/distributions/gamma.rs b/src/distributions/gamma.rs
@@ -15,7 +15,7 @@ use self::ChiSquaredRepr::*;
 
 use Rng;
 use distributions::normal::StandardNormal;
-use distributions::{Distribution, Exp};
+use distributions::{Distribution, Exp, Open01};
 
 /// The Gamma distribution `Gamma(shape, scale)` distribution.
 ///
@@ -142,7 +142,7 @@ impl Distribution<f64> for Gamma {
 }
 impl Distribution<f64> for GammaSmallShape {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
-        let u: f64 = rng.gen();
+        let u: f64 = rng.sample(Open01);
 
         self.large_shape.sample(rng) * u.powf(self.inv_shape)
     }
@@ -157,7 +157,7 @@ impl Distribution<f64> for GammaLargeShape {
             }
 
             let v = v_cbrt * v_cbrt * v_cbrt;
-            let u: f64 = rng.gen();
+            let u: f64 = rng.sample(Open01);
 
             let x_sqr = x * x;
             if u < 1.0 - 0.0331 * x_sqr * x_sqr ||
diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs
@@ -27,6 +27,7 @@ use Rng;
 
 pub use self::other::Alphanumeric;
 pub use self::uniform::Uniform;
+pub use self::float::Open01;
 #[deprecated(since="0.5.0", note="use Uniform instead")]
 pub use self::uniform::Uniform as Range;
 #[cfg(feature="std")]
@@ -247,7 +248,7 @@ impl<'a, D, R, T> Iterator for DistIter<'a, D, R, T>
 ///   unassigned/reserved code points.
 /// * `bool`: Generates `false` or `true`, each with probability 0.5.
 /// * Floating point types (`f32` and `f64`): Uniformly distributed in the
-///   open range `(0, 1)`.
+///   half-open range `[0, 1)`.
 ///
 /// The following aggregate types also implement the distribution `Standard` as
 /// long as their component types implement it:
@@ -263,7 +264,7 @@ impl<'a, D, R, T> Iterator for DistIter<'a, D, R, T>
 /// use rand::distributions::Standard;
 ///
 /// let val: f32 = SmallRng::from_entropy().sample(Standard);
-/// println!("f32 from (0,1): {}", val);
+/// println!("f32 from [0, 1): {}", val);
 /// ```
 ///
 /// With dynamic dispatch (type erasure of `Rng`):
@@ -275,42 +276,29 @@ impl<'a, D, R, T> Iterator for DistIter<'a, D, R, T>
 /// let mut rng = thread_rng();
 /// let erased_rng: &mut RngCore = &mut rng;
 /// let val: f32 = erased_rng.sample(Standard);
-/// println!("f32 from (0, 1): {}", val);
+/// println!("f32 from [0, 1): {}", val);
 /// ```
 ///
-/// # Open interval for floats
-/// In theory it is possible to choose between an open interval `(0, 1)`, and
-/// the half-open intervals `[0, 1)` and `(0, 1]`. All can give a distribution
-/// with perfectly uniform intervals. Many libraries in other programming
-/// languages default to the closed-open interval `[0, 1)`. We choose here to go
-/// with *open*, with the arguments:
+/// # Floating point implementation
+/// The floating point implementations for `Standard` generate a random value in
+/// the half-open interval [0, 1).
 ///
-/// - The chance to generate a specific value, like exactly 0.0, is *tiny*. No
-///   (or almost no) sensible code relies on an exact floating-point value to be
-///   generated with a very small chance (1 in 2<sup>23</sup> (approx. 8
-///   million) for `f32`, and 1 in 2<sup>52</sup> for `f64`). What is relied on
-///   is having a uniform distribution and a mean of `0.5`.
-/// - Several common algorithms rely on never seeing the value `0.0` generated,
-///   i.e. they rely on an open interval. For example when the logarithm of the
-///   value is taken, or used as a devisor.
+/// All values that can be generated are multiples of ε/2. For `f32` the 23 most
+/// significant random bits of an `u32` are used, for `f64` 53 from an `u64`.
+/// The conversion uses the common multiply-based approach.
 ///
-/// In other words, the guarantee some value *could* be generated is less useful
-/// than the guarantee some value (`0.0`) is never generated. That makes an open
-/// interval a nicer choice.
+/// The `Open01` distribution provides an alternative: it generates values in
+/// the open interval (0, 1), with one less random bit. It uses a
+/// transmute-based method for the conversion to a floating point value, which
+/// may be slightly faster on some architectures.
 ///
-/// Consider using `Rng::gen_range` if you really need a half-open interval (as
-/// the ranges use a half-open interval). It has the same performance. Example:
-///
-/// ```
-/// use rand::{thread_rng, Rng};
+/// `Rng::gen_range(0, 1)` also uses the transmute-based method, but produces
+/// values in a half-open interval just like `Standard`.
 ///
-/// let mut rng = thread_rng();
-/// let val = rng.gen_range(0.0f32, 1.0);
-/// println!("f32 from [0, 1): {}", val);
-/// ```
+/// If you wish to sample from the (0, 1] half-open interval consider using
+/// `1.0 - rng.gen()`.
 ///
-/// [`Exp1`]: struct.Exp1.html
-/// [`StandardNormal`]: struct.StandardNormal.html
+/// [`Open01`]: struct.Open01.html
 #[derive(Debug)]
 pub struct Standard;
 
diff --git a/src/distributions/normal.rs b/src/distributions/normal.rs
@@ -11,7 +11,7 @@
 //! The normal and derived distributions.
 
 use Rng;
-use distributions::{ziggurat, ziggurat_tables, Distribution};
+use distributions::{ziggurat, ziggurat_tables, Distribution, Open01};
 
 /// Samples floating-point numbers according to the normal distribution
 /// `N(0, 1)` (a.k.a.  a standard normal, or Gaussian). This is equivalent to
@@ -55,8 +55,8 @@ impl Distribution<f64> for StandardNormal {
             let mut y = 0.0f64;
 
             while -2.0 * y < x * x {
-                let x_: f64 = rng.gen();
-                let y_: f64 = rng.gen();
+                let x_: f64 = rng.sample(Open01);
+                let y_: f64 = rng.sample(Open01);
 
                 x = x_.ln() / ziggurat_tables::ZIG_NORM_R;
                 y = y_.ln();