Merge pull request #420 from pitdicker/closed_open_float_conversion

dhardy · web-flow · commit 0950203339f3 · 2018-05-03T15:42:35.000+01:00
Switch the `Standard` distribution for floats to [0, 1)
diff --git a/benches/distributions.rs b/benches/distributions.rs
@@ -96,6 +96,10 @@ distr!(distr_standard_codepoint, char, Standard);
 
 distr_float!(distr_standard_f32, f32, Standard);
 distr_float!(distr_standard_f64, f64, Standard);
+distr_float!(distr_open01_f32, f32, Open01);
+distr_float!(distr_open01_f64, f64, Open01);
+distr_float!(distr_openclosed01_f32, f32, OpenClosed01);
+distr_float!(distr_openclosed01_f64, f64, OpenClosed01);
 
 // distributions
 distr_float!(distr_exp, f64, Exp::new(1.23 * 4.56));
diff --git a/src/distributions/float.rs b/src/distributions/float.rs
@@ -14,6 +14,60 @@ use core::mem;
 use Rng;
 use distributions::{Distribution, Standard};
 
+/// A distribution to sample floating point numbers uniformly in the half-open
+/// interval `(0, 1]`, i.e. including 1 but not 0.
+///
+/// All values that can be generated are of the form `n * ε/2`. For `f32`
+/// the 23 most significant random bits of a `u32` are used and for `f64` the
+/// 53 most significant bits of a `u64` are used. The conversion uses the
+/// multiplicative method.
+///
+/// See also: [`Standard`] which samples from `[0, 1)`, [`Open01`]
+/// which samples from `(0, 1)` and [`Uniform`] which samples from arbitrary
+/// ranges.
+///
+/// # Example
+/// ```rust
+/// use rand::{thread_rng, Rng};
+/// use rand::distributions::OpenClosed01;
+///
+/// let val: f32 = thread_rng().sample(OpenClosed01);
+/// println!("f32 from (0, 1): {}", val);
+/// ```
+///
+/// [`Standard`]: struct.Standard.html
+/// [`Open01`]: struct.Open01.html
+/// [`Uniform`]: uniform/struct.Uniform.html
+#[derive(Clone, Copy, Debug)]
+pub struct OpenClosed01;
+
+/// A distribution to sample floating point numbers uniformly in the open
+/// interval `(0, 1)`, i.e. not including either endpoint.
+///
+/// All values that can be generated are of the form `n * ε + ε/2`. For `f32`
+/// the 22 most significant random bits of an `u32` are used, for `f64` 52 from
+/// an `u64`. The conversion uses a transmute-based method.
+///
+/// See also: [`Standard`] which samples from `[0, 1)`, [`OpenClosed01`]
+/// which samples from `(0, 1]` and [`Uniform`] which samples from arbitrary
+/// ranges.
+///
+/// # Example
+/// ```rust
+/// use rand::{thread_rng, Rng};
+/// use rand::distributions::Open01;
+///
+/// let val: f32 = thread_rng().sample(Open01);
+/// println!("f32 from (0, 1): {}", val);
+/// ```
+///
+/// [`Standard`]: struct.Standard.html
+/// [`OpenClosed01`]: struct.OpenClosed01.html
+/// [`Uniform`]: uniform/struct.Uniform.html
+#[derive(Clone, Copy, Debug)]
+pub struct Open01;
+
+
 pub(crate) trait IntoFloat {
     type F;
 
@@ -29,8 +83,7 @@ pub(crate) trait IntoFloat {
 }
 
 macro_rules! float_impls {
-    ($ty:ty, $uty:ty, $fraction_bits:expr, $exponent_bias:expr,
-     $next_u:ident) => {
+    ($ty:ty, $uty:ty, $fraction_bits:expr, $exponent_bias:expr) => {
         impl IntoFloat for $uty {
             type F = $ty;
             #[inline(always)]
@@ -43,47 +96,111 @@ macro_rules! float_impls {
         }
 
         impl Distribution<$ty> for Standard {
-            /// Generate a floating point number in the open interval `(0, 1)`
-            /// (not including either endpoint) with a uniform distribution.
             fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Multiply-based method; 24/53 random bits; [0, 1) interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
+                let float_size = mem::size_of::<$ty>() * 8;
+                let precision = $fraction_bits + 1;
+                let scale = 1.0 / ((1 as $uty << precision) as $ty);
+
+                let value: $uty = rng.gen();
+                scale * (value >> (float_size - precision)) as $ty
+            }
+        }
+
+        impl Distribution<$ty> for OpenClosed01 {
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Multiply-based method; 24/53 random bits; (0, 1] interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
+                let float_size = mem::size_of::<$ty>() * 8;
+                let precision = $fraction_bits + 1;
+                let scale = 1.0 / ((1 as $uty << precision) as $ty);
+
+                let value: $uty = rng.gen();
+                let value = value >> (float_size - precision);
+                // Add 1 to shift up; will not overflow because of right-shift:
+                scale * (value + 1) as $ty
+            }
+        }
+
+        impl Distribution<$ty> for Open01 {
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Transmute-based method; 23/52 random bits; (0, 1) interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
                 const EPSILON: $ty = 1.0 / (1u64 << $fraction_bits) as $ty;
                 let float_size = mem::size_of::<$ty>() * 8;
 
-                let value = rng.$next_u();
+                let value: $uty = rng.gen();
                 let fraction = value >> (float_size - $fraction_bits);
                 fraction.into_float_with_exponent(0) - (1.0 - EPSILON / 2.0)
             }
         }
     }
 }
-float_impls! { f32, u32, 23, 127, next_u32 }
-float_impls! { f64, u64, 52, 1023, next_u64 }
+float_impls! { f32, u32, 23, 127 }
+float_impls! { f64, u64, 52, 1023 }
 
 
 #[cfg(test)]
 mod tests {
     use Rng;
+    use distributions::{Open01, OpenClosed01};
     use mock::StepRng;
 
     const EPSILON32: f32 = ::core::f32::EPSILON;
     const EPSILON64: f64 = ::core::f64::EPSILON;
 
     #[test]
-    fn floating_point_edge_cases() {
+    fn standard_fp_edge_cases() {
         let mut zeros = StepRng::new(0, 0);
-        assert_eq!(zeros.gen::<f32>(), 0.0 + EPSILON32 / 2.0);
-        assert_eq!(zeros.gen::<f64>(), 0.0 + EPSILON64 / 2.0);
+        assert_eq!(zeros.gen::<f32>(), 0.0);
+        assert_eq!(zeros.gen::<f64>(), 0.0);
 
-        let mut one = StepRng::new(1 << 9, 0);
-        let one32 = one.gen::<f32>();
-        assert!(EPSILON32 < one32 && one32 < EPSILON32 * 2.0);
+        let mut one32 = StepRng::new(1 << 8, 0);
+        assert_eq!(one32.gen::<f32>(), EPSILON32 / 2.0);
 
-        let mut one = StepRng::new(1 << 12, 0);
-        let one64 = one.gen::<f64>();
-        assert!(EPSILON64 < one64 && one64 < EPSILON64 * 2.0);
+        let mut one64 = StepRng::new(1 << 11, 0);
+        assert_eq!(one64.gen::<f64>(), EPSILON64 / 2.0);
 
         let mut max = StepRng::new(!0, 0);
         assert_eq!(max.gen::<f32>(), 1.0 - EPSILON32 / 2.0);
         assert_eq!(max.gen::<f64>(), 1.0 - EPSILON64 / 2.0);
     }
+
+    #[test]
+    fn openclosed01_edge_cases() {
+        let mut zeros = StepRng::new(0, 0);
+        assert_eq!(zeros.sample::<f32, _>(OpenClosed01), 0.0 + EPSILON32 / 2.0);
+        assert_eq!(zeros.sample::<f64, _>(OpenClosed01), 0.0 + EPSILON64 / 2.0);
+
+        let mut one32 = StepRng::new(1 << 8, 0);
+        assert_eq!(one32.sample::<f32, _>(OpenClosed01), EPSILON32);
+
+        let mut one64 = StepRng::new(1 << 11, 0);
+        assert_eq!(one64.sample::<f64, _>(OpenClosed01), EPSILON64);
+
+        let mut max = StepRng::new(!0, 0);
+        assert_eq!(max.sample::<f32, _>(OpenClosed01), 1.0);
+        assert_eq!(max.sample::<f64, _>(OpenClosed01), 1.0);
+    }
+
+    #[test]
+    fn open01_edge_cases() {
+        let mut zeros = StepRng::new(0, 0);
+        assert_eq!(zeros.sample::<f32, _>(Open01), 0.0 + EPSILON32 / 2.0);
+        assert_eq!(zeros.sample::<f64, _>(Open01), 0.0 + EPSILON64 / 2.0);
+
+        let mut one32 = StepRng::new(1 << 9, 0);
+        assert_eq!(one32.sample::<f32, _>(Open01), EPSILON32 / 2.0 * 3.0);
+
+        let mut one64 = StepRng::new(1 << 12, 0);
+        assert_eq!(one64.sample::<f64, _>(Open01), EPSILON64 / 2.0 * 3.0);
+
+        let mut max = StepRng::new(!0, 0);
+        assert_eq!(max.sample::<f32, _>(Open01), 1.0 - EPSILON32 / 2.0);
+        assert_eq!(max.sample::<f64, _>(Open01), 1.0 - EPSILON64 / 2.0);
+    }
 }
diff --git a/src/distributions/gamma.rs b/src/distributions/gamma.rs
@@ -15,7 +15,7 @@ use self::ChiSquaredRepr::*;
 
 use Rng;
 use distributions::normal::StandardNormal;
-use distributions::{Distribution, Exp};
+use distributions::{Distribution, Exp, Open01};
 
 /// The Gamma distribution `Gamma(shape, scale)` distribution.
 ///
@@ -142,7 +142,7 @@ impl Distribution<f64> for Gamma {
 }
 impl Distribution<f64> for GammaSmallShape {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
-        let u: f64 = rng.gen();
+        let u: f64 = rng.sample(Open01);
 
         self.large_shape.sample(rng) * u.powf(self.inv_shape)
     }
@@ -157,7 +157,7 @@ impl Distribution<f64> for GammaLargeShape {
             }
 
             let v = v_cbrt * v_cbrt * v_cbrt;
-            let u: f64 = rng.gen();
+            let u: f64 = rng.sample(Open01);
 
             let x_sqr = x * x;
             if u < 1.0 - 0.0331 * x_sqr * x_sqr ||
diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs
@@ -27,6 +27,7 @@ use Rng;
 
 pub use self::other::Alphanumeric;
 pub use self::uniform::Uniform;
+pub use self::float::{OpenClosed01, Open01};
 #[deprecated(since="0.5.0", note="use Uniform instead")]
 pub use self::uniform::Uniform as Range;
 #[cfg(feature="std")]
@@ -227,16 +228,13 @@ impl<'a, D, R, T> Iterator for DistIter<'a, D, R, T>
 }
 
 
-/// A generic random value distribution. Generates values for various types
-/// with numerically uniform distribution.
+/// A generic random value distribution, implemented for many primitive types.
+/// Usually generates values with a numerically uniform distribution, and with a
+/// range appropriate to the type.
 /// 
-/// For floating-point numbers, this generates values from the open range
-/// `(0, 1)` (i.e. excluding 0.0 and 1.0).
-///
 /// ## Built-in Implementations
 ///
-/// This crate implements the distribution `Standard` for various primitive
-/// types.  Assuming the provided `Rng` is well-behaved, these implementations
+/// Assuming the provided `Rng` is well-behaved, these implementations
 /// generate values with the following ranges and distributions:
 ///
 /// * Integers (`i32`, `u32`, `isize`, `usize`, etc.): Uniformly distributed
@@ -247,71 +245,45 @@ impl<'a, D, R, T> Iterator for DistIter<'a, D, R, T>
 ///   unassigned/reserved code points.
 /// * `bool`: Generates `false` or `true`, each with probability 0.5.
 /// * Floating point types (`f32` and `f64`): Uniformly distributed in the
-///   open range `(0, 1)`.
+///   half-open range `[0, 1)`. See notes below.
 ///
 /// The following aggregate types also implement the distribution `Standard` as
 /// long as their component types implement it:
 ///
 /// * Tuples and arrays: Each element of the tuple or array is generated
 ///   independently, using the `Standard` distribution recursively.
-/// * `Option<T>`: Returns `None` with probability 0.5; otherwise generates a
-///   random `T` and returns `Some(T)`.
+/// * `Option<T>` where `Standard` is implemented for `T`: Returns `None` with
+///   probability 0.5; otherwise generates a random `x: T` and returns `Some(x)`.
 ///
 /// # Example
 /// ```rust
 /// use rand::{FromEntropy, SmallRng, Rng};
 /// use rand::distributions::Standard;
 ///
 /// let val: f32 = SmallRng::from_entropy().sample(Standard);
-/// println!("f32 from (0,1): {}", val);
-/// ```
-///
-/// With dynamic dispatch (type erasure of `Rng`):
-/// 
-/// ```rust
-/// use rand::{thread_rng, Rng, RngCore};
-/// use rand::distributions::Standard;
-///
-/// let mut rng = thread_rng();
-/// let erased_rng: &mut RngCore = &mut rng;
-/// let val: f32 = erased_rng.sample(Standard);
-/// println!("f32 from (0, 1): {}", val);
+/// println!("f32 from [0, 1): {}", val);
 /// ```
 ///
-/// # Open interval for floats
-/// In theory it is possible to choose between an open interval `(0, 1)`, and
-/// the half-open intervals `[0, 1)` and `(0, 1]`. All can give a distribution
-/// with perfectly uniform intervals. Many libraries in other programming
-/// languages default to the closed-open interval `[0, 1)`. We choose here to go
-/// with *open*, with the arguments:
+/// # Floating point implementation
+/// The floating point implementations for `Standard` generate a random value in
+/// the half-open interval `[0, 1)`, i.e. including 0 but not 1.
 ///
-/// - The chance to generate a specific value, like exactly 0.0, is *tiny*. No
-///   (or almost no) sensible code relies on an exact floating-point value to be
-///   generated with a very small chance (1 in 2<sup>23</sup> (approx. 8
-///   million) for `f32`, and 1 in 2<sup>52</sup> for `f64`). What is relied on
-///   is having a uniform distribution and a mean of `0.5`.
-/// - Several common algorithms rely on never seeing the value `0.0` generated,
-///   i.e. they rely on an open interval. For example when the logarithm of the
-///   value is taken, or used as a devisor.
+/// All values that can be generated are of the form `n * ε/2`. For `f32`
+/// the 23 most significant random bits of a `u32` are used and for `f64` the
+/// 53 most significant bits of a `u64` are used. The conversion uses the
+/// multiplicative method: `(rng.gen::<$uty>() >> N) as $ty * (ε/2)`.
 ///
-/// In other words, the guarantee some value *could* be generated is less useful
-/// than the guarantee some value (`0.0`) is never generated. That makes an open
-/// interval a nicer choice.
+/// See also: [`Open01`] which samples from `(0, 1)`, [`OpenClosed01`] which
+/// samples from `(0, 1]` and `Rng::gen_range(0, 1)` which also samples from
+/// `[0, 1)`. Note that `Open01` and `gen_range` (which uses [`Uniform`]) use
+/// transmute-based methods which yield 1 bit less precision but may perform
+/// faster on some architectures (on modern Intel CPUs all methods have
+/// approximately equal performance).
 ///
-/// Consider using `Rng::gen_range` if you really need a half-open interval (as
-/// the ranges use a half-open interval). It has the same performance. Example:
-///
-/// ```
-/// use rand::{thread_rng, Rng};
-///
-/// let mut rng = thread_rng();
-/// let val = rng.gen_range(0.0f32, 1.0);
-/// println!("f32 from [0, 1): {}", val);
-/// ```
-///
-/// [`Exp1`]: struct.Exp1.html
-/// [`StandardNormal`]: struct.StandardNormal.html
-#[derive(Debug)]
+/// [`Open01`]: struct.Open01.html
+/// [`OpenClosed01`]: struct.OpenClosed01.html
+/// [`Uniform`]: uniform/struct.Uniform.html
+#[derive(Clone, Copy, Debug)]
 pub struct Standard;
 
 #[allow(deprecated)]
diff --git a/src/distributions/normal.rs b/src/distributions/normal.rs
@@ -11,7 +11,7 @@
 //! The normal and derived distributions.
 
 use Rng;
-use distributions::{ziggurat, ziggurat_tables, Distribution};
+use distributions::{ziggurat, ziggurat_tables, Distribution, Open01};
 
 /// Samples floating-point numbers according to the normal distribution
 /// `N(0, 1)` (a.k.a.  a standard normal, or Gaussian). This is equivalent to
@@ -55,8 +55,8 @@ impl Distribution<f64> for StandardNormal {
             let mut y = 0.0f64;
 
             while -2.0 * y < x * x {
-                let x_: f64 = rng.gen();
-                let y_: f64 = rng.gen();
+                let x_: f64 = rng.sample(Open01);
+                let y_: f64 = rng.sample(Open01);
 
                 x = x_.ln() / ziggurat_tables::ZIG_NORM_R;
                 y = y_.ln();