HdrHistogram
diff --git a/‎examples/cli.rs
Lines changed: 145 additions & 17 deletions b/‎examples/cli.rs
Lines changed: 145 additions & 17 deletions
diff --git a/‎src/iterators/all.rs
Lines changed: 4 additions & 0 deletions b/‎src/iterators/all.rs
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/iterators/linear.rs
Lines changed: 6 additions & 2 deletions b/‎src/iterators/linear.rs
Lines changed: 6 additions & 2 deletions
diff --git a/‎src/iterators/log.rs
Lines changed: 6 additions & 2 deletions b/‎src/iterators/log.rs
Lines changed: 6 additions & 2 deletions
diff --git a/‎src/iterators/mod.rs
Lines changed: 26 additions & 10 deletions b/‎src/iterators/mod.rs
Lines changed: 26 additions & 10 deletions
@@ -3,17 +3,20 @@
 extern crate hdrsample;
 extern crate clap;
 
-use std::io::BufRead;
+use std::io;
+use std::io::{Write, BufRead};
+use std::fmt::Display;
 
 use clap::{App, Arg, SubCommand};
 
-use hdrsample::Histogram;
-use hdrsample::serialization::{V2Serializer, V2DeflateSerializer};
+use hdrsample::{Histogram, RecordError};
+use hdrsample::serialization::{V2Serializer, V2SerializeError, V2DeflateSerializer, V2DeflateSerializeError, Deserializer, DeserializeError};
 
 fn main() {
     let default_max = format!("{}", u64::max_value());
     let matches = App::new("hdrsample cli")
             .subcommand(SubCommand::with_name("serialize")
+                    .about("Transform number-per-line input from stdin into a serialized histogram on stdout")
                     .arg(Arg::with_name("min")
                             .long("min")
                             .help("Minimum discernible value")
@@ -37,8 +40,26 @@ fn main() {
                             .short("r")
                             .long("resize")
                             .help("Enable auto resize")))
+            .subcommand(SubCommand::with_name("iter-quantiles")
+                    .about("Display quantiles to stdout from serialized histogram stdin")
+                    .arg(Arg::with_name("ticks")
+                            .short("t")
+                            .long("ticks-per-half")
+                            .takes_value(true)
+                            .required(true)
+                            .help("Ticks per half distance"))
+                    .arg(Arg::with_name("quantile-precision")
+                            .long("quantile-precision")
+                            .takes_value(true)
+                            .default_value("20")))
             .get_matches();
 
+    let stdin = std::io::stdin();
+    let stdin = stdin.lock();
+
+    let stdout = std::io::stdout();
+    let stdout = stdout.lock();
+
     match matches.subcommand_name() {
         Some("serialize") => {
             let sub_matches = matches.subcommand_matches("serialize").unwrap();
@@ -52,28 +73,135 @@ fn main() {
                 h.auto(true);
             }
 
-            serialize(h, sub_matches.is_present("compression"));
-        },
+            serialize(stdin, stdout, h, sub_matches.is_present("compression"))
+        }
+        Some("iter-quantiles") => {
+            let sub_matches = matches.subcommand_matches("iter-quantiles").unwrap();
+            let ticks_per_half = sub_matches.value_of("ticks").unwrap().parse().unwrap();
+            let quantile_precision = sub_matches.value_of("quantile-precision").unwrap().parse().unwrap();
+            quantiles(stdin, stdout, quantile_precision, ticks_per_half)
+        }
         _ => unreachable!()
-    }
+    }.expect("Subcommand failed")
 }
 
-fn serialize(mut h: Histogram<u64>, compression: bool) {
-    let stdin = std::io::stdin();
-    let stdin_handle = stdin.lock();
-
-    for num in stdin_handle.lines()
+/// Read numbers, one from each line, from stdin and output the resulting serialized histogram.
+fn serialize<R: BufRead, W: Write>(reader: R, mut writer: W, mut h: Histogram<u64>, compression: bool) -> Result<(), CliError> {
+    for num in reader.lines()
             .map(|l| l.expect("Should be able to read stdin"))
             .map(|s| s.parse().expect("Each line must be a u64")) {
-        h.record(num).unwrap();
+        h.record(num)?;
     }
 
-    let stdout = std::io::stdout();
-    let mut stdout_handle = stdout.lock();
-
     if compression {
-        V2DeflateSerializer::new().serialize(&h, &mut stdout_handle).unwrap();
+        V2DeflateSerializer::new().serialize(&h, &mut writer)?;
     } else {
-        V2Serializer::new().serialize(&h, &mut stdout_handle).unwrap();
+        V2Serializer::new().serialize(&h, &mut writer)?;
+    }
+
+    Ok(())
+}
+
+/// Output histogram data in a format similar to the Java impl's
+/// `AbstractHistogram#outputPercentileDistribution`.
+fn quantiles<R: BufRead, W: Write>(mut reader: R, mut writer: W, quantile_precision: usize, ticks_per_half: u32) -> Result<(), CliError> {
+    let hist: Histogram<u64> = Deserializer::new().deserialize(&mut reader)?;
+
+    writer.write_all(
+        format!(
+            "{:>12} {:>quantile_precision$} {:>quantile_precision$} {:>10} {:>14}\n\n",
+            "Value",
+            "QuantileValue",
+            "QuantileIteration",
+            "TotalCount",
+            "1/(1-Quantile)",
+            quantile_precision = quantile_precision + 2 // + 2 from leading "0." for numbers
+        ).as_ref(),
+    )?;
+    let mut sum = 0;
+    for v in hist.iter_quantiles(ticks_per_half) {
+        sum += v.count_since_last_iteration();
+        if v.quantile() < 1.0 {
+            writer.write_all(
+                format!(
+                    "{:12} {:1.*} {:1.*} {:10} {:14.2}\n",
+                    v.value(),
+                    quantile_precision,
+                    v.quantile(),
+                    quantile_precision,
+                    v.quantile_iterated_to(),
+                    sum,
+                    1_f64 / (1_f64 - v.quantile())
+                ).as_ref(),
+            )?;
+        } else {
+            writer.write_all(
+                format!(
+                    "{:12} {:1.*} {:1.*} {:10} {:>14}\n",
+                    v.value(),
+                    quantile_precision,
+                    v.quantile(),
+                    quantile_precision,
+                    v.quantile_iterated_to(),
+                    sum,
+                    "∞"
+                ).as_ref(),
+            )?;
+        }
+    }
+
+    fn write_extra_data<T1: Display, T2: Display, W: Write>(
+        writer: &mut W, label1: &str, data1: T1, label2: &str, data2: T2) -> Result<(), io::Error> {
+        writer.write_all(format!("#[{:10} = {:12.2}, {:14} = {:12.2}]\n",
+                                 label1, data1, label2, data2).as_ref())
+    }
+
+    write_extra_data(&mut writer, "Mean", hist.mean(), "StdDeviation", hist.stdev())?;
+    write_extra_data(&mut writer, "Max", hist.max(), "Total count", hist.count())?;
+    write_extra_data(&mut writer, "Buckets", hist.buckets(), "SubBuckets", hist.len())?;
+
+    Ok(())
+}
+
+
+// A handy way to enable ? use in subcommands by mapping common errors.
+// Normally I frown on excessive use of From as it's too "magic", but in the limited confines of
+// subcommands, the convenience seems worth it.
+#[derive(Debug)]
+enum CliError {
+    IoError(io::Error),
+    HistogramSerializeError(V2SerializeError),
+    HistogramSerializeCompressedError(V2DeflateSerializeError),
+    HistogramDeserializeError(DeserializeError),
+    HistogramRecordError(RecordError)
+}
+
+impl From<io::Error> for CliError {
+    fn from(e: io::Error) -> Self {
+        CliError::IoError(e)
+    }
+}
+
+impl From<V2SerializeError> for CliError {
+    fn from(e: V2SerializeError) -> Self {
+        CliError::HistogramSerializeError(e)
+    }
+}
+
+impl From<V2DeflateSerializeError> for CliError {
+    fn from(e: V2DeflateSerializeError) -> Self {
+        CliError::HistogramSerializeCompressedError(e)
+    }
+}
+
+impl From<RecordError> for CliError {
+    fn from(e: RecordError) -> Self {
+        CliError::HistogramRecordError(e)
+    }
+}
+
+impl From<DeserializeError> for CliError {
+    fn from(e: DeserializeError) -> Self {
+        CliError::HistogramDeserializeError(e)
     }
 }
@@ -26,4 +26,8 @@ impl<T: Counter> PickyIterator<T> for Iter {
     fn more(&mut self, _: usize) -> bool {
         true
     }
+
+    fn quantile_iterated_to(&self) -> Option<f64> {
+        None
+    }
 }
@@ -20,8 +20,8 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
         assert!(value_units_per_bucket > 0, "value_units_per_bucket must be > 0");
         HistogramIterator::new(hist,
                                Iter {
-                                   hist: hist,
-                                   value_units_per_bucket: value_units_per_bucket,
+                                   hist,
+                                   value_units_per_bucket,
                                    // won't underflow because value_units_per_bucket > 0
                                    current_step_highest_value_reporting_level: value_units_per_bucket - 1,
                                    current_step_lowest_value_reporting_level:
@@ -51,4 +51,8 @@ impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
         // TODO index + 1 could overflow 16-bit usize
         self.current_step_highest_value_reporting_level + 1 < self.hist.value_for(index + 1)
     }
+
+    fn quantile_iterated_to(&self) -> Option<f64> {
+        None
+    }
 }
@@ -25,8 +25,8 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
         assert!(log_base > 1.0, "log_base must be > 1.0");
         HistogramIterator::new(hist,
                                Iter {
-                                   hist: hist,
-                                   log_base: log_base,
+                                   hist,
+                                   log_base,
                                    next_value_reporting_level: value_units_in_first_bucket as f64,
                                    current_step_highest_value_reporting_level: value_units_in_first_bucket -
                                                                           1,
@@ -60,4 +60,8 @@ impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
         self.hist.lowest_equivalent(self.next_value_reporting_level as u64) <
             self.hist.value_for(next_index)
     }
+
+    fn quantile_iterated_to(&self) -> Option<f64> {
+        None
+    }
 }
@@ -23,6 +23,11 @@ pub trait PickyIterator<T: Counter> {
     fn pick(&mut self, index: usize, total_count_to_index: u64) -> bool;
     /// should we keep iterating even though all future indices are zeros?
     fn more(&mut self, index: usize) -> bool;
+
+    /// Supply the quantile iterated to in the last `pick()`, if available. If `None` is returned,
+    /// the quantile of the current value will be used instead. Probably only useful for the
+    /// quantile iterator.
+    fn quantile_iterated_to(&self) -> Option<f64>;
 }
 
 /// `HistogramIterator` provides a base iterator for a `Histogram`.
@@ -53,43 +58,49 @@ pub struct HistogramIterator<'a, T: 'a + Counter, P: PickyIterator<T>> {
 pub struct IterationValue<T: Counter> {
     value: u64,
     quantile: f64,
+    quantile_iterated_to: f64,
     count_at_value: T,
     count_since_last_iteration: u64
 }
 
 impl<T: Counter> IterationValue<T> {
     /// Create a new IterationValue.
-    pub fn new(value: u64, quantile: f64, count_at_value: T, count_since_last_iteration: u64)
-            -> IterationValue<T> {
+    pub fn new(value: u64, quantile: f64, quantile_iterated_to: f64, count_at_value: T,
+               count_since_last_iteration: u64) -> IterationValue<T> {
         IterationValue {
             value,
             quantile,
+            quantile_iterated_to,
             count_at_value,
             count_since_last_iteration
         }
     }
 
-    /// the lowest value stored in the current histogram bin
+    /// The lowest value stored in the current histogram bin
     pub fn value(&self) -> u64 {
         self.value
     }
 
-    /// percent of recorded values that are equivalent to or below `value`.
+    /// Percent of recorded values that are equivalent to or below `value`.
     /// This is simply the quantile multiplied by 100.0, so if you care about maintaining the best
     /// floating-point precision, use `quantile()` instead.
     pub fn percentile(&self) -> f64 {
         self.quantile * 100.0
     }
 
-    /// quantile of recorded values that are equivalent to or below `value`
+    /// Quantile of recorded values that are equivalent to or below `value`
     pub fn quantile(&self) -> f64 { self.quantile }
 
-    /// recorded count for values equivalent to `value`
+    /// Quantile iterated to, which in the case of quantile iteration may be different from
+    /// `quantile` because slightly different quantiles can still map to the same bucket.
+    pub fn quantile_iterated_to(&self) -> f64 { self.quantile_iterated_to }
+
+    /// Recorded count for values equivalent to `value`
     pub fn count_at_value(&self) -> T {
         self.count_at_value
     }
 
-    /// number of values traversed since the last iteration step
+    /// Number of values traversed since the last iteration step
     pub fn count_since_last_iteration(&self) -> u64 {
         self.count_since_last_iteration
     }
@@ -109,9 +120,11 @@ impl<'a, T: Counter, P: PickyIterator<T>> HistogramIterator<'a, T, P> {
     }
 
     fn current(&self) -> IterationValue<T> {
+        let quantile = self.total_count_to_index as f64 / self.hist.count() as f64;
         IterationValue {
             value: self.hist.highest_equivalent(self.hist.value_for(self.current_index)),
-            quantile: self.total_count_to_index as f64 / self.hist.count() as f64,
+            quantile,
+            quantile_iterated_to: self.picker.quantile_iterated_to().unwrap_or(quantile),
             count_at_value: self.hist.count_at_index(self.current_index)
                 .expect("current index cannot exceed counts length"),
             count_since_last_iteration: self.total_count_to_index - self.prev_total_count
@@ -142,7 +155,9 @@ impl<'a, T: 'a, P> Iterator for HistogramIterator<'a, T, P>
                 return None;
             }
 
-            // have we yielded all non-zeros in the histogram?
+            // TODO should check if we've reached max, not count, to avoid early termination
+            // on histograms with very large counts whose total would exceed u64::max_value()
+            // Have we yielded all non-zeros in the histogram?
             let total = self.hist.count();
             if self.prev_total_count == total {
                 // is the picker done?
@@ -163,7 +178,7 @@ impl<'a, T: 'a, P> Iterator for HistogramIterator<'a, T, P>
                     // if we've seen all counts, no other counts should be non-zero
                     if self.total_count_to_index == total {
                         // TODO this can fail when total count overflows
-                        assert!(count == T::zero());
+                        assert_eq!(count, T::zero());
                     }
 
                     // TODO overflow
@@ -182,6 +197,7 @@ impl<'a, T: 'a, P> Iterator for HistogramIterator<'a, T, P>
                 // exposed to the same value again after yielding. not sure why this is the
                 // behavior we want, but it's what the original Java implementation dictates.
 
+                // TODO count starting at 0 each time we emit a value to be less prone to overflow
                 self.prev_total_count = self.total_count_to_index;
                 return Some(val);
             }
Original file line number	Diff line number	Diff line change
`@@ -26,4 +26,8 @@ impl<T: Counter> PickyIterator<T> for Iter {`
`26`	`26`	`fn more(&mut self, _: usize) -> bool {`
`27`	`27`	`true`
`28`	`28`	`}`
	`29`	`+`
	`30`	`+ fn quantile_iterated_to(&self) -> Option<f64> {`
	`31`	`+ None`
	`32`	`+ }`
`29`	`33`	`}`