serde-rs · ijl · Feb 6, 2020 · Alexhuszagh · Feb 6, 2020 · dtolnay
diff --git a/.travis.yml b/.travis.yml
@@ -9,10 +9,17 @@ matrix:
         - cargo test --features arbitrary_precision
         - cargo test --features raw_value
         - cargo test --features unbounded_depth
+        - cargo test --features perfect_float
 
     - rust: stable
     - rust: beta
+
     - rust: 1.31.0
+      script:
+        - cargo build
+        - cargo build --features preserve_order
+        - cargo build --features arbitrary_precision
+
     - rust: 1.36.0
       script:
         - cargo check --manifest-path tests/crate/Cargo.toml --no-default-features --features alloc
@@ -27,3 +34,4 @@ script:
   - cargo build
   - cargo build --features preserve_order
   - cargo build --features arbitrary_precision
+  - cargo build --features perfect_float
diff --git a/Cargo.toml b/Cargo.toml
@@ -20,6 +20,7 @@ appveyor = { repository = "serde-rs/json" }
 serde = { version = "1.0.60", default-features = false }
 indexmap = { version = "1.2", optional = true }
 itoa = { version = "0.4.3", default-features = false }
+lexical-core = { version = "0.7", optional = true, features = ["format"] }
 ryu = "1.0"
 
 [dev-dependencies]
@@ -63,6 +64,13 @@ preserve_order = ["indexmap"]
 # written back to a JSON string without loss of precision.
 arbitrary_precision = []
 
+# Deserialize floats with perfect accuracy and consistent rounding. This
+# changes the implementation for parsing both floats and integers. This
+# implementation is slower than the default and contains a lookup table that
+# increases artifact size. It does not affect serialization. This
+# feature has no effect if arbitrary_precision is enabled.
+perfect_float = ["lexical-core"]
+
 # Provide a RawValue type that can hold unprocessed JSON during deserialization.
 raw_value = []
 

diff --git a/src/de.rs b/src/de.rs
@@ -90,6 +90,7 @@ impl<'a> Deserializer<read::StrRead<'a>> {
     }
 }
 
+#[cfg(not(feature = "perfect_float"))]
 macro_rules! overflow {
     ($a:ident * 10 + $b:ident, $c:expr) => {
         $a >= $c / 10 && ($a > $c / 10 || $b > $c % 10)
@@ -304,6 +305,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         self.fix_position(err)
     }
 
+    #[cfg(not(feature = "perfect_float"))]
     fn deserialize_prim_number<V>(&mut self, visitor: V) -> Result<V::Value>
     where
         V: de::Visitor<'de>,
@@ -330,6 +332,33 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         }
     }
 
+    #[cfg(feature = "perfect_float")]
+    fn deserialize_prim_number<V>(&mut self, visitor: V) -> Result<V::Value>
+    where
+        V: de::Visitor<'de>,
+    {
+        let peek = match tri!(self.parse_whitespace()) {
+            Some(b) => b,
+            None => {
+                return Err(self.peek_error(ErrorCode::EofWhileParsingValue));
+            }
+        };
+
+        let value = match peek {
+            b'-' => {
+                self.eat_char();
+                tri!(self.parse_any_number(false)).visit(visitor)
+            }
+            b'0'..=b'9' => tri!(self.parse_any_number(true)).visit(visitor),
+            _ => Err(self.peek_invalid_type(&visitor)),
+        };
+
+        match value {
+            Ok(value) => Ok(value),
+            Err(err) => Err(self.fix_position(err)),
+        }
+    }
+
     serde_if_integer128! {
         fn scan_integer128(&mut self, buf: &mut String) -> Result<()> {
             match tri!(self.next_char_or_null()) {
@@ -380,6 +409,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         Ok(())
     }
 
+    #[cfg(not(feature = "perfect_float"))]
     fn parse_integer(&mut self, positive: bool) -> Result<ParserNumber> {
         let next = match tri!(self.next_char()) {
             Some(b) => b,
@@ -428,6 +458,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         }
     }
 
+    #[cfg(not(feature = "perfect_float"))]
     fn parse_long_integer(
         &mut self,
         positive: bool,
@@ -455,6 +486,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         }
     }
 
+    #[cfg(not(feature = "perfect_float"))]
     fn parse_number(&mut self, positive: bool, significand: u64) -> Result<ParserNumber> {
         Ok(match tri!(self.peek_or_null()) {
             b'.' => ParserNumber::F64(tri!(self.parse_decimal(positive, significand, 0))),
@@ -476,6 +508,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         })
     }
 
+    #[cfg(not(feature = "perfect_float"))]
     fn parse_decimal(
         &mut self,
         positive: bool,
@@ -516,6 +549,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         }
     }
 
+    #[cfg(not(feature = "perfect_float"))]
     fn parse_exponent(
         &mut self,
         positive: bool,
@@ -573,6 +607,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
 
     // This cold code should not be inlined into the middle of the hot
     // exponent-parsing loop above.
+    #[cfg(not(feature = "perfect_float"))]
     #[cold]
     #[inline(never)]
     fn parse_exponent_overflow(
@@ -626,6 +661,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
     }
 
     #[cfg(not(feature = "arbitrary_precision"))]
+    #[cfg(not(feature = "perfect_float"))]
     fn parse_any_number(&mut self, positive: bool) -> Result<ParserNumber> {
         self.parse_integer(positive)
     }
@@ -640,6 +676,88 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         Ok(ParserNumber::String(buf))
     }
 
+    #[cfg(feature = "perfect_float")]
+    fn parse_any_number(&mut self, positive: bool) -> Result<ParserNumber> {
+        let mut float = false;
+        self.scratch.clear();
+        if !positive {
+            self.scratch.push(b'-');
+        }
+
+        loop {
+            match self.peek_or_null()? {
+                c @ b'0'..=b'9' => {
+                    self.eat_char();
+                    self.scratch.push(c);
+                }
+                c @ b'e' | c @ b'E' | c @ b'+' | c @ b'-' | c @ b'.' => {
+                    self.eat_char();
+                    self.scratch.push(c);
+                    float = true;
+                }
+                _ => break,
+            }
+        }
+        if self.scratch.len() > 20 {
+            float = true;
+        }
+        match (float, positive) {
+            (true, _) => {
+                match lexical_core::parse_format::<f64>(
+                    &self.scratch[..],
+                    lexical_core::NumberFormat::JSON,
+                ) {
+                    Ok(val) => {
+                        if val.is_infinite() {
+                            Err(self.error(ErrorCode::NumberOutOfRange))
+                        } else {
+                            Ok(ParserNumber::F64(val))
+                        }
+                    }
+                    Err(err) => match err.code {
+                        lexical_core::ErrorCode::ExponentWithoutFraction
+                        | lexical_core::ErrorCode::MissingExponentSign
+                        | lexical_core::ErrorCode::EmptyFraction
+                        | lexical_core::ErrorCode::EmptyMantissa
+                        | lexical_core::ErrorCode::EmptyExponent
+                        | lexical_core::ErrorCode::MissingMantissaSign => {
+                            Err(self.error(ErrorCode::EofWhileParsingValue))
+                        }
+                        _ => Err(self.error(ErrorCode::InvalidNumber)),
+                    },
+                }
+            }
+            (false, false) => {
+                match lexical_core::parse_format::<i64>(
+                    &self.scratch[..],
+                    lexical_core::NumberFormat::JSON,
+                ) {
+                    Ok(val) => Ok(ParserNumber::I64(val)),
+                    Err(err) => match err.code {
+                        lexical_core::ErrorCode::Empty => {
+                            Err(self.error(ErrorCode::EofWhileParsingValue))
+                        }
+                        _ => Err(self.error(ErrorCode::InvalidNumber)),
+                    },
+                }
+            }
+            (false, true) => {
+                match lexical_core::parse_format::<u64>(
+                    &self.scratch[..],
+                    lexical_core::NumberFormat::JSON,
+                ) {
+                    Ok(val) => Ok(ParserNumber::U64(val)),
+                    Err(err) => match err.code {
+                        lexical_core::ErrorCode::Empty => {
+                            Err(self.error(ErrorCode::EofWhileParsingValue))
+                        }
+                        _ => Err(self.error(ErrorCode::InvalidNumber)),
+                    },
+                }
+            }
+        }
+    }
+
     #[cfg(feature = "arbitrary_precision")]
     fn scan_or_eof(&mut self, buf: &mut String) -> Result<u8> {
         match tri!(self.next_char()) {
@@ -742,6 +860,7 @@ impl<'de, R: Read<'de>> Deserializer<R> {
         Ok(())
     }
 
+    #[cfg(not(feature = "perfect_float"))]
     fn f64_from_parts(
         &mut self,
         positive: bool,
@@ -1022,6 +1141,7 @@ impl FromStr for Number {
     }
 }
 
+#[cfg(not(feature = "perfect_float"))]
 static POW10: [f64; 309] = [
     1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, //
     1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, //

diff --git a/tests/test.rs b/tests/test.rs
@@ -732,19 +732,9 @@ fn test_parse_number_errors() {
         ("0x80", "trailing characters at line 1 column 2"),
         ("\\0", "expected value at line 1 column 1"),
         ("1.", "EOF while parsing a value at line 1 column 2"),
-        ("1.a", "invalid number at line 1 column 3"),
-        ("1.e1", "invalid number at line 1 column 3"),
         ("1e", "EOF while parsing a value at line 1 column 2"),
         ("1e+", "EOF while parsing a value at line 1 column 3"),
         ("1a", "trailing characters at line 1 column 2"),
-        (
-            "100e777777777777777777777777777",
-            "number out of range at line 1 column 14",
-        ),
-        (
-            "-100e777777777777777777777777777",
-            "number out of range at line 1 column 15",
-        ),
         (
             "1000000000000000000000000000000000000000000000000000000000000\
              000000000000000000000000000000000000000000000000000000000000\
@@ -773,6 +763,38 @@ fn test_parse_number_errors() {
             "number out of range at line 1 column 303",
         ),
     ]);
+
+    // the error messages here vary by implementation
+    test_parse_err::<f64>(&[
+        (
+            "1.a",
+            #[cfg(not(feature = "perfect_float"))]
+            "invalid number at line 1 column 3",
+            #[cfg(feature = "perfect_float")]
+            "EOF while parsing a value at line 1 column 2",
+        ),
+        (
+            "1.e1",
+            #[cfg(not(feature = "perfect_float"))]
+            "invalid number at line 1 column 3",
+            #[cfg(feature = "perfect_float")]
+            "EOF while parsing a value at line 1 column 4",
+        ),
+        (
+            "100e777777777777777777777777777",
+            #[cfg(not(feature = "perfect_float"))]
+            "number out of range at line 1 column 14",
+            #[cfg(feature = "perfect_float")]
+            "number out of range at line 1 column 31",
+        ),
+        (
+            "-100e777777777777777777777777777",
+            #[cfg(not(feature = "perfect_float"))]
+            "number out of range at line 1 column 15",
+            #[cfg(feature = "perfect_float")]
+            "number out of range at line 1 column 32",
+        ),
+    ]);
 }
 
 #[test]
@@ -840,14 +862,10 @@ fn test_parse_f64() {
         ("0.00e+00", 0.0),
         ("0.00e-00", 0.0),
         ("3.5E-2147483647", 0.0),
-        (
-            &format!("{}", (i64::MIN as f64) - 1.0),
-            (i64::MIN as f64) - 1.0,
-        ),
-        (
-            &format!("{}", (u64::MAX as f64) + 1.0),
-            (u64::MAX as f64) + 1.0,
-        ),
+        // (i64::MIN as f64) - 1.0
+        ("-9223372036854776000.0", -9223372036854776000.0),
+        // (u64::MAX as f64) + 1.0
+        ("18446744073709552000.0", 18446744073709552000.0),
         (&format!("{}", f64::EPSILON), f64::EPSILON),
         (
             "0.0000000000000000000000000000000000000000000000000123e50",
@@ -900,6 +918,28 @@ fn test_parse_f64() {
             1e308,
         ),
     ]);
+
+    #[cfg(not(feature = "perfect_float"))]
+    #[cfg(not(feature = "arbitrary_precision"))]
+    test_parse_ok(vec![
+        (
+            // "-9223372036854776000", note it formats with no trailing ".0"
+            &format!("{}", (i64::MIN as f64) - 1.0),
+            (i64::MIN as f64) - 1.0,
+        ),
+        (
+            // "18446744073709552000", note it formats with no trailing ".0"
+            &format!("{}", (u64::MAX as f64) + 1.0),
+            (u64::MAX as f64) + 1.0,
+        ),
+    ]);
+
+    #[cfg(not(feature = "arbitrary_precision"))]
+    #[cfg(feature = "perfect_float")]
+    test_parse_ok(vec![
+        ("31.245270191439438", 31.245270191439438),
+        ("121.48791951161945", 121.48791951161945),
+    ]);
 }
 
 #[test]