-
Notifications
You must be signed in to change notification settings - Fork 580
perfect_float feature for deserialization accuracy #541
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -732,19 +732,9 @@ fn test_parse_number_errors() { | |||||||
("0x80", "trailing characters at line 1 column 2"), | ||||||||
("\\0", "expected value at line 1 column 1"), | ||||||||
("1.", "EOF while parsing a value at line 1 column 2"), | ||||||||
("1.a", "invalid number at line 1 column 3"), | ||||||||
("1.e1", "invalid number at line 1 column 3"), | ||||||||
("1e", "EOF while parsing a value at line 1 column 2"), | ||||||||
("1e+", "EOF while parsing a value at line 1 column 3"), | ||||||||
("1a", "trailing characters at line 1 column 2"), | ||||||||
( | ||||||||
"100e777777777777777777777777777", | ||||||||
"number out of range at line 1 column 14", | ||||||||
), | ||||||||
( | ||||||||
"-100e777777777777777777777777777", | ||||||||
"number out of range at line 1 column 15", | ||||||||
), | ||||||||
( | ||||||||
"1000000000000000000000000000000000000000000000000000000000000\ | ||||||||
000000000000000000000000000000000000000000000000000000000000\ | ||||||||
|
@@ -773,6 +763,38 @@ fn test_parse_number_errors() { | |||||||
"number out of range at line 1 column 303", | ||||||||
), | ||||||||
]); | ||||||||
|
||||||||
// the error messages here vary by implementation | ||||||||
test_parse_err::<f64>(&[ | ||||||||
( | ||||||||
"1.a", | ||||||||
#[cfg(not(feature = "perfect_float"))] | ||||||||
"invalid number at line 1 column 3", | ||||||||
#[cfg(feature = "perfect_float")] | ||||||||
"EOF while parsing a value at line 1 column 2", | ||||||||
), | ||||||||
( | ||||||||
"1.e1", | ||||||||
#[cfg(not(feature = "perfect_float"))] | ||||||||
"invalid number at line 1 column 3", | ||||||||
#[cfg(feature = "perfect_float")] | ||||||||
"EOF while parsing a value at line 1 column 4", | ||||||||
), | ||||||||
( | ||||||||
"100e777777777777777777777777777", | ||||||||
#[cfg(not(feature = "perfect_float"))] | ||||||||
"number out of range at line 1 column 14", | ||||||||
#[cfg(feature = "perfect_float")] | ||||||||
"number out of range at line 1 column 31", | ||||||||
), | ||||||||
( | ||||||||
"-100e777777777777777777777777777", | ||||||||
#[cfg(not(feature = "perfect_float"))] | ||||||||
"number out of range at line 1 column 15", | ||||||||
#[cfg(feature = "perfect_float")] | ||||||||
"number out of range at line 1 column 32", | ||||||||
), | ||||||||
]); | ||||||||
} | ||||||||
|
||||||||
#[test] | ||||||||
|
@@ -840,14 +862,10 @@ fn test_parse_f64() { | |||||||
("0.00e+00", 0.0), | ||||||||
("0.00e-00", 0.0), | ||||||||
("3.5E-2147483647", 0.0), | ||||||||
( | ||||||||
&format!("{}", (i64::MIN as f64) - 1.0), | ||||||||
(i64::MIN as f64) - 1.0, | ||||||||
), | ||||||||
( | ||||||||
&format!("{}", (u64::MAX as f64) + 1.0), | ||||||||
(u64::MAX as f64) + 1.0, | ||||||||
), | ||||||||
// (i64::MIN as f64) - 1.0 | ||||||||
("-9223372036854776000.0", -9223372036854776000.0), | ||||||||
// (u64::MAX as f64) + 1.0 | ||||||||
("18446744073709552000.0", 18446744073709552000.0), | ||||||||
(&format!("{}", f64::EPSILON), f64::EPSILON), | ||||||||
( | ||||||||
"0.0000000000000000000000000000000000000000000000000123e50", | ||||||||
|
@@ -900,6 +918,28 @@ fn test_parse_f64() { | |||||||
1e308, | ||||||||
), | ||||||||
]); | ||||||||
|
||||||||
#[cfg(not(feature = "perfect_float"))] | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How come these don't pass with perfect_float? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dtolnay The issue is because that value actually isn't the correct representation of a perfect float. That is a close-to-halfway representation, and the correct bit-pattern is actually:
The annotated bit-pattern is:
As you can see, the significant digits has a 0 for the significant bit, so for anything below or equal to the halfway point (a bit set just past the significant digits), the default IEEE754 rounding scheme of round-nearest-tie-even will round down. The correct value is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For simple code to practice getting halfway representations, use the following code: import numpy as np
class FloatMixin:
'''Mixing for floating-point methods.'''
def __init__(self, value):
self.value = self.float_type(value)
def to_bits(self):
'''Extract bitwise representation of float.'''
return np.frombuffer(self.value.tobytes(), dtype=self.int_type)[0]
@classmethod
def from_bits(cls, value):
'''Extract bitwise representation of float.'''
return cls(np.frombuffer(value.tobytes(), dtype=cls.float_type)[0])
def to_hex(self):
'''Convert double to hex.'''
return '{0:0{1}x}'.format(self.to_bits(), self.value.itemsize * 2)
def is_denormal(self):
'''Returns true if the float is a denormal.'''
return self.to_bits() & self.EXPONENT_MASK == 0
def is_special(self):
'''Returns true if the float is NaN or Infinite.'''
return self.to_bits() & self.EXPONENT_MASK == self.EXPONENT_MASK
def is_nan(self):
'''Returns true if the float is NaN.'''
return self.is_special() and self.to_bits() & self.MANTISSA_MASK != 0
def is_inf(self):
'''Returns true if the float is Infinite.'''
return self.is_special() and self.to_bits() & self.MANTISSA_MASK == 0
def exponent(self):
'''Get exponent component from the float.'''
if self.is_denormal():
return self.DENORMAL_EXPONENT
bits = self.to_bits()
exp_bits = bits & self.EXPONENT_MASK
biased_e = np.int32(exp_bits >> int_type(self.MANTISSA_SIZE))
return biased_e - self.EXPONENT_BIAS
def mantissa(self):
'''Get mantissa component from the float.'''
bits = self.to_bits()
s = bits & self.MANTISSA_MASK
if not self.is_denormal():
return s + self.HIDDEN_BIT_MASK
return s
class Float32(FloatMixin):
'''Wrapper around a 32-bit floating point value.'''
SIGN_MASK = np.uint32(0x80000000)
EXPONENT_MASK = np.uint32(0x7F800000)
HIDDEN_BIT_MASK = np.uint32(0x00800000)
MANTISSA_MASK = np.uint32(0x007FFFFF)
MANTISSA_SIZE = np.int32(23)
EXPONENT_BIAS = np.int32(127 + MANTISSA_SIZE)
DENORMAL_EXPONENT = np.int32(1 - EXPONENT_BIAS)
float_type = np.float32
int_type = np.uint32
class Float64(FloatMixin):
'''Wrapper around a 64-bit floating point value.'''
SIGN_MASK = np.uint64(0x8000000000000000)
EXPONENT_MASK = np.uint64(0x7FF0000000000000)
HIDDEN_BIT_MASK = np.uint64(0x0010000000000000)
MANTISSA_MASK = np.uint64(0x000FFFFFFFFFFFFF)
MANTISSA_SIZE = np.int32(52)
EXPONENT_BIAS = np.int32(1023 + MANTISSA_SIZE)
DENORMAL_EXPONENT = np.int32(1 - EXPONENT_BIAS)
float_type = np.float64
int_type = np.uint64 Now, to calculate b = Float64(18446744073709551616.0)
b1 = Float64.from_bits(b.to_bits() + Float64.int_type(1))
bh = (int(b.value) + int(b1.value)) // 2 In this case, for >>> float("18446744073709553664.0") # b+h
18446744073709551616.0
>>> float("18446744073709553665.0") # above b+h
18446744073709555712.0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dtolnay The accurate float parser does not mean every decimal representation can be exactly represented as a floating-point number, however, it does mean that the closest representation will always be used, and that these conversions are stable. I hope my explanation makes sense? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand that in general not every decimal representation can be exactly represented as a floating-point number, that is obvious to me. The part I don't think you answered is why these particular values in the test no longer round trip. My expectation for f64::to_string is that it produces a decimal representation (hopefully with as few significant digits as possible, but this is not necessary) which is mathematically closer to the original input float than to any other float. My expectation for f64::from_str is that it produces the float which is mathematically closest to the input digits. This test was supposed to test that pair of expectations. Of the two expectations, is one or both untrue of the standard library? Is one or both untrue of the implementation in this PR? For the standard library my understanding is that the following line succeeds for any input float assert_eq!(n.to_string().parse::<f64>().unwrap(), n); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dtolnay Oh I see what you mean, my mistake. However, the actual results seem to work after adding the test back in anyway: #[cfg(not(feature = "arbitrary_precision"))]
#[cfg(feature = "perfect_float")]
test_parse_ok(vec![
("18446744073709552000.0", 18446744073709552000.0),
("31.245270191439438", 31.245270191439438),
("121.48791951161945", 121.48791951161945),
]); I'm not why it originally failed, I have extensive tests that ensure this works in my codebase (and I test on ~20 different architectures for each release). |
||||||||
#[cfg(not(feature = "arbitrary_precision"))] | ||||||||
test_parse_ok(vec![ | ||||||||
( | ||||||||
// "-9223372036854776000", note it formats with no trailing ".0" | ||||||||
&format!("{}", (i64::MIN as f64) - 1.0), | ||||||||
(i64::MIN as f64) - 1.0, | ||||||||
), | ||||||||
( | ||||||||
// "18446744073709552000", note it formats with no trailing ".0" | ||||||||
&format!("{}", (u64::MAX as f64) + 1.0), | ||||||||
(u64::MAX as f64) + 1.0, | ||||||||
), | ||||||||
]); | ||||||||
|
||||||||
#[cfg(not(feature = "arbitrary_precision"))] | ||||||||
#[cfg(feature = "perfect_float")] | ||||||||
test_parse_ok(vec![ | ||||||||
("31.245270191439438", 31.245270191439438), | ||||||||
("121.48791951161945", 121.48791951161945), | ||||||||
]); | ||||||||
} | ||||||||
|
||||||||
#[test] | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would recommend using
perfect_float
by default. @dtolnay will ultimately decide, however. Correctness should always be favored by default, and performance at the cost of correctness should be opt-in, not opt-out.