Skip to content

Commit 805e5be

Browse files
committed
Fixes #1384, by ensuring specials are properly parsed.
1 parent 615e6a4 commit 805e5be

File tree

3 files changed

+348
-59
lines changed

3 files changed

+348
-59
lines changed

src/number/complete.rs

Lines changed: 160 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Parsers recognizing numbers, complete input version
22
3+
use core::{f32, f64};
34
use crate::branch::alt;
45
use crate::bytes::complete::tag;
56
use crate::character::complete::{char, digit1, sign};
@@ -1426,11 +1427,13 @@ where
14261427
)(input)
14271428
}
14281429

1430+
///
1431+
14291432
/// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data
14301433
///
14311434
/// *Complete version*: Can parse until the end of input.
14321435
///
1433-
pub fn recognize_float_parts<T, E: ParseError<T>>(input: T) -> IResult<T, (bool, T, T, i32), E>
1436+
pub fn recognize_float_parts<T, E: ParseError<T>>(input: T) -> IResult<T, (T, T, i32), E>
14341437
where
14351438
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
14361439
T: Clone + Offset,
@@ -1441,8 +1444,7 @@ where
14411444
T: for<'a> Compare<&'a [u8]>,
14421445
T: AsBytes,
14431446
{
1444-
let (i, sign) = sign(input.clone())?;
1445-
1447+
let i = input.clone();
14461448
//let (i, zeroes) = take_while(|c: <T as InputTakeAtPosition>::Item| c.as_char() == '0')(i)?;
14471449
let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0' as u8) {
14481450
Some(index) => i.take_split(index),
@@ -1514,7 +1516,79 @@ where
15141516
(i2, 0)
15151517
};
15161518

1517-
Ok((i, (sign, integer, fraction, exp)))
1519+
Ok((i, (integer, fraction, exp)))
1520+
}
1521+
1522+
macro_rules! float_finite {
1523+
($input:ident, $t:ty) => {{
1524+
let (i, (integer, fraction, exponent)) = recognize_float_parts($input)?;
1525+
1526+
let float: $t = minimal_lexical::parse_float(
1527+
integer.as_bytes().iter(),
1528+
fraction.as_bytes().iter(),
1529+
exponent,
1530+
);
1531+
1532+
Ok((i, float))
1533+
}};
1534+
}
1535+
1536+
macro_rules! float_nonfinite {
1537+
($input:ident, $t:ty) => {{
1538+
let b = $input.as_bytes();
1539+
let (float, count) = if b.len() >= 3 {
1540+
if crate::number::case_insensitive_cmp(b, b"nan") {
1541+
(<$t>::NAN, 3)
1542+
} else if b.len() >= 8 && crate::number::case_insensitive_cmp(b, b"infinity") {
1543+
(<$t>::INFINITY, 8)
1544+
} else if crate::number::case_insensitive_cmp(b, b"inf") {
1545+
(<$t>::INFINITY, 3)
1546+
} else {
1547+
return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float)));
1548+
}
1549+
} else {
1550+
return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float)));
1551+
};
1552+
1553+
Ok(($input.slice(count..), float))
1554+
}};
1555+
}
1556+
1557+
/// Recognizes floating point number in text format and returns a f32.
1558+
///
1559+
/// *Complete version*: Can parse until the end of input. This only handles
1560+
/// finite (non-special floats).
1561+
/// ```
1562+
pub fn float_finite<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E>
1563+
where
1564+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1565+
T: Clone + Offset,
1566+
T: InputIter + InputLength + InputTake,
1567+
<T as InputIter>::Item: AsChar + Copy,
1568+
<T as InputIter>::IterElem: Clone,
1569+
T: InputTakeAtPosition,
1570+
<T as InputTakeAtPosition>::Item: AsChar,
1571+
T: AsBytes,
1572+
T: for<'a> Compare<&'a [u8]>,
1573+
{
1574+
float_finite!(input, f32)
1575+
}
1576+
1577+
/// Recognizes floating point number in text format and returns a f32.
1578+
/// This only handles non-finite (special) values.
1579+
pub fn float_nonfinite<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E>
1580+
where
1581+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1582+
T: Clone + Offset,
1583+
T: InputIter + InputLength + InputTake,
1584+
<T as InputIter>::Item: AsChar + Copy,
1585+
<T as InputIter>::IterElem: Clone,
1586+
T: InputTakeAtPosition,
1587+
<T as InputTakeAtPosition>::Item: AsChar,
1588+
T: AsBytes,
1589+
T: for<'a> Compare<&'a [u8]>,
1590+
{
1591+
float_nonfinite!(input, f32)
15181592
}
15191593

15201594
/// Recognizes floating point number in text format and returns a f32.
@@ -1546,30 +1620,62 @@ where
15461620
T: AsBytes,
15471621
T: for<'a> Compare<&'a [u8]>,
15481622
{
1549-
let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?;
1623+
let (input, sign) = sign(input.clone())?;
1624+
let res = float_finite::<T, E>(input.clone());
1625+
let (i, mut float) = res.or(float_nonfinite::<T, E>(input))?;
15501626

1551-
let mut float: f32 = minimal_lexical::parse_float(
1552-
integer.as_bytes().iter(),
1553-
fraction.as_bytes().iter(),
1554-
exponent,
1555-
);
15561627
if !sign {
15571628
float = -float;
15581629
}
1559-
15601630
Ok((i, float))
15611631
}
15621632

1563-
/// Recognizes floating point number in text format and returns a f32.
1633+
/// Recognizes floating point number in text format and returns a f64.
1634+
///
1635+
/// *Complete version*: Can parse until the end of input. This only handles
1636+
/// finite (non-special floats).
1637+
pub fn double_finite<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E>
1638+
where
1639+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1640+
T: Clone + Offset,
1641+
T: InputIter + InputLength + InputTake,
1642+
<T as InputIter>::Item: AsChar + Copy,
1643+
<T as InputIter>::IterElem: Clone,
1644+
T: InputTakeAtPosition,
1645+
<T as InputTakeAtPosition>::Item: AsChar,
1646+
T: AsBytes,
1647+
T: for<'a> Compare<&'a [u8]>,
1648+
{
1649+
float_finite!(input, f64)
1650+
}
1651+
1652+
/// Recognizes floating point number in text format and returns a f64.
1653+
/// This only handles non-finite (special) values.
1654+
pub fn double_nonfinite<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E>
1655+
where
1656+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1657+
T: Clone + Offset,
1658+
T: InputIter + InputLength + InputTake,
1659+
<T as InputIter>::Item: AsChar + Copy,
1660+
<T as InputIter>::IterElem: Clone,
1661+
T: InputTakeAtPosition,
1662+
<T as InputTakeAtPosition>::Item: AsChar,
1663+
T: AsBytes,
1664+
T: for<'a> Compare<&'a [u8]>,
1665+
{
1666+
float_nonfinite!(input, f64)
1667+
}
1668+
1669+
/// Recognizes floating point number in text format and returns a f64.
15641670
///
15651671
/// *Complete version*: Can parse until the end of input.
15661672
/// ```rust
15671673
/// # use nom::{Err, error::ErrorKind, Needed};
15681674
/// # use nom::Needed::Size;
1569-
/// use nom::number::complete::float;
1675+
/// use nom::number::complete::double;
15701676
///
15711677
/// let parser = |s| {
1572-
/// float(s)
1678+
/// double(s)
15731679
/// };
15741680
///
15751681
/// assert_eq!(parser("11e-1"), Ok(("", 1.1)));
@@ -1589,18 +1695,14 @@ where
15891695
T: AsBytes,
15901696
T: for<'a> Compare<&'a [u8]>,
15911697
{
1592-
let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?;
1698+
let (input, sign) = sign(input.clone())?;
1699+
let res = double_finite::<T, E>(input.clone());
1700+
let (i, mut double) = res.or(double_nonfinite::<T, E>(input))?;
15931701

1594-
let mut float: f64 = minimal_lexical::parse_float(
1595-
integer.as_bytes().iter(),
1596-
fraction.as_bytes().iter(),
1597-
exponent,
1598-
);
15991702
if !sign {
1600-
float = -float;
1703+
double = -double;
16011704
}
1602-
1603-
Ok((i, float))
1705+
Ok((i, double))
16041706
}
16051707

16061708
#[cfg(test)]
@@ -1618,6 +1720,23 @@ mod tests {
16181720
};
16191721
);
16201722

1723+
// Need more complex logic, since NaN != NaN.
1724+
macro_rules! assert_float_eq {
1725+
($left: expr, $right: expr) => {
1726+
let left: $crate::IResult<_, _, (_, ErrorKind)> = $left;
1727+
let right: $crate::IResult<_, _, (_, ErrorKind)> = $right;
1728+
if let Ok((_, float)) = right {
1729+
if float.is_nan() {
1730+
assert!(left.unwrap().1.is_nan());
1731+
} else {
1732+
assert_eq!(left, right);
1733+
}
1734+
}else {
1735+
assert_eq!(left, right);
1736+
}
1737+
};
1738+
}
1739+
16211740
#[test]
16221741
fn i8_tests() {
16231742
assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0)));
@@ -1942,6 +2061,8 @@ mod tests {
19422061
"12.34",
19432062
"-1.234E-12",
19442063
"-1.234e-12",
2064+
"NaN",
2065+
"inf",
19452066
];
19462067

19472068
for test in test_cases.drain(..) {
@@ -1951,15 +2072,24 @@ mod tests {
19512072
println!("now parsing: {} -> {}", test, expected32);
19522073

19532074
let larger = format!("{}", test);
1954-
assert_parse!(recognize_float(&larger[..]), Ok(("", test)));
2075+
if expected32.is_finite() {
2076+
assert_parse!(recognize_float(&larger[..]), Ok(("", test)));
2077+
}
19552078

1956-
assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32)));
1957-
assert_parse!(float(&larger[..]), Ok(("", expected32)));
2079+
assert_float_eq!(float(larger.as_bytes()), Ok((&b""[..], expected32)));
2080+
assert_float_eq!(float(&larger[..]), Ok(("", expected32)));
19582081

1959-
assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64)));
1960-
assert_parse!(double(&larger[..]), Ok(("", expected64)));
2082+
assert_float_eq!(double(larger.as_bytes()), Ok((&b""[..], expected64)));
2083+
assert_float_eq!(double(&larger[..]), Ok(("", expected64)));
19612084
}
19622085

2086+
// b"infinity" and case-insensitive floats don't work until recent
2087+
// rustc versions, so just test they work here.
2088+
assert_float_eq!(float("nan".as_bytes()), Ok((&b""[..], f32::NAN)));
2089+
assert_float_eq!(float("infinity".as_bytes()), Ok((&b""[..], f32::INFINITY)));
2090+
assert_float_eq!(double("nan".as_bytes()), Ok((&b""[..], f64::NAN)));
2091+
assert_float_eq!(double("infinity".as_bytes()), Ok((&b""[..], f64::INFINITY)));
2092+
19632093
let remaining_exponent = "-1.234E-";
19642094
assert_parse!(
19652095
recognize_float(remaining_exponent),
@@ -2051,8 +2181,8 @@ mod tests {
20512181
}
20522182

20532183
fn parse_f64(i: &str) -> IResult<&str, f64, ()> {
2054-
match recognize_float(i) {
2055-
Err(e) => Err(e),
2184+
match recognize_float::<_, ()>(i) {
2185+
Err(_) => Err(Err::Error(())),
20562186
Ok((i, s)) => {
20572187
if s.is_empty() {
20582188
return Err(Err::Error(()));

src/number/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,13 @@ pub enum Endianness {
1313
/// Will match the host's endianness
1414
Native,
1515
}
16+
17+
/// Case-insensitive comparison of digits. Only works if `y` is only ASCII letters.
18+
#[inline]
19+
fn case_insensitive_cmp(x: &[u8], y: &[u8]) -> bool {
20+
let d = (x.iter().zip(y.iter())).fold(0, |d, (xi, yi)| d | xi ^ yi);
21+
// This uses the trick that 'a' - 'A' == 0x20, and this is true
22+
// for all characters, so as long as `yi` is a valid ASCII letter,
23+
// `xi ^ yi` can only be 0 or 0x20.
24+
d == 0 || d == 0x20
25+
}

0 commit comments

Comments
 (0)