Skip to content

Commit 40bd305

Browse files
committed
Improve String.Basic.number, String.Basic.intDecimal
1 parent a49594d commit 40bd305

File tree

3 files changed

+78
-47
lines changed

3 files changed

+78
-47
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ New features:
5151

5252
Bugfixes:
5353

54+
- Improve correctness and speed of `number` and `intDecimal`. (#189 by @jamesdbrock)
55+
5456
Other improvements:
57+
5558
- Drop `math` dependency; update imports (#167 by @JordanMartinez)
5659

5760
## [v8.4.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.4.0) - 2022-03-15

src/Parsing/String/Basic.purs

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@ import Prelude
2222

2323
import Data.Array (elem, notElem)
2424
import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isLower, isOctDigit, isSpace, isUpper)
25-
import Data.Either (Either(..))
25+
import Data.Either (Either(..), either)
2626
import Data.Int as Data.Int
2727
import Data.Maybe (Maybe(..))
2828
import Data.Number (infinity, nan)
2929
import Data.Number as Data.Number
3030
import Data.String (CodePoint, singleton, takeWhile)
3131
import Data.String.CodePoints (codePointFromChar)
3232
import Data.String.CodeUnits as SCU
33-
import Data.Tuple (Tuple(..), fst)
33+
import Data.Tuple (fst)
3434
import Parsing (ParserT, fail)
35-
import Parsing.Combinators (choice, skipMany, (<?>), (<~?>))
36-
import Parsing.String (consumeWith, match, satisfy, satisfyCodePoint)
37-
import Parsing.String as Parser.String
35+
import Parsing.Combinators (choice, tryRethrow, (<?>), (<|>), (<~?>))
36+
import Parsing.String (consumeWith, match, regex, satisfy, satisfyCodePoint, string)
37+
import Partial.Unsafe (unsafeCrashWith)
3838

3939
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
4040
digit :: forall m. ParserT String m Char
@@ -84,26 +84,27 @@ alphaNum = satisfyCP isAlphaNum <?> "letter or digit"
8484
-- | * `"NaN"`
8585
-- | * `"-Infinity"`
8686
number :: forall m. ParserT String m Number
87-
-- TODO because the JavaScript parseFloat function will successfully parse
88-
-- a Number up until it doesn't understand something and then return
89-
-- the partially parsed Number, this parser will sometimes consume more
90-
-- String that it actually parses. Example "1..3" will parse as 1.0.
91-
-- So this needs improvement.
9287
number =
9388
choice
94-
[ Parser.String.string "Infinity" *> pure infinity
95-
, Parser.String.string "+Infinity" *> pure infinity
96-
, Parser.String.string "-Infinity" *> pure (negate infinity)
97-
, Parser.String.string "NaN" *> pure nan
98-
, do
99-
Tuple section _ <- Parser.String.match do
100-
_ <- oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
101-
skipMany $ oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
89+
[ string "Infinity" *> pure infinity
90+
, string "+Infinity" *> pure infinity
91+
, string "-Infinity" *> pure (negate infinity)
92+
, string "NaN" *> pure nan
93+
, tryRethrow $ do
94+
section <- numberRegex
10295
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
10396
case Data.Number.fromString section of
104-
Nothing -> fail $ "Could not parse Number " <> section
97+
Nothing -> fail $ "Number.fromString failed"
98+
-- Maybe this parser should set consumed flag if regex matches but fromString fails?
99+
-- But currently regex allows some illegal inputs, like "."
100+
-- Anyway this primitiv-ish parser should always backtrack on fail.
105101
Just x -> pure x
106-
]
102+
] <|> fail "Expected Number"
103+
104+
numberRegex :: forall m. ParserT String m String
105+
numberRegex = either unsafeCrashWith identity $ regex pattern mempty
106+
where
107+
pattern = "[+-]?[0-9]*(\\.[0-9]*)?([eE][+-]?[0-9]*(\\.[0-9]*))?"
107108

108109
-- | Parser based on the __Data.Int.fromString__ function.
109110
-- |
@@ -114,17 +115,20 @@ number =
114115
-- | * `"-3"`
115116
-- | * `"+300"`
116117
intDecimal :: forall m. ParserT String m Int
117-
intDecimal = do
118-
Tuple section _ <- Parser.String.match do
119-
_ <- oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
120-
skipMany $ oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
118+
intDecimal = tryRethrow do
119+
section <- intDecimalRegex <|> fail "Expected Int"
121120
case Data.Int.fromString section of
122-
Nothing -> fail $ "Could not parse Int " <> section
121+
Nothing -> fail $ "Int.fromString failed"
123122
Just x -> pure x
124123

124+
intDecimalRegex :: forall m. ParserT String m String
125+
intDecimalRegex = either unsafeCrashWith identity $ regex pattern mempty
126+
where
127+
pattern = "[+-]?[0-9]*"
128+
125129
-- | Helper function
126130
satisfyCP :: forall m. (CodePoint -> Boolean) -> ParserT String m Char
127-
satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar)
131+
satisfyCP p = satisfy (p <<< codePointFromChar)
128132

129133
-- | Match zero or more whitespace characters satisfying
130134
-- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.

test/Main.purs

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,16 @@ import Control.Lazy (fix)
1212
import Control.Monad.State (State, modify, runState)
1313
import Data.Array (some, toUnfoldable)
1414
import Data.Array as Array
15-
import Data.Bifunctor (rmap)
16-
import Data.Either (Either(..), hush)
15+
import Data.Bifunctor (lmap, rmap)
16+
import Data.Either (Either(..), either, hush)
1717
import Data.Foldable (oneOf)
1818
import Data.List (List(..), fromFoldable, (:))
1919
import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons')
2020
import Data.List.NonEmpty as NE
2121
import Data.Maybe (Maybe(..), fromJust)
2222
import Data.NonEmpty ((:|))
23-
import Data.Number (infinity, isNaN)
23+
import Data.Number (infinity, nan)
24+
import Data.Number as Data.Number
2425
import Data.String (toUpper)
2526
import Data.String.CodePoints as SCP
2627
import Data.String.CodeUnits (fromCharArray, singleton)
@@ -684,19 +685,52 @@ main = do
684685

685686
log "\nTESTS number\n"
686687

687-
parseTest "Infinity" infinity number
688-
parseTest "+Infinity" infinity number
689-
parseTest "-Infinity" (negate infinity) number
690-
parseErrorTestPosition number "+xxx" (mkPos 2)
688+
-- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity"
689+
assertEqual' "number Infinity"
690+
{ actual: runParser "Infinity" number
691+
, expected: Right infinity
692+
}
693+
assertEqual' "number +Infinity"
694+
{ actual: runParser "+Infinity" number
695+
, expected: Right infinity
696+
}
697+
assertEqual' "number -Infinity"
698+
{ actual: runParser "-Infinity" number
699+
, expected: Right (negate infinity)
700+
}
701+
assertEqual' "number +xxx"
702+
{ actual: lmap parseErrorPosition $ runParser "+xxx" number
703+
, expected: Left $ Position { index: 0, line: 1, column: 1 }
704+
}
691705

692-
parseTest "-3.0E-1.0" (-0.3) number
706+
assertEqual' "number 1"
707+
{ actual: runParser "-3.0E-1.0" number
708+
, expected: Right (-0.3)
709+
}
693710

694711
-- test from issue #73
695-
parseTest "0.7531531167929774" 0.7531531167929774 number
712+
assertEqual' "number 2"
713+
{ actual: runParser "0.7531531167929774" number
714+
, expected: Right 0.7531531167929774
715+
}
696716

697717
-- test from issue #115
698-
parseTest "-6.0" (-6.0) number
699-
parseTest "+6.0" (6.0) number
718+
assertEqual' "number 3"
719+
{ actual: runParser "-6.0" number
720+
, expected: Right (-6.0)
721+
}
722+
assertEqual' "number 4"
723+
{ actual: runParser "+6.0" number
724+
, expected: Right (6.0)
725+
}
726+
727+
assert' "number NaN 1" $ either (\_ -> false) Data.Number.isNaN (runParser (show nan) number)
728+
assert' "number NaN 2" $ either (\_ -> false) Data.Number.isNaN (runParser "NaN" number)
729+
730+
assertEqual' "number 5"
731+
{ actual: runParser "1..3" number
732+
, expected: Right (1.0)
733+
}
700734

701735
log "\nTESTS Operator\n"
702736
-- test from issue #161
@@ -757,16 +791,6 @@ main = do
757791
"c"
758792
"Expected \"b\""
759793

760-
-- we can't test "NaN" with `parseTest` because nan doesn't compare equal
761-
case runParser "NaN" number of
762-
Right actual -> do
763-
assert' ("expected: NaN, actual: " <> show actual) (isNaN actual)
764-
logShow actual
765-
Left err -> assert' ("error: " <> show err) false
766-
767-
-- TODO This shows the current limitations of the number parser. Ideally this parse should fail.
768-
parseTest "1..3" 1.0 $ number <* eof
769-
770794
log "\nTESTS intDecimal\n"
771795
parseTest "-300" (-300) intDecimal
772796

0 commit comments

Comments
 (0)