Skip to content

Commit 7ef3fb5

Browse files
committed
Improve String.Basic.number, String.Basic.intDecimal
1 parent dbf9344 commit 7ef3fb5

File tree

3 files changed

+78
-47
lines changed

3 files changed

+78
-47
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ New features:
5151

5252
Bugfixes:
5353

54+
- Improve correctness and speed of `number` and `intDecimal`. (#189 by @jamesdbrock)
55+
5456
Other improvements:
57+
5558
- Drop `math` dependency; update imports (#167 by @JordanMartinez)
5659

5760
## [v8.4.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.4.0) - 2022-03-15

src/Parsing/String/Basic.purs

+30-26
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@ import Prelude
2222

2323
import Data.Array (elem, notElem)
2424
import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isLower, isOctDigit, isSpace, isUpper)
25-
import Data.Either (Either(..))
25+
import Data.Either (Either(..), either)
2626
import Data.Int as Data.Int
2727
import Data.Maybe (Maybe(..))
2828
import Data.Number (infinity, nan)
2929
import Data.Number as Data.Number
3030
import Data.String (CodePoint, singleton, takeWhile)
3131
import Data.String.CodePoints (codePointFromChar)
3232
import Data.String.CodeUnits as SCU
33-
import Data.Tuple (Tuple(..), fst)
33+
import Data.Tuple (fst)
3434
import Parsing (ParserT, fail)
35-
import Parsing.Combinators (choice, skipMany, (<?>), (<~?>))
36-
import Parsing.String (consumeWith, match, satisfy, satisfyCodePoint)
37-
import Parsing.String as Parser.String
35+
import Parsing.Combinators (choice, tryRethrow, (<?>), (<|>), (<~?>))
36+
import Parsing.String (consumeWith, match, regex, satisfy, satisfyCodePoint, string)
37+
import Partial.Unsafe (unsafeCrashWith)
3838

3939
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
4040
digit :: forall m. ParserT String m Char
@@ -84,26 +84,27 @@ alphaNum = satisfyCP isAlphaNum <?> "letter or digit"
8484
-- | * `"NaN"`
8585
-- | * `"-Infinity"`
8686
number :: forall m. ParserT String m Number
87-
-- TODO because the JavaScript parseFloat function will successfully parse
88-
-- a Number up until it doesn't understand something and then return
89-
-- the partially parsed Number, this parser will sometimes consume more
90-
-- String that it actually parses. Example "1..3" will parse as 1.0.
91-
-- So this needs improvement.
9287
number =
9388
choice
94-
[ Parser.String.string "Infinity" *> pure infinity
95-
, Parser.String.string "+Infinity" *> pure infinity
96-
, Parser.String.string "-Infinity" *> pure (negate infinity)
97-
, Parser.String.string "NaN" *> pure nan
98-
, do
99-
Tuple section _ <- Parser.String.match do
100-
_ <- oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
101-
skipMany $ oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
89+
[ string "Infinity" *> pure infinity
90+
, string "+Infinity" *> pure infinity
91+
, string "-Infinity" *> pure (negate infinity)
92+
, string "NaN" *> pure nan
93+
, tryRethrow $ do
94+
section <- numberRegex
10295
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
10396
case Data.Number.fromString section of
104-
Nothing -> fail $ "Could not parse Number " <> section
97+
Nothing -> fail $ "Number.fromString failed"
98+
-- Maybe this parser should set consumed flag if regex matches but fromString fails?
99+
-- But currently regex allows some illegal inputs, like "."
100+
-- Anyway this primitiv-ish parser should always backtrack on fail.
105101
Just x -> pure x
106-
]
102+
] <|> fail "Expected Number"
103+
104+
numberRegex :: forall m. ParserT String m String
105+
numberRegex = either unsafeCrashWith identity $ regex pattern mempty
106+
where
107+
pattern = "[+-]?[0-9]*(\\.[0-9]*)?([eE][+-]?[0-9]*(\\.[0-9]*))?"
107108

108109
-- | Parser based on the __Data.Int.fromString__ function.
109110
-- |
@@ -114,17 +115,20 @@ number =
114115
-- | * `"-3"`
115116
-- | * `"+300"`
116117
intDecimal :: forall m. ParserT String m Int
117-
intDecimal = do
118-
Tuple section _ <- Parser.String.match do
119-
_ <- oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
120-
skipMany $ oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
118+
intDecimal = tryRethrow do
119+
section <- intDecimalRegex <|> fail "Expected Int"
121120
case Data.Int.fromString section of
122-
Nothing -> fail $ "Could not parse Int " <> section
121+
Nothing -> fail $ "Int.fromString failed"
123122
Just x -> pure x
124123

124+
intDecimalRegex :: forall m. ParserT String m String
125+
intDecimalRegex = either unsafeCrashWith identity $ regex pattern mempty
126+
where
127+
pattern = "[+-]?[0-9]*"
128+
125129
-- | Helper function
126130
satisfyCP :: forall m. (CodePoint -> Boolean) -> ParserT String m Char
127-
satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar)
131+
satisfyCP p = satisfy (p <<< codePointFromChar)
128132

129133
-- | Match zero or more whitespace characters satisfying
130134
-- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.

test/Main.purs

+45-21
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,16 @@ import Control.Lazy (fix)
1212
import Control.Monad.State (State, modify, runState)
1313
import Data.Array (some, toUnfoldable)
1414
import Data.Array as Array
15-
import Data.Bifunctor (rmap)
16-
import Data.Either (Either(..), hush)
15+
import Data.Bifunctor (lmap, rmap)
16+
import Data.Either (Either(..), either, hush)
1717
import Data.Foldable (oneOf)
1818
import Data.List (List(..), fromFoldable, (:))
1919
import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons')
2020
import Data.List.NonEmpty as NE
2121
import Data.Maybe (Maybe(..), fromJust)
2222
import Data.NonEmpty ((:|))
23-
import Data.Number (infinity, isNaN)
23+
import Data.Number (infinity, nan)
24+
import Data.Number as Data.Number
2425
import Data.String (toUpper)
2526
import Data.String.CodePoints as SCP
2627
import Data.String.CodeUnits (fromCharArray, singleton)
@@ -681,19 +682,52 @@ main = do
681682

682683
log "\nTESTS number\n"
683684

684-
parseTest "Infinity" infinity number
685-
parseTest "+Infinity" infinity number
686-
parseTest "-Infinity" (negate infinity) number
687-
parseErrorTestPosition number "+xxx" (mkPos 2)
685+
-- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity"
686+
assertEqual' "number Infinity"
687+
{ actual: runParser "Infinity" number
688+
, expected: Right infinity
689+
}
690+
assertEqual' "number +Infinity"
691+
{ actual: runParser "+Infinity" number
692+
, expected: Right infinity
693+
}
694+
assertEqual' "number -Infinity"
695+
{ actual: runParser "-Infinity" number
696+
, expected: Right (negate infinity)
697+
}
698+
assertEqual' "number +xxx"
699+
{ actual: lmap parseErrorPosition $ runParser "+xxx" number
700+
, expected: Left $ Position { index: 0, line: 1, column: 1 }
701+
}
688702

689-
parseTest "-3.0E-1.0" (-0.3) number
703+
assertEqual' "number 1"
704+
{ actual: runParser "-3.0E-1.0" number
705+
, expected: Right (-0.3)
706+
}
690707

691708
-- test from issue #73
692-
parseTest "0.7531531167929774" 0.7531531167929774 number
709+
assertEqual' "number 2"
710+
{ actual: runParser "0.7531531167929774" number
711+
, expected: Right 0.7531531167929774
712+
}
693713

694714
-- test from issue #115
695-
parseTest "-6.0" (-6.0) number
696-
parseTest "+6.0" (6.0) number
715+
assertEqual' "number 3"
716+
{ actual: runParser "-6.0" number
717+
, expected: Right (-6.0)
718+
}
719+
assertEqual' "number 4"
720+
{ actual: runParser "+6.0" number
721+
, expected: Right (6.0)
722+
}
723+
724+
assert' "number NaN 1" $ either (\_ -> false) Data.Number.isNaN (runParser (show nan) number)
725+
assert' "number NaN 2" $ either (\_ -> false) Data.Number.isNaN (runParser "NaN" number)
726+
727+
assertEqual' "number 5"
728+
{ actual: runParser "1..3" number
729+
, expected: Right (1.0)
730+
}
697731

698732
log "\nTESTS Operator\n"
699733
-- test from issue #161
@@ -754,16 +788,6 @@ main = do
754788
"c"
755789
"Expected \"b\""
756790

757-
-- we can't test "NaN" with `parseTest` because nan doesn't compare equal
758-
case runParser "NaN" number of
759-
Right actual -> do
760-
assert' ("expected: NaN, actual: " <> show actual) (isNaN actual)
761-
logShow actual
762-
Left err -> assert' ("error: " <> show err) false
763-
764-
-- TODO This shows the current limitations of the number parser. Ideally this parse should fail.
765-
parseTest "1..3" 1.0 $ number <* eof
766-
767791
log "\nTESTS intDecimal\n"
768792
parseTest "-300" (-300) intDecimal
769793

0 commit comments

Comments
 (0)