From 40bd305b21e4d3716975d43077b8c5d06b2add05 Mon Sep 17 00:00:00 2001 From: James Brock Date: Mon, 18 Apr 2022 23:38:35 +0900 Subject: [PATCH] Improve String.Basic.number, String.Basic.intDecimal --- CHANGELOG.md | 3 ++ src/Parsing/String/Basic.purs | 56 +++++++++++++++-------------- test/Main.purs | 66 ++++++++++++++++++++++++----------- 3 files changed, 78 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ed7316..0c160b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,7 +51,10 @@ New features: Bugfixes: +- Improve correctness and speed of `number` and `intDecimal`. (#189 by @jamesdbrock) + Other improvements: + - Drop `math` dependency; update imports (#167 by @JordanMartinez) ## [v8.4.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.4.0) - 2022-03-15 diff --git a/src/Parsing/String/Basic.purs b/src/Parsing/String/Basic.purs index adca850..c35771f 100644 --- a/src/Parsing/String/Basic.purs +++ b/src/Parsing/String/Basic.purs @@ -22,7 +22,7 @@ import Prelude import Data.Array (elem, notElem) import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isLower, isOctDigit, isSpace, isUpper) -import Data.Either (Either(..)) +import Data.Either (Either(..), either) import Data.Int as Data.Int import Data.Maybe (Maybe(..)) import Data.Number (infinity, nan) @@ -30,11 +30,11 @@ import Data.Number as Data.Number import Data.String (CodePoint, singleton, takeWhile) import Data.String.CodePoints (codePointFromChar) import Data.String.CodeUnits as SCU -import Data.Tuple (Tuple(..), fst) +import Data.Tuple (fst) import Parsing (ParserT, fail) -import Parsing.Combinators (choice, skipMany, (), (<~?>)) -import Parsing.String (consumeWith, match, satisfy, satisfyCodePoint) -import Parsing.String as Parser.String +import Parsing.Combinators (choice, tryRethrow, (), (<|>), (<~?>)) +import Parsing.String (consumeWith, match, regex, satisfy, satisfyCodePoint, string) +import Partial.Unsafe (unsafeCrashWith) -- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`. digit :: forall m. ParserT String m Char @@ -84,26 +84,27 @@ alphaNum = satisfyCP isAlphaNum "letter or digit" -- | * `"NaN"` -- | * `"-Infinity"` number :: forall m. ParserT String m Number --- TODO because the JavaScript parseFloat function will successfully parse --- a Number up until it doesn't understand something and then return --- the partially parsed Number, this parser will sometimes consume more --- String that it actually parses. Example "1..3" will parse as 1.0. --- So this needs improvement. number = choice - [ Parser.String.string "Infinity" *> pure infinity - , Parser.String.string "+Infinity" *> pure infinity - , Parser.String.string "-Infinity" *> pure (negate infinity) - , Parser.String.string "NaN" *> pure nan - , do - Tuple section _ <- Parser.String.match do - _ <- oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] - skipMany $ oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] + [ string "Infinity" *> pure infinity + , string "+Infinity" *> pure infinity + , string "-Infinity" *> pure (negate infinity) + , string "NaN" *> pure nan + , tryRethrow $ do + section <- numberRegex -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat case Data.Number.fromString section of - Nothing -> fail $ "Could not parse Number " <> section + Nothing -> fail $ "Number.fromString failed" + -- Maybe this parser should set consumed flag if regex matches but fromString fails? + -- But currently regex allows some illegal inputs, like "." + -- Anyway this primitiv-ish parser should always backtrack on fail. Just x -> pure x - ] + ] <|> fail "Expected Number" + +numberRegex :: forall m. ParserT String m String +numberRegex = either unsafeCrashWith identity $ regex pattern mempty + where + pattern = "[+-]?[0-9]*(\\.[0-9]*)?([eE][+-]?[0-9]*(\\.[0-9]*))?" -- | Parser based on the __Data.Int.fromString__ function. -- | @@ -114,17 +115,20 @@ number = -- | * `"-3"` -- | * `"+300"` intDecimal :: forall m. ParserT String m Int -intDecimal = do - Tuple section _ <- Parser.String.match do - _ <- oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] - skipMany $ oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] +intDecimal = tryRethrow do + section <- intDecimalRegex <|> fail "Expected Int" case Data.Int.fromString section of - Nothing -> fail $ "Could not parse Int " <> section + Nothing -> fail $ "Int.fromString failed" Just x -> pure x +intDecimalRegex :: forall m. ParserT String m String +intDecimalRegex = either unsafeCrashWith identity $ regex pattern mempty + where + pattern = "[+-]?[0-9]*" + -- | Helper function satisfyCP :: forall m. (CodePoint -> Boolean) -> ParserT String m Char -satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar) +satisfyCP p = satisfy (p <<< codePointFromChar) -- | Match zero or more whitespace characters satisfying -- | `Data.CodePoint.Unicode.isSpace`. Always succeeds. diff --git a/test/Main.purs b/test/Main.purs index 26611c1..516fbba 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -12,15 +12,16 @@ import Control.Lazy (fix) import Control.Monad.State (State, modify, runState) import Data.Array (some, toUnfoldable) import Data.Array as Array -import Data.Bifunctor (rmap) -import Data.Either (Either(..), hush) +import Data.Bifunctor (lmap, rmap) +import Data.Either (Either(..), either, hush) import Data.Foldable (oneOf) import Data.List (List(..), fromFoldable, (:)) import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons') import Data.List.NonEmpty as NE import Data.Maybe (Maybe(..), fromJust) import Data.NonEmpty ((:|)) -import Data.Number (infinity, isNaN) +import Data.Number (infinity, nan) +import Data.Number as Data.Number import Data.String (toUpper) import Data.String.CodePoints as SCP import Data.String.CodeUnits (fromCharArray, singleton) @@ -684,19 +685,52 @@ main = do log "\nTESTS number\n" - parseTest "Infinity" infinity number - parseTest "+Infinity" infinity number - parseTest "-Infinity" (negate infinity) number - parseErrorTestPosition number "+xxx" (mkPos 2) + -- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity" + assertEqual' "number Infinity" + { actual: runParser "Infinity" number + , expected: Right infinity + } + assertEqual' "number +Infinity" + { actual: runParser "+Infinity" number + , expected: Right infinity + } + assertEqual' "number -Infinity" + { actual: runParser "-Infinity" number + , expected: Right (negate infinity) + } + assertEqual' "number +xxx" + { actual: lmap parseErrorPosition $ runParser "+xxx" number + , expected: Left $ Position { index: 0, line: 1, column: 1 } + } - parseTest "-3.0E-1.0" (-0.3) number + assertEqual' "number 1" + { actual: runParser "-3.0E-1.0" number + , expected: Right (-0.3) + } -- test from issue #73 - parseTest "0.7531531167929774" 0.7531531167929774 number + assertEqual' "number 2" + { actual: runParser "0.7531531167929774" number + , expected: Right 0.7531531167929774 + } -- test from issue #115 - parseTest "-6.0" (-6.0) number - parseTest "+6.0" (6.0) number + assertEqual' "number 3" + { actual: runParser "-6.0" number + , expected: Right (-6.0) + } + assertEqual' "number 4" + { actual: runParser "+6.0" number + , expected: Right (6.0) + } + + assert' "number NaN 1" $ either (\_ -> false) Data.Number.isNaN (runParser (show nan) number) + assert' "number NaN 2" $ either (\_ -> false) Data.Number.isNaN (runParser "NaN" number) + + assertEqual' "number 5" + { actual: runParser "1..3" number + , expected: Right (1.0) + } log "\nTESTS Operator\n" -- test from issue #161 @@ -757,16 +791,6 @@ main = do "c" "Expected \"b\"" - -- we can't test "NaN" with `parseTest` because nan doesn't compare equal - case runParser "NaN" number of - Right actual -> do - assert' ("expected: NaN, actual: " <> show actual) (isNaN actual) - logShow actual - Left err -> assert' ("error: " <> show err) false - - -- TODO This shows the current limitations of the number parser. Ideally this parse should fail. - parseTest "1..3" 1.0 $ number <* eof - log "\nTESTS intDecimal\n" parseTest "-300" (-300) intDecimal