Improve String.Basic.number, String.Basic.intDecimal

jamesdbrock · jamesdbrock · commit 7ef3fb53b615 · 2022-04-24T00:30:49.000+09:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -51,7 +51,10 @@ New features:
 
 Bugfixes:
 
+- Improve correctness and speed of `number` and `intDecimal`. (#189 by @jamesdbrock)
+
 Other improvements:
+
 - Drop `math` dependency; update imports (#167 by @JordanMartinez)
 
 ## [v8.4.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.4.0) - 2022-03-15
diff --git a/src/Parsing/String/Basic.purs b/src/Parsing/String/Basic.purs
@@ -22,19 +22,19 @@ import Prelude
 
 import Data.Array (elem, notElem)
 import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isLower, isOctDigit, isSpace, isUpper)
-import Data.Either (Either(..))
+import Data.Either (Either(..), either)
 import Data.Int as Data.Int
 import Data.Maybe (Maybe(..))
 import Data.Number (infinity, nan)
 import Data.Number as Data.Number
 import Data.String (CodePoint, singleton, takeWhile)
 import Data.String.CodePoints (codePointFromChar)
 import Data.String.CodeUnits as SCU
-import Data.Tuple (Tuple(..), fst)
+import Data.Tuple (fst)
 import Parsing (ParserT, fail)
-import Parsing.Combinators (choice, skipMany, (<?>), (<~?>))
-import Parsing.String (consumeWith, match, satisfy, satisfyCodePoint)
-import Parsing.String as Parser.String
+import Parsing.Combinators (choice, tryRethrow, (<?>), (<|>), (<~?>))
+import Parsing.String (consumeWith, match, regex, satisfy, satisfyCodePoint, string)
+import Partial.Unsafe (unsafeCrashWith)
 
 -- | Parse a digit.  Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
 digit :: forall m. ParserT String m Char
@@ -84,26 +84,27 @@ alphaNum = satisfyCP isAlphaNum <?> "letter or digit"
 -- | * `"NaN"`
 -- | * `"-Infinity"`
 number :: forall m. ParserT String m Number
--- TODO because the JavaScript parseFloat function will successfully parse
--- a Number up until it doesn't understand something and then return
--- the partially parsed Number, this parser will sometimes consume more
--- String that it actually parses. Example "1..3" will parse as 1.0.
--- So this needs improvement.
 number =
   choice
-    [ Parser.String.string "Infinity" *> pure infinity
-    , Parser.String.string "+Infinity" *> pure infinity
-    , Parser.String.string "-Infinity" *> pure (negate infinity)
-    , Parser.String.string "NaN" *> pure nan
-    , do
-        Tuple section _ <- Parser.String.match do
-          _ <- oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
-          skipMany $ oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
+    [ string "Infinity" *> pure infinity
+    , string "+Infinity" *> pure infinity
+    , string "-Infinity" *> pure (negate infinity)
+    , string "NaN" *> pure nan
+    , tryRethrow $ do
+        section <- numberRegex
         -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
         case Data.Number.fromString section of
-          Nothing -> fail $ "Could not parse Number " <> section
+          Nothing -> fail $ "Number.fromString failed"
+          -- Maybe this parser should set consumed flag if regex matches but fromString fails?
+          -- But currently regex allows some illegal inputs, like "."
+          -- Anyway this primitiv-ish parser should always backtrack on fail.
           Just x -> pure x
-    ]
+    ] <|> fail "Expected Number"
+
+numberRegex :: forall m. ParserT String m String
+numberRegex = either unsafeCrashWith identity $ regex pattern mempty
+  where
+  pattern = "[+-]?[0-9]*(\\.[0-9]*)?([eE][+-]?[0-9]*(\\.[0-9]*))?"
 
 -- | Parser based on the __Data.Int.fromString__ function.
 -- |
@@ -114,17 +115,20 @@ number =
 -- | * `"-3"`
 -- | * `"+300"`
 intDecimal :: forall m. ParserT String m Int
-intDecimal = do
-  Tuple section _ <- Parser.String.match do
-    _ <- oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
-    skipMany $ oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
+intDecimal = tryRethrow do
+  section <- intDecimalRegex <|> fail "Expected Int"
   case Data.Int.fromString section of
-    Nothing -> fail $ "Could not parse Int " <> section
+    Nothing -> fail $ "Int.fromString failed"
     Just x -> pure x
 
+intDecimalRegex :: forall m. ParserT String m String
+intDecimalRegex = either unsafeCrashWith identity $ regex pattern mempty
+  where
+  pattern = "[+-]?[0-9]*"
+
 -- | Helper function
 satisfyCP :: forall m. (CodePoint -> Boolean) -> ParserT String m Char
-satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar)
+satisfyCP p = satisfy (p <<< codePointFromChar)
 
 -- | Match zero or more whitespace characters satisfying
 -- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.
diff --git a/test/Main.purs b/test/Main.purs
@@ -12,15 +12,16 @@ import Control.Lazy (fix)
 import Control.Monad.State (State, modify, runState)
 import Data.Array (some, toUnfoldable)
 import Data.Array as Array
-import Data.Bifunctor (rmap)
-import Data.Either (Either(..), hush)
+import Data.Bifunctor (lmap, rmap)
+import Data.Either (Either(..), either, hush)
 import Data.Foldable (oneOf)
 import Data.List (List(..), fromFoldable, (:))
 import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons')
 import Data.List.NonEmpty as NE
 import Data.Maybe (Maybe(..), fromJust)
 import Data.NonEmpty ((:|))
-import Data.Number (infinity, isNaN)
+import Data.Number (infinity, nan)
+import Data.Number as Data.Number
 import Data.String (toUpper)
 import Data.String.CodePoints as SCP
 import Data.String.CodeUnits (fromCharArray, singleton)
@@ -681,19 +682,52 @@ main = do
 
   log "\nTESTS number\n"
 
-  parseTest "Infinity" infinity number
-  parseTest "+Infinity" infinity number
-  parseTest "-Infinity" (negate infinity) number
-  parseErrorTestPosition number "+xxx" (mkPos 2)
+  -- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity"
+  assertEqual' "number Infinity"
+    { actual: runParser "Infinity" number
+    , expected: Right infinity
+    }
+  assertEqual' "number +Infinity"
+    { actual: runParser "+Infinity" number
+    , expected: Right infinity
+    }
+  assertEqual' "number -Infinity"
+    { actual: runParser "-Infinity" number
+    , expected: Right (negate infinity)
+    }
+  assertEqual' "number +xxx"
+    { actual: lmap parseErrorPosition $ runParser "+xxx" number
+    , expected: Left $ Position { index: 0, line: 1, column: 1 }
+    }
 
-  parseTest "-3.0E-1.0" (-0.3) number
+  assertEqual' "number 1"
+    { actual: runParser "-3.0E-1.0" number
+    , expected: Right (-0.3)
+    }
 
   -- test from issue #73
-  parseTest "0.7531531167929774" 0.7531531167929774 number
+  assertEqual' "number 2"
+    { actual: runParser "0.7531531167929774" number
+    , expected: Right 0.7531531167929774
+    }
 
   -- test from issue #115
-  parseTest "-6.0" (-6.0) number
-  parseTest "+6.0" (6.0) number
+  assertEqual' "number 3"
+    { actual: runParser "-6.0" number
+    , expected: Right (-6.0)
+    }
+  assertEqual' "number 4"
+    { actual: runParser "+6.0" number
+    , expected: Right (6.0)
+    }
+
+  assert' "number NaN 1" $ either (\_ -> false) Data.Number.isNaN (runParser (show nan) number)
+  assert' "number NaN 2" $ either (\_ -> false) Data.Number.isNaN (runParser "NaN" number)
+
+  assertEqual' "number 5"
+    { actual: runParser "1..3" number
+    , expected: Right (1.0)
+    }
 
   log "\nTESTS Operator\n"
   -- test from issue #161
@@ -754,16 +788,6 @@ main = do
     "c"
     "Expected \"b\""
 
-  -- we can't test "NaN" with `parseTest` because nan doesn't compare equal
-  case runParser "NaN" number of
-    Right actual -> do
-      assert' ("expected: NaN, actual: " <> show actual) (isNaN actual)
-      logShow actual
-    Left err -> assert' ("error: " <> show err) false
-
-  -- TODO This shows the current limitations of the number parser. Ideally this parse should fail.
-  parseTest "1..3" 1.0 $ number <* eof
-
   log "\nTESTS intDecimal\n"
   parseTest "-300" (-300) intDecimal