Skip to content

Improve String.Basic.number, String.Basic.intDecimal #189

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ New features:

Bugfixes:

- Improve correctness and speed of `number` and `intDecimal`. (#189 by @jamesdbrock)

Other improvements:

- Drop `math` dependency; update imports (#167 by @JordanMartinez)

## [v8.4.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.4.0) - 2022-03-15
Expand Down
56 changes: 30 additions & 26 deletions src/Parsing/String/Basic.purs
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@ import Prelude

import Data.Array (elem, notElem)
import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isLower, isOctDigit, isSpace, isUpper)
import Data.Either (Either(..))
import Data.Either (Either(..), either)
import Data.Int as Data.Int
import Data.Maybe (Maybe(..))
import Data.Number (infinity, nan)
import Data.Number as Data.Number
import Data.String (CodePoint, singleton, takeWhile)
import Data.String.CodePoints (codePointFromChar)
import Data.String.CodeUnits as SCU
import Data.Tuple (Tuple(..), fst)
import Data.Tuple (fst)
import Parsing (ParserT, fail)
import Parsing.Combinators (choice, skipMany, (<?>), (<~?>))
import Parsing.String (consumeWith, match, satisfy, satisfyCodePoint)
import Parsing.String as Parser.String
import Parsing.Combinators (choice, tryRethrow, (<?>), (<|>), (<~?>))
import Parsing.String (consumeWith, match, regex, satisfy, satisfyCodePoint, string)
import Partial.Unsafe (unsafeCrashWith)

-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
digit :: forall m. ParserT String m Char
Expand Down Expand Up @@ -84,26 +84,27 @@ alphaNum = satisfyCP isAlphaNum <?> "letter or digit"
-- | * `"NaN"`
-- | * `"-Infinity"`
number :: forall m. ParserT String m Number
-- TODO because the JavaScript parseFloat function will successfully parse
-- a Number up until it doesn't understand something and then return
-- the partially parsed Number, this parser will sometimes consume more
-- String that it actually parses. Example "1..3" will parse as 1.0.
-- So this needs improvement.
number =
choice
[ Parser.String.string "Infinity" *> pure infinity
, Parser.String.string "+Infinity" *> pure infinity
, Parser.String.string "-Infinity" *> pure (negate infinity)
, Parser.String.string "NaN" *> pure nan
, do
Tuple section _ <- Parser.String.match do
_ <- oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
skipMany $ oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
[ string "Infinity" *> pure infinity
, string "+Infinity" *> pure infinity
, string "-Infinity" *> pure (negate infinity)
, string "NaN" *> pure nan
, tryRethrow $ do
section <- numberRegex
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
case Data.Number.fromString section of
Nothing -> fail $ "Could not parse Number " <> section
Nothing -> fail $ "Number.fromString failed"
-- Maybe this parser should set consumed flag if regex matches but fromString fails?
-- But currently regex allows some illegal inputs, like "."
-- Anyway this primitiv-ish parser should always backtrack on fail.
Just x -> pure x
]
] <|> fail "Expected Number"

numberRegex :: forall m. ParserT String m String
numberRegex = either unsafeCrashWith identity $ regex pattern mempty
where
pattern = "[+-]?[0-9]*(\\.[0-9]*)?([eE][+-]?[0-9]*(\\.[0-9]*))?"

-- | Parser based on the __Data.Int.fromString__ function.
-- |
Expand All @@ -114,17 +115,20 @@ number =
-- | * `"-3"`
-- | * `"+300"`
intDecimal :: forall m. ParserT String m Int
intDecimal = do
Tuple section _ <- Parser.String.match do
_ <- oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
skipMany $ oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
intDecimal = tryRethrow do
section <- intDecimalRegex <|> fail "Expected Int"
case Data.Int.fromString section of
Nothing -> fail $ "Could not parse Int " <> section
Nothing -> fail $ "Int.fromString failed"
Just x -> pure x

intDecimalRegex :: forall m. ParserT String m String
intDecimalRegex = either unsafeCrashWith identity $ regex pattern mempty
where
pattern = "[+-]?[0-9]*"

-- | Helper function
satisfyCP :: forall m. (CodePoint -> Boolean) -> ParserT String m Char
satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar)
satisfyCP p = satisfy (p <<< codePointFromChar)

-- | Match zero or more whitespace characters satisfying
-- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.
Expand Down
66 changes: 45 additions & 21 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@ import Control.Lazy (fix)
import Control.Monad.State (State, modify, runState)
import Data.Array (some, toUnfoldable)
import Data.Array as Array
import Data.Bifunctor (rmap)
import Data.Either (Either(..), hush)
import Data.Bifunctor (lmap, rmap)
import Data.Either (Either(..), either, hush)
import Data.Foldable (oneOf)
import Data.List (List(..), fromFoldable, (:))
import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons')
import Data.List.NonEmpty as NE
import Data.Maybe (Maybe(..), fromJust)
import Data.NonEmpty ((:|))
import Data.Number (infinity, isNaN)
import Data.Number (infinity, nan)
import Data.Number as Data.Number
import Data.String (toUpper)
import Data.String.CodePoints as SCP
import Data.String.CodeUnits (fromCharArray, singleton)
Expand Down Expand Up @@ -684,19 +685,52 @@ main = do

log "\nTESTS number\n"

parseTest "Infinity" infinity number
parseTest "+Infinity" infinity number
parseTest "-Infinity" (negate infinity) number
parseErrorTestPosition number "+xxx" (mkPos 2)
-- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity"
assertEqual' "number Infinity"
{ actual: runParser "Infinity" number
, expected: Right infinity
}
assertEqual' "number +Infinity"
{ actual: runParser "+Infinity" number
, expected: Right infinity
}
assertEqual' "number -Infinity"
{ actual: runParser "-Infinity" number
, expected: Right (negate infinity)
}
assertEqual' "number +xxx"
{ actual: lmap parseErrorPosition $ runParser "+xxx" number
, expected: Left $ Position { index: 0, line: 1, column: 1 }
}

parseTest "-3.0E-1.0" (-0.3) number
assertEqual' "number 1"
{ actual: runParser "-3.0E-1.0" number
, expected: Right (-0.3)
}

-- test from issue #73
parseTest "0.7531531167929774" 0.7531531167929774 number
assertEqual' "number 2"
{ actual: runParser "0.7531531167929774" number
, expected: Right 0.7531531167929774
}

-- test from issue #115
parseTest "-6.0" (-6.0) number
parseTest "+6.0" (6.0) number
assertEqual' "number 3"
{ actual: runParser "-6.0" number
, expected: Right (-6.0)
}
assertEqual' "number 4"
{ actual: runParser "+6.0" number
, expected: Right (6.0)
}

assert' "number NaN 1" $ either (\_ -> false) Data.Number.isNaN (runParser (show nan) number)
assert' "number NaN 2" $ either (\_ -> false) Data.Number.isNaN (runParser "NaN" number)

assertEqual' "number 5"
{ actual: runParser "1..3" number
, expected: Right (1.0)
}

log "\nTESTS Operator\n"
-- test from issue #161
Expand Down Expand Up @@ -757,16 +791,6 @@ main = do
"c"
"Expected \"b\""

-- we can't test "NaN" with `parseTest` because nan doesn't compare equal
case runParser "NaN" number of
Right actual -> do
assert' ("expected: NaN, actual: " <> show actual) (isNaN actual)
logShow actual
Left err -> assert' ("error: " <> show err) false

-- TODO This shows the current limitations of the number parser. Ideally this parse should fail.
parseTest "1..3" 1.0 $ number <* eof

log "\nTESTS intDecimal\n"
parseTest "-300" (-300) intDecimal

Expand Down