Skip to content

Move parsers from Parsing.String to Parsing.String.Basic #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ Breaking changes:
- Replace the `regex` parser. (#170 by @jamesdbrock)
- Reorganize Combinators for #154 (#182 by @jamesdbrock)
- Add the `index` field to `Position`. (#171 by @jamesdbrock)
- Move the parsers
* `whiteSpace`
* `skipSpaces`
* `oneOf`
* `oneOfCodePoints`
* `noneOf`
* `noneOfCodePoints`
from `Parsing.String` to `Parsing.String.Basic`. (#183 by @jamesdbrock)

New features:

Expand Down
3 changes: 2 additions & 1 deletion bench/Json/Parsing.purs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ import Data.String.Regex.Flags (noFlags)
import Data.Tuple (Tuple(..))
import Parsing (ParserT, fail)
import Parsing.Combinators (between, choice, sepBy, try)
import Parsing.String (regex, skipSpaces, string)
import Parsing.String (regex, string)
import Parsing.String.Basic (skipSpaces)
import Partial.Unsafe (unsafeCrashWith)

json :: forall m. Monad m => ParserT String m Json
Expand Down
3 changes: 2 additions & 1 deletion src/Parsing/Indent.purs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ import Data.Maybe (Maybe(..))
import Parsing (ParserT, fail, position)
import Parsing.Combinators (option, optionMaybe)
import Parsing.Pos (Position(..), initialPos)
import Parsing.String (oneOf, string)
import Parsing.String (string)
import Parsing.String.Basic (oneOf)

-- | Indentation sensitive parser type. Usually @ m @ will
-- | be @ Identity @ as with any @ ParserT @
Expand Down
4 changes: 2 additions & 2 deletions src/Parsing/Language.purs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import Prelude

import Control.Alt ((<|>))
import Parsing (ParserT)
import Parsing.String (char, oneOf)
import Parsing.String.Basic (alphaNum, letter)
import Parsing.String (char)
import Parsing.String.Basic (alphaNum, letter, oneOf)
import Parsing.Token (GenLanguageDef(..), LanguageDef, TokenParser, makeTokenParser, unGenLanguageDef)

-----------------------------------------------------------
Expand Down
50 changes: 7 additions & 43 deletions src/Parsing/String.purs
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,15 @@
-- | - carriage-return (`0x0D`)
-- | - carriage-return-newline (`0x0D 0x0A`)
module Parsing.String
( string
, eof
, rest
( char
, string
, anyChar
, anyCodePoint
, satisfy
, satisfyCodePoint
, char
, takeN
, whiteSpace
, skipSpaces
, oneOf
, oneOfCodePoints
, noneOf
, noneOfCodePoints
, rest
, eof
, match
, regex
, consumeWith
Expand All @@ -57,22 +51,20 @@ module Parsing.String
import Prelude hiding (between)

import Control.Monad.State (get)
import Data.Array (elem, notElem)
import Data.Array.NonEmpty as NonEmptyArray
import Data.CodePoint.Unicode (isSpace)
import Data.Either (Either(..))
import Data.Enum (fromEnum, toEnum)
import Data.Function.Uncurried (mkFn5, runFn2)
import Data.Maybe (Maybe(..), fromJust)
import Data.String (CodePoint, Pattern(..), codePointAt, length, null, singleton, splitAt, stripPrefix, takeWhile, uncons)
import Data.String (CodePoint, Pattern(..), codePointAt, length, null, splitAt, stripPrefix, uncons)
import Data.String as String
import Data.String.CodeUnits as SCU
import Data.String.Regex as Regex
import Data.String.Regex.Flags (RegexFlags)
import Data.Tuple (Tuple(..), fst)
import Data.Tuple (Tuple(..))
import Partial.Unsafe (unsafePartial)
import Parsing (ParseError(..), ParseState(..), ParserT(..))
import Parsing.Combinators ((<?>), (<~?>))
import Parsing.Combinators ((<?>))
import Parsing.Pos (Position(..))

-- | Match “end-of-file,” the end of the input stream.
Expand Down Expand Up @@ -159,34 +151,6 @@ takeN n = consumeWith \input -> do
else
Left $ "Could not take " <> show n <> " characters"

-- | Match zero or more whitespace characters satisfying
-- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.
whiteSpace :: forall m. ParserT String m String
whiteSpace = fst <$> match skipSpaces

-- | Skip whitespace characters and throw them away. Always succeeds.
skipSpaces :: forall m. ParserT String m Unit
skipSpaces = consumeWith \input -> do
let consumed = takeWhile isSpace input
let remainder = SCU.drop (SCU.length consumed) input
Right { value: unit, consumed, remainder }

-- | Match one of the BMP `Char`s in the array.
oneOf :: forall m. Array Char -> ParserT String m Char
oneOf ss = satisfy (flip elem ss) <~?> \_ -> "one of " <> show ss

-- | Match any BMP `Char` not in the array.
noneOf :: forall m. Array Char -> ParserT String m Char
noneOf ss = satisfy (flip notElem ss) <~?> \_ -> "none of " <> show ss

-- | Match one of the Unicode characters in the array.
oneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint
oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> show (singleton <$> ss)

-- | Match any Unicode character not in the array.
noneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <> show (singleton <$> ss)

-- | Updates a `Position` by adding the columns and lines in `String`.
updatePosString :: Position -> String -> String -> Position
updatePosString pos before after = case uncons before of
Expand Down
54 changes: 45 additions & 9 deletions src/Parsing/String/Basic.purs
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,30 @@ module Parsing.String.Basic
, alphaNum
, intDecimal
, number
, module Parsing.String
, whiteSpace
, skipSpaces
, oneOf
, oneOfCodePoints
, noneOf
, noneOfCodePoints
) where

import Prelude

import Data.Array (elem, notElem)
import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isLower, isOctDigit, isSpace, isUpper)
import Data.Either (Either(..))
import Data.Int as Data.Int
import Data.Maybe (Maybe(..))
import Data.Number (infinity, nan)
import Data.Number as Data.Number
import Data.String (CodePoint)
import Data.String (CodePoint, singleton, takeWhile)
import Data.String.CodePoints (codePointFromChar)
import Data.Tuple (Tuple(..))
import Data.String.CodeUnits as SCU
import Data.Tuple (Tuple(..), fst)
import Parsing (ParserT, fail)
import Parsing.Combinators (choice, skipMany, (<?>))
import Parsing.String (noneOf, noneOfCodePoints, oneOf, oneOfCodePoints, skipSpaces, whiteSpace)
import Parsing.Combinators (choice, skipMany, (<?>), (<~?>))
import Parsing.String (consumeWith, match, satisfy, satisfyCodePoint)
import Parsing.String as Parser.String

-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
Expand Down Expand Up @@ -94,8 +102,8 @@ number =
, Parser.String.string "NaN" *> pure nan
, do
Tuple section _ <- Parser.String.match do
_ <- Parser.String.oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
skipMany $ Parser.String.oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
_ <- oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
skipMany $ oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
case Data.Number.fromString section of
Nothing -> fail $ "Could not parse Number " <> section
Expand All @@ -113,12 +121,40 @@ number =
intDecimal :: forall m. ParserT String m Int
intDecimal = do
Tuple section _ <- Parser.String.match do
_ <- Parser.String.oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
skipMany $ Parser.String.oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
_ <- oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
skipMany $ oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
case Data.Int.fromString section of
Nothing -> fail $ "Could not parse Int " <> section
Just x -> pure x

-- | Helper function
satisfyCP :: forall m. (CodePoint -> Boolean) -> ParserT String m Char
satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar)

-- | Match zero or more whitespace characters satisfying
-- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.
whiteSpace :: forall m. ParserT String m String
whiteSpace = fst <$> match skipSpaces

-- | Skip whitespace characters and throw them away. Always succeeds.
skipSpaces :: forall m. ParserT String m Unit
skipSpaces = consumeWith \input -> do
let consumed = takeWhile isSpace input
let remainder = SCU.drop (SCU.length consumed) input
Right { value: unit, consumed, remainder }

-- | Match one of the BMP `Char`s in the array.
oneOf :: forall m. Array Char -> ParserT String m Char
oneOf ss = satisfy (flip elem ss) <~?> \_ -> "one of " <> show ss

-- | Match any BMP `Char` not in the array.
noneOf :: forall m. Array Char -> ParserT String m Char
noneOf ss = satisfy (flip notElem ss) <~?> \_ -> "none of " <> show ss

-- | Match one of the Unicode characters in the array.
oneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint
oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> show (singleton <$> ss)

-- | Match any Unicode character not in the array.
noneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <> show (singleton <$> ss)
4 changes: 2 additions & 2 deletions src/Parsing/Token.purs
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ import Data.Tuple (Tuple(..))
import Parsing (ParseState(..), ParserT, consume, fail)
import Parsing.Combinators (between, choice, notFollowedBy, option, sepBy, sepBy1, skipMany, skipMany1, try, tryRethrow, (<?>), (<??>))
import Parsing.Pos (Position)
import Parsing.String (char, noneOf, oneOf, satisfy, satisfyCodePoint, string)
import Parsing.String (char, satisfy, satisfyCodePoint, string)
import Parsing.String.Basic (alphaNum, digit, hexDigit, letter, noneOf, octDigit, oneOf, space, upper)
import Parsing.String.Basic as Basic
import Parsing.String.Basic (digit, hexDigit, octDigit, upper, space, letter, alphaNum)

-- | A parser which returns the first token in the stream.
token :: forall m a. (a -> Position) -> ParserT (List a) m a
Expand Down
4 changes: 2 additions & 2 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ import Parsing.Combinators (between, chainl, chainl1, chainr, chainr1, choice, e
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
import Parsing.Pos (Position(..), initialPos)
import Parsing.String (anyChar, anyCodePoint, char, eof, regex, noneOfCodePoints, oneOfCodePoints, rest, satisfy, string, takeN, whiteSpace)
import Parsing.String.Basic (intDecimal, number, letter)
import Parsing.String (anyChar, anyCodePoint, char, eof, regex, rest, satisfy, string, takeN)
import Parsing.String.Basic (intDecimal, number, letter, noneOfCodePoints, oneOfCodePoints, whiteSpace)
import Parsing.Token (TokenParser, makeTokenParser, match, token, when)
import Parsing.Token as Parser.Token
import Partial.Unsafe (unsafePartial)
Expand Down