Skip to content

takeWhile #218

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Breaking changes:

New features:

- Add `Parsing.String.Basic.takeWhile`, `takeWhile1` (#218 by @jamesdbrock)

Other improvements:

## [v10.1.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v10.1.0) - 2022-11-10
Expand Down
3 changes: 1 addition & 2 deletions src/Parsing.purs
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,7 @@ failWithPosition message pos = throwError (ParseError message pos)
-- |
-- | lmap (parseErrorHuman input 30) $ runParser input do
-- | inContext ("Megacity list: " <> _) do
-- | cityname <- inContext ("city name: " <> _) do
-- | fst <$> match (skipMany letter)
-- | cityname <- inContext ("city name: " <> _) (takeWhile isLetter)
-- | skipSpaces
-- | population <- inContext ("population: " <> _) intDecimal
-- | pure $ Tuple cityname population
Expand Down
2 changes: 1 addition & 1 deletion src/Parsing/Combinators.purs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ infixl 4 withErrorMessage as <?>
-- |
-- |```purescript
-- |parseBang :: Parser Char
-- |parseBang = char '!' <~?> \_ -> "Expected a bang"
-- |parseBang = char '!' <~?> \_ -> "a bang"
-- |```
withLazyErrorMessage :: forall m s a. ParserT s m a -> (Unit -> String) -> ParserT s m a
withLazyErrorMessage p msg = p <|> defer \_ -> fail ("Expected " <> msg unit)
Expand Down
20 changes: 2 additions & 18 deletions src/Parsing/String.purs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
-- |
-- | All of these primitive parsers will consume when they succeed.
-- |
-- | All of these primitive parsers will not consume when they
-- | fail.
-- | All of these primitive parsers will not consume and will automatically
-- | backtrack when they fail.
-- |
-- | The behavior of these primitive parsers is based on the behavior of the
-- | `Data.String` module in the __strings__ package.
Expand Down Expand Up @@ -181,22 +181,6 @@ updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp o

-- | Combinator which returns both the result of a parse and the slice of
-- | the input that was consumed while it was being parsed.
-- |
-- | Because `String`s are not `Char` arrays in PureScript, `many` and `some`
-- | on `Char` parsers need to
-- | be used with `Data.String.CodeUnits.fromCharArray` to
-- | construct a `String`.
-- |
-- | ```
-- | fromCharArray <$> Data.Array.many (char 'x')
-- | ```
-- |
-- | It’s more efficient to achieve the same result by using this `match` combinator
-- | instead of `fromCharArray`.
-- |
-- | ```
-- | fst <$> match (Combinators.skipMany (char 'x'))
-- | ```
match :: forall m a. ParserT String m a -> ParserT String m (Tuple String a)
match p = do
ParseState input1 _ _ <- getParserT
Expand Down
86 changes: 75 additions & 11 deletions src/Parsing/String/Basic.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
-- |
-- | #### unicode dependency
-- |
-- | Some of the parsers in this module depend on the __unicode__ package.
-- | Some of the parsers in this module depend on the
-- | [__unicode__](https://pursuit.purescript.org/packages/purescript-unicode)
-- | package.
-- | The __unicode__ package is large; about half a megabyte unminified.
-- | If code which depends on __parsing__ is “tree-shaken”
-- | “dead-code-eliminated,” then
Expand All @@ -24,6 +26,8 @@ module Parsing.String.Basic
, alphaNum
, intDecimal
, number
, takeWhile
, takeWhile1
, whiteSpace
, skipSpaces
, oneOf
Expand All @@ -41,13 +45,13 @@ import Data.Int as Data.Int
import Data.Maybe (Maybe(..))
import Data.Number (infinity, nan)
import Data.Number as Data.Number
import Data.String (CodePoint, singleton, takeWhile)
import Data.String (CodePoint, singleton)
import Data.String as String
import Data.String.CodePoints (codePointFromChar)
import Data.String.CodeUnits as SCU
import Data.Tuple (fst)
import Parsing (ParserT, fail)
import Parsing.Combinators (choice, tryRethrow, (<?>), (<|>), (<~?>))
import Parsing.String (consumeWith, match, regex, satisfy, satisfyCodePoint, string)
import Parsing.String (consumeWith, regex, satisfy, satisfyCodePoint, string)
import Partial.Unsafe (unsafeCrashWith)

-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
Expand Down Expand Up @@ -112,7 +116,7 @@ number =
section <- numberRegex
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
case Data.Number.fromString section of
Nothing -> fail $ "Number.fromString failed"
Nothing -> fail "Expected Number"
Just x -> pure x
] <|> fail "Expected Number"

Expand All @@ -134,7 +138,7 @@ intDecimal :: forall m. ParserT String m Int
intDecimal = tryRethrow do
section <- intDecimalRegex <|> fail "Expected Int"
case Data.Int.fromString section of
Nothing -> fail $ "Int.fromString failed"
Nothing -> fail "Expected Int"
Just x -> pure x

-- Non-exported regex is compiled at startup time.
Expand All @@ -153,17 +157,14 @@ satisfyCP p = satisfy (p <<< codePointFromChar)
-- | Always succeeds. Will consume only when matched whitespace string
-- | is non-empty.
whiteSpace :: forall m. ParserT String m String
whiteSpace = fst <$> match skipSpaces
whiteSpace = takeWhile isSpace

-- | Skip whitespace characters satisfying `Data.CodePoint.Unicode.isSpace`
-- | and throw them away.
-- |
-- | Always succeeds. Will only consume when some characters are skipped.
skipSpaces :: forall m. ParserT String m Unit
skipSpaces = consumeWith \input -> do
let consumed = takeWhile isSpace input
let remainder = SCU.drop (SCU.length consumed) input
Right { value: unit, consumed, remainder }
skipSpaces = void whiteSpace

-- | Match one of the BMP `Char`s in the array.
oneOf :: forall m. Array Char -> ParserT String m Char
Expand All @@ -180,3 +181,66 @@ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> sho
-- | Match any Unicode character not in the array.
noneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <> show (singleton <$> ss)

-- | Take the longest `String` for which the characters satisfy the
-- | predicate.
-- |
-- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
-- | for useful predicates.
-- |
-- | Example:
-- |
-- | ```
-- | runParser "Tackling the Awkward Squad" do
-- | takeWhile Data.CodePoint.Unicode.isLetter
-- | ```
-- | ---
-- | ```
-- | Right "Tackling"
-- | ```
-- |
-- | You should prefer `takeWhile isLetter` to
-- | `fromCharArray <$> Data.Array.many letter`.
takeWhile :: forall m. (CodePoint -> Boolean) -> ParserT String m String
takeWhile predicate =
consumeWith \s ->
let
value = String.takeWhile predicate s
in
Right
{ consumed: value
, remainder: SCU.drop (SCU.length value) s
, value
}

-- | Take the longest `String` for which the characters satisfy the
-- | predicate. Require at least 1 character. You should supply an
-- | expectation description for the error
-- | message for when the predicate fails on the first character.
-- |
-- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
-- | for useful predicates.
-- |
-- | Example:
-- |
-- | ```
-- | runParser "Tackling the Awkward Squad" do
-- | takeWhile1 Data.CodePoint.Unicode.isLetter <?> "a letter"
-- | ```
-- | ---
-- | ```
-- | Right "Tackling"
-- | ```
takeWhile1 :: forall m. (CodePoint -> Boolean) -> ParserT String m String
takeWhile1 predicate =
consumeWith \s ->
let
value = String.takeWhile predicate s
len = SCU.length value
in
if len > 0 then Right
{ consumed: value
, remainder: SCU.drop (SCU.length value) s
, value
}
else Left "Expected character satisfying predicate"
19 changes: 15 additions & 4 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import Control.Monad.State (State, lift, modify, runState)
import Data.Array (some, toUnfoldable)
import Data.Array as Array
import Data.Bifunctor (lmap, rmap)
import Data.CodePoint.Unicode as CodePoint.Unicode
import Data.Either (Either(..), either, fromLeft, hush)
import Data.Foldable (oneOf)
import Data.List (List(..), fromFoldable, (:))
Expand All @@ -36,12 +37,11 @@ import Effect.Unsafe (unsafePerformEffect)
import Node.Process (lookupEnv)
import Parsing (ParseError(..), ParseState(..), Parser, ParserT, Position(..), consume, fail, getParserT, initialPos, parseErrorMessage, parseErrorPosition, position, region, runParser)
import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optionMaybe, replicateA, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, tryRethrow, (<?>), (<??>), (<~?>))
import Parsing.Combinators as Combinators
import Parsing.Combinators.Array as Combinators.Array
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, parseErrorHuman, regex, rest, satisfy, string, takeN)
import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, whiteSpace)
import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, takeWhile, takeWhile1, whiteSpace)
import Parsing.String.Basic as String.Basic
import Parsing.String.Replace (breakCap, replace, replaceT, splitCap, splitCapT)
import Parsing.Token (TokenParser, makeTokenParser, token, when)
Expand Down Expand Up @@ -712,8 +712,7 @@ main = do
assertEqual' "region 1"
{ actual: runParser input do
inContext ("Megacity list: " <> _) do
cityname <- inContext ("city name: " <> _) do
fst <$> match (Combinators.skipMany letter)
cityname <- inContext ("city name: " <> _) (takeWhile CodePoint.Unicode.isLetter)
skipSpaces
population <- inContext ("population: " <> _) intDecimal
pure $ Tuple cityname population
Expand All @@ -725,6 +724,18 @@ main = do
, expected: Left $ ParseError "Expected 'c'" (Position { index: 1, column: 2, line: 1 })
}

assertEqual' "takeWhile 1"
{ actual: runParser "Tackling the Awkward" do
takeWhile CodePoint.Unicode.isLetter <* string " the Awkward"
, expected: Right "Tackling"
}

assertEqual' "takeWhile1 1"
{ actual: runParser "3ackling the Awkward" do
takeWhile1 CodePoint.Unicode.isLetter <* string " the Awkward" <?> "letter"
, expected: Left $ ParseError "Expected letter" (Position { index: 0, line: 1, column: 1 })
}

log "\nTESTS number\n"

-- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity"
Expand Down