Skip to content

anyTill String combinator #186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Breaking changes:

New features:

- Add the `anyTill` primitive `String` combinator. (#186 by @jamesdbrock)

Bugfixes:

Other improvements:
Expand Down
1 change: 1 addition & 0 deletions packages.dhall
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
let upstream =
https://raw.githubusercontent.com/purescript/package-sets/prepare-0.15/src/packages.dhall
sha256:b1c6d06132b7cbf1e93b1e5343044fba1604b50bfbe02d8f80a3002e71569c59

in upstream
6 changes: 3 additions & 3 deletions src/Parsing/Combinators.purs
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,16 @@ try (ParserT k1) = ParserT

-- | If the parser fails then backtrack the input stream to the unconsumed state.
-- |
-- | Like `try`, but will relocate the error to the `try` point.
-- | Like `try`, but will reposition the error to the `try` point.
-- |
-- | ```
-- | >>> runParser "ac" (try (char 'a' *> char 'b'))
-- | Left (ParseError "Expected 'b'" (Position { line: 1, column: 2 }))
-- | Left (ParseError "Expected 'b'" (Position { index: 1, line: 1, column: 2 }))
-- | ```
-- |
-- | ```
-- | >>> runParser "ac" (tryRethrow (char 'a' *> char 'b'))
-- | Left (ParseError "Expected 'b'" (Position { line: 1, column: 1 }))
-- | Left (ParseError "Expected 'b'" (Position { index: 0, line: 1, column: 1 }))
-- | ```
tryRethrow :: forall m s a. ParserT s m a -> ParserT s m a
tryRethrow (ParserT k1) = ParserT
Expand Down
40 changes: 37 additions & 3 deletions src/Parsing/String.purs
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,13 @@ module Parsing.String
, eof
, match
, regex
, anyTill
, consumeWith
) where

import Prelude hiding (between)

import Control.Monad.Rec.Class (Step(..), tailRecM)
import Control.Monad.State (get)
import Data.Array.NonEmpty as NonEmptyArray
import Data.Either (Either(..))
Expand All @@ -62,9 +64,9 @@ import Data.String.CodeUnits as SCU
import Data.String.Regex as Regex
import Data.String.Regex.Flags (RegexFlags)
import Data.Tuple (Tuple(..))
import Partial.Unsafe (unsafePartial)
import Parsing (ParseError(..), ParseState(..), ParserT(..), Position(..))
import Parsing.Combinators ((<?>))
import Parsing.Combinators (alt, try, (<?>))
import Partial.Unsafe (unsafePartial)

-- | Match “end-of-file,” the end of the input stream.
eof :: forall m. ParserT String m Unit
Expand Down Expand Up @@ -263,11 +265,13 @@ regex pattern flags =
-- | Consume a portion of the input string while yielding a value.
-- |
-- | Takes a consumption function which takes the remaining input `String`
-- | as its argument and returns three fields:
-- | as its argument and returns either an error message, or three fields:
-- |
-- | * `value` is the value to return.
-- | * `consumed` is the input `String` that was consumed. It is used to update the parser position.
-- | * `remainder` is the new remaining input `String`.
-- |
-- | This function is used internally to construct primitive `String` parsers.
consumeWith
:: forall m a
. (String -> Either String { value :: a, consumed :: String, remainder :: String })
Expand All @@ -280,3 +284,33 @@ consumeWith f = ParserT
Right { value, consumed, remainder } ->
runFn2 done (ParseState remainder (updatePosString pos consumed remainder) true) value
)

-- | Combinator which finds the first position in the input `String` where the
-- | phrase can parse. Returns both the
-- | parsed result and the unparsable input section searched before the parse.
-- | Will fail if no section of the input is parseable. To backtrack the input
-- | stream on failure, combine with `tryRethrow`.
-- |
-- | This combinator is equivalent to `manyTill_ anyCodePoint`, but it will be
-- | faster because it returns a slice of the input `String` for the
-- | section preceding the parse instead of a `List CodePoint`.
anyTill
:: forall m a
. Monad m
=> ParserT String m a
-> ParserT String m (Tuple String a)
anyTill p = do
ParseState input1 _ _ <- get
Tuple input2 t <- tailRecM go unit
pure $ Tuple (SCU.take (SCU.length input1 - SCU.length input2) input1) t
where
go unit = alt
( do
ParseState input2 _ _ <- get
t <- try p
pure $ Done $ Tuple input2 t
)
( do
_ <- anyCodePoint
pure $ Loop unit
)
9 changes: 8 additions & 1 deletion test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import Parsing.Combinators (between, chainl, chainl1, chainr, chainr1, choice, e
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
import Parsing.Pos (Position(..), initialPos)
import Parsing.String (anyChar, anyCodePoint, char, eof, regex, rest, satisfy, string, takeN)
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, regex, rest, satisfy, string, takeN)
import Parsing.String.Basic (intDecimal, number, letter, noneOfCodePoints, oneOfCodePoints, whiteSpace)
import Parsing.Token (TokenParser, makeTokenParser, match, token, when)
import Parsing.Token as Parser.Token
Expand Down Expand Up @@ -827,3 +827,10 @@ main = do
let messageExpected = "context1 context2 Expected \"b\""
assert' ("expected message: " <> messageExpected <> ", message: " <> message) (message == messageExpected)
logShow messageExpected

log "\nTESTS anyTill\n"
parseTest "𝅘𝅥𝅮𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥" (Tuple "" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮")
parseTest "𝅘𝅥𝅘𝅥𝅘𝅥𝅮𝅘𝅥𝅘𝅥" (Tuple "𝅘𝅥𝅘𝅥" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮")
parseTest "𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥𝅮" (Tuple "𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮") <* eof
parseErrorTestPosition (anyTill (string "𝅘𝅥𝅮")) "𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥" (Position { index: 4, line: 1, column: 5 })