diff --git a/CHANGELOG.md b/CHANGELOG.md index 029c9e4..b176f6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,8 @@ Breaking changes: New features: +- Add the `anyTill` primitive `String` combinator. (#186 by @jamesdbrock) + Bugfixes: Other improvements: diff --git a/packages.dhall b/packages.dhall index 582d6d3..6d6a1f6 100644 --- a/packages.dhall +++ b/packages.dhall @@ -1,4 +1,5 @@ let upstream = https://raw.githubusercontent.com/purescript/package-sets/prepare-0.15/src/packages.dhall + sha256:b1c6d06132b7cbf1e93b1e5343044fba1604b50bfbe02d8f80a3002e71569c59 in upstream diff --git a/src/Parsing/Combinators.purs b/src/Parsing/Combinators.purs index a2f6602..fed2532 100644 --- a/src/Parsing/Combinators.purs +++ b/src/Parsing/Combinators.purs @@ -173,16 +173,16 @@ try (ParserT k1) = ParserT -- | If the parser fails then backtrack the input stream to the unconsumed state. -- | --- | Like `try`, but will relocate the error to the `try` point. +-- | Like `try`, but will reposition the error to the `try` point. -- | -- | ``` -- | >>> runParser "ac" (try (char 'a' *> char 'b')) --- | Left (ParseError "Expected 'b'" (Position { line: 1, column: 2 })) +-- | Left (ParseError "Expected 'b'" (Position { index: 1, line: 1, column: 2 })) -- | ``` -- | -- | ``` -- | >>> runParser "ac" (tryRethrow (char 'a' *> char 'b')) --- | Left (ParseError "Expected 'b'" (Position { line: 1, column: 1 })) +-- | Left (ParseError "Expected 'b'" (Position { index: 0, line: 1, column: 1 })) -- | ``` tryRethrow :: forall m s a. ParserT s m a -> ParserT s m a tryRethrow (ParserT k1) = ParserT diff --git a/src/Parsing/String.purs b/src/Parsing/String.purs index e98a574..94b380c 100644 --- a/src/Parsing/String.purs +++ b/src/Parsing/String.purs @@ -45,11 +45,13 @@ module Parsing.String , eof , match , regex + , anyTill , consumeWith ) where import Prelude hiding (between) +import Control.Monad.Rec.Class (Step(..), tailRecM) import Control.Monad.State (get) import Data.Array.NonEmpty as NonEmptyArray import Data.Either (Either(..)) @@ -62,9 +64,9 @@ import Data.String.CodeUnits as SCU import Data.String.Regex as Regex import Data.String.Regex.Flags (RegexFlags) import Data.Tuple (Tuple(..)) -import Partial.Unsafe (unsafePartial) import Parsing (ParseError(..), ParseState(..), ParserT(..), Position(..)) -import Parsing.Combinators (()) +import Parsing.Combinators (alt, try, ()) +import Partial.Unsafe (unsafePartial) -- | Match β€œend-of-file,” the end of the input stream. eof :: forall m. ParserT String m Unit @@ -263,11 +265,13 @@ regex pattern flags = -- | Consume a portion of the input string while yielding a value. -- | -- | Takes a consumption function which takes the remaining input `String` --- | as its argument and returns three fields: +-- | as its argument and returns either an error message, or three fields: -- | -- | * `value` is the value to return. -- | * `consumed` is the input `String` that was consumed. It is used to update the parser position. -- | * `remainder` is the new remaining input `String`. +-- | +-- | This function is used internally to construct primitive `String` parsers. consumeWith :: forall m a . (String -> Either String { value :: a, consumed :: String, remainder :: String }) @@ -280,3 +284,33 @@ consumeWith f = ParserT Right { value, consumed, remainder } -> runFn2 done (ParseState remainder (updatePosString pos consumed remainder) true) value ) + +-- | Combinator which finds the first position in the input `String` where the +-- | phrase can parse. Returns both the +-- | parsed result and the unparsable input section searched before the parse. +-- | Will fail if no section of the input is parseable. To backtrack the input +-- | stream on failure, combine with `tryRethrow`. +-- | +-- | This combinator is equivalent to `manyTill_ anyCodePoint`, but it will be +-- | faster because it returns a slice of the input `String` for the +-- | section preceding the parse instead of a `List CodePoint`. +anyTill + :: forall m a + . Monad m + => ParserT String m a + -> ParserT String m (Tuple String a) +anyTill p = do + ParseState input1 _ _ <- get + Tuple input2 t <- tailRecM go unit + pure $ Tuple (SCU.take (SCU.length input1 - SCU.length input2) input1) t + where + go unit = alt + ( do + ParseState input2 _ _ <- get + t <- try p + pure $ Done $ Tuple input2 t + ) + ( do + _ <- anyCodePoint + pure $ Loop unit + ) \ No newline at end of file diff --git a/test/Main.purs b/test/Main.purs index 84074dc..cedc5de 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -30,7 +30,7 @@ import Parsing.Combinators (between, chainl, chainl1, chainr, chainr1, choice, e import Parsing.Expr (Assoc(..), Operator(..), buildExprParser) import Parsing.Language (haskellDef, haskellStyle, javaStyle) import Parsing.Pos (Position(..), initialPos) -import Parsing.String (anyChar, anyCodePoint, char, eof, regex, rest, satisfy, string, takeN) +import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, regex, rest, satisfy, string, takeN) import Parsing.String.Basic (intDecimal, number, letter, noneOfCodePoints, oneOfCodePoints, whiteSpace) import Parsing.Token (TokenParser, makeTokenParser, match, token, when) import Parsing.Token as Parser.Token @@ -827,3 +827,10 @@ main = do let messageExpected = "context1 context2 Expected \"b\"" assert' ("expected message: " <> messageExpected <> ", message: " <> message) (message == messageExpected) logShow messageExpected + + log "\nTESTS anyTill\n" + parseTest "π… π…Ÿπ…Ÿπ…Ÿπ…Ÿ" (Tuple "" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮") + parseTest "π…Ÿπ…Ÿπ… π…Ÿπ…Ÿ" (Tuple "π…Ÿπ…Ÿ" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮") + parseTest "π…Ÿπ…Ÿπ…Ÿπ…Ÿπ… " (Tuple "π…Ÿπ…Ÿπ…Ÿπ…Ÿ" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮") <* eof + parseErrorTestPosition (anyTill (string "𝅘𝅥𝅮")) "π…Ÿπ…Ÿπ…Ÿπ…Ÿ" (Position { index: 4, line: 1, column: 5 }) +