Skip to content

Commit ad87284

Browse files
committed
anyTill String combinator
This primitive combinator enables features like string split, search, and search-and-replace.
1 parent ea807e1 commit ad87284

File tree

4 files changed

+49
-7
lines changed

4 files changed

+49
-7
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ Breaking changes:
4040

4141
New features:
4242

43+
- Add the `anyTill` primitive `String` combinator. (#186 by @jamesdbrock)
44+
4345
Bugfixes:
4446

4547
Other improvements:

src/Parsing/Combinators.purs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,16 @@ try (ParserT k1) = ParserT
173173

174174
-- | If the parser fails then backtrack the input stream to the unconsumed state.
175175
-- |
176-
-- | Like `try`, but will relocate the error to the `try` point.
176+
-- | Like `try`, but will reposition the error to the `try` point.
177177
-- |
178178
-- | ```
179179
-- | >>> runParser "ac" (try (char 'a' *> char 'b'))
180-
-- | Left (ParseError "Expected 'b'" (Position { line: 1, column: 2 }))
180+
-- | Left (ParseError "Expected 'b'" (Position { index: 1, line: 1, column: 2 }))
181181
-- | ```
182182
-- |
183183
-- | ```
184184
-- | >>> runParser "ac" (tryRethrow (char 'a' *> char 'b'))
185-
-- | Left (ParseError "Expected 'b'" (Position { line: 1, column: 1 }))
185+
-- | Left (ParseError "Expected 'b'" (Position { index: 0, line: 1, column: 1 }))
186186
-- | ```
187187
tryRethrow :: forall m s a. ParserT s m a -> ParserT s m a
188188
tryRethrow (ParserT k1) = ParserT

src/Parsing/String.purs

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,13 @@ module Parsing.String
4545
, eof
4646
, match
4747
, regex
48+
, anyTill
4849
, consumeWith
4950
) where
5051

5152
import Prelude hiding (between)
5253

54+
import Control.Monad.Rec.Class (Step(..), tailRecM)
5355
import Control.Monad.State (get)
5456
import Data.Array.NonEmpty as NonEmptyArray
5557
import Data.Either (Either(..))
@@ -62,9 +64,9 @@ import Data.String.CodeUnits as SCU
6264
import Data.String.Regex as Regex
6365
import Data.String.Regex.Flags (RegexFlags)
6466
import Data.Tuple (Tuple(..))
65-
import Partial.Unsafe (unsafePartial)
6667
import Parsing (ParseError(..), ParseState(..), ParserT(..), Position(..))
67-
import Parsing.Combinators ((<?>))
68+
import Parsing.Combinators (try, (<?>), (<|>))
69+
import Partial.Unsafe (unsafePartial)
6870

6971
-- | Match “end-of-file,” the end of the input stream.
7072
eof :: forall m. ParserT String m Unit
@@ -263,11 +265,13 @@ regex pattern flags =
263265
-- | Consume a portion of the input string while yielding a value.
264266
-- |
265267
-- | Takes a consumption function which takes the remaining input `String`
266-
-- | as its argument and returns three fields:
268+
-- | as its argument and returns either an error message, or three fields:
267269
-- |
268270
-- | * `value` is the value to return.
269271
-- | * `consumed` is the input `String` that was consumed. It is used to update the parser position.
270272
-- | * `remainder` is the new remaining input `String`.
273+
-- |
274+
-- | This function is used internally to construct primitive `String` parsers.
271275
consumeWith
272276
:: forall m a
273277
. (String -> Either String { value :: a, consumed :: String, remainder :: String })
@@ -280,3 +284,32 @@ consumeWith f = ParserT
280284
Right { value, consumed, remainder } ->
281285
runFn2 done (ParseState remainder (updatePosString pos consumed remainder) true) value
282286
)
287+
288+
-- | Combinator which finds the first position in the input `String` where the
289+
-- | phrase can parse. Returns both the
290+
-- | parsed result and the unparsable input section searched before the parse.
291+
-- | Will fail if no section of the input is parseable. To backtrack the input
292+
-- | stream on failure, combine with `tryRethrow`.
293+
-- |
294+
-- | This combinator is equivalent to `manyTill_ anyCodePoint`, but it will be
295+
-- | faster because it returns a slice of the input `String` for the
296+
-- | section preceding the parse instead of a `List CodePoint`.
297+
anyTill
298+
:: forall m a
299+
. Monad m
300+
=> ParserT String m a
301+
-> ParserT String m (Tuple String a)
302+
anyTill p = do
303+
ParseState input1 _ _ <- get
304+
Tuple input2 t <- tailRecM go unit
305+
pure $ Tuple (SCU.take (SCU.length input1 - SCU.length input2) input1) t
306+
where
307+
go unit =
308+
do
309+
ParseState input2 _ _ <- get
310+
t <- try p
311+
pure $ Done $ Tuple input2 t
312+
<|>
313+
do
314+
_ <- anyCodePoint
315+
pure $ Loop unit

test/Main.purs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import Parsing.Combinators (between, chainl, chainl1, chainr, chainr1, choice, e
3030
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
3131
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
3232
import Parsing.Pos (Position(..), initialPos)
33-
import Parsing.String (anyChar, anyCodePoint, char, eof, regex, rest, satisfy, string, takeN)
33+
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, regex, rest, satisfy, string, takeN)
3434
import Parsing.String.Basic (intDecimal, number, letter, noneOfCodePoints, oneOfCodePoints, whiteSpace)
3535
import Parsing.Token (TokenParser, makeTokenParser, match, token, when)
3636
import Parsing.Token as Parser.Token
@@ -827,3 +827,10 @@ main = do
827827
let messageExpected = "context1 context2 Expected \"b\""
828828
assert' ("expected message: " <> messageExpected <> ", message: " <> message) (message == messageExpected)
829829
logShow messageExpected
830+
831+
log "\nTESTS anyTill\n"
832+
parseTest "𝅘𝅥𝅮𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥" (Tuple "" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮")
833+
parseTest "𝅘𝅥𝅘𝅥𝅘𝅥𝅮𝅘𝅥𝅘𝅥" (Tuple "𝅘𝅥𝅘𝅥" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮")
834+
parseTest "𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥𝅮" (Tuple "𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥" "𝅘𝅥𝅮") $ anyTill (string "𝅘𝅥𝅮") <* eof
835+
parseErrorTestPosition (anyTill (string "𝅘𝅥𝅮")) "𝅘𝅥𝅘𝅥𝅘𝅥𝅘𝅥" (Position {index:4,line:1,column:5})
836+

0 commit comments

Comments
 (0)