Skip to content

Commit 736f863

Browse files
committed
Add parsers rest, take, eof
- `Parser.String.rest` - `Parser.String.take` - `Parser.Token.eof` Bugfixes: - `Parser.String.eof` Set consumed on success so that this parser combines correctly with `notFollowedBy eof`. Added a test for this.
1 parent 609469e commit 736f863

File tree

6 files changed

+72
-14
lines changed

6 files changed

+72
-14
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,19 @@ Breaking changes:
88

99
New features:
1010

11+
- `Parser.String.rest` (#140 by @jamesdbrock)
12+
- `Parser.String.takeN` (#140 by @jamesdbrock)
13+
- `Parser.Token.eof` (#140 by @jamesdbrock)
14+
1115
Bugfixes:
1216

17+
- `Parser.String.eof` Set consumed on success so that this parser combines
18+
correctly with `notFollowedBy eof`. Added a test for this. (#140 by @jamesdbrock)
19+
1320
Other improvements:
1421

22+
- Documentation. (#140 by @jamesdbrock)
23+
1524
## [v8.1.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.1.0) - 2022-01-10
1625

1726
Other improvements: README Quick start monadic parsing tutorial

src/Text/Parsing/Parser.purs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,3 @@ failWithPosition message pos = throwError (ParseError message pos)
172172
-- | `region` as the parser backs out the call stack.
173173
region :: forall m s a. Monad m => (ParseError -> ParseError) -> ParserT s m a -> ParserT s m a
174174
region context p = catchError p $ \err -> throwError $ context err
175-

src/Text/Parsing/Parser/Combinators.purs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ skipMany1Rec p = p *> tailRecM go unit
387387
-- | Fail if the specified parser matches.
388388
notFollowedBy :: forall s a m. Monad m => ParserT s m a -> ParserT s m Unit
389389
notFollowedBy p = try $ (try p *> fail "Negated parser succeeded") <|> pure unit
390-
-- why do we need the outer try?
391390

392391
-- | Parse several phrases until the specified terminator matches.
393392
manyTill :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (List a)

src/Text/Parsing/Parser/String.purs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
-- |
33
-- | All of these primitive parsers will consume their input when they succeed.
44
-- |
5-
-- | All of these primitive parsers will consume no input (backtrack) when they
5+
-- | All of these primitive parsers will consume no input when they
66
-- | fail.
77
-- |
88
-- | The behavior of these primitive parsers is based on the behavior of the
@@ -22,11 +22,13 @@
2222
module Text.Parsing.Parser.String
2323
( string
2424
, eof
25+
, rest
2526
, anyChar
2627
, anyCodePoint
2728
, satisfy
2829
, satisfyCodePoint
2930
, char
31+
, takeN
3032
, whiteSpace
3133
, skipSpaces
3234
, oneOf
@@ -44,10 +46,10 @@ import Data.Char (fromCharCode)
4446
import Data.CodePoint.Unicode (isSpace)
4547
import Data.Foldable (elem)
4648
import Data.Maybe (Maybe(..))
47-
import Data.String (CodePoint, Pattern(..), null, singleton, stripPrefix, uncons)
49+
import Data.String (CodePoint, Pattern(..), length, null, singleton, splitAt, stripPrefix, uncons)
4850
import Data.String.CodeUnits as SCU
4951
import Data.Tuple (Tuple(..), fst)
50-
import Text.Parsing.Parser (ParseState(..), ParserT, fail)
52+
import Text.Parsing.Parser (ParseState(..), ParserT, consume, fail)
5153
import Text.Parsing.Parser.Combinators (skipMany, tryRethrow, (<?>), (<~?>))
5254
import Text.Parsing.Parser.Pos (Position(..))
5355
import Unsafe.Coerce (unsafeCoerce)
@@ -56,7 +58,17 @@ import Unsafe.Coerce (unsafeCoerce)
5658
eof :: forall m. Monad m => ParserT String m Unit
5759
eof = do
5860
ParseState input _ _ <- get
59-
unless (null input) (fail "Expected EOF")
61+
if (null input)
62+
-- We must consume so this combines correctly with notFollowedBy
63+
then consume
64+
else (fail "Expected EOF")
65+
66+
-- | Match the entire rest of the input stream. Always succeeds.
67+
rest :: forall m. Monad m => ParserT String m String
68+
rest = do
69+
ParseState input position _ <- get
70+
put $ ParseState "" (updatePosString position input) true
71+
pure input
6072

6173
-- | Match the specified string.
6274
string :: forall m. Monad m => String -> ParserT String m String
@@ -111,8 +123,18 @@ satisfyCodePoint f = tryRethrow do
111123
char :: forall m. Monad m => Char -> ParserT String m Char
112124
char c = satisfy (_ == c) <?> show c
113125

126+
-- | Match a `String` exactly *N* characters long.
127+
takeN :: forall m. Monad m => Int -> ParserT String m String
128+
takeN n = do
129+
ParseState input position _ <- get
130+
let { before, after } = splitAt n input
131+
if length before == n then do
132+
put $ ParseState after (updatePosString position before) true
133+
pure before
134+
else fail ("Could not take " <> show n <> " characters")
135+
114136
-- | Match zero or more whitespace characters satisfying
115-
-- | `Data.CodePoint.Unicode.isSpace`.
137+
-- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.
116138
whiteSpace :: forall m. Monad m => ParserT String m String
117139
whiteSpace = fst <$> match skipSpaces
118140

src/Text/Parsing/Parser/Token.purs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ module Text.Parsing.Parser.Token
44
( token
55
, when
66
, match
7+
, eof
78
, LanguageDef
89
, GenLanguageDef(LanguageDef)
910
, unGenLanguageDef
@@ -23,7 +24,7 @@ module Text.Parsing.Parser.Token
2324
import Prelude hiding (between, when)
2425

2526
import Control.Lazy (fix)
26-
import Control.Monad.State (gets, modify_)
27+
import Control.Monad.State (get, gets, modify_)
2728
import Control.MonadPlus (guard, (<|>))
2829
import Data.Array as Array
2930
import Data.Char (fromCharCode, toCharCode)
@@ -43,12 +44,12 @@ import Data.String.CodeUnits as SCU
4344
import Data.String.Unicode as Unicode
4445
import Data.Tuple (Tuple(..))
4546
import Math (pow)
46-
import Text.Parsing.Parser (ParseState(..), ParserT, fail)
47+
import Text.Parsing.Parser (ParseState(..), ParserT, consume, fail)
4748
import Text.Parsing.Parser.Combinators (between, choice, notFollowedBy, option, sepBy, sepBy1, skipMany, skipMany1, try, tryRethrow, (<?>), (<??>))
4849
import Text.Parsing.Parser.Pos (Position)
4950
import Text.Parsing.Parser.String (char, noneOf, oneOf, satisfy, satisfyCodePoint, string)
5051

51-
-- | Create a parser which Returns the first token in the stream.
52+
-- | A parser which returns the first token in the stream.
5253
token :: forall m a. Monad m => (a -> Position) -> ParserT (List a) m a
5354
token tokpos = do
5455
input <- gets \(ParseState input _ _) -> input
@@ -59,7 +60,7 @@ token tokpos = do
5960
ParseState tail (tokpos head) true
6061
pure head
6162

62-
-- | Create a parser which matches any token satisfying the predicate.
63+
-- | A parser which matches any token satisfying the predicate.
6364
when :: forall m a. Monad m => (a -> Position) -> (a -> Boolean) -> ParserT (List a) m a
6465
when tokpos f = tryRethrow do
6566
a <- token tokpos
@@ -70,6 +71,15 @@ when tokpos f = tryRethrow do
7071
match :: forall a m. Monad m => Eq a => (a -> Position) -> a -> ParserT (List a) m a
7172
match tokpos tok = when tokpos (_ == tok)
7273

74+
-- | Match the “end-of-file,” the end of the input stream.
75+
eof :: forall a m. Monad m => ParserT (List a) m Unit
76+
eof = do
77+
ParseState input _ _ <- get
78+
if (List.null input)
79+
-- We must consume so this combines correctly with notFollowedBy
80+
then consume
81+
else (fail "Expected EOF")
82+
7383
type LanguageDef = GenLanguageDef String Identity
7484

7585
-- | The `GenLanguageDef` type is a record that contains all parameterizable

test/Main.purs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@ import Data.Tuple (Tuple(..))
1818
import Effect (Effect)
1919
import Effect.Console (logShow)
2020
import Test.Assert (assert')
21-
import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorMessage, parseErrorPosition, region, runParser)
22-
import Text.Parsing.Parser.Combinators (between, chainl, chainl1Rec, chainlRec, chainr1Rec, chainrRec, endBy1, endBy1Rec, endByRec, many1Rec, many1TillRec, manyTillRec, optionMaybe, sepBy1, sepBy1Rec, sepByRec, sepEndBy1Rec, sepEndByRec, skipMany1Rec, skipManyRec, try)
21+
import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorMessage, parseErrorPosition, position, region, runParser)
22+
import Text.Parsing.Parser.Combinators (between, chainl, chainl1Rec, chainlRec, chainr1Rec, chainrRec, endBy1, endBy1Rec, endByRec, many1Rec, many1TillRec, manyTillRec, notFollowedBy, optionMaybe, sepBy1, sepBy1Rec, sepByRec, sepEndBy1Rec, sepEndByRec, skipMany1Rec, skipManyRec, try)
2323
import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser)
2424
import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle)
2525
import Text.Parsing.Parser.Pos (Position(..), initialPos)
26-
import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, noneOfCodePoints, oneOfCodePoints, satisfy, string, whiteSpace)
26+
import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, noneOfCodePoints, oneOfCodePoints, rest, satisfy, string, takeN, whiteSpace)
2727
import Text.Parsing.Parser.Token (TokenParser, letter, makeTokenParser, match, token, when)
28+
import Text.Parsing.Parser.Token as Parser.Token
2829

2930
parens :: forall m a. Monad m => ParserT String m a -> ParserT String m a
3031
parens = between (string "(") (string ")")
@@ -574,6 +575,17 @@ main = do
574575
parseTest "1*2+3/4-5" (-3) exprTest
575576
parseTest "ab?" "ab" manySatisfyTest
576577

578+
parseTest "ab" unit (char 'a' *> notFollowedBy (char 'a'))
579+
580+
parseTest "rest" "rest" rest
581+
parseTest "rest" unit (rest *> eof)
582+
parseTest "rest\nrest" (Position { line: 2, column: 5 }) (rest *> position)
583+
584+
parseErrorTestPosition
585+
(rest *> notFollowedBy eof)
586+
"aa\naa"
587+
(Position { column: 3, line: 2 })
588+
577589
parseErrorTestPosition
578590
anyChar
579591
"𝅘𝅥𝅯"
@@ -591,6 +603,11 @@ main = do
591603
one <- Array.many $ oneOfCodePoints $ SCP.toCodePointArray "🤔💯✅"
592604
pure $ SCP.fromCodePointArray <$> [ none, one ]
593605

606+
parseTest "abcd" "ab" $ takeN 2
607+
parseTest "abcd" "" $ takeN 0
608+
parseErrorTestPosition (takeN 10) "abcd" (Position { column: 1, line: 1 })
609+
parseErrorTestPosition (takeN (-1)) "abcd" (Position { column: 1, line: 1 })
610+
594611
parseErrorTestMessage
595612
(noneOfCodePoints $ SCP.toCodePointArray "❓✅")
596613
""
@@ -617,6 +634,8 @@ main = do
617634
parseTest (fromFoldable [ B ]) B (match tokpos B)
618635
parseTest (fromFoldable [ A, B ]) A (match tokpos A)
619636

637+
parseTest (fromFoldable []) unit Parser.Token.eof
638+
620639
parseErrorTestPosition (string "abc") "bcd" (Position { column: 1, line: 1 })
621640
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 })
622641
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 })

0 commit comments

Comments
 (0)