Skip to content

Commit 1ffa3f8

Browse files
committed
takeWhile
1 parent 66b5222 commit 1ffa3f8

File tree

6 files changed

+96
-36
lines changed

6 files changed

+96
-36
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Breaking changes:
1010

1111
New features:
1212

13+
- Add `Parsing.String.Basic.takeWhile`, `takeWhile1` (#218 by @jamesdbrock)
14+
1315
Other improvements:
1416

1517
## [v10.1.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v10.1.0) - 2022-11-10

src/Parsing.purs

+1-2
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,7 @@ failWithPosition message pos = throwError (ParseError message pos)
430430
-- |
431431
-- | lmap (parseErrorHuman input 30) $ runParser input do
432432
-- | inContext ("Megacity list: " <> _) do
433-
-- | cityname <- inContext ("city name: " <> _) do
434-
-- | fst <$> match (skipMany letter)
433+
-- | cityname <- inContext ("city name: " <> _) (takeWhile isLetter)
435434
-- | skipSpaces
436435
-- | population <- inContext ("population: " <> _) intDecimal
437436
-- | pure $ Tuple cityname population

src/Parsing/Combinators.purs

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ infixl 4 withErrorMessage as <?>
113113
-- |
114114
-- |```purescript
115115
-- |parseBang :: Parser Char
116-
-- |parseBang = char '!' <~?> \_ -> "Expected a bang"
116+
-- |parseBang = char '!' <~?> \_ -> "a bang"
117117
-- |```
118118
withLazyErrorMessage :: forall m s a. ParserT s m a -> (Unit -> String) -> ParserT s m a
119119
withLazyErrorMessage p msg = p <|> defer \_ -> fail ("Expected " <> msg unit)

src/Parsing/String.purs

+2-18
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
-- |
44
-- | All of these primitive parsers will consume when they succeed.
55
-- |
6-
-- | All of these primitive parsers will not consume when they
7-
-- | fail.
6+
-- | All of these primitive parsers will not consume and will automatically
7+
-- | backtrack when they fail.
88
-- |
99
-- | The behavior of these primitive parsers is based on the behavior of the
1010
-- | `Data.String` module in the __strings__ package.
@@ -181,22 +181,6 @@ updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp o
181181

182182
-- | Combinator which returns both the result of a parse and the slice of
183183
-- | the input that was consumed while it was being parsed.
184-
-- |
185-
-- | Because `String`s are not `Char` arrays in PureScript, `many` and `some`
186-
-- | on `Char` parsers need to
187-
-- | be used with `Data.String.CodeUnits.fromCharArray` to
188-
-- | construct a `String`.
189-
-- |
190-
-- | ```
191-
-- | fromCharArray <$> Data.Array.many (char 'x')
192-
-- | ```
193-
-- |
194-
-- | It’s more efficient to achieve the same result by using this `match` combinator
195-
-- | instead of `fromCharArray`.
196-
-- |
197-
-- | ```
198-
-- | fst <$> match (Combinators.skipMany (char 'x'))
199-
-- | ```
200184
match :: forall m a. ParserT String m a -> ParserT String m (Tuple String a)
201185
match p = do
202186
ParseState input1 _ _ <- getParserT

src/Parsing/String/Basic.purs

+75-11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
-- |
33
-- | #### unicode dependency
44
-- |
5-
-- | Some of the parsers in this module depend on the __unicode__ package.
5+
-- | Some of the parsers in this module depend on the
6+
-- | [__unicode__](https://pursuit.purescript.org/packages/purescript-unicode)
7+
-- | package.
68
-- | The __unicode__ package is large; about half a megabyte unminified.
79
-- | If code which depends on __parsing__ is “tree-shaken”
810
-- | “dead-code-eliminated,” then
@@ -24,6 +26,8 @@ module Parsing.String.Basic
2426
, alphaNum
2527
, intDecimal
2628
, number
29+
, takeWhile
30+
, takeWhile1
2731
, whiteSpace
2832
, skipSpaces
2933
, oneOf
@@ -41,13 +45,13 @@ import Data.Int as Data.Int
4145
import Data.Maybe (Maybe(..))
4246
import Data.Number (infinity, nan)
4347
import Data.Number as Data.Number
44-
import Data.String (CodePoint, singleton, takeWhile)
48+
import Data.String (CodePoint, singleton)
49+
import Data.String as String
4550
import Data.String.CodePoints (codePointFromChar)
4651
import Data.String.CodeUnits as SCU
47-
import Data.Tuple (fst)
4852
import Parsing (ParserT, fail)
4953
import Parsing.Combinators (choice, tryRethrow, (<?>), (<|>), (<~?>))
50-
import Parsing.String (consumeWith, match, regex, satisfy, satisfyCodePoint, string)
54+
import Parsing.String (consumeWith, regex, satisfy, satisfyCodePoint, string)
5155
import Partial.Unsafe (unsafeCrashWith)
5256

5357
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
@@ -112,7 +116,7 @@ number =
112116
section <- numberRegex
113117
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
114118
case Data.Number.fromString section of
115-
Nothing -> fail $ "Number.fromString failed"
119+
Nothing -> fail "Expected Number"
116120
Just x -> pure x
117121
] <|> fail "Expected Number"
118122

@@ -134,7 +138,7 @@ intDecimal :: forall m. ParserT String m Int
134138
intDecimal = tryRethrow do
135139
section <- intDecimalRegex <|> fail "Expected Int"
136140
case Data.Int.fromString section of
137-
Nothing -> fail $ "Int.fromString failed"
141+
Nothing -> fail "Expected Int"
138142
Just x -> pure x
139143

140144
-- Non-exported regex is compiled at startup time.
@@ -153,17 +157,14 @@ satisfyCP p = satisfy (p <<< codePointFromChar)
153157
-- | Always succeeds. Will consume only when matched whitespace string
154158
-- | is non-empty.
155159
whiteSpace :: forall m. ParserT String m String
156-
whiteSpace = fst <$> match skipSpaces
160+
whiteSpace = takeWhile isSpace
157161

158162
-- | Skip whitespace characters satisfying `Data.CodePoint.Unicode.isSpace`
159163
-- | and throw them away.
160164
-- |
161165
-- | Always succeeds. Will only consume when some characters are skipped.
162166
skipSpaces :: forall m. ParserT String m Unit
163-
skipSpaces = consumeWith \input -> do
164-
let consumed = takeWhile isSpace input
165-
let remainder = SCU.drop (SCU.length consumed) input
166-
Right { value: unit, consumed, remainder }
167+
skipSpaces = void whiteSpace
167168

168169
-- | Match one of the BMP `Char`s in the array.
169170
oneOf :: forall m. Array Char -> ParserT String m Char
@@ -180,3 +181,66 @@ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> sho
180181
-- | Match any Unicode character not in the array.
181182
noneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint
182183
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <> show (singleton <$> ss)
184+
185+
-- | Take the longest `String` for which the characters satisfy the
186+
-- | predicate.
187+
-- |
188+
-- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
189+
-- | for useful predicates.
190+
-- |
191+
-- | Example:
192+
-- |
193+
-- | ```
194+
-- | runParser "Tackling the Awkward Squad" do
195+
-- | takeWhile Data.CodePoint.Unicode.isLetter
196+
-- | ```
197+
-- | ---
198+
-- | ```
199+
-- | Right "Tackling"
200+
-- | ```
201+
-- |
202+
-- | You should prefer `takeWhile isLetter` to
203+
-- | `fromCharArray <$> Data.Array.many letter`.
204+
takeWhile :: forall m. (CodePoint -> Boolean) -> ParserT String m String
205+
takeWhile predicate =
206+
consumeWith \s ->
207+
let
208+
value = String.takeWhile predicate s
209+
in
210+
Right
211+
{ consumed: value
212+
, remainder: SCU.drop (SCU.length value) s
213+
, value
214+
}
215+
216+
-- | Take the longest `String` for which the characters satisfy the
217+
-- | predicate. Require at least 1 character. You should supply an
218+
-- | expectation description for the error
219+
-- | message for when the predicate fails on the first character.
220+
-- |
221+
-- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
222+
-- | for useful predicates.
223+
-- |
224+
-- | Example:
225+
-- |
226+
-- | ```
227+
-- | runParser "Tackling the Awkward Squad" do
228+
-- | takeWhile1 Data.CodePoint.Unicode.isLetter <?> "a letter"
229+
-- | ```
230+
-- | ---
231+
-- | ```
232+
-- | Right "Tackling"
233+
-- | ```
234+
takeWhile1 :: forall m. (CodePoint -> Boolean) -> ParserT String m String
235+
takeWhile1 predicate =
236+
consumeWith \s ->
237+
let
238+
value = String.takeWhile predicate s
239+
len = SCU.length value
240+
in
241+
if len > 0 then Right
242+
{ consumed: value
243+
, remainder: SCU.drop (SCU.length value) s
244+
, value
245+
}
246+
else Left "Expected character satisfying predicate"

test/Main.purs

+15-4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import Control.Monad.State (State, lift, modify, runState)
1313
import Data.Array (some, toUnfoldable)
1414
import Data.Array as Array
1515
import Data.Bifunctor (lmap, rmap)
16+
import Data.CodePoint.Unicode as CodePoint.Unicode
1617
import Data.Either (Either(..), either, fromLeft, hush)
1718
import Data.Foldable (oneOf)
1819
import Data.List (List(..), fromFoldable, (:))
@@ -36,12 +37,11 @@ import Effect.Unsafe (unsafePerformEffect)
3637
import Node.Process (lookupEnv)
3738
import Parsing (ParseError(..), ParseState(..), Parser, ParserT, Position(..), consume, fail, getParserT, initialPos, parseErrorMessage, parseErrorPosition, position, region, runParser)
3839
import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optionMaybe, replicateA, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, tryRethrow, (<?>), (<??>), (<~?>))
39-
import Parsing.Combinators as Combinators
4040
import Parsing.Combinators.Array as Combinators.Array
4141
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
4242
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
4343
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, parseErrorHuman, regex, rest, satisfy, string, takeN)
44-
import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, whiteSpace)
44+
import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, takeWhile, takeWhile1, whiteSpace)
4545
import Parsing.String.Basic as String.Basic
4646
import Parsing.String.Replace (breakCap, replace, replaceT, splitCap, splitCapT)
4747
import Parsing.Token (TokenParser, makeTokenParser, token, when)
@@ -712,8 +712,7 @@ main = do
712712
assertEqual' "region 1"
713713
{ actual: runParser input do
714714
inContext ("Megacity list: " <> _) do
715-
cityname <- inContext ("city name: " <> _) do
716-
fst <$> match (Combinators.skipMany letter)
715+
cityname <- inContext ("city name: " <> _) (takeWhile CodePoint.Unicode.isLetter)
717716
skipSpaces
718717
population <- inContext ("population: " <> _) intDecimal
719718
pure $ Tuple cityname population
@@ -725,6 +724,18 @@ main = do
725724
, expected: Left $ ParseError "Expected 'c'" (Position { index: 1, column: 2, line: 1 })
726725
}
727726

727+
assertEqual' "takeWhile 1"
728+
{ actual: runParser "Tackling the Awkward" do
729+
takeWhile CodePoint.Unicode.isLetter <* string " the Awkward"
730+
, expected: Right "Tackling"
731+
}
732+
733+
assertEqual' "takeWhile1 1"
734+
{ actual: runParser "3ackling the Awkward" do
735+
takeWhile1 CodePoint.Unicode.isLetter <* string " the Awkward" <?> "letter"
736+
, expected: Left $ ParseError "Expected letter" (Position { index: 0, line: 1, column: 1 })
737+
}
738+
728739
log "\nTESTS number\n"
729740

730741
-- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity"

0 commit comments

Comments
 (0)