Skip to content

Commit c4579ca

Browse files
committed
takeWhile
1 parent 66b5222 commit c4579ca

File tree

6 files changed

+97
-33
lines changed

6 files changed

+97
-33
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Breaking changes:
1010

1111
New features:
1212

13+
- Add `Parsing.String.Basic.takeWhile`, `takeWhile1` (#218 by @jamesdbrock)
14+
1315
Other improvements:
1416

1517
## [v10.1.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v10.1.0) - 2022-11-10

src/Parsing.purs

+1-2
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,7 @@ failWithPosition message pos = throwError (ParseError message pos)
430430
-- |
431431
-- | lmap (parseErrorHuman input 30) $ runParser input do
432432
-- | inContext ("Megacity list: " <> _) do
433-
-- | cityname <- inContext ("city name: " <> _) do
434-
-- | fst <$> match (skipMany letter)
433+
-- | cityname <- inContext ("city name: " <> _) (takeWhile isLetter)
435434
-- | skipSpaces
436435
-- | population <- inContext ("population: " <> _) intDecimal
437436
-- | pure $ Tuple cityname population

src/Parsing/Combinators.purs

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ infixl 4 withErrorMessage as <?>
113113
-- |
114114
-- |```purescript
115115
-- |parseBang :: Parser Char
116-
-- |parseBang = char '!' <~?> \_ -> "Expected a bang"
116+
-- |parseBang = char '!' <~?> \_ -> "a bang"
117117
-- |```
118118
withLazyErrorMessage :: forall m s a. ParserT s m a -> (Unit -> String) -> ParserT s m a
119119
withLazyErrorMessage p msg = p <|> defer \_ -> fail ("Expected " <> msg unit)

src/Parsing/String.purs

+2-18
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
-- |
44
-- | All of these primitive parsers will consume when they succeed.
55
-- |
6-
-- | All of these primitive parsers will not consume when they
7-
-- | fail.
6+
-- | All of these primitive parsers will not consume and will automatically
7+
-- | backtrack when they fail.
88
-- |
99
-- | The behavior of these primitive parsers is based on the behavior of the
1010
-- | `Data.String` module in the __strings__ package.
@@ -181,22 +181,6 @@ updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp o
181181

182182
-- | Combinator which returns both the result of a parse and the slice of
183183
-- | the input that was consumed while it was being parsed.
184-
-- |
185-
-- | Because `String`s are not `Char` arrays in PureScript, `many` and `some`
186-
-- | on `Char` parsers need to
187-
-- | be used with `Data.String.CodeUnits.fromCharArray` to
188-
-- | construct a `String`.
189-
-- |
190-
-- | ```
191-
-- | fromCharArray <$> Data.Array.many (char 'x')
192-
-- | ```
193-
-- |
194-
-- | It’s more efficient to achieve the same result by using this `match` combinator
195-
-- | instead of `fromCharArray`.
196-
-- |
197-
-- | ```
198-
-- | fst <$> match (Combinators.skipMany (char 'x'))
199-
-- | ```
200184
match :: forall m a. ParserT String m a -> ParserT String m (Tuple String a)
201185
match p = do
202186
ParseState input1 _ _ <- getParserT

src/Parsing/String/Basic.purs

+76-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
-- |
33
-- | #### unicode dependency
44
-- |
5-
-- | Some of the parsers in this module depend on the __unicode__ package.
5+
-- | Some of the parsers in this module depend on the
6+
-- | [__unicode__](https://pursuit.purescript.org/packages/purescript-unicode)
7+
-- | package.
68
-- | The __unicode__ package is large; about half a megabyte unminified.
79
-- | If code which depends on __parsing__ is “tree-shaken”
810
-- | “dead-code-eliminated,” then
@@ -24,6 +26,8 @@ module Parsing.String.Basic
2426
, alphaNum
2527
, intDecimal
2628
, number
29+
, takeWhile
30+
, takeWhile1
2731
, whiteSpace
2832
, skipSpaces
2933
, oneOf
@@ -41,7 +45,8 @@ import Data.Int as Data.Int
4145
import Data.Maybe (Maybe(..))
4246
import Data.Number (infinity, nan)
4347
import Data.Number as Data.Number
44-
import Data.String (CodePoint, singleton, takeWhile)
48+
import Data.String (CodePoint, singleton)
49+
import Data.String as String
4550
import Data.String.CodePoints (codePointFromChar)
4651
import Data.String.CodeUnits as SCU
4752
import Data.Tuple (fst)
@@ -112,7 +117,7 @@ number =
112117
section <- numberRegex
113118
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
114119
case Data.Number.fromString section of
115-
Nothing -> fail $ "Number.fromString failed"
120+
Nothing -> fail "Expected Number"
116121
Just x -> pure x
117122
] <|> fail "Expected Number"
118123

@@ -134,7 +139,7 @@ intDecimal :: forall m. ParserT String m Int
134139
intDecimal = tryRethrow do
135140
section <- intDecimalRegex <|> fail "Expected Int"
136141
case Data.Int.fromString section of
137-
Nothing -> fail $ "Int.fromString failed"
142+
Nothing -> fail "Expected Int"
138143
Just x -> pure x
139144

140145
-- Non-exported regex is compiled at startup time.
@@ -153,17 +158,14 @@ satisfyCP p = satisfy (p <<< codePointFromChar)
153158
-- | Always succeeds. Will consume only when matched whitespace string
154159
-- | is non-empty.
155160
whiteSpace :: forall m. ParserT String m String
156-
whiteSpace = fst <$> match skipSpaces
161+
whiteSpace = takeWhile isSpace
157162

158163
-- | Skip whitespace characters satisfying `Data.CodePoint.Unicode.isSpace`
159164
-- | and throw them away.
160165
-- |
161166
-- | Always succeeds. Will only consume when some characters are skipped.
162167
skipSpaces :: forall m. ParserT String m Unit
163-
skipSpaces = consumeWith \input -> do
164-
let consumed = takeWhile isSpace input
165-
let remainder = SCU.drop (SCU.length consumed) input
166-
Right { value: unit, consumed, remainder }
168+
skipSpaces = void whiteSpace
167169

168170
-- | Match one of the BMP `Char`s in the array.
169171
oneOf :: forall m. Array Char -> ParserT String m Char
@@ -180,3 +182,68 @@ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> sho
180182
-- | Match any Unicode character not in the array.
181183
noneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint
182184
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <> show (singleton <$> ss)
185+
186+
-- | Take the longest `String` for which the characters satisfy the
187+
-- | predicate.
188+
-- |
189+
-- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
190+
-- | for useful predicates.
191+
-- |
192+
-- | Example:
193+
-- |
194+
-- | ```
195+
-- | runParser "Tackling the Awkward Squad" do
196+
-- | takeWhile Data.CodePoint.Unicode.isLetter
197+
-- | ```
198+
-- | ---
199+
-- | ```
200+
-- | Right "Tackling"
201+
-- | ```
202+
-- |
203+
-- | You should prefer `takeWhile isLetter` to
204+
-- | `fromCharArray <$> Data.Array.many letter`.
205+
takeWhile :: forall m. (CodePoint -> Boolean) -> ParserT String m String
206+
takeWhile predicate =
207+
consumeWith \s ->
208+
let
209+
value = String.takeWhile predicate s
210+
in
211+
Right
212+
{ consumed: value
213+
, remainder: SCU.drop (SCU.length value) s
214+
, value
215+
}
216+
217+
218+
-- | Take the longest `String` for which the characters satisfy the
219+
-- | predicate. Require at least 1 character. You should supply an
220+
-- | expectation description for the error
221+
-- | message for when the predicate fails on the first character.
222+
-- |
223+
-- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
224+
-- | for useful predicates.
225+
-- |
226+
-- | Example:
227+
-- |
228+
-- | ```
229+
-- | runParser "Tackling the Awkward Squad" do
230+
-- | takeWhile1 Data.CodePoint.Unicode.isLetter <?> "letter"
231+
-- | ```
232+
-- | ---
233+
-- | ```
234+
-- | Right "Tackling"
235+
-- | ```
236+
takeWhile1 :: forall m. (CodePoint -> Boolean) -> ParserT String m String
237+
takeWhile1 predicate =
238+
consumeWith \s ->
239+
let
240+
value = String.takeWhile predicate s
241+
len = SCU.length value
242+
in
243+
if len > 0
244+
then Right
245+
{ consumed: value
246+
, remainder: SCU.drop (SCU.length value) s
247+
, value
248+
}
249+
else Left "character predicate"

test/Main.purs

+15-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import Control.Monad.State (State, lift, modify, runState)
1313
import Data.Array (some, toUnfoldable)
1414
import Data.Array as Array
1515
import Data.Bifunctor (lmap, rmap)
16+
import Data.CodePoint.Unicode as CodePoint.Unicode
1617
import Data.Either (Either(..), either, fromLeft, hush)
1718
import Data.Foldable (oneOf)
1819
import Data.List (List(..), fromFoldable, (:))
@@ -41,7 +42,7 @@ import Parsing.Combinators.Array as Combinators.Array
4142
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
4243
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
4344
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, parseErrorHuman, regex, rest, satisfy, string, takeN)
44-
import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, whiteSpace)
45+
import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, takeWhile, takeWhile1, whiteSpace)
4546
import Parsing.String.Basic as String.Basic
4647
import Parsing.String.Replace (breakCap, replace, replaceT, splitCap, splitCapT)
4748
import Parsing.Token (TokenParser, makeTokenParser, token, when)
@@ -712,8 +713,7 @@ main = do
712713
assertEqual' "region 1"
713714
{ actual: runParser input do
714715
inContext ("Megacity list: " <> _) do
715-
cityname <- inContext ("city name: " <> _) do
716-
fst <$> match (Combinators.skipMany letter)
716+
cityname <- inContext ("city name: " <> _) (takeWhile CodePoint.Unicode.isLetter)
717717
skipSpaces
718718
population <- inContext ("population: " <> _) intDecimal
719719
pure $ Tuple cityname population
@@ -725,6 +725,18 @@ main = do
725725
, expected: Left $ ParseError "Expected 'c'" (Position { index: 1, column: 2, line: 1 })
726726
}
727727

728+
assertEqual' "takeWhile 1"
729+
{ actual: runParser "Tackling the Awkward" do
730+
takeWhile CodePoint.Unicode.isLetter <* string " the Awkward"
731+
, expected: Right "Tackling"
732+
}
733+
734+
assertEqual' "takeWhile1 1"
735+
{ actual: runParser "3ackling the Awkward" do
736+
takeWhile1 CodePoint.Unicode.isLetter <* string " the Awkward" <?> "letter"
737+
, expected: Left $ ParseError "Expected letter" (Position {index:0,line:1,column:1})
738+
}
739+
728740
log "\nTESTS number\n"
729741

730742
-- assert' "Number.fromString" $ Just infinity == Data.Number.fromString "Infinity"

0 commit comments

Comments
 (0)