22-- |
33-- | #### unicode dependency
44-- |
5- -- | Some of the parsers in this module depend on the __unicode__ package.
5+ -- | Some of the parsers in this module depend on the
6+ -- | [__unicode__](https://pursuit.purescript.org/packages/purescript-unicode)
7+ -- | package.
68-- | The __unicode__ package is large; about half a megabyte unminified.
79-- | If code which depends on __parsing__ is “tree-shaken”
810-- | “dead-code-eliminated,” then
@@ -24,6 +26,8 @@ module Parsing.String.Basic
2426 , alphaNum
2527 , intDecimal
2628 , number
29+ , takeWhile
30+ , takeWhile1
2731 , whiteSpace
2832 , skipSpaces
2933 , oneOf
@@ -41,7 +45,8 @@ import Data.Int as Data.Int
4145import Data.Maybe (Maybe (..))
4246import Data.Number (infinity , nan )
4347import Data.Number as Data.Number
44- import Data.String (CodePoint , singleton , takeWhile )
48+ import Data.String (CodePoint , singleton )
49+ import Data.String as String
4550import Data.String.CodePoints (codePointFromChar )
4651import Data.String.CodeUnits as SCU
4752import Data.Tuple (fst )
@@ -112,7 +117,7 @@ number =
112117 section <- numberRegex
113118 -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
114119 case Data.Number .fromString section of
115- Nothing -> fail $ " Number.fromString failed "
120+ Nothing -> fail " Expected Number "
116121 Just x -> pure x
117122 ] <|> fail " Expected Number"
118123
@@ -134,7 +139,7 @@ intDecimal :: forall m. ParserT String m Int
134139intDecimal = tryRethrow do
135140 section <- intDecimalRegex <|> fail " Expected Int"
136141 case Data.Int .fromString section of
137- Nothing -> fail $ " Int.fromString failed "
142+ Nothing -> fail " Expected Int "
138143 Just x -> pure x
139144
140145-- Non-exported regex is compiled at startup time.
@@ -153,17 +158,14 @@ satisfyCP p = satisfy (p <<< codePointFromChar)
153158-- | Always succeeds. Will consume only when matched whitespace string
154159-- | is non-empty.
155160whiteSpace :: forall m . ParserT String m String
156- whiteSpace = fst <$> match skipSpaces
161+ whiteSpace = takeWhile isSpace
157162
158163-- | Skip whitespace characters satisfying `Data.CodePoint.Unicode.isSpace`
159164-- | and throw them away.
160165-- |
161166-- | Always succeeds. Will only consume when some characters are skipped.
162167skipSpaces :: forall m . ParserT String m Unit
163- skipSpaces = consumeWith \input -> do
164- let consumed = takeWhile isSpace input
165- let remainder = SCU .drop (SCU .length consumed) input
166- Right { value: unit, consumed, remainder }
168+ skipSpaces = void whiteSpace
167169
168170-- | Match one of the BMP `Char`s in the array.
169171oneOf :: forall m . Array Char -> ParserT String m Char
@@ -180,3 +182,68 @@ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> sho
180182-- | Match any Unicode character not in the array.
181183noneOfCodePoints :: forall m . Array CodePoint -> ParserT String m CodePoint
182184noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> " none of " <> show (singleton <$> ss)
185+
186+ -- | Take the longest `String` for which the characters satisfy the
187+ -- | predicate.
188+ -- |
189+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
190+ -- | for useful predicates.
191+ -- |
192+ -- | Example:
193+ -- |
194+ -- | ```
195+ -- | runParser "Tackling the Awkward Squad" do
196+ -- | takeWhile Data.CodePoint.Unicode.isLetter
197+ -- | ```
198+ -- | ---
199+ -- | ```
200+ -- | Right "Tackling"
201+ -- | ```
202+ -- |
203+ -- | You should prefer `takeWhile isLetter` to
204+ -- | `fromCharArray <$> Data.Array.many letter`.
205+ takeWhile :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
206+ takeWhile predicate =
207+ consumeWith \s ->
208+ let
209+ value = String .takeWhile predicate s
210+ in
211+ Right
212+ { consumed: value
213+ , remainder: SCU .drop (SCU .length value) s
214+ , value
215+ }
216+
217+
218+ -- | Take the longest `String` for which the characters satisfy the
219+ -- | predicate. Require at least 1 character. You should supply an
220+ -- | expectation description for the error
221+ -- | message for when the predicate fails on the first character.
222+ -- |
223+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
224+ -- | for useful predicates.
225+ -- |
226+ -- | Example:
227+ -- |
228+ -- | ```
229+ -- | runParser "Tackling the Awkward Squad" do
230+ -- | takeWhile1 Data.CodePoint.Unicode.isLetter <?> "letter"
231+ -- | ```
232+ -- | ---
233+ -- | ```
234+ -- | Right "Tackling"
235+ -- | ```
236+ takeWhile1 :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
237+ takeWhile1 predicate =
238+ consumeWith \s ->
239+ let
240+ value = String .takeWhile predicate s
241+ len = SCU .length value
242+ in
243+ if len > 0
244+ then Right
245+ { consumed: value
246+ , remainder: SCU .drop (SCU .length value) s
247+ , value
248+ }
249+ else Left " character predicate"
0 commit comments