2
2
-- |
3
3
-- | #### unicode dependency
4
4
-- |
5
- -- | Some of the parsers in this module depend on the __unicode__ package.
5
+ -- | Some of the parsers in this module depend on the
6
+ -- | [__unicode__](https://pursuit.purescript.org/packages/purescript-unicode)
7
+ -- | package.
6
8
-- | The __unicode__ package is large; about half a megabyte unminified.
7
9
-- | If code which depends on __parsing__ is “tree-shaken”
8
10
-- | “dead-code-eliminated,” then
@@ -24,6 +26,8 @@ module Parsing.String.Basic
24
26
, alphaNum
25
27
, intDecimal
26
28
, number
29
+ , takeWhile
30
+ , takeWhile1
27
31
, whiteSpace
28
32
, skipSpaces
29
33
, oneOf
@@ -41,7 +45,8 @@ import Data.Int as Data.Int
41
45
import Data.Maybe (Maybe (..))
42
46
import Data.Number (infinity , nan )
43
47
import Data.Number as Data.Number
44
- import Data.String (CodePoint , singleton , takeWhile )
48
+ import Data.String (CodePoint , singleton )
49
+ import Data.String as String
45
50
import Data.String.CodePoints (codePointFromChar )
46
51
import Data.String.CodeUnits as SCU
47
52
import Data.Tuple (fst )
@@ -112,7 +117,7 @@ number =
112
117
section <- numberRegex
113
118
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
114
119
case Data.Number .fromString section of
115
- Nothing -> fail $ " Number.fromString failed "
120
+ Nothing -> fail " Expected Number "
116
121
Just x -> pure x
117
122
] <|> fail " Expected Number"
118
123
@@ -134,7 +139,7 @@ intDecimal :: forall m. ParserT String m Int
134
139
intDecimal = tryRethrow do
135
140
section <- intDecimalRegex <|> fail " Expected Int"
136
141
case Data.Int .fromString section of
137
- Nothing -> fail $ " Int.fromString failed "
142
+ Nothing -> fail " Expected Int "
138
143
Just x -> pure x
139
144
140
145
-- Non-exported regex is compiled at startup time.
@@ -153,17 +158,14 @@ satisfyCP p = satisfy (p <<< codePointFromChar)
153
158
-- | Always succeeds. Will consume only when matched whitespace string
154
159
-- | is non-empty.
155
160
whiteSpace :: forall m . ParserT String m String
156
- whiteSpace = fst <$> match skipSpaces
161
+ whiteSpace = takeWhile isSpace
157
162
158
163
-- | Skip whitespace characters satisfying `Data.CodePoint.Unicode.isSpace`
159
164
-- | and throw them away.
160
165
-- |
161
166
-- | Always succeeds. Will only consume when some characters are skipped.
162
167
skipSpaces :: forall m . ParserT String m Unit
163
- skipSpaces = consumeWith \input -> do
164
- let consumed = takeWhile isSpace input
165
- let remainder = SCU .drop (SCU .length consumed) input
166
- Right { value: unit, consumed, remainder }
168
+ skipSpaces = void whiteSpace
167
169
168
170
-- | Match one of the BMP `Char`s in the array.
169
171
oneOf :: forall m . Array Char -> ParserT String m Char
@@ -180,3 +182,68 @@ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> sho
180
182
-- | Match any Unicode character not in the array.
181
183
noneOfCodePoints :: forall m . Array CodePoint -> ParserT String m CodePoint
182
184
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> " none of " <> show (singleton <$> ss)
185
+
186
+ -- | Take the longest `String` for which the characters satisfy the
187
+ -- | predicate.
188
+ -- |
189
+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
190
+ -- | for useful predicates.
191
+ -- |
192
+ -- | Example:
193
+ -- |
194
+ -- | ```
195
+ -- | runParser "Tackling the Awkward Squad" do
196
+ -- | takeWhile Data.CodePoint.Unicode.isLetter
197
+ -- | ```
198
+ -- | ---
199
+ -- | ```
200
+ -- | Right "Tackling"
201
+ -- | ```
202
+ -- |
203
+ -- | You should prefer `takeWhile isLetter` to
204
+ -- | `fromCharArray <$> Data.Array.many letter`.
205
+ takeWhile :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
206
+ takeWhile predicate =
207
+ consumeWith \s ->
208
+ let
209
+ value = String .takeWhile predicate s
210
+ in
211
+ Right
212
+ { consumed: value
213
+ , remainder: SCU .drop (SCU .length value) s
214
+ , value
215
+ }
216
+
217
+
218
+ -- | Take the longest `String` for which the characters satisfy the
219
+ -- | predicate. Require at least 1 character. You should supply an
220
+ -- | expectation description for the error
221
+ -- | message for when the predicate fails on the first character.
222
+ -- |
223
+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
224
+ -- | for useful predicates.
225
+ -- |
226
+ -- | Example:
227
+ -- |
228
+ -- | ```
229
+ -- | runParser "Tackling the Awkward Squad" do
230
+ -- | takeWhile1 Data.CodePoint.Unicode.isLetter <?> "letter"
231
+ -- | ```
232
+ -- | ---
233
+ -- | ```
234
+ -- | Right "Tackling"
235
+ -- | ```
236
+ takeWhile1 :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
237
+ takeWhile1 predicate =
238
+ consumeWith \s ->
239
+ let
240
+ value = String .takeWhile predicate s
241
+ len = SCU .length value
242
+ in
243
+ if len > 0
244
+ then Right
245
+ { consumed: value
246
+ , remainder: SCU .drop (SCU .length value) s
247
+ , value
248
+ }
249
+ else Left " character predicate"
0 commit comments