2
2
-- |
3
3
-- | #### unicode dependency
4
4
-- |
5
- -- | Some of the parsers in this module depend on the __unicode__ package.
5
+ -- | Some of the parsers in this module depend on the
6
+ -- | [__unicode__](https://pursuit.purescript.org/packages/purescript-unicode)
7
+ -- | package.
6
8
-- | The __unicode__ package is large; about half a megabyte unminified.
7
9
-- | If code which depends on __parsing__ is “tree-shaken”
8
10
-- | “dead-code-eliminated,” then
@@ -24,6 +26,8 @@ module Parsing.String.Basic
24
26
, alphaNum
25
27
, intDecimal
26
28
, number
29
+ , takeWhile
30
+ , takeWhile1
27
31
, whiteSpace
28
32
, skipSpaces
29
33
, oneOf
@@ -41,13 +45,13 @@ import Data.Int as Data.Int
41
45
import Data.Maybe (Maybe (..))
42
46
import Data.Number (infinity , nan )
43
47
import Data.Number as Data.Number
44
- import Data.String (CodePoint , singleton , takeWhile )
48
+ import Data.String (CodePoint , singleton )
49
+ import Data.String as String
45
50
import Data.String.CodePoints (codePointFromChar )
46
51
import Data.String.CodeUnits as SCU
47
- import Data.Tuple (fst )
48
52
import Parsing (ParserT , fail )
49
53
import Parsing.Combinators (choice , tryRethrow , (<?>), (<|>), (<~?>))
50
- import Parsing.String (consumeWith , match , regex , satisfy , satisfyCodePoint , string )
54
+ import Parsing.String (consumeWith , regex , satisfy , satisfyCodePoint , string )
51
55
import Partial.Unsafe (unsafeCrashWith )
52
56
53
57
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
@@ -112,7 +116,7 @@ number =
112
116
section <- numberRegex
113
117
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
114
118
case Data.Number .fromString section of
115
- Nothing -> fail $ " Number.fromString failed "
119
+ Nothing -> fail " Expected Number "
116
120
Just x -> pure x
117
121
] <|> fail " Expected Number"
118
122
@@ -134,7 +138,7 @@ intDecimal :: forall m. ParserT String m Int
134
138
intDecimal = tryRethrow do
135
139
section <- intDecimalRegex <|> fail " Expected Int"
136
140
case Data.Int .fromString section of
137
- Nothing -> fail $ " Int.fromString failed "
141
+ Nothing -> fail " Expected Int "
138
142
Just x -> pure x
139
143
140
144
-- Non-exported regex is compiled at startup time.
@@ -153,17 +157,14 @@ satisfyCP p = satisfy (p <<< codePointFromChar)
153
157
-- | Always succeeds. Will consume only when matched whitespace string
154
158
-- | is non-empty.
155
159
whiteSpace :: forall m . ParserT String m String
156
- whiteSpace = fst <$> match skipSpaces
160
+ whiteSpace = takeWhile isSpace
157
161
158
162
-- | Skip whitespace characters satisfying `Data.CodePoint.Unicode.isSpace`
159
163
-- | and throw them away.
160
164
-- |
161
165
-- | Always succeeds. Will only consume when some characters are skipped.
162
166
skipSpaces :: forall m . ParserT String m Unit
163
- skipSpaces = consumeWith \input -> do
164
- let consumed = takeWhile isSpace input
165
- let remainder = SCU .drop (SCU .length consumed) input
166
- Right { value: unit, consumed, remainder }
167
+ skipSpaces = void whiteSpace
167
168
168
169
-- | Match one of the BMP `Char`s in the array.
169
170
oneOf :: forall m . Array Char -> ParserT String m Char
@@ -180,3 +181,66 @@ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> sho
180
181
-- | Match any Unicode character not in the array.
181
182
noneOfCodePoints :: forall m . Array CodePoint -> ParserT String m CodePoint
182
183
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> " none of " <> show (singleton <$> ss)
184
+
185
+ -- | Take the longest `String` for which the characters satisfy the
186
+ -- | predicate.
187
+ -- |
188
+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
189
+ -- | for useful predicates.
190
+ -- |
191
+ -- | Example:
192
+ -- |
193
+ -- | ```
194
+ -- | runParser "Tackling the Awkward Squad" do
195
+ -- | takeWhile Data.CodePoint.Unicode.isLetter
196
+ -- | ```
197
+ -- | ---
198
+ -- | ```
199
+ -- | Right "Tackling"
200
+ -- | ```
201
+ -- |
202
+ -- | You should prefer `takeWhile isLetter` to
203
+ -- | `fromCharArray <$> Data.Array.many letter`.
204
+ takeWhile :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
205
+ takeWhile predicate =
206
+ consumeWith \s ->
207
+ let
208
+ value = String .takeWhile predicate s
209
+ in
210
+ Right
211
+ { consumed: value
212
+ , remainder: SCU .drop (SCU .length value) s
213
+ , value
214
+ }
215
+
216
+ -- | Take the longest `String` for which the characters satisfy the
217
+ -- | predicate. Require at least 1 character. You should supply an
218
+ -- | expectation description for the error
219
+ -- | message for when the predicate fails on the first character.
220
+ -- |
221
+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
222
+ -- | for useful predicates.
223
+ -- |
224
+ -- | Example:
225
+ -- |
226
+ -- | ```
227
+ -- | runParser "Tackling the Awkward Squad" do
228
+ -- | takeWhile1 Data.CodePoint.Unicode.isLetter <?> "a letter"
229
+ -- | ```
230
+ -- | ---
231
+ -- | ```
232
+ -- | Right "Tackling"
233
+ -- | ```
234
+ takeWhile1 :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
235
+ takeWhile1 predicate =
236
+ consumeWith \s ->
237
+ let
238
+ value = String .takeWhile predicate s
239
+ len = SCU .length value
240
+ in
241
+ if len > 0 then Right
242
+ { consumed: value
243
+ , remainder: SCU .drop (SCU .length value) s
244
+ , value
245
+ }
246
+ else Left " Expected character satisfying predicate"
0 commit comments