@@ -36,19 +36,27 @@ module Text.Parsing.Parser.String
36
36
, noneOf
37
37
, noneOfCodePoints
38
38
, match
39
+ , regex
40
+ , RegexFlagsRow
39
41
) where
40
42
41
43
import Prelude hiding (between )
42
44
43
45
import Control.Monad.State (get , put )
44
46
import Data.Array (notElem )
47
+ import Data.Array.NonEmpty as NonEmptyArray
45
48
import Data.Char (fromCharCode )
46
49
import Data.CodePoint.Unicode (isSpace )
50
+ import Data.Either (Either (..))
47
51
import Data.Foldable (elem )
48
52
import Data.Maybe (Maybe (..))
49
53
import Data.String (CodePoint , Pattern (..), length , null , singleton , splitAt , stripPrefix , uncons )
50
54
import Data.String.CodeUnits as SCU
55
+ import Data.String.Regex as Regex
56
+ import Data.String.Regex.Flags (RegexFlags (..), RegexFlagsRec )
51
57
import Data.Tuple (Tuple (..), fst )
58
+ import Prim.Row (class Nub , class Union )
59
+ import Record (merge )
52
60
import Text.Parsing.Parser (ParseState (..), ParserT , consume , fail )
53
61
import Text.Parsing.Parser.Combinators (skipMany , tryRethrow , (<?>), (<~?>))
54
62
import Text.Parsing.Parser.Pos (Position (..))
@@ -208,3 +216,82 @@ match p = do
208
216
-- | to something other than `newtype CodePoint = CodePoint Int`.
209
217
unCodePoint :: CodePoint -> Int
210
218
unCodePoint = unsafeCoerce
219
+
220
+ -- | Parser which uses the `Data.String.Regex` module to match the regular
221
+ -- | expression pattern passed as the `String`
222
+ -- | argument to the parser.
223
+ -- |
224
+ -- | This parser will try to match the regular expression pattern starting
225
+ -- | at the current parser position. On success, it will return the matched
226
+ -- | substring.
227
+ -- |
228
+ -- | This parser may be useful for quickly consuming a large section of the
229
+ -- | input `String`, because in a JavaScript runtime environment the `RegExp`
230
+ -- | runtime is a lot faster than primitive parsers.
231
+ -- |
232
+ -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
233
+ -- |
234
+ -- | The `Record flags` argument to the parser is for `Regex` flags. Here are
235
+ -- | the default flags.
236
+ -- |
237
+ -- | ```purescript
238
+ -- | { dotAll: true
239
+ -- | ignoreCase: false
240
+ -- | unicode: true
241
+ -- | }
242
+ -- | ```
243
+ -- |
244
+ -- | If you want to use the defaults then pass
245
+ -- | `{}` as the flags argument. For case-insensitive pattern matching, pass
246
+ -- | `{ignoreCase: true}` as the flags argument.
247
+ -- | The other `Data.String.Regex.Flags.RegexFlagsRec` fields are mostly
248
+ -- | nonsense in the context of parsing
249
+ -- | and use of the other flags may cause strange behavior in the parser.
250
+ -- |
251
+ -- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
252
+ -- |
253
+ -- | If the `Regex` pattern string fails to compile then this parser will fail.
254
+ -- | (Note: It’s not possible to use a precompiled `Regex` because this parser
255
+ -- | must set flags and make adjustments to the `Regex` pattern string.)
256
+ regex
257
+ :: forall m flags f_
258
+ . Monad m
259
+ => Union flags RegexFlagsRow f_
260
+ => Nub f_ RegexFlagsRow
261
+ => Record flags
262
+ -> String
263
+ -> ParserT String m String
264
+ regex flags pattern =
265
+ -- Prefix a ^ to ensure the pattern only matches the current position in the parse
266
+ case Regex .regex (" ^(" <> pattern <> " )" ) flags' of
267
+ Left paterr ->
268
+ fail $ " Regex pattern error " <> paterr
269
+ Right regexobj -> do
270
+ ParseState input position _ <- get
271
+ case NonEmptyArray .head <$> Regex .match regexobj input of
272
+ Just (Just matched) -> do
273
+ let remainder = SCU .drop (SCU .length matched) input
274
+ put $ ParseState remainder (updatePosString position matched) true
275
+ pure matched
276
+ _ -> fail $ " No Regex pattern match"
277
+ where
278
+ flags' = RegexFlags
279
+ ( merge flags
280
+ { dotAll: true
281
+ , global: false
282
+ , ignoreCase: false
283
+ , multiline: false
284
+ , sticky: false
285
+ , unicode: true
286
+ } :: RegexFlagsRec
287
+ )
288
+
289
+ -- | The fields from `Data.String.Regex.Flags.RegexFlagsRec`.
290
+ type RegexFlagsRow =
291
+ ( dotAll :: Boolean
292
+ , global :: Boolean
293
+ , ignoreCase :: Boolean
294
+ , multiline :: Boolean
295
+ , sticky :: Boolean
296
+ , unicode :: Boolean
297
+ )
0 commit comments