20
20
21
21
//! A regex parser yielding an AST.
22
22
23
+ use regex:: escape;
23
24
use bit_set:: BitSet ;
24
25
use std:: str:: FromStr ;
25
26
use std:: usize;
@@ -199,7 +200,13 @@ impl<'a> Parser<'a> {
199
200
}
200
201
) ) ,
201
202
b'(' => self . parse_group ( ix, depth) ,
202
- b'\\' => self . parse_escape ( ix) ,
203
+ b'\\' => {
204
+ let ( next, expr) = try!( self . parse_escape ( ix) ) ;
205
+ if let Expr :: Backref ( group) = expr {
206
+ self . backrefs . insert ( group) ;
207
+ }
208
+ Ok ( ( next, expr) )
209
+ } ,
203
210
b'+' | b'*' | b'?' | b'|' | b')' =>
204
211
Ok ( ( ix, Expr :: Empty ) ) ,
205
212
b'[' => self . parse_class ( ix) ,
@@ -221,7 +228,7 @@ impl<'a> Parser<'a> {
221
228
}
222
229
223
230
// ix points to \ character
224
- fn parse_escape ( & mut self , ix : usize ) -> Result < ( usize , Expr ) > {
231
+ fn parse_escape ( & self , ix : usize ) -> Result < ( usize , Expr ) > {
225
232
if ix + 1 == self . re . len ( ) {
226
233
return Err ( Error :: TrailingBackslash ) ;
227
234
}
@@ -233,7 +240,6 @@ impl<'a> Parser<'a> {
233
240
if let Some ( ( end, group) ) = parse_decimal ( self . re , ix + 1 ) {
234
241
// protect BitSet against unreasonably large value
235
242
if group < self . re . len ( ) / 2 {
236
- self . backrefs . insert ( group) ;
237
243
return Ok ( ( end, Expr :: Backref ( group) ) ) ;
238
244
}
239
245
}
@@ -331,9 +337,9 @@ impl<'a> Parser<'a> {
331
337
fn parse_class ( & self , ix : usize ) -> Result < ( usize , Expr ) > {
332
338
let bytes = self . re . as_bytes ( ) ;
333
339
let mut ix = ix + 1 ; // skip opening '['
334
- let mut inner = String :: new ( ) ;
340
+ let mut class = String :: new ( ) ;
335
341
let mut nest = 1 ;
336
- inner . push ( '[' ) ;
342
+ class . push ( '[' ) ;
337
343
loop {
338
344
ix = self . optional_whitespace ( ix) ;
339
345
if ix == self . re . len ( ) {
@@ -344,27 +350,46 @@ impl<'a> Parser<'a> {
344
350
if ix + 1 == self . re . len ( ) {
345
351
return Err ( Error :: InvalidClass ) ;
346
352
}
347
- ix + 1 + codepoint_len ( bytes[ ix + 1 ] )
353
+
354
+ // We support more escapes than regex, so parse it ourselves before delegating.
355
+ let ( end, expr) = try!( self . parse_escape ( ix) ) ;
356
+ match expr {
357
+ Expr :: Literal { val, .. } => {
358
+ class. push_str ( & escape ( & val) ) ;
359
+ }
360
+ Expr :: Delegate { inner, .. } => {
361
+ class. push_str ( & inner) ;
362
+ }
363
+ _ => {
364
+ return Err ( Error :: InvalidClass ) ;
365
+ }
366
+ }
367
+ end
348
368
}
349
369
b'[' => {
350
370
nest += 1 ;
371
+ class. push ( '[' ) ;
351
372
ix + 1
352
373
}
353
374
b']' => {
354
375
nest -= 1 ;
355
376
if nest == 0 {
356
377
break ;
357
378
}
379
+ class. push ( ']' ) ;
358
380
ix + 1
359
381
}
360
- b => ix + codepoint_len ( b)
382
+ b => {
383
+ let end = ix + codepoint_len ( b) ;
384
+ class. push_str ( & self . re [ ix..end] ) ;
385
+ end
386
+ }
361
387
} ;
362
- inner. push_str ( & self . re [ ix..end] ) ;
363
388
ix = end;
364
389
}
365
- inner . push ( ']' ) ;
390
+ class . push ( ']' ) ;
366
391
let ix = ix + 1 ; // skip closing ']'
367
- Ok ( ( ix, Expr :: Delegate { inner : inner , size : 1 } ) )
392
+ Ok ( ( ix, Expr :: Delegate { inner : class , size : 1 } ) )
368
393
}
369
394
370
395
fn parse_group ( & mut self , ix : usize , depth : usize ) -> Result < ( usize , Expr ) > {
0 commit comments