@@ -176,27 +176,31 @@ impl<W: fmt::Write> Visitor for Writer<W> {
176
176
| HirKind :: Concat ( _)
177
177
| HirKind :: Alternation ( _) => { }
178
178
HirKind :: Repetition ( ref x) => {
179
- match x . kind {
180
- hir :: RepetitionKind :: ZeroOrOne => {
179
+ match ( x . min , x . max ) {
180
+ ( 0 , Some ( 1 ) ) => {
181
181
self . wtr . write_str ( "?" ) ?;
182
182
}
183
- hir :: RepetitionKind :: ZeroOrMore => {
183
+ ( 0 , None ) => {
184
184
self . wtr . write_str ( "*" ) ?;
185
185
}
186
- hir :: RepetitionKind :: OneOrMore => {
186
+ ( 1 , None ) => {
187
187
self . wtr . write_str ( "+" ) ?;
188
188
}
189
- hir:: RepetitionKind :: Range ( ref x) => match * x {
190
- hir:: RepetitionRange :: Exactly ( m) => {
191
- write ! ( self . wtr, "{{{}}}" , m) ?;
192
- }
193
- hir:: RepetitionRange :: AtLeast ( m) => {
194
- write ! ( self . wtr, "{{{},}}" , m) ?;
195
- }
196
- hir:: RepetitionRange :: Bounded ( m, n) => {
197
- write ! ( self . wtr, "{{{},{}}}" , m, n) ?;
198
- }
199
- } ,
189
+ ( 1 , Some ( 1 ) ) => {
190
+ // 'a{1}' and 'a{1}?' are exactly equivalent to 'a'.
191
+ return Ok ( ( ) ) ;
192
+ }
193
+ ( m, None ) => {
194
+ write ! ( self . wtr, "{{{},}}" , m) ?;
195
+ }
196
+ ( m, Some ( n) ) if m == n => {
197
+ write ! ( self . wtr, "{{{}}}" , m) ?;
198
+ // a{m} and a{m}? are always exactly equivalent.
199
+ return Ok ( ( ) ) ;
200
+ }
201
+ ( m, Some ( n) ) => {
202
+ write ! ( self . wtr, "{{{},{}}}" , m, n) ?;
203
+ }
200
204
}
201
205
if !x. greedy {
202
206
self . wtr . write_str ( "?" ) ?;
@@ -241,7 +245,10 @@ impl<W: fmt::Write> Writer<W> {
241
245
242
246
#[ cfg( test) ]
243
247
mod tests {
244
- use alloc:: string:: String ;
248
+ use alloc:: {
249
+ boxed:: Box ,
250
+ string:: { String , ToString } ,
251
+ } ;
245
252
246
253
use crate :: ParserBuilder ;
247
254
@@ -338,14 +345,17 @@ mod tests {
338
345
roundtrip ( "a+?" , "a+?" ) ;
339
346
roundtrip ( "(?U)a+" , "a+?" ) ;
340
347
341
- roundtrip ( "a{1}" , "a{1}" ) ;
342
- roundtrip ( "a{1,}" , "a{1,}" ) ;
348
+ roundtrip ( "a{1}" , "a" ) ;
349
+ roundtrip ( "a{2}" , "a{2}" ) ;
350
+ roundtrip ( "a{1,}" , "a+" ) ;
343
351
roundtrip ( "a{1,5}" , "a{1,5}" ) ;
344
- roundtrip ( "a{1}?" , "a{1}?" ) ;
345
- roundtrip ( "a{1,}?" , "a{1,}?" ) ;
352
+ roundtrip ( "a{1}?" , "a" ) ;
353
+ roundtrip ( "a{2}?" , "a{2}" ) ;
354
+ roundtrip ( "a{1,}?" , "a+?" ) ;
346
355
roundtrip ( "a{1,5}?" , "a{1,5}?" ) ;
347
- roundtrip ( "(?U)a{1}" , "a{1}?" ) ;
348
- roundtrip ( "(?U)a{1,}" , "a{1,}?" ) ;
356
+ roundtrip ( "(?U)a{1}" , "a" ) ;
357
+ roundtrip ( "(?U)a{2}" , "a{2}" ) ;
358
+ roundtrip ( "(?U)a{1,}" , "a+?" ) ;
349
359
roundtrip ( "(?U)a{1,5}" , "a{1,5}?" ) ;
350
360
}
351
361
@@ -371,4 +381,85 @@ mod tests {
371
381
roundtrip ( "a|b|c" , "a|b|c" ) ;
372
382
roundtrip ( "foo|bar|quux" , "foo|bar|quux" ) ;
373
383
}
384
+
385
+ // This is a regression test that stresses a peculiarity of how the HIR
386
+ // is both constructed and printed. Namely, it is legal for a repetition
387
+ // to directly contain a concatenation. This particular construct isn't
388
+ // really possible to build from the concrete syntax directly, since you'd
389
+ // be forced to put the concatenation into (at least) a non-capturing
390
+ // group. Concurrently, the printer doesn't consider this case and just
391
+ // kind of naively prints the child expression and tacks on the repetition
392
+ // operator.
393
+ //
394
+ // As a result, if you attached '+' to a 'concat(a, b)', the printer gives
395
+ // you 'ab+', but clearly it really should be '(?:ab)+'.
396
+ //
397
+ // This bug isn't easy to surface because most ways of building an HIR
398
+ // come directly from the concrete syntax, and as mentioned above, it just
399
+ // isn't possible to build this kind of HIR from the concrete syntax.
400
+ // Nevertheless, this is definitely a bug.
401
+ //
402
+ // See: https://github.com/rust-lang/regex/issues/731
403
+ #[ test]
404
+ fn regression_repetition_concat ( ) {
405
+ let expr = Hir :: concat ( alloc:: vec![
406
+ Hir :: literal( hir:: Literal :: Unicode ( 'x' ) ) ,
407
+ Hir :: repetition( hir:: Repetition {
408
+ min: 1 ,
409
+ max: None ,
410
+ greedy: true ,
411
+ hir: Box :: new( Hir :: concat( alloc:: vec![
412
+ Hir :: literal( hir:: Literal :: Unicode ( 'a' ) ) ,
413
+ Hir :: literal( hir:: Literal :: Unicode ( 'b' ) ) ,
414
+ ] ) ) ,
415
+ } ) ,
416
+ Hir :: literal( hir:: Literal :: Unicode ( 'y' ) ) ,
417
+ ] ) ;
418
+ assert_eq ! ( r"x(?:ab)+y" , expr. to_string( ) ) ;
419
+ }
420
+
421
+ // Just like regression_repetition_concat, but with the repetition using
422
+ // an alternation as a child expression instead.
423
+ //
424
+ // See: https://github.com/rust-lang/regex/issues/731
425
+ #[ test]
426
+ fn regression_repetition_alternation ( ) {
427
+ let expr = Hir :: concat ( alloc:: vec![
428
+ Hir :: literal( hir:: Literal :: Unicode ( 'x' ) ) ,
429
+ Hir :: repetition( hir:: Repetition {
430
+ min: 1 ,
431
+ max: None ,
432
+ greedy: true ,
433
+ hir: Box :: new( Hir :: alternation( alloc:: vec![
434
+ Hir :: literal( hir:: Literal :: Unicode ( 'a' ) ) ,
435
+ Hir :: literal( hir:: Literal :: Unicode ( 'b' ) ) ,
436
+ ] ) ) ,
437
+ } ) ,
438
+ Hir :: literal( hir:: Literal :: Unicode ( 'y' ) ) ,
439
+ ] ) ;
440
+ assert_eq ! ( r"x(?:a|b)+y" , expr. to_string( ) ) ;
441
+ }
442
+
443
+ // This regression test is very similar in flavor to
444
+ // regression_repetition_concat in that the root of the issue lies in a
445
+ // peculiarity of how the HIR is represented and how the printer writes it
446
+ // out. Like the other regression, this one is also rooted in the fact that
447
+ // you can't produce the peculiar HIR from the concrete syntax. Namely, you
448
+ // just can't have a 'concat(a, alt(b, c))' because the 'alt' will normally
449
+ // be in (at least) a non-capturing group. Why? Because the '|' has very
450
+ // low precedence (lower that concatenation), and so something like 'ab|c'
451
+ // is actually 'alt(ab, c)'.
452
+ //
453
+ // See: https://github.com/rust-lang/regex/issues/516
454
+ #[ test]
455
+ fn regression_alternation_concat ( ) {
456
+ let expr = Hir :: concat ( alloc:: vec![
457
+ Hir :: literal( hir:: Literal :: Unicode ( 'a' ) ) ,
458
+ Hir :: alternation( alloc:: vec![
459
+ Hir :: literal( hir:: Literal :: Unicode ( 'b' ) ) ,
460
+ Hir :: literal( hir:: Literal :: Unicode ( 'c' ) ) ,
461
+ ] ) ,
462
+ ] ) ;
463
+ assert_eq ! ( r"a(?:b|c)" , expr. to_string( ) ) ;
464
+ }
374
465
}
0 commit comments