@@ -348,6 +348,40 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
348
348
// Impl for char
349
349
/////////////////////////////////////////////////////////////////////////////
350
350
351
+ #[ derive( Clone , Copy , Debug ) ]
352
+ enum Utf8Size {
353
+ // Values are indexes, so `- 1`
354
+ One = 0 ,
355
+ Two = 1 ,
356
+ Three = 2 ,
357
+ Four = 3 ,
358
+ }
359
+
360
+ impl Utf8Size {
361
+ fn new ( size : usize ) -> Option < Self > {
362
+ match size {
363
+ 1 => Some ( Self :: One ) ,
364
+ 2 => Some ( Self :: Two ) ,
365
+ 3 => Some ( Self :: Three ) ,
366
+ 4 => Some ( Self :: Four ) ,
367
+ _ => None ,
368
+ }
369
+ }
370
+
371
+ // # Safety
372
+ //
373
+ // `size` must be more than `0` and less than `5`
374
+ unsafe fn new_unchecked ( size : usize ) -> Self {
375
+ // SAFETY: Invariant held by caller
376
+ unsafe { Self :: new ( size) . unwrap_unchecked ( ) }
377
+ }
378
+
379
+ fn index ( self , arr : & [ u8 ; 4 ] ) -> & u8 {
380
+ // SAFETY: max value is 3, which indexes to the 4th element.
381
+ unsafe { arr. get_unchecked ( self as usize ) }
382
+ }
383
+ }
384
+
351
385
/// Associated type for `<char as Pattern<'a>>::Searcher`.
352
386
#[ derive( Clone , Debug ) ]
353
387
pub struct CharSearcher < ' a > {
@@ -368,9 +402,8 @@ pub struct CharSearcher<'a> {
368
402
/// The character being searched for
369
403
needle : char ,
370
404
371
- // safety invariant: `utf8_size` must be less than 5
372
405
/// The number of bytes `needle` takes up when encoded in utf8.
373
- utf8_size : usize ,
406
+ utf8_size : Utf8Size ,
374
407
/// A utf8 encoded copy of the `needle`
375
408
utf8_encoded : [ u8 ; 4 ] ,
376
409
}
@@ -413,8 +446,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
413
446
// get the haystack after the last character found
414
447
let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
415
448
// the last byte of the utf8 encoded needle
416
- // SAFETY: we have an invariant that `utf8_size < 5`
417
- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
449
+ let last_byte = * self . utf8_size . index ( & self . utf8_encoded ) ;
418
450
if let Some ( index) = memchr:: memchr ( last_byte, bytes) {
419
451
// The new finger is the index of the byte we found,
420
452
// plus one, since we memchr'd for the last byte of the character.
@@ -434,10 +466,12 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
434
466
// find something. When we find something the `finger` will be set
435
467
// to a UTF8 boundary.
436
468
self . finger += index + 1 ;
437
- if self . finger >= self . utf8_size {
438
- let found_char = self . finger - self . utf8_size ;
469
+
470
+ let utf8_size = self . utf8_size as usize ;
471
+ if self . finger >= utf8_size {
472
+ let found_char = self . finger - utf8_size;
439
473
if let Some ( slice) = self . haystack . as_bytes ( ) . get ( found_char..self . finger ) {
440
- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
474
+ if slice == & self . utf8_encoded [ 0 ..utf8_size] {
441
475
return Some ( ( found_char, self . finger ) ) ;
442
476
}
443
477
}
@@ -481,8 +515,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
481
515
// get the haystack up to but not including the last character searched
482
516
let bytes = haystack. get ( self . finger ..self . finger_back ) ?;
483
517
// the last byte of the utf8 encoded needle
484
- // SAFETY: we have an invariant that `utf8_size < 5`
485
- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
518
+ let last_byte = * self . utf8_size . index ( & self . utf8_encoded ) ;
486
519
if let Some ( index) = memchr:: memrchr ( last_byte, bytes) {
487
520
// we searched a slice that was offset by self.finger,
488
521
// add self.finger to recoup the original index
@@ -493,14 +526,15 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
493
526
// char in the paradigm of reverse iteration). For
494
527
// multibyte chars we need to skip down by the number of more
495
528
// bytes they have than ASCII
496
- let shift = self . utf8_size - 1 ;
529
+ let utf8_size = self . utf8_size as usize ;
530
+ let shift = utf8_size - 1 ;
497
531
if index >= shift {
498
532
let found_char = index - shift;
499
- if let Some ( slice) = haystack. get ( found_char..( found_char + self . utf8_size ) ) {
500
- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
533
+ if let Some ( slice) = haystack. get ( found_char..( found_char + utf8_size) ) {
534
+ if slice == & self . utf8_encoded [ 0 ..utf8_size] {
501
535
// move finger to before the character found (i.e., at its start index)
502
536
self . finger_back = found_char;
503
- return Some ( ( self . finger_back , self . finger_back + self . utf8_size ) ) ;
537
+ return Some ( ( self . finger_back , self . finger_back + utf8_size) ) ;
504
538
}
505
539
}
506
540
}
@@ -543,6 +577,9 @@ impl<'a> Pattern<'a> for char {
543
577
fn into_searcher ( self , haystack : & ' a str ) -> Self :: Searcher {
544
578
let mut utf8_encoded = [ 0 ; 4 ] ;
545
579
let utf8_size = self . encode_utf8 ( & mut utf8_encoded) . len ( ) ;
580
+
581
+ // SAFETY: utf8_size is below 5
582
+ let utf8_size = unsafe { Utf8Size :: new_unchecked ( utf8_size) } ;
546
583
CharSearcher {
547
584
haystack,
548
585
finger : 0 ,
0 commit comments