@@ -90,6 +90,13 @@ pub enum ErrorKind {
90
90
__Nonexhaustive,
91
91
}
92
92
93
+ // BREADCRUMBS:
94
+ //
95
+ // Remove EmptyClassNotAllowed
96
+ // Make errors non_exhaustive
97
+ // Simplify repetitions (get rid of ZeroOrOne, OneOrMore etc)
98
+ // Get rid of deprecated things
99
+
93
100
impl ErrorKind {
94
101
// TODO: Remove this method entirely on the next breaking semver release.
95
102
#[ allow( deprecated) ]
@@ -1013,12 +1020,12 @@ impl fmt::Debug for ClassUnicodeRange {
1013
1020
{
1014
1021
self . start . to_string ( )
1015
1022
} else {
1016
- format ! ( "0x{:X}" , self . start as u32 )
1023
+ format ! ( "0x{:X}" , u32 :: from ( self . start) )
1017
1024
} ;
1018
1025
let end = if !self . end . is_whitespace ( ) && !self . end . is_control ( ) {
1019
1026
self . end . to_string ( )
1020
1027
} else {
1021
- format ! ( "0x{:X}" , self . end as u32 )
1028
+ format ! ( "0x{:X}" , u32 :: from ( self . end) )
1022
1029
} ;
1023
1030
f. debug_struct ( "ClassUnicodeRange" )
1024
1031
. field ( "start" , & start)
@@ -1058,10 +1065,9 @@ impl Interval for ClassUnicodeRange {
1058
1065
if !unicode:: contains_simple_case_mapping ( self . start , self . end ) ? {
1059
1066
return Ok ( ( ) ) ;
1060
1067
}
1061
- let start = self . start as u32 ;
1062
- let end = ( self . end as u32 ) . saturating_add ( 1 ) ;
1068
+ let ( start, end) = ( u32:: from ( self . start ) , u32:: from ( self . end ) ) ;
1063
1069
let mut next_simple_cp = None ;
1064
- for cp in ( start..end) . filter_map ( char:: from_u32) {
1070
+ for cp in ( start..= end) . filter_map ( char:: from_u32) {
1065
1071
if next_simple_cp. map_or ( false , |next| cp < next) {
1066
1072
continue ;
1067
1073
}
@@ -1104,6 +1110,18 @@ impl ClassUnicodeRange {
1104
1110
pub fn end ( & self ) -> char {
1105
1111
self . end
1106
1112
}
1113
+
1114
+ /// Returns the number of codepoints in this range.
1115
+ pub fn len ( & self ) -> usize {
1116
+ let diff = 1 + u32:: from ( self . end ) - u32:: from ( self . start ) ;
1117
+ // This is likely to panic in 16-bit targets since a usize can only fit
1118
+ // 2^16. It's not clear what to do here, other than to return an error
1119
+ // when building a Unicode class that contains a range whose length
1120
+ // overflows usize. (Which, to be honest, is probably quite common on
1121
+ // 16-bit targets. For example, this would imply that '.' and '\p{any}'
1122
+ // would be impossible to build.)
1123
+ usize:: try_from ( diff) . expect ( "char class len fits in usize" )
1124
+ }
1107
1125
}
1108
1126
1109
1127
/// A set of characters represented by arbitrary bytes (where one byte
@@ -1291,18 +1309,27 @@ impl ClassBytesRange {
1291
1309
pub fn end ( & self ) -> u8 {
1292
1310
self . end
1293
1311
}
1312
+
1313
+ /// Returns the number of bytes in this range.
1314
+ pub fn len ( & self ) -> usize {
1315
+ usize:: from ( self . end . checked_sub ( self . start ) . unwrap ( ) )
1316
+ . checked_add ( 1 )
1317
+ . unwrap ( )
1318
+ }
1294
1319
}
1295
1320
1296
1321
impl fmt:: Debug for ClassBytesRange {
1297
1322
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
1298
1323
let mut debug = f. debug_struct ( "ClassBytesRange" ) ;
1299
1324
if self . start <= 0x7F {
1300
- debug. field ( "start" , & ( self . start as char ) ) ;
1325
+ let ch = char:: try_from ( self . start ) . unwrap ( ) ;
1326
+ debug. field ( "start" , & ch) ;
1301
1327
} else {
1302
1328
debug. field ( "start" , & self . start ) ;
1303
1329
}
1304
1330
if self . end <= 0x7F {
1305
- debug. field ( "end" , & ( self . end as char ) ) ;
1331
+ let ch = char:: try_from ( self . start ) . unwrap ( ) ;
1332
+ debug. field ( "end" , & ch) ;
1306
1333
} else {
1307
1334
debug. field ( "end" , & self . end ) ;
1308
1335
}
0 commit comments