@@ -402,131 +402,10 @@ macro_rules! utf8_acc_cont_byte(
402
402
( $ch: expr, $byte: expr) => ( ( $ch << 6 ) | ( $byte & 63u8 ) as u32 )
403
403
)
404
404
405
- static TAG_CONT_U8 : u8 = 128u8 ;
406
-
407
- /// Converts a vector of bytes to a new utf-8 string.
408
- /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
409
- ///
410
- /// # Example
411
- ///
412
- /// ```rust
413
- /// let input = b"Hello \xF0\x90\x80World";
414
- /// let output = std::str::from_utf8_lossy(input);
415
- /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
416
- /// ```
405
+ /// Deprecated. Use `String::from_utf8_lossy`.
406
+ #[ deprecated = "Replaced by String::from_utf8_lossy" ]
417
407
pub fn from_utf8_lossy < ' a > ( v : & ' a [ u8 ] ) -> MaybeOwned < ' a > {
418
- if is_utf8 ( v) {
419
- return Slice ( unsafe { mem:: transmute ( v) } )
420
- }
421
-
422
- static REPLACEMENT : & ' static [ u8 ] = b"\xEF \xBF \xBD " ; // U+FFFD in UTF-8
423
- let mut i = 0 ;
424
- let total = v. len ( ) ;
425
- fn unsafe_get ( xs : & [ u8 ] , i : uint ) -> u8 {
426
- unsafe { * xs. unsafe_ref ( i) }
427
- }
428
- fn safe_get ( xs : & [ u8 ] , i : uint , total : uint ) -> u8 {
429
- if i >= total {
430
- 0
431
- } else {
432
- unsafe_get ( xs, i)
433
- }
434
- }
435
-
436
- let mut res = String :: with_capacity ( total) ;
437
-
438
- if i > 0 {
439
- unsafe {
440
- res. push_bytes ( v. slice_to ( i) )
441
- } ;
442
- }
443
-
444
- // subseqidx is the index of the first byte of the subsequence we're looking at.
445
- // It's used to copy a bunch of contiguous good codepoints at once instead of copying
446
- // them one by one.
447
- let mut subseqidx = 0 ;
448
-
449
- while i < total {
450
- let i_ = i;
451
- let byte = unsafe_get ( v, i) ;
452
- i += 1 ;
453
-
454
- macro_rules! error( ( ) => ( {
455
- unsafe {
456
- if subseqidx != i_ {
457
- res. push_bytes( v. slice( subseqidx, i_) ) ;
458
- }
459
- subseqidx = i;
460
- res. push_bytes( REPLACEMENT ) ;
461
- }
462
- } ) )
463
-
464
- if byte < 128u8 {
465
- // subseqidx handles this
466
- } else {
467
- let w = utf8_char_width ( byte) ;
468
-
469
- match w {
470
- 2 => {
471
- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
472
- error ! ( ) ;
473
- continue ;
474
- }
475
- i += 1 ;
476
- }
477
- 3 => {
478
- match ( byte, safe_get ( v, i, total) ) {
479
- ( 0xE0 , 0xA0 .. 0xBF ) => ( ) ,
480
- ( 0xE1 .. 0xEC , 0x80 .. 0xBF ) => ( ) ,
481
- ( 0xED , 0x80 .. 0x9F ) => ( ) ,
482
- ( 0xEE .. 0xEF , 0x80 .. 0xBF ) => ( ) ,
483
- _ => {
484
- error ! ( ) ;
485
- continue ;
486
- }
487
- }
488
- i += 1 ;
489
- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
490
- error ! ( ) ;
491
- continue ;
492
- }
493
- i += 1 ;
494
- }
495
- 4 => {
496
- match ( byte, safe_get ( v, i, total) ) {
497
- ( 0xF0 , 0x90 .. 0xBF ) => ( ) ,
498
- ( 0xF1 .. 0xF3 , 0x80 .. 0xBF ) => ( ) ,
499
- ( 0xF4 , 0x80 .. 0x8F ) => ( ) ,
500
- _ => {
501
- error ! ( ) ;
502
- continue ;
503
- }
504
- }
505
- i += 1 ;
506
- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
507
- error ! ( ) ;
508
- continue ;
509
- }
510
- i += 1 ;
511
- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
512
- error ! ( ) ;
513
- continue ;
514
- }
515
- i += 1 ;
516
- }
517
- _ => {
518
- error ! ( ) ;
519
- continue ;
520
- }
521
- }
522
- }
523
- }
524
- if subseqidx < total {
525
- unsafe {
526
- res. push_bytes ( v. slice ( subseqidx, total) )
527
- } ;
528
- }
529
- Owned ( res. into_string ( ) )
408
+ String :: from_utf8_lossy ( v)
530
409
}
531
410
532
411
/*
@@ -2052,41 +1931,6 @@ String::from_str("\u1111\u1171\u11b6"));
2052
1931
assert_eq ! ( from_utf8( xs) , None ) ;
2053
1932
}
2054
1933
2055
- #[ test]
2056
- fn test_str_from_utf8_lossy ( ) {
2057
- let xs = b"hello" ;
2058
- assert_eq ! ( from_utf8_lossy( xs) , Slice ( "hello" ) ) ;
2059
-
2060
- let xs = "ศไทย中华Việt Nam" . as_bytes ( ) ;
2061
- assert_eq ! ( from_utf8_lossy( xs) , Slice ( "ศไทย中华Việt Nam" ) ) ;
2062
-
2063
- let xs = b"Hello\xC2 There\xFF Goodbye" ;
2064
- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "Hello\uFFFD There\uFFFD Goodbye" ) ) ) ;
2065
-
2066
- let xs = b"Hello\xC0 \x80 There\xE6 \x83 Goodbye" ;
2067
- assert_eq ! ( from_utf8_lossy( xs) ,
2068
- Owned ( String :: from_str( "Hello\uFFFD \uFFFD There\uFFFD Goodbye" ) ) ) ;
2069
-
2070
- let xs = b"\xF5 foo\xF5 \x80 bar" ;
2071
- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD foo\uFFFD \uFFFD bar" ) ) ) ;
2072
-
2073
- let xs = b"\xF1 foo\xF1 \x80 bar\xF1 \x80 \x80 baz" ;
2074
- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD foo\uFFFD bar\uFFFD baz" ) ) ) ;
2075
-
2076
- let xs = b"\xF4 foo\xF4 \x80 bar\xF4 \xBF baz" ;
2077
- assert_eq ! ( from_utf8_lossy( xs) ,
2078
- Owned ( String :: from_str( "\uFFFD foo\uFFFD bar\uFFFD \uFFFD baz" ) ) ) ;
2079
-
2080
- let xs = b"\xF0 \x80 \x80 \x80 foo\xF0 \x90 \x80 \x80 bar" ;
2081
- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD \uFFFD \uFFFD \uFFFD \
2082
- foo\U 00010000bar") ) ) ;
2083
-
2084
- // surrogates
2085
- let xs = b"\xED \xA0 \x80 foo\xED \xBF \xBF bar" ;
2086
- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD \uFFFD \uFFFD foo\
2087
- \uFFFD \uFFFD \uFFFD bar") ) ) ;
2088
- }
2089
-
2090
1934
#[ test]
2091
1935
fn test_maybe_owned_traits ( ) {
2092
1936
let s = Slice ( "abcde" ) ;
@@ -2296,42 +2140,6 @@ mod bench {
2296
2140
} ) ;
2297
2141
}
2298
2142
2299
- #[ bench]
2300
- fn from_utf8_lossy_100_ascii ( b : & mut Bencher ) {
2301
- let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2302
- Lorem ipsum dolor sit amet, consectetur. ";
2303
-
2304
- assert_eq ! ( 100 , s. len( ) ) ;
2305
- b. iter ( || {
2306
- let _ = from_utf8_lossy ( s) ;
2307
- } ) ;
2308
- }
2309
-
2310
- #[ bench]
2311
- fn from_utf8_lossy_100_multibyte ( b : & mut Bencher ) {
2312
- let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰" . as_bytes ( ) ;
2313
- assert_eq ! ( 100 , s. len( ) ) ;
2314
- b. iter ( || {
2315
- let _ = from_utf8_lossy ( s) ;
2316
- } ) ;
2317
- }
2318
-
2319
- #[ bench]
2320
- fn from_utf8_lossy_invalid ( b : & mut Bencher ) {
2321
- let s = b"Hello\xC0 \x80 There\xE6 \x83 Goodbye" ;
2322
- b. iter ( || {
2323
- let _ = from_utf8_lossy ( s) ;
2324
- } ) ;
2325
- }
2326
-
2327
- #[ bench]
2328
- fn from_utf8_lossy_100_invalid ( b : & mut Bencher ) {
2329
- let s = Vec :: from_elem ( 100 , 0xF5u8 ) ;
2330
- b. iter ( || {
2331
- let _ = from_utf8_lossy ( s. as_slice ( ) ) ;
2332
- } ) ;
2333
- }
2334
-
2335
2143
#[ bench]
2336
2144
fn bench_connect ( b : & mut Bencher ) {
2337
2145
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb" ;
0 commit comments