@@ -117,6 +117,23 @@ impl String {
117
117
}
118
118
Some ( s)
119
119
}
120
+
121
+ /// Decode a UTF-16 encoded vector `v` into a string, replacing
122
+ /// invalid data with the replacement character (U+FFFD).
123
+ ///
124
+ /// # Example
125
+ /// ```rust
126
+ /// // ð„žmus<invalid>ic<invalid>
127
+ /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
128
+ /// 0x0073, 0xDD1E, 0x0069, 0x0063,
129
+ /// 0xD834];
130
+ ///
131
+ /// assert_eq!(String::from_utf16_lossy(v),
132
+ /// "ð„žmus\uFFFDic\uFFFD".to_string());
133
+ /// ```
134
+ pub fn from_utf16_lossy ( v : & [ u16 ] ) -> String {
135
+ str:: utf16_items ( v) . map ( |c| c. to_char_lossy ( ) ) . collect ( )
136
+ }
120
137
121
138
/// Convert a vector of chars to a string
122
139
///
@@ -431,6 +448,7 @@ mod tests {
431
448
use test:: Bencher ;
432
449
433
450
use Mutable ;
451
+ use str;
434
452
use str:: { Str , StrSlice } ;
435
453
use super :: String ;
436
454
@@ -439,6 +457,95 @@ mod tests {
439
457
let owned: Option < :: std:: string:: String > = from_str ( "string" ) ;
440
458
assert_eq ! ( owned. as_ref( ) . map( |s| s. as_slice( ) ) , Some ( "string" ) ) ;
441
459
}
460
+
461
+ #[ test]
462
+ fn test_from_utf16 ( ) {
463
+ let pairs =
464
+ [ ( String :: from_str ( "ð…ðŒ¿ðŒ»ð†ðŒ¹ðŒ»ðŒ°\n " ) ,
465
+ vec ! [ 0xd800_u16 , 0xdf45_u16 , 0xd800_u16 , 0xdf3f_u16 ,
466
+ 0xd800_u16 , 0xdf3b_u16 , 0xd800_u16 , 0xdf46_u16 ,
467
+ 0xd800_u16 , 0xdf39_u16 , 0xd800_u16 , 0xdf3b_u16 ,
468
+ 0xd800_u16 , 0xdf30_u16 , 0x000a_u16 ] ) ,
469
+
470
+ ( String :: from_str ( "ð’ð‘‰ð®ð‘€ð²ð‘‹ ðð²ð‘\n " ) ,
471
+ vec ! [ 0xd801_u16 , 0xdc12_u16 , 0xd801_u16 ,
472
+ 0xdc49_u16 , 0xd801_u16 , 0xdc2e_u16 , 0xd801_u16 ,
473
+ 0xdc40_u16 , 0xd801_u16 , 0xdc32_u16 , 0xd801_u16 ,
474
+ 0xdc4b_u16 , 0x0020_u16 , 0xd801_u16 , 0xdc0f_u16 ,
475
+ 0xd801_u16 , 0xdc32_u16 , 0xd801_u16 , 0xdc4d_u16 ,
476
+ 0x000a_u16 ] ) ,
477
+
478
+ ( String :: from_str ( "ðŒ€ðŒ–ðŒ‹ðŒ„ðŒ‘ðŒ‰Â·ðŒŒðŒ„ðŒ•ðŒ„ðŒ‹ðŒ‰ðŒ‘\n " ) ,
479
+ vec ! [ 0xd800_u16 , 0xdf00_u16 , 0xd800_u16 , 0xdf16_u16 ,
480
+ 0xd800_u16 , 0xdf0b_u16 , 0xd800_u16 , 0xdf04_u16 ,
481
+ 0xd800_u16 , 0xdf11_u16 , 0xd800_u16 , 0xdf09_u16 ,
482
+ 0x00b7_u16 , 0xd800_u16 , 0xdf0c_u16 , 0xd800_u16 ,
483
+ 0xdf04_u16 , 0xd800_u16 , 0xdf15_u16 , 0xd800_u16 ,
484
+ 0xdf04_u16 , 0xd800_u16 , 0xdf0b_u16 , 0xd800_u16 ,
485
+ 0xdf09_u16 , 0xd800_u16 , 0xdf11_u16 , 0x000a_u16 ] ) ,
486
+
487
+ ( String :: from_str ( "ð’‹ð’˜ð’ˆð’‘ð’›ð’’ ð’•ð’“ ð’ˆð’šð’ ð’ð’œð’’ð’–ð’† ð’•ð’†\n " ) ,
488
+ vec ! [ 0xd801_u16 , 0xdc8b_u16 , 0xd801_u16 , 0xdc98_u16 ,
489
+ 0xd801_u16 , 0xdc88_u16 , 0xd801_u16 , 0xdc91_u16 ,
490
+ 0xd801_u16 , 0xdc9b_u16 , 0xd801_u16 , 0xdc92_u16 ,
491
+ 0x0020_u16 , 0xd801_u16 , 0xdc95_u16 , 0xd801_u16 ,
492
+ 0xdc93_u16 , 0x0020_u16 , 0xd801_u16 , 0xdc88_u16 ,
493
+ 0xd801_u16 , 0xdc9a_u16 , 0xd801_u16 , 0xdc8d_u16 ,
494
+ 0x0020_u16 , 0xd801_u16 , 0xdc8f_u16 , 0xd801_u16 ,
495
+ 0xdc9c_u16 , 0xd801_u16 , 0xdc92_u16 , 0xd801_u16 ,
496
+ 0xdc96_u16 , 0xd801_u16 , 0xdc86_u16 , 0x0020_u16 ,
497
+ 0xd801_u16 , 0xdc95_u16 , 0xd801_u16 , 0xdc86_u16 ,
498
+ 0x000a_u16 ] ) ,
499
+ // Issue #12318, even-numbered non-BMP planes
500
+ ( String :: from_str ( "\U 00020000" ) ,
501
+ vec ! [ 0xD840 , 0xDC00 ] ) ] ;
502
+
503
+ for p in pairs. iter ( ) {
504
+ let ( s, u) = ( * p) . clone ( ) ;
505
+ let s_as_utf16 = s. as_slice ( ) . utf16_units ( ) . collect :: < Vec < u16 > > ( ) ;
506
+ let u_as_string = String :: from_utf16 ( u. as_slice ( ) ) . unwrap ( ) ;
507
+
508
+ assert ! ( str :: is_utf16( u. as_slice( ) ) ) ;
509
+ assert_eq ! ( s_as_utf16, u) ;
510
+
511
+ assert_eq ! ( u_as_string, s) ;
512
+ assert_eq ! ( String :: from_utf16_lossy( u. as_slice( ) ) , s) ;
513
+
514
+ assert_eq ! ( String :: from_utf16( s_as_utf16. as_slice( ) ) . unwrap( ) , s) ;
515
+ assert_eq ! ( u_as_string. as_slice( ) . utf16_units( ) . collect:: <Vec <u16 >>( ) , u) ;
516
+ }
517
+ }
518
+
519
+ #[ test]
520
+ fn test_utf16_invalid ( ) {
521
+ // completely positive cases tested above.
522
+ // lead + eof
523
+ assert_eq ! ( String :: from_utf16( [ 0xD800 ] ) , None ) ;
524
+ // lead + lead
525
+ assert_eq ! ( String :: from_utf16( [ 0xD800 , 0xD800 ] ) , None ) ;
526
+
527
+ // isolated trail
528
+ assert_eq ! ( String :: from_utf16( [ 0x0061 , 0xDC00 ] ) , None ) ;
529
+
530
+ // general
531
+ assert_eq ! ( String :: from_utf16( [ 0xD800 , 0xd801 , 0xdc8b , 0xD800 ] ) , None ) ;
532
+ }
533
+
534
+ #[ test]
535
+ fn test_from_utf16_lossy ( ) {
536
+ // completely positive cases tested above.
537
+ // lead + eof
538
+ assert_eq ! ( String :: from_utf16_lossy( [ 0xD800 ] ) , String :: from_str( "\uFFFD " ) ) ;
539
+ // lead + lead
540
+ assert_eq ! ( String :: from_utf16_lossy( [ 0xD800 , 0xD800 ] ) , String :: from_str( "\uFFFD \uFFFD " ) ) ;
541
+
542
+ // isolated trail
543
+ assert_eq ! ( String :: from_utf16_lossy( [ 0x0061 , 0xDC00 ] ) , String :: from_str( "a\uFFFD " ) ) ;
544
+
545
+ // general
546
+ assert_eq ! ( String :: from_utf16_lossy( [ 0xD800 , 0xd801 , 0xdc8b , 0xD800 ] ) ,
547
+ String :: from_str( "\uFFFD ð’‹\uFFFD " ) ) ;
548
+ }
442
549
443
550
#[ bench]
444
551
fn bench_with_capacity ( b : & mut Bencher ) {
0 commit comments