From 79cda0135f2a23d575f110ad73432e4f3b705b41 Mon Sep 17 00:00:00 2001
From: M Farkas-Dyck <strake888@gmail.com>
Date: Fri, 20 Jul 2018 15:15:26 -0800
Subject: [PATCH 1/5] Define non-panicking UTF encoding methods on `char`

---
 src/libcore/char/methods.rs | 62 +++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index eee78de90362..a39d8283abde 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -467,6 +467,37 @@ impl char {
         }
     }
 
+    /// Encodes this character as UTF-8 into the provided byte buffer,
+    /// and then returns the subslice of the buffer that contains the encoded character.
+    /// Returns `None` if buffer too short.
+    ///
+    /// # Examples
+    ///
+    /// In both of these examples, 'ß' takes two bytes to encode.
+    ///
+    /// ```
+    /// let mut b = [0; 2];
+    ///
+    /// let result = 'ß'.encode_utf8(&mut b).unwrap();
+    ///
+    /// assert_eq!(result, "ß");
+    ///
+    /// assert_eq!(result.len(), 2);
+    /// ```
+    ///
+    /// A buffer that's too small:
+    ///
+    /// ```
+    /// let mut b = [0; 1];
+    ///
+    /// assert_eq!(None, 'ß'.encode_utf8(&mut b));
+    /// ```
+    #[unstable(feature = "try_unicode_encode_char", issue = "52579")]
+    #[inline]
+    pub fn try_encode_utf8(self, dst: &mut [u8]) -> Option<&mut str> {
+        if dst.len() < self.len_utf8() { None } else { Some(self.encode_utf8(dst)) }
+    }
+
     /// Encodes this character as UTF-16 into the provided `u16` buffer,
     /// and then returns the subslice of the buffer that contains the encoded character.
     ///
@@ -525,6 +556,37 @@ impl char {
         }
     }
 
+    /// Encodes this character as UTF-16 into the provided `u16` buffer,
+    /// and then returns the subslice of the buffer that contains the encoded character.
+    /// Returns `None` if buffer too short.
+    ///
+    /// # Examples
+    ///
+    /// In both of these examples, '𝕊' takes two `u16`s to encode.
+    ///
+    /// ```
+    /// let mut b = [0; 2];
+    ///
+    /// let result = '𝕊'.encode_utf16(&mut b).unwrap();
+    ///
+    /// assert_eq!(result, "𝕊");
+    ///
+    /// assert_eq!(result.len(), 2);
+    /// ```
+    ///
+    /// A buffer that's too small:
+    ///
+    /// ```
+    /// let mut b = [0; 1];
+    ///
+    /// assert_eq!(None, '𝕊'.encode_utf16(&mut b));
+    /// ```
+    #[unstable(feature = "try_unicode_encode_char", issue = "52579")]
+    #[inline]
+    pub fn try_encode_utf16(self, dst: &mut [u16]) -> Option<&mut [u16]> {
+        if dst.len() < self.len_utf16() { None } else { Some(self.encode_utf16(dst)) }
+    }
+
     /// Returns true if this `char` is an alphabetic code point, and false if not.
     ///
     /// # Examples

From fc9e1900137cf7bf169afab2615518fc148d97f2 Mon Sep 17 00:00:00 2001
From: M Farkas-Dyck <strake888@gmail.com>
Date: Sun, 22 Jul 2018 12:12:06 -0800
Subject: [PATCH 2/5] not check UTF length twice

---
 src/libcore/char/methods.rs | 100 +++++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 48 deletions(-)

diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index a39d8283abde..bf0b42784456 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -436,34 +436,11 @@ impl char {
     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
     #[inline]
     pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
-        let code = self as u32;
-        unsafe {
-            let len =
-            if code < MAX_ONE_B && !dst.is_empty() {
-                *dst.get_unchecked_mut(0) = code as u8;
-                1
-            } else if code < MAX_TWO_B && dst.len() >= 2 {
-                *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
-                *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
-                2
-            } else if code < MAX_THREE_B && dst.len() >= 3  {
-                *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
-                *dst.get_unchecked_mut(1) = (code >>  6 & 0x3F) as u8 | TAG_CONT;
-                *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
-                3
-            } else if dst.len() >= 4 {
-                *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
-                *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-                *dst.get_unchecked_mut(2) = (code >>  6 & 0x3F) as u8 | TAG_CONT;
-                *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
-                4
-            } else {
-                panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
-                    from_u32_unchecked(code).len_utf8(),
-                    code,
-                    dst.len())
-            };
-            from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
+        let l = dst.len();
+        match self.try_encode_utf8(dst) {
+            Some(s) => s,
+            None => panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
+                           self.len_utf8(), self as u32, l),
         }
     }
 
@@ -495,7 +472,32 @@ impl char {
     #[unstable(feature = "try_unicode_encode_char", issue = "52579")]
     #[inline]
     pub fn try_encode_utf8(self, dst: &mut [u8]) -> Option<&mut str> {
-        if dst.len() < self.len_utf8() { None } else { Some(self.encode_utf8(dst)) }
+        let code = self as u32;
+        unsafe {
+            let len =
+            if code < MAX_ONE_B && !dst.is_empty() {
+                *dst.get_unchecked_mut(0) = code as u8;
+                1
+            } else if code < MAX_TWO_B && dst.len() >= 2 {
+                *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
+                *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
+                2
+            } else if code < MAX_THREE_B && dst.len() >= 3  {
+                *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
+                *dst.get_unchecked_mut(1) = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+                *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
+                3
+            } else if dst.len() >= 4 {
+                *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
+                *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+                *dst.get_unchecked_mut(2) = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+                *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
+                4
+            } else {
+                return None;
+            };
+            Some(from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)))
+        }
     }
 
     /// Encodes this character as UTF-16 into the provided `u16` buffer,
@@ -535,24 +537,11 @@ impl char {
     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
     #[inline]
     pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
-        let mut code = self as u32;
-        unsafe {
-            if (code & 0xFFFF) == code && !dst.is_empty() {
-                // The BMP falls through (assuming non-surrogate, as it should)
-                *dst.get_unchecked_mut(0) = code as u16;
-                slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
-            } else if dst.len() >= 2 {
-                // Supplementary planes break into surrogates.
-                code -= 0x1_0000;
-                *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
-                *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
-                slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
-            } else {
-                panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
-                    from_u32_unchecked(code).len_utf16(),
-                    code,
-                    dst.len())
-            }
+        let l = dst.len();
+        match self.try_encode_utf16(dst) {
+            Some(s) => s,
+            None => panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
+                           self.len_utf16(), self as u32, l),
         }
     }
 
@@ -584,7 +573,22 @@ impl char {
     #[unstable(feature = "try_unicode_encode_char", issue = "52579")]
     #[inline]
     pub fn try_encode_utf16(self, dst: &mut [u16]) -> Option<&mut [u16]> {
-        if dst.len() < self.len_utf16() { None } else { Some(self.encode_utf16(dst)) }
+        let mut code = self as u32;
+        unsafe {
+            if (code & 0xFFFF) == code && !dst.is_empty() {
+                // The BMP falls through (assuming non-surrogate, as it should)
+                *dst.get_unchecked_mut(0) = code as u16;
+                Some(slice::from_raw_parts_mut(dst.as_mut_ptr(), 1))
+            } else if dst.len() >= 2 {
+                // Supplementary planes break into surrogates.
+                code -= 0x1_0000;
+                *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
+                *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
+                Some(slice::from_raw_parts_mut(dst.as_mut_ptr(), 2))
+            } else {
+                None
+            }
+        }
     }
 
     /// Returns true if this `char` is an alphabetic code point, and false if not.

From 9ba4046aa6369e0e74ff681c05cf24ff3aee06c1 Mon Sep 17 00:00:00 2001
From: M Farkas-Dyck <strake888@gmail.com>
Date: Sun, 22 Jul 2018 12:14:04 -0800
Subject: [PATCH 3/5] unbreak doctests

---
 src/libcore/char/methods.rs | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index bf0b42784456..ac5c4104049d 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -455,7 +455,7 @@ impl char {
     /// ```
     /// let mut b = [0; 2];
     ///
-    /// let result = 'ß'.encode_utf8(&mut b).unwrap();
+    /// let result = 'ß'.try_encode_utf8(&mut b).unwrap();
     ///
     /// assert_eq!(result, "ß");
     ///
@@ -467,7 +467,7 @@ impl char {
     /// ```
     /// let mut b = [0; 1];
     ///
-    /// assert_eq!(None, 'ß'.encode_utf8(&mut b));
+    /// assert_eq!(None, 'ß'.try_encode_utf8(&mut b));
     /// ```
     #[unstable(feature = "try_unicode_encode_char", issue = "52579")]
     #[inline]
@@ -517,22 +517,17 @@ impl char {
     ///
     /// let result = '𝕊'.encode_utf16(&mut b);
     ///
+    /// assert_eq!(result, "𝕊");
+    ///
     /// assert_eq!(result.len(), 2);
     /// ```
     ///
     /// A buffer that's too small:
     ///
     /// ```
-    /// use std::thread;
-    ///
-    /// let result = thread::spawn(|| {
-    ///     let mut b = [0; 1];
-    ///
-    ///     // this panics
-    ///     '𝕊'.encode_utf16(&mut b);
-    /// }).join();
+    /// let mut b = [0; 1];
     ///
-    /// assert!(result.is_err());
+    /// assert_eq!(None, '𝕊'.encode_utf16(&mut b));
     /// ```
     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
     #[inline]
@@ -556,9 +551,7 @@ impl char {
     /// ```
     /// let mut b = [0; 2];
     ///
-    /// let result = '𝕊'.encode_utf16(&mut b).unwrap();
-    ///
-    /// assert_eq!(result, "𝕊");
+    /// let result = '𝕊'.try_encode_utf16(&mut b).unwrap();
     ///
     /// assert_eq!(result.len(), 2);
     /// ```
@@ -568,7 +561,7 @@ impl char {
     /// ```
     /// let mut b = [0; 1];
     ///
-    /// assert_eq!(None, '𝕊'.encode_utf16(&mut b));
+    /// assert_eq!(None, '𝕊'.try_encode_utf16(&mut b));
     /// ```
     #[unstable(feature = "try_unicode_encode_char", issue = "52579")]
     #[inline]

From a4b17ae3dfc80b07c0a9af4e7ca2758bace5c404 Mon Sep 17 00:00:00 2001
From: M Farkas-Dyck <strake888@gmail.com>
Date: Sun, 29 Jul 2018 20:27:06 -0800
Subject: [PATCH 4/5] feature(try_unicode_encode_char)

---
 src/libcore/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/libcore/lib.rs b/src/libcore/lib.rs
index bbe6ae8619fe..cd21250c8753 100644
--- a/src/libcore/lib.rs
+++ b/src/libcore/lib.rs
@@ -122,6 +122,7 @@
 #![feature(const_slice_len)]
 #![feature(const_str_as_bytes)]
 #![feature(const_str_len)]
+#![feature(try_unicode_encode_char)]
 
 #[prelude_import]
 #[allow(unused)]

From 5ceeccdfd59ed11e7a6febeba5bb1a355eb92ff4 Mon Sep 17 00:00:00 2001
From: M Farkas-Dyck <strake888@gmail.com>
Date: Wed, 8 Aug 2018 03:56:14 -0800
Subject: [PATCH 5/5] unbreak docs

---
 src/libcore/char/methods.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index ac5c4104049d..919793aff879 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -453,6 +453,8 @@ impl char {
     /// In both of these examples, 'ß' takes two bytes to encode.
     ///
     /// ```
+    /// #![feature(try_unicode_encode_char)]
+    ///
     /// let mut b = [0; 2];
     ///
     /// let result = 'ß'.try_encode_utf8(&mut b).unwrap();
@@ -465,6 +467,8 @@ impl char {
     /// A buffer that's too small:
     ///
     /// ```
+    /// #![feature(try_unicode_encode_char)]
+    ///
     /// let mut b = [0; 1];
     ///
     /// assert_eq!(None, 'ß'.try_encode_utf8(&mut b));
@@ -549,6 +553,8 @@ impl char {
     /// In both of these examples, '𝕊' takes two `u16`s to encode.
     ///
     /// ```
+    /// #![feature(try_unicode_encode_char)]
+    ///
     /// let mut b = [0; 2];
     ///
     /// let result = '𝕊'.try_encode_utf16(&mut b).unwrap();
@@ -559,6 +565,8 @@ impl char {
     /// A buffer that's too small:
     ///
     /// ```
+    /// #![feature(try_unicode_encode_char)]
+    ///
     /// let mut b = [0; 1];
     ///
     /// assert_eq!(None, '𝕊'.try_encode_utf16(&mut b));