From c5d0e2a0733b5271d1044979cf824dccb2a3c3d5 Mon Sep 17 00:00:00 2001 From: marcell Date: Tue, 9 Jun 2015 23:58:00 +0200 Subject: [PATCH 1/2] Add missing #[inline] to methods related to char. --- src/libcore/char.rs | 4 ++++ src/librustc_unicode/char.rs | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 48d9869f72ae4..9938c2996158a 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -152,10 +152,12 @@ pub trait CharExt { } impl CharExt for char { + #[inline] fn is_digit(self, radix: u32) -> bool { self.to_digit(radix).is_some() } + #[inline] fn to_digit(self, radix: u32) -> Option { if radix > 36 { panic!("to_digit: radix is too high (maximum 36)"); @@ -170,10 +172,12 @@ impl CharExt for char { else { None } } + #[inline] fn escape_unicode(self) -> EscapeUnicode { EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash } } + #[inline] fn escape_default(self) -> EscapeDefault { let init_state = match self { '\t' => EscapeDefaultState::Backslash('t'), diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index 9dfd172707d7b..4e0d196e2f83e 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -119,6 +119,7 @@ impl char { /// assert_eq!('f'.to_digit(16), Some(15)); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn to_digit(self, radix: u32) -> Option { C::to_digit(self, radix) } /// Returns an iterator that yields the hexadecimal Unicode escape of a @@ -157,6 +158,7 @@ impl char { /// assert_eq!(heart, r"\u{2764}"); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn escape_unicode(self) -> EscapeUnicode { C::escape_unicode(self) } /// Returns an iterator that yields the 'default' ASCII and @@ -195,6 +197,7 @@ impl char { /// assert_eq!(quote, "\\\""); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn escape_default(self) -> EscapeDefault { C::escape_default(self) } /// Returns the number of bytes this character would need if encoded in @@ -208,6 +211,7 @@ impl char { /// assert_eq!(n, 2); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn len_utf8(self) -> usize { C::len_utf8(self) } /// Returns the number of 16-bit code units this character would need if @@ -221,6 +225,7 @@ impl char { /// assert_eq!(n, 1); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn len_utf16(self) -> usize { C::len_utf16(self) } /// Encodes this character as UTF-8 into the provided byte buffer, and then @@ -255,6 +260,7 @@ impl char { /// ``` #[unstable(feature = "unicode", reason = "pending decision about Iterator/Writer/Reader")] + #[inline] pub fn encode_utf8(self, dst: &mut [u8]) -> Option { C::encode_utf8(self, dst) } /// Encodes this character as UTF-16 into the provided `u16` buffer, and @@ -289,6 +295,7 @@ impl char { /// ``` #[unstable(feature = "unicode", reason = "pending decision about Iterator/Writer/Reader")] + #[inline] pub fn encode_utf16(self, dst: &mut [u16]) -> Option { C::encode_utf16(self, dst) } /// Returns whether the specified character is considered a Unicode @@ -451,5 +458,6 @@ impl char { since = "1.0.0")] #[unstable(feature = "unicode", reason = "needs expert opinion. is_cjk flag stands out as ugly")] + #[inline] pub fn width(self, is_cjk: bool) -> Option { charwidth::width(self, is_cjk) } } From e87c62fb12e6b02cfc39fc2a16c315615714757a Mon Sep 17 00:00:00 2001 From: marcell Date: Wed, 10 Jun 2015 02:03:56 +0200 Subject: [PATCH 2/2] Modify String::push to reallocate more conservatively in case of the character's UTF-8 representation is bigger than 1 byte --- src/libcollections/string.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 7ede6545b9fb2..6717f2f45fab5 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -468,24 +468,24 @@ impl String { #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn push(&mut self, ch: char) { - if (ch as u32) < 0x80 { - self.vec.push(ch as u8); - return; - } - - let cur_len = self.len(); - // This may use up to 4 bytes. - self.vec.reserve(4); + match ch.len_utf8() { + 1 => self.vec.push(ch as u8), + ch_len => { + let cur_len = self.len(); + // This may use up to 4 bytes. + self.vec.reserve(ch_len); - unsafe { - // Attempt to not use an intermediate buffer by just pushing bytes - // directly onto this string. - let slice = slice::from_raw_parts_mut ( - self.vec.as_mut_ptr().offset(cur_len as isize), - 4 - ); - let used = ch.encode_utf8(slice).unwrap_or(0); - self.vec.set_len(cur_len + used); + unsafe { + // Attempt to not use an intermediate buffer by just pushing bytes + // directly onto this string. + let slice = slice::from_raw_parts_mut ( + self.vec.as_mut_ptr().offset(cur_len as isize), + ch_len + ); + let used = ch.encode_utf8(slice).unwrap_or(0); + self.vec.set_len(cur_len + used); + } + } } }