Skip to content

Commit 9c627c3

Browse files
committed
also expose and use encode_utf16_raw for wtf8
1 parent 3182cdf commit 9c627c3

File tree

3 files changed

+40
-24
lines changed

3 files changed

+40
-24
lines changed

src/libcore/char/methods.rs

+37-22
Original file line numberDiff line numberDiff line change
@@ -701,28 +701,7 @@ impl char {
701701
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
702702
#[inline]
703703
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
704-
let mut code = self as u32;
705-
// SAFETY: each arm checks whether there are enough bits to write into
706-
unsafe {
707-
if (code & 0xFFFF) == code && !dst.is_empty() {
708-
// The BMP falls through (assuming non-surrogate, as it should)
709-
*dst.get_unchecked_mut(0) = code as u16;
710-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
711-
} else if dst.len() >= 2 {
712-
// Supplementary planes break into surrogates.
713-
code -= 0x1_0000;
714-
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
715-
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
716-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
717-
} else {
718-
panic!(
719-
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
720-
from_u32_unchecked(code).len_utf16(),
721-
code,
722-
dst.len(),
723-
)
724-
}
725-
}
704+
encode_utf16_raw(self as u32, dst)
726705
}
727706

728707
/// Returns `true` if this `char` has the `Alphabetic` property.
@@ -1692,3 +1671,39 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut str {
16921671
// SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
16931672
unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
16941673
}
1674+
1675+
/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1676+
/// and then returns the subslice of the buffer that contains the encoded character.
1677+
///
1678+
/// Unlike `char::encode_utf16`, this method can be called on codepoints in the surrogate range.
1679+
///
1680+
/// # Panics
1681+
///
1682+
/// Panics if the buffer is not large enough.
1683+
/// A buffer of length 2 is large enough to encode any `char`.
1684+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1685+
#[doc(hidden)]
1686+
#[inline]
1687+
pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
1688+
// SAFETY: each arm checks whether there are enough bits to write into
1689+
unsafe {
1690+
if (code & 0xFFFF) == code && !dst.is_empty() {
1691+
// The BMP falls through (assuming non-surrogate, as it should)
1692+
*dst.get_unchecked_mut(0) = code as u16;
1693+
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
1694+
} else if dst.len() >= 2 {
1695+
// Supplementary planes break into surrogates.
1696+
code -= 0x1_0000;
1697+
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
1698+
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
1699+
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
1700+
} else {
1701+
panic!(
1702+
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
1703+
from_u32_unchecked(code).len_utf16(),
1704+
code,
1705+
dst.len(),
1706+
)
1707+
}
1708+
}
1709+
}

src/libcore/char/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ pub use crate::unicode::UNICODE_VERSION;
3939

4040
// perma-unstable re-exports
4141
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
42+
pub use self::methods::encode_utf16_raw;
43+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
4244
pub use self::methods::encode_utf8_raw;
4345

4446
use crate::fmt::{self, Write};

src/libstd/sys_common/wtf8.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -828,8 +828,7 @@ impl<'a> Iterator for EncodeWide<'a> {
828828

829829
let mut buf = [0; 2];
830830
self.code_points.next().map(|code_point| {
831-
let c = unsafe { char::from_u32_unchecked(code_point.value) };
832-
let n = c.encode_utf16(&mut buf).len();
831+
let n = char::encode_utf16_raw(code_point.value, &mut buf).len();
833832
if n == 2 {
834833
self.extra = buf[1];
835834
}

0 commit comments

Comments
 (0)