Skip to content

Commit e1681df

Browse files
committed
auto merge of #12759 : lucab/rust/char-doc, r=alexcrichton
This is mostly a reaction to #12730. If we are going to keep calling them `char`, at least make it clear that they aren't characters but codepoint/scalar.
2 parents 0017056 + 331f907 commit e1681df

File tree

1 file changed

+60
-29
lines changed

1 file changed

+60
-29
lines changed

src/libstd/char.rs

+60-29
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,21 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
//! Unicode characters manipulation (`char` type)
11+
//! Character manipulation (`char` type, Unicode Scalar Value)
12+
//!
13+
//! This module provides the `Char` trait, as well as its implementation
14+
//! for the primitive `char` type, in order to allow basic character manipulation.
15+
//!
16+
//! A `char` actually represents a
17+
//! *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*,
18+
//! as it can contain any Unicode code point except high-surrogate and
19+
//! low-surrogate code points.
20+
//!
21+
//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
22+
//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
23+
//! however the converse is not always true due to the above range limits
24+
//! and, as such, should be performed via the `from_u32` function..
25+
1226

1327
use cast::transmute;
1428
use option::{None, Option, Some};
@@ -66,7 +80,7 @@ static TAG_FOUR_B: uint = 240u;
6680
/// The highest valid code point
6781
pub static MAX: char = '\U0010ffff';
6882

69-
/// Convert from `u32` to a character.
83+
/// Converts from `u32` to a `char`
7084
#[inline]
7185
pub fn from_u32(i: u32) -> Option<char> {
7286
// catch out-of-bounds and surrogates
@@ -77,31 +91,44 @@ pub fn from_u32(i: u32) -> Option<char> {
7791
}
7892
}
7993

80-
/// Returns whether the specified character is considered a unicode alphabetic
81-
/// character
94+
/// Returns whether the specified `char` is considered a Unicode alphabetic
95+
/// code point
8296
pub fn is_alphabetic(c: char) -> bool { derived_property::Alphabetic(c) }
83-
#[allow(missing_doc)]
97+
98+
/// Returns whether the specified `char` satisfies the 'XID_Start' Unicode property
99+
///
100+
/// 'XID_Start' is a Unicode Derived Property specified in
101+
/// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
102+
/// mostly similar to ID_Start but modified for closure under NFKx.
84103
pub fn is_XID_start(c: char) -> bool { derived_property::XID_Start(c) }
85-
#[allow(missing_doc)]
104+
105+
/// Returns whether the specified `char` satisfies the 'XID_Continue' Unicode property
106+
///
107+
/// 'XID_Continue' is a Unicode Derived Property specified in
108+
/// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
109+
/// mostly similar to 'ID_Continue' but modified for closure under NFKx.
86110
pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }
87111

88112
///
89-
/// Indicates whether a character is in lower case, defined
90-
/// in terms of the Unicode Derived Core Property 'Lowercase'.
113+
/// Indicates whether a `char` is in lower case
114+
///
115+
/// This is defined according to the terms of the Unicode Derived Core Property 'Lowercase'.
91116
///
92117
#[inline]
93118
pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) }
94119

95120
///
96-
/// Indicates whether a character is in upper case, defined
97-
/// in terms of the Unicode Derived Core Property 'Uppercase'.
121+
/// Indicates whether a `char` is in upper case
122+
///
123+
/// This is defined according to the terms of the Unicode Derived Core Property 'Uppercase'.
98124
///
99125
#[inline]
100126
pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) }
101127

102128
///
103-
/// Indicates whether a character is whitespace. Whitespace is defined in
104-
/// terms of the Unicode Property 'White_Space'.
129+
/// Indicates whether a `char` is whitespace
130+
///
131+
/// Whitespace is defined in terms of the Unicode Property 'White_Space'.
105132
///
106133
#[inline]
107134
pub fn is_whitespace(c: char) -> bool {
@@ -112,9 +139,10 @@ pub fn is_whitespace(c: char) -> bool {
112139
}
113140

114141
///
115-
/// Indicates whether a character is alphanumeric. Alphanumericness is
116-
/// defined in terms of the Unicode General Categories 'Nd', 'Nl', 'No'
117-
/// and the Derived Core Property 'Alphabetic'.
142+
/// Indicates whether a `char` is alphanumeric
143+
///
144+
/// Alphanumericness is defined in terms of the Unicode General Categories
145+
/// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
118146
///
119147
#[inline]
120148
pub fn is_alphanumeric(c: char) -> bool {
@@ -125,14 +153,15 @@ pub fn is_alphanumeric(c: char) -> bool {
125153
}
126154

127155
///
128-
/// Indicates whether a character is a control character. Control
129-
/// characters are defined in terms of the Unicode General Category
156+
/// Indicates whether a `char` is a control code point
157+
///
158+
/// Control code points are defined in terms of the Unicode General Category
130159
/// 'Cc'.
131160
///
132161
#[inline]
133162
pub fn is_control(c: char) -> bool { general_category::Cc(c) }
134163

135-
/// Indicates whether the character is numeric (Nd, Nl, or No)
164+
/// Indicates whether the `char` is numeric (Nd, Nl, or No)
136165
#[inline]
137166
pub fn is_digit(c: char) -> bool {
138167
general_category::Nd(c)
@@ -141,7 +170,8 @@ pub fn is_digit(c: char) -> bool {
141170
}
142171

143172
///
144-
/// Checks if a character parses as a numeric digit in the given radix.
173+
/// Checks if a `char` parses as a numeric digit in the given radix
174+
///
145175
/// Compared to `is_digit()`, this function only recognizes the
146176
/// characters `0-9`, `a-z` and `A-Z`.
147177
///
@@ -167,13 +197,13 @@ pub fn is_digit_radix(c: char, radix: uint) -> bool {
167197
}
168198

169199
///
170-
/// Convert a char to the corresponding digit.
200+
/// Converts a `char` to the corresponding digit
171201
///
172202
/// # Return value
173203
///
174204
/// If `c` is between '0' and '9', the corresponding value
175205
/// between 0 and 9. If `c` is 'a' or 'A', 10. If `c` is
176-
/// 'b' or 'B', 11, etc. Returns none if the char does not
206+
/// 'b' or 'B', 11, etc. Returns none if the `char` does not
177207
/// refer to a digit in the given radix.
178208
///
179209
/// # Failure
@@ -196,7 +226,7 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
196226
}
197227

198228
///
199-
/// Converts a number to the character representing it.
229+
/// Converts a number to the character representing it
200230
///
201231
/// # Return value
202232
///
@@ -254,7 +284,7 @@ fn decompose_hangul(s: char, f: |char|) {
254284
}
255285
}
256286

257-
/// Returns the canonical decomposition of a character.
287+
/// Returns the canonical decomposition of a character
258288
pub fn decompose_canonical(c: char, f: |char|) {
259289
if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
260290
decompose::canonical(c, f);
@@ -263,7 +293,7 @@ pub fn decompose_canonical(c: char, f: |char|) {
263293
}
264294
}
265295

266-
/// Returns the compatibility decomposition of a character.
296+
/// Returns the compatibility decomposition of a character
267297
pub fn decompose_compatible(c: char, f: |char|) {
268298
if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
269299
decompose::compatibility(c, f);
@@ -273,7 +303,7 @@ pub fn decompose_compatible(c: char, f: |char|) {
273303
}
274304

275305
///
276-
/// Return the hexadecimal unicode escape of a char.
306+
/// Returns the hexadecimal Unicode escape of a `char`
277307
///
278308
/// The rules are as follows:
279309
///
@@ -301,7 +331,7 @@ pub fn escape_unicode(c: char, f: |char|) {
301331
}
302332

303333
///
304-
/// Return a 'default' ASCII and C++11-like char-literal escape of a char.
334+
/// Returns a 'default' ASCII and C++11-like literal escape of a `char`
305335
///
306336
/// The default is chosen with a bias toward producing literals that are
307337
/// legal in a variety of languages, including C++11 and similar C-family
@@ -325,7 +355,7 @@ pub fn escape_default(c: char, f: |char|) {
325355
}
326356
}
327357

328-
/// Returns the amount of bytes this character would need if encoded in utf8
358+
/// Returns the amount of bytes this `char` would need if encoded in UTF-8
329359
pub fn len_utf8_bytes(c: char) -> uint {
330360
static MAX_ONE_B: uint = 128u;
331361
static MAX_TWO_B: uint = 2048u;
@@ -360,8 +390,9 @@ pub trait Char {
360390
fn escape_default(&self, f: |char|);
361391
fn len_utf8_bytes(&self) -> uint;
362392

363-
/// Encodes this character as utf-8 into the provided byte-buffer. The
364-
/// buffer must be at least 4 bytes long or a runtime failure will occur.
393+
/// Encodes this `char` as utf-8 into the provided byte-buffer
394+
///
395+
/// The buffer must be at least 4 bytes long or a runtime failure will occur.
365396
///
366397
/// This will then return the number of characters written to the slice.
367398
fn encode_utf8(&self, dst: &mut [u8]) -> uint;

0 commit comments

Comments
 (0)