8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
- //! Unicode characters manipulation (`char` type)
11
+ //! Character manipulation (`char` type, Unicode Scalar Value)
12
+ //!
13
+ //! This module provides the `Char` trait, as well as its implementation
14
+ //! for the primitive `char` type, in order to allow basic character manipulation.
15
+ //!
16
+ //! A `char` actually represents a
17
+ //! *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*,
18
+ //! as it can contain any Unicode code point except high-surrogate and
19
+ //! low-surrogate code points.
20
+ //!
21
+ //! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
22
+ //! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
23
+ //! however the converse is not always true due to the above range limits
24
+ //! and, as such, should be performed via the `from_u32` function..
25
+
12
26
13
27
use cast:: transmute;
14
28
use option:: { None , Option , Some } ;
@@ -66,7 +80,7 @@ static TAG_FOUR_B: uint = 240u;
66
80
/// The highest valid code point
67
81
pub static MAX : char = ' \U 0010 ffff' ;
68
82
69
- /// Convert from `u32` to a character.
83
+ /// Converts from `u32` to a `char`
70
84
#[ inline]
71
85
pub fn from_u32 ( i : u32 ) -> Option < char > {
72
86
// catch out-of-bounds and surrogates
@@ -77,31 +91,44 @@ pub fn from_u32(i: u32) -> Option<char> {
77
91
}
78
92
}
79
93
80
- /// Returns whether the specified character is considered a unicode alphabetic
81
- /// character
94
+ /// Returns whether the specified `char` is considered a Unicode alphabetic
95
+ /// code point
82
96
pub fn is_alphabetic ( c : char ) -> bool { derived_property:: Alphabetic ( c) }
83
- #[ allow( missing_doc) ]
97
+
98
+ /// Returns whether the specified `char` satisfies the 'XID_Start' Unicode property
99
+ ///
100
+ /// 'XID_Start' is a Unicode Derived Property specified in
101
+ /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
102
+ /// mostly similar to ID_Start but modified for closure under NFKx.
84
103
pub fn is_XID_start ( c : char ) -> bool { derived_property:: XID_Start ( c) }
85
- #[ allow( missing_doc) ]
104
+
105
+ /// Returns whether the specified `char` satisfies the 'XID_Continue' Unicode property
106
+ ///
107
+ /// 'XID_Continue' is a Unicode Derived Property specified in
108
+ /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
109
+ /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
86
110
pub fn is_XID_continue ( c : char ) -> bool { derived_property:: XID_Continue ( c) }
87
111
88
112
///
89
- /// Indicates whether a character is in lower case, defined
90
- /// in terms of the Unicode Derived Core Property 'Lowercase'.
113
+ /// Indicates whether a `char` is in lower case
114
+ ///
115
+ /// This is defined according to the terms of the Unicode Derived Core Property 'Lowercase'.
91
116
///
92
117
#[ inline]
93
118
pub fn is_lowercase ( c : char ) -> bool { derived_property:: Lowercase ( c) }
94
119
95
120
///
96
- /// Indicates whether a character is in upper case, defined
97
- /// in terms of the Unicode Derived Core Property 'Uppercase'.
121
+ /// Indicates whether a `char` is in upper case
122
+ ///
123
+ /// This is defined according to the terms of the Unicode Derived Core Property 'Uppercase'.
98
124
///
99
125
#[ inline]
100
126
pub fn is_uppercase ( c : char ) -> bool { derived_property:: Uppercase ( c) }
101
127
102
128
///
103
- /// Indicates whether a character is whitespace. Whitespace is defined in
104
- /// terms of the Unicode Property 'White_Space'.
129
+ /// Indicates whether a `char` is whitespace
130
+ ///
131
+ /// Whitespace is defined in terms of the Unicode Property 'White_Space'.
105
132
///
106
133
#[ inline]
107
134
pub fn is_whitespace ( c : char ) -> bool {
@@ -112,9 +139,10 @@ pub fn is_whitespace(c: char) -> bool {
112
139
}
113
140
114
141
///
115
- /// Indicates whether a character is alphanumeric. Alphanumericness is
116
- /// defined in terms of the Unicode General Categories 'Nd', 'Nl', 'No'
117
- /// and the Derived Core Property 'Alphabetic'.
142
+ /// Indicates whether a `char` is alphanumeric
143
+ ///
144
+ /// Alphanumericness is defined in terms of the Unicode General Categories
145
+ /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
118
146
///
119
147
#[ inline]
120
148
pub fn is_alphanumeric ( c : char ) -> bool {
@@ -125,14 +153,15 @@ pub fn is_alphanumeric(c: char) -> bool {
125
153
}
126
154
127
155
///
128
- /// Indicates whether a character is a control character. Control
129
- /// characters are defined in terms of the Unicode General Category
156
+ /// Indicates whether a `char` is a control code point
157
+ ///
158
+ /// Control code points are defined in terms of the Unicode General Category
130
159
/// 'Cc'.
131
160
///
132
161
#[ inline]
133
162
pub fn is_control ( c : char ) -> bool { general_category:: Cc ( c) }
134
163
135
- /// Indicates whether the character is numeric (Nd, Nl, or No)
164
+ /// Indicates whether the `char` is numeric (Nd, Nl, or No)
136
165
#[ inline]
137
166
pub fn is_digit ( c : char ) -> bool {
138
167
general_category:: Nd ( c)
@@ -141,7 +170,8 @@ pub fn is_digit(c: char) -> bool {
141
170
}
142
171
143
172
///
144
- /// Checks if a character parses as a numeric digit in the given radix.
173
+ /// Checks if a `char` parses as a numeric digit in the given radix
174
+ ///
145
175
/// Compared to `is_digit()`, this function only recognizes the
146
176
/// characters `0-9`, `a-z` and `A-Z`.
147
177
///
@@ -167,13 +197,13 @@ pub fn is_digit_radix(c: char, radix: uint) -> bool {
167
197
}
168
198
169
199
///
170
- /// Convert a char to the corresponding digit.
200
+ /// Converts a ` char` to the corresponding digit
171
201
///
172
202
/// # Return value
173
203
///
174
204
/// If `c` is between '0' and '9', the corresponding value
175
205
/// between 0 and 9. If `c` is 'a' or 'A', 10. If `c` is
176
- /// 'b' or 'B', 11, etc. Returns none if the char does not
206
+ /// 'b' or 'B', 11, etc. Returns none if the ` char` does not
177
207
/// refer to a digit in the given radix.
178
208
///
179
209
/// # Failure
@@ -196,7 +226,7 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
196
226
}
197
227
198
228
///
199
- /// Converts a number to the character representing it.
229
+ /// Converts a number to the character representing it
200
230
///
201
231
/// # Return value
202
232
///
@@ -254,7 +284,7 @@ fn decompose_hangul(s: char, f: |char|) {
254
284
}
255
285
}
256
286
257
- /// Returns the canonical decomposition of a character.
287
+ /// Returns the canonical decomposition of a character
258
288
pub fn decompose_canonical ( c : char , f: |char|) {
259
289
if ( c as uint ) < S_BASE || ( c as uint ) >= ( S_BASE + S_COUNT ) {
260
290
decompose:: canonical ( c, f) ;
@@ -263,7 +293,7 @@ pub fn decompose_canonical(c: char, f: |char|) {
263
293
}
264
294
}
265
295
266
- /// Returns the compatibility decomposition of a character.
296
+ /// Returns the compatibility decomposition of a character
267
297
pub fn decompose_compatible ( c : char , f: |char|) {
268
298
if ( c as uint ) < S_BASE || ( c as uint ) >= ( S_BASE + S_COUNT ) {
269
299
decompose:: compatibility ( c, f) ;
@@ -273,7 +303,7 @@ pub fn decompose_compatible(c: char, f: |char|) {
273
303
}
274
304
275
305
///
276
- /// Return the hexadecimal unicode escape of a char.
306
+ /// Returns the hexadecimal Unicode escape of a ` char`
277
307
///
278
308
/// The rules are as follows:
279
309
///
@@ -301,7 +331,7 @@ pub fn escape_unicode(c: char, f: |char|) {
301
331
}
302
332
303
333
///
304
- /// Return a 'default' ASCII and C++11-like char- literal escape of a char.
334
+ /// Returns a 'default' ASCII and C++11-like literal escape of a ` char`
305
335
///
306
336
/// The default is chosen with a bias toward producing literals that are
307
337
/// legal in a variety of languages, including C++11 and similar C-family
@@ -325,7 +355,7 @@ pub fn escape_default(c: char, f: |char|) {
325
355
}
326
356
}
327
357
328
- /// Returns the amount of bytes this character would need if encoded in utf8
358
+ /// Returns the amount of bytes this `char` would need if encoded in UTF-8
329
359
pub fn len_utf8_bytes ( c : char ) -> uint {
330
360
static MAX_ONE_B : uint = 128 u;
331
361
static MAX_TWO_B : uint = 2048 u;
@@ -360,8 +390,9 @@ pub trait Char {
360
390
fn escape_default ( & self , f: |char|) ;
361
391
fn len_utf8_bytes ( & self ) -> uint ;
362
392
363
- /// Encodes this character as utf-8 into the provided byte-buffer. The
364
- /// buffer must be at least 4 bytes long or a runtime failure will occur.
393
+ /// Encodes this `char` as utf-8 into the provided byte-buffer
394
+ ///
395
+ /// The buffer must be at least 4 bytes long or a runtime failure will occur.
365
396
///
366
397
/// This will then return the number of characters written to the slice.
367
398
fn encode_utf8 ( & self , dst : & mut [ u8 ] ) -> uint ;
0 commit comments