Skip to content

Commit efe6564

Browse files
committed
Code modernisation and cleanup
1 parent d73f31b commit efe6564

File tree

1 file changed

+100
-64
lines changed

1 file changed

+100
-64
lines changed

src/libcore/char.rs

+100-64
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,50 @@
1010

1111
//! Utilities for manipulating the char type
1212
13-
#[cfg(not(test))]
14-
use cmp::Ord;
1513
use option::{None, Option, Some};
1614
use str;
15+
#[cfg(stage0)]
16+
use str::StrSlice;
17+
#[cfg(not(stage0))]
18+
use str::{StrSlice, OwnedStr};
1719
use u32;
1820
use uint;
1921
use unicode::{derived_property, general_category};
2022

21-
#[cfg(not(test))] use cmp::Eq;
23+
#[cfg(not(test))]
24+
use cmp::{Eq, Ord};
2225

2326
/*
24-
Lu Uppercase_Letter an uppercase letter
25-
Ll Lowercase_Letter a lowercase letter
26-
Lt Titlecase_Letter a digraphic character, with first part uppercase
27-
Lm Modifier_Letter a modifier letter
28-
Lo Other_Letter other letters, including syllables and ideographs
29-
Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
30-
Mc Spacing_Mark a spacing combining mark (positive advance width)
31-
Me Enclosing_Mark an enclosing combining mark
32-
Nd Decimal_Number a decimal digit
33-
Nl Letter_Number a letterlike numeric character
34-
No Other_Number a numeric character of other type
27+
Lu Uppercase_Letter an uppercase letter
28+
Ll Lowercase_Letter a lowercase letter
29+
Lt Titlecase_Letter a digraphic character, with first part uppercase
30+
Lm Modifier_Letter a modifier letter
31+
Lo Other_Letter other letters, including syllables and ideographs
32+
Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
33+
Mc Spacing_Mark a spacing combining mark (positive advance width)
34+
Me Enclosing_Mark an enclosing combining mark
35+
Nd Decimal_Number a decimal digit
36+
Nl Letter_Number a letterlike numeric character
37+
No Other_Number a numeric character of other type
3538
Pc Connector_Punctuation a connecting punctuation mark, like a tie
36-
Pd Dash_Punctuation a dash or hyphen punctuation mark
37-
Ps Open_Punctuation an opening punctuation mark (of a pair)
38-
Pe Close_Punctuation a closing punctuation mark (of a pair)
39+
Pd Dash_Punctuation a dash or hyphen punctuation mark
40+
Ps Open_Punctuation an opening punctuation mark (of a pair)
41+
Pe Close_Punctuation a closing punctuation mark (of a pair)
3942
Pi Initial_Punctuation an initial quotation mark
40-
Pf Final_Punctuation a final quotation mark
41-
Po Other_Punctuation a punctuation mark of other type
42-
Sm Math_Symbol a symbol of primarily mathematical use
43-
Sc Currency_Symbol a currency sign
44-
Sk Modifier_Symbol a non-letterlike modifier symbol
45-
So Other_Symbol a symbol of other type
46-
Zs Space_Separator a space character (of various non-zero widths)
47-
Zl Line_Separator U+2028 LINE SEPARATOR only
43+
Pf Final_Punctuation a final quotation mark
44+
Po Other_Punctuation a punctuation mark of other type
45+
Sm Math_Symbol a symbol of primarily mathematical use
46+
Sc Currency_Symbol a currency sign
47+
Sk Modifier_Symbol a non-letterlike modifier symbol
48+
So Other_Symbol a symbol of other type
49+
Zs Space_Separator a space character (of various non-zero widths)
50+
Zl Line_Separator U+2028 LINE SEPARATOR only
4851
Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
49-
Cc Control a C0 or C1 control code
50-
Cf Format a format control character
51-
Cs Surrogate a surrogate code point
52-
Co Private_Use a private-use character
53-
Cn Unassigned a reserved unassigned code point or a noncharacter
52+
Cc Control a C0 or C1 control code
53+
Cf Format a format control character
54+
Cs Surrogate a surrogate code point
55+
Co Private_Use a private-use character
56+
Cn Unassigned a reserved unassigned code point or a noncharacter
5457
*/
5558

5659
pub fn is_alphabetic(c: char) -> bool { derived_property::Alphabetic(c) }
@@ -62,18 +65,14 @@ pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }
6265
* in terms of the Unicode General Category 'Ll'
6366
*/
6467
#[inline(always)]
65-
pub fn is_lowercase(c: char) -> bool {
66-
return general_category::Ll(c);
67-
}
68+
pub fn is_lowercase(c: char) -> bool { general_category::Ll(c) }
6869

6970
/**
7071
* Indicates whether a character is in upper case, defined
7172
* in terms of the Unicode General Category 'Lu'.
7273
*/
7374
#[inline(always)]
74-
pub fn is_uppercase(c: char) -> bool {
75-
return general_category::Lu(c);
76-
}
75+
pub fn is_uppercase(c: char) -> bool { general_category::Lu(c) }
7776

7877
/**
7978
* Indicates whether a character is whitespace. Whitespace is defined in
@@ -82,10 +81,10 @@ pub fn is_uppercase(c: char) -> bool {
8281
*/
8382
#[inline(always)]
8483
pub fn is_whitespace(c: char) -> bool {
85-
return ('\x09' <= c && c <= '\x0d')
84+
('\x09' <= c && c <= '\x0d')
8685
|| general_category::Zs(c)
8786
|| general_category::Zl(c)
88-
|| general_category::Zp(c);
87+
|| general_category::Zp(c)
8988
}
9089

9190
/**
@@ -95,18 +94,18 @@ pub fn is_whitespace(c: char) -> bool {
9594
*/
9695
#[inline(always)]
9796
pub fn is_alphanumeric(c: char) -> bool {
98-
return derived_property::Alphabetic(c) ||
99-
general_category::Nd(c) ||
100-
general_category::Nl(c) ||
101-
general_category::No(c);
97+
derived_property::Alphabetic(c)
98+
|| general_category::Nd(c)
99+
|| general_category::Nl(c)
100+
|| general_category::No(c)
102101
}
103102

104103
/// Indicates whether the character is numeric (Nd, Nl, or No)
105104
#[inline(always)]
106105
pub fn is_digit(c: char) -> bool {
107-
return general_category::Nd(c) ||
108-
general_category::Nl(c) ||
109-
general_category::No(c);
106+
general_category::Nd(c)
107+
|| general_category::Nl(c)
108+
|| general_category::No(c)
110109
}
111110

112111
/**
@@ -125,7 +124,7 @@ pub fn is_digit(c: char) -> bool {
125124
pub fn is_digit_radix(c: char, radix: uint) -> bool {
126125
match to_digit(c, radix) {
127126
Some(_) => true,
128-
None => false
127+
None => false,
129128
}
130129
}
131130

@@ -151,7 +150,7 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
151150
'0' .. '9' => c as uint - ('0' as uint),
152151
'a' .. 'z' => c as uint + 10u - ('a' as uint),
153152
'A' .. 'Z' => c as uint + 10u - ('A' as uint),
154-
_ => return None
153+
_ => return None,
155154
};
156155
if val < radix { Some(val) }
157156
else { None }
@@ -181,6 +180,21 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
181180
}
182181
}
183182

183+
#[cfg(stage0)]
184+
pub fn escape_unicode(c: char) -> ~str {
185+
let s = u32::to_str_radix(c as u32, 16u);
186+
let (c, pad) = (if c <= '\xff' { ('x', 2u) }
187+
else if c <= '\uffff' { ('u', 4u) }
188+
else { ('U', 8u) });
189+
assert!(str::len(s) <= pad);
190+
let mut out = ~"\\";
191+
str::push_str(&mut out, str::from_char(c));
192+
for uint::range(str::len(s), pad) |_i|
193+
{ str::push_str(&mut out, ~"0"); }
194+
str::push_str(&mut out, s);
195+
out
196+
}
197+
184198
/**
185199
* Return the hexadecimal unicode escape of a char.
186200
*
@@ -190,17 +204,21 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
190204
* - chars in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`
191205
* - chars above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`
192206
*/
207+
#[cfg(not(stage0))]
193208
pub fn escape_unicode(c: char) -> ~str {
194209
let s = u32::to_str_radix(c as u32, 16u);
195-
let (c, pad) = (if c <= '\xff' { ('x', 2u) }
196-
else if c <= '\uffff' { ('u', 4u) }
197-
else { ('U', 8u) });
198-
assert!(str::len(s) <= pad);
210+
let (c, pad) = cond!(
211+
(c <= '\xff') { ('x', 2u) }
212+
(c <= '\uffff') { ('u', 4u) }
213+
_ { ('U', 8u) }
214+
);
215+
assert!(s.len() <= pad);
199216
let mut out = ~"\\";
200-
str::push_str(&mut out, str::from_char(c));
201-
for uint::range(str::len(s), pad) |_i|
202-
{ str::push_str(&mut out, ~"0"); }
203-
str::push_str(&mut out, s);
217+
out.push_str(str::from_char(c));
218+
for uint::range(s.len(), pad) |_| {
219+
out.push_str("0");
220+
}
221+
out.push_str(s);
204222
out
205223
}
206224

@@ -218,18 +236,18 @@ pub fn escape_unicode(c: char) -> ~str {
218236
*/
219237
pub fn escape_default(c: char) -> ~str {
220238
match c {
221-
'\t' => ~"\\t",
222-
'\r' => ~"\\r",
223-
'\n' => ~"\\n",
224-
'\\' => ~"\\\\",
225-
'\'' => ~"\\'",
226-
'"' => ~"\\\"",
227-
'\x20' .. '\x7e' => str::from_char(c),
228-
_ => escape_unicode(c)
239+
'\t' => ~"\\t",
240+
'\r' => ~"\\r",
241+
'\n' => ~"\\n",
242+
'\\' => ~"\\\\",
243+
'\'' => ~"\\'",
244+
'"' => ~"\\\"",
245+
'\x20' .. '\x7e' => str::from_char(c),
246+
_ => c.escape_unicode(),
229247
}
230248
}
231249

232-
/// Returns the amount of bytes this character would need if encoded in utf8
250+
#[cfg(stage0)]
233251
pub fn len_utf8_bytes(c: char) -> uint {
234252
static max_one_b: uint = 128u;
235253
static max_two_b: uint = 2048u;
@@ -244,6 +262,24 @@ pub fn len_utf8_bytes(c: char) -> uint {
244262
else { fail!("invalid character!") }
245263
}
246264

265+
/// Returns the amount of bytes this character would need if encoded in utf8
266+
#[cfg(not(stage0))]
267+
pub fn len_utf8_bytes(c: char) -> uint {
268+
static MAX_ONE_B: uint = 128u;
269+
static MAX_TWO_B: uint = 2048u;
270+
static MAX_THREE_B: uint = 65536u;
271+
static MAX_FOUR_B: uint = 2097152u;
272+
273+
let code = c as uint;
274+
cond!(
275+
(code < MAX_ONE_B) { 1u }
276+
(code < MAX_TWO_B) { 2u }
277+
(code < MAX_THREE_B) { 3u }
278+
(code < MAX_FOUR_B) { 4u }
279+
_ { fail!("invalid character!") }
280+
)
281+
}
282+
247283
pub trait Char {
248284
fn is_alphabetic(&self) -> bool;
249285
fn is_XID_start(&self) -> bool;

0 commit comments

Comments
 (0)