diff --git a/doc/rust.md b/doc/rust.md index b1eb5521b398f..0173f61e7307d 100644 --- a/doc/rust.md +++ b/doc/rust.md @@ -802,7 +802,7 @@ An example of `use` declarations: ~~~~ use core::float::sin; -use core::str::{slice, to_upper}; +use core::str::{slice, contains}; use core::option::Some; fn main() { @@ -813,8 +813,8 @@ fn main() { info!(Some(1.0)); // Equivalent to - // 'info!(core::str::to_upper(core::str::slice("foo", 0, 1)));' - info!(to_upper(slice("foo", 0, 1))); + // 'info!(core::str::contains(core::str::slice("foo", 0, 1), "oo"));' + info!(contains(slice("foo", 0, 1), "oo")); } ~~~~ diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index 5a5c091d9570a..3aa88523a873a 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -50,7 +50,11 @@ fn parse_expected(line_num: uint, line: ~str) -> ~[ExpectedError] { while idx < len && line[idx] == (' ' as u8) { idx += 1u; } let start_kind = idx; while idx < len && line[idx] != (' ' as u8) { idx += 1u; } - let kind = str::to_lower(str::slice(line, start_kind, idx).to_owned()); + + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + let kind = str::slice(line, start_kind, idx); + let kind = kind.to_ascii().to_lower().to_str_ascii(); // Extract msg: while idx < len && line[idx] == (' ' as u8) { idx += 1u; } diff --git a/src/libcore/char.rs b/src/libcore/char.rs index c07a31490c346..8af61dcb86197 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -100,12 +100,6 @@ pub fn is_alphanumeric(c: char) -> bool { unicode::general_category::No(c); } -/// Indicates whether the character is an ASCII character -#[inline(always)] -pub fn is_ascii(c: char) -> bool { - c - ('\x7F' & c) == '\x00' -} - /// Indicates whether the character is numeric (Nd, Nl, or No) #[inline(always)] pub fn is_digit(c: char) -> bool { @@ -116,7 +110,7 @@ pub fn is_digit(c: char) -> bool { /** * Checks if a character parses as a numeric digit in the given radix. - * Compared to `is_digit()`, this function only recognizes the ascii + * Compared to `is_digit()`, this function only recognizes the * characters `0-9`, `a-z` and `A-Z`. * * Returns `true` if `c` is a valid digit under `radix`, and `false` @@ -163,7 +157,7 @@ pub fn to_digit(c: char, radix: uint) -> Option { } /** - * Converts a number to the ascii character representing it. + * Converts a number to the character representing it. * * Returns `Some(char)` if `num` represents one digit under `radix`, * using one character of `0-9` or `a-z`, or `None` if it doesn't. @@ -316,12 +310,6 @@ fn test_to_digit() { assert!(to_digit('$', 36u).is_none()); } -#[test] -fn test_is_ascii() { - assert!(str::all(~"banana", is_ascii)); - assert!(! str::all(~"ประเทศไทย中华Việt Nam", is_ascii)); -} - #[test] fn test_is_digit() { assert!(is_digit('2')); diff --git a/src/libcore/num/uint-template/u8.rs b/src/libcore/num/uint-template/u8.rs index ce23bebacdad1..5c548d7209330 100644 --- a/src/libcore/num/uint-template/u8.rs +++ b/src/libcore/num/uint-template/u8.rs @@ -10,16 +10,9 @@ //! Operations and constants for `u8` -pub use self::inst::is_ascii; - mod inst { pub type T = u8; #[allow(non_camel_case_types)] pub type T_SIGNED = i8; pub static bits: uint = 8; - - // Type-specific functions here. These must be reexported by the - // parent module so that they appear in core::u8 and not core::u8::u8; - - pub fn is_ascii(x: T) -> bool { return 0 as T == x & 128 as T; } } diff --git a/src/libcore/path.rs b/src/libcore/path.rs index 8328d42c35e9a..edc61299af96d 100644 --- a/src/libcore/path.rs +++ b/src/libcore/path.rs @@ -19,6 +19,7 @@ use libc; use option::{None, Option, Some}; use str; use to_str::ToStr; +use ascii::{AsciiCast, AsciiStr}; #[deriving(Clone, Eq)] pub struct WindowsPath { @@ -753,7 +754,9 @@ impl GenericPath for WindowsPath { fn is_restricted(&self) -> bool { match self.filestem() { Some(stem) => { - match stem.to_lower() { + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + match stem.to_ascii().to_lower().to_str_ascii() { ~"con" | ~"aux" | ~"com1" | ~"com2" | ~"com3" | ~"com4" | ~"lpt1" | ~"lpt2" | ~"lpt3" | ~"prn" | ~"nul" => true, _ => false @@ -809,7 +812,10 @@ impl GenericPath for WindowsPath { host: copy self.host, device: match self.device { None => None, - Some(ref device) => Some(device.to_upper()) + + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + Some(ref device) => Some(device.to_ascii().to_upper().to_str_ascii()) }, is_absolute: self.is_absolute, components: normalize(self.components) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 9590f148e3046..92c965256ceaf 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -27,7 +27,6 @@ use option::{None, Option, Some}; use iterator::Iterator; use ptr; use str; -use u8; use uint; use vec; use to_str::ToStr; @@ -787,22 +786,6 @@ pub fn each_split_within<'a>(ss: &'a str, } } -/// Convert a string to lowercase. ASCII only -pub fn to_lower(s: &str) -> ~str { - do map(s) |c| { - assert!(char::is_ascii(c)); - (unsafe{libc::tolower(c as libc::c_char)}) as char - } -} - -/// Convert a string to uppercase. ASCII only -pub fn to_upper(s: &str) -> ~str { - do map(s) |c| { - assert!(char::is_ascii(c)); - (unsafe{libc::toupper(c as libc::c_char)}) as char - } -} - /** * Replace all occurrences of one string with another * @@ -1610,13 +1593,6 @@ pub fn ends_with<'a,'b>(haystack: &'a str, needle: &'b str) -> bool { Section: String properties */ -/// Determines if a string contains only ASCII characters -pub fn is_ascii(s: &str) -> bool { - let mut i: uint = len(s); - while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { return false; } } - return true; -} - /// Returns true if the string has length 0 pub fn is_empty(s: &str) -> bool { len(s) == 0u } @@ -2403,8 +2379,6 @@ pub trait StrSlice<'self> { fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool); fn starts_with<'a>(&self, needle: &'a str) -> bool; fn substr(&self, begin: uint, n: uint) -> &'self str; - fn to_lower(&self) -> ~str; - fn to_upper(&self) -> ~str; fn escape_default(&self) -> ~str; fn escape_unicode(&self) -> ~str; fn trim(&self) -> &'self str; @@ -2565,12 +2539,6 @@ impl<'self> StrSlice<'self> for &'self str { fn substr(&self, begin: uint, n: uint) -> &'self str { substr(*self, begin, n) } - /// Convert a string to lowercase - #[inline] - fn to_lower(&self) -> ~str { to_lower(*self) } - /// Convert a string to uppercase - #[inline] - fn to_upper(&self) -> ~str { to_upper(*self) } /// Escape each char in `s` with char::escape_default. #[inline] fn escape_default(&self) -> ~str { escape_default(*self) } @@ -3084,27 +3052,6 @@ mod tests { assert!(repeat(~"hi", 0) == ~""); } - #[test] - fn test_to_upper() { - // libc::toupper, and hence str::to_upper - // are culturally insensitive: they only work for ASCII - // (see Issue #1347) - let unicode = ~""; //"\u65e5\u672c"; // uncomment once non-ASCII works - let input = ~"abcDEF" + unicode + ~"xyz:.;"; - let expected = ~"ABCDEF" + unicode + ~"XYZ:.;"; - let actual = to_upper(input); - assert!(expected == actual); - } - - #[test] - fn test_to_lower() { - // libc::tolower, and hence str::to_lower - // are culturally insensitive: they only work for ASCII - // (see Issue #1347) - assert!(~"" == to_lower("")); - assert!(~"ymca" == to_lower("YMCA")); - } - #[test] fn test_unsafe_slice() { assert!("ab" == unsafe {raw::slice_bytes("abc", 0, 2)}); @@ -3337,13 +3284,6 @@ mod tests { assert!((!is_whitespace(~" _ "))); } - #[test] - fn test_is_ascii() { - assert!((is_ascii(~""))); - assert!((is_ascii(~"a"))); - assert!((!is_ascii(~"\u2009"))); - } - #[test] fn test_shift_byte() { let mut s = ~"ABC"; diff --git a/src/libcore/str/ascii.rs b/src/libcore/str/ascii.rs index f6c0176eafc63..9180c995ca28c 100644 --- a/src/libcore/str/ascii.rs +++ b/src/libcore/str/ascii.rs @@ -199,6 +199,7 @@ impl ToStrConsume for ~[Ascii] { #[cfg(test)] mod tests { use super::*; + use str; macro_rules! v2ascii ( ( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]); @@ -221,6 +222,9 @@ mod tests { assert_eq!('['.to_ascii().to_lower().to_char(), '['); assert_eq!('`'.to_ascii().to_upper().to_char(), '`'); assert_eq!('{'.to_ascii().to_upper().to_char(), '{'); + + assert!(str::all(~"banana", |c| c.is_ascii())); + assert!(! str::all(~"ประเทศไทย中华Việt Nam", |c| c.is_ascii())); } #[test] @@ -234,6 +238,15 @@ mod tests { assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#"); assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#"); + + assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~""); + assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca"); + assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;"); + + assert!("".is_ascii()); + assert!("a".is_ascii()); + assert!(!"\u2009".is_ascii()); + } #[test] diff --git a/src/libcore/unstable/extfmt.rs b/src/libcore/unstable/extfmt.rs index ee33e2ed20bba..b812be5575a4a 100644 --- a/src/libcore/unstable/extfmt.rs +++ b/src/libcore/unstable/extfmt.rs @@ -520,7 +520,13 @@ pub mod rt { match cv.ty { TyDefault => uint_to_str_prec(u, 10, prec), TyHexLower => uint_to_str_prec(u, 16, prec), - TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)), + + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + TyHexUpper => { + let s = uint_to_str_prec(u, 16, prec); + s.to_ascii().to_upper().to_str_ascii() + } TyBits => uint_to_str_prec(u, 2, prec), TyOctal => uint_to_str_prec(u, 8, prec) }; diff --git a/src/librustc/driver/driver.rs b/src/librustc/driver/driver.rs index 7110382bb55ed..f33bb878ee4ba 100644 --- a/src/librustc/driver/driver.rs +++ b/src/librustc/driver/driver.rs @@ -546,7 +546,11 @@ pub fn build_session_options(binary: @~str, let lint_dict = lint::get_lint_dict(); for lint_levels.each |level| { let level_name = lint::level_to_str(*level); - let level_short = level_name.substr(0,1).to_upper(); + + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + let level_short = level_name.substr(0,1); + let level_short = level_short.to_ascii().to_upper().to_str_ascii(); let flags = vec::append(getopts::opt_strs(matches, level_short), getopts::opt_strs(matches, level_name)); for flags.each |lint_name| { diff --git a/src/librustdoc/markdown_index_pass.rs b/src/librustdoc/markdown_index_pass.rs index 866fbba2be87a..631c86b74f1e9 100644 --- a/src/librustdoc/markdown_index_pass.rs +++ b/src/librustdoc/markdown_index_pass.rs @@ -157,7 +157,9 @@ pub fn pandoc_header_id(header: &str) -> ~str { let s = str::replace(s, ~" ", ~"-"); return s; } - fn convert_to_lowercase(s: &str) -> ~str { str::to_lower(s) } + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + fn convert_to_lowercase(s: &str) -> ~str { s.to_ascii().to_lower().to_str_ascii() } fn remove_up_to_first_letter(s: &str) -> ~str { s.to_str() } fn maybe_use_section_id(s: &str) -> ~str { s.to_str() } } diff --git a/src/libstd/semver.rs b/src/libstd/semver.rs index 83fab365d8d62..f6d1b1ba2ecd4 100644 --- a/src/libstd/semver.rs +++ b/src/libstd/semver.rs @@ -220,7 +220,7 @@ fn parse_reader(rdr: @io::Reader) -> Version { pub fn parse(s: &str) -> Option { - if ! str::is_ascii(s) { + if !s.is_ascii() { return None; } let s = s.trim(); diff --git a/src/libstd/sort.rs b/src/libstd/sort.rs index db6efdf3f52aa..cc002bc8305a2 100644 --- a/src/libstd/sort.rs +++ b/src/libstd/sort.rs @@ -885,8 +885,12 @@ mod tests { // tjc: funny that we have to use parens fn ile(x: &(&'static str), y: &(&'static str)) -> bool { - let x = x.to_lower(); - let y = y.to_lower(); + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + // (Actually, could just remove the to_str_* call, but needs an deriving(Ord) on + // Ascii) + let x = x.to_ascii().to_lower().to_str_ascii(); + let y = y.to_ascii().to_lower().to_str_ascii(); x <= y } diff --git a/src/test/bench/shootout-k-nucleotide-pipes.rs b/src/test/bench/shootout-k-nucleotide-pipes.rs index c8b13a6e27fa3..4c8c984cd07c9 100644 --- a/src/test/bench/shootout-k-nucleotide-pipes.rs +++ b/src/test/bench/shootout-k-nucleotide-pipes.rs @@ -59,7 +59,10 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str { for pairs_sorted.each |kv| { let (k,v) = copy *kv; unsafe { - buffer += (fmt!("%s %0.3f\n", str::to_upper(str::raw::from_bytes(k)), v)); + let b = str::raw::from_bytes(k); + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + buffer += (fmt!("%s %0.3f\n", b.to_ascii().to_upper().to_str_ascii(), v)); } } @@ -68,7 +71,9 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str { // given a map, search for the frequency of a pattern fn find(mm: &HashMap<~[u8], uint>, key: ~str) -> uint { - match mm.find(&str::to_bytes(str::to_lower(key))) { + // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use + // to_ascii_consume and to_str_consume to not do a unnecessary copy. + match mm.find(&str::to_bytes(key.to_ascii().to_lower().to_str_ascii())) { option::None => { return 0u; } option::Some(&num) => { return num; } }