diff --git a/src/libstd/char.rs b/src/libstd/char.rs index abb1ac5ace85b..59c2844f32379 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -17,6 +17,7 @@ use str::StrSlice; use unicode::{derived_property, general_category, decompose}; use to_str::ToStr; use str; +use vec::ImmutableVector; #[cfg(test)] use str::OwnedStr; @@ -79,6 +80,67 @@ pub fn from_u32(i: u32) -> Option { } } +/// Something that can be used to compare against a character +pub trait CharEq { + /// Determine if `self` is equal to the given `char` + fn matches(&self, char) -> bool; + /// Indicate if this is only concerned about ASCII characters, + /// which can allow for a faster implementation. + fn only_ascii(&self) -> bool; +} + +impl CharEq for char { + #[inline] + fn matches(&self, c: char) -> bool { *self == c } + + #[inline] + fn only_ascii(&self) -> bool { (*self as uint) < 128 } +} + +impl<'self> CharEq for &'self fn(char) -> bool { + #[inline] + fn matches(&self, c: char) -> bool { (*self)(c) } + + #[inline] + fn only_ascii(&self) -> bool { false } +} + +impl CharEq for extern "Rust" fn(char) -> bool { + #[inline] + fn matches(&self, c: char) -> bool { (*self)(c) } + + #[inline] + fn only_ascii(&self) -> bool { false } +} + +impl<'self, C: CharEq> CharEq for &'self [C] { + #[inline] + fn matches(&self, c: char) -> bool { + self.iter().any(|m| m.matches(c)) + } + + #[inline] + fn only_ascii(&self) -> bool { + self.iter().all(|m| m.only_ascii()) + } +} + +/// Trait for converting `self` into an `char`. +pub trait ToChar { + /// Converts self into an `char`. + fn to_char(&self) -> char; +} + +impl ToChar for char { + #[inline] + fn to_char(&self) -> char { *self } +} + +impl ToChar for u8 { + #[inline] + fn to_char(&self) -> char { *self as char } +} + /// Returns whether the specified character is considered a unicode alphabetic /// character pub fn is_alphabetic(c: char) -> bool { derived_property::Alphabetic(c) } diff --git a/src/libstd/prelude.rs b/src/libstd/prelude.rs index 96ade70f007b5..e988e1f3ade8a 100644 --- a/src/libstd/prelude.rs +++ b/src/libstd/prelude.rs @@ -47,7 +47,7 @@ pub use from_str::from_str; pub use c_str::ToCStr; pub use clone::{Clone, DeepClone}; pub use cmp::{Eq, ApproxEq, Ord, TotalEq, TotalOrd, Ordering, Less, Equal, Greater, Equiv}; -pub use char::Char; +pub use char::{Char, ToChar}; pub use container::{Container, Mutable, Map, MutableMap, Set, MutableSet}; pub use hash::Hash; pub use num::Times; diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 931bddbdf9306..d3b8f2ce2e4c1 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -93,9 +93,10 @@ use at_vec; use cast; use cast::transmute; use char; -use char::Char; +use char::{Char, ToChar, CharEq}; use clone::{Clone, DeepClone}; use container::{Container, Mutable}; +use iter; use iter::{Iterator, FromIterator, Extendable, range}; use iter::{Filter, AdditiveIterator, Map}; use iter::{Invert, DoubleEndedIterator, ExactSize}; @@ -295,47 +296,6 @@ impl<'self, S: Str> StrVector for &'self [S] { } } -/// Something that can be used to compare against a character -pub trait CharEq { - /// Determine if the splitter should split at the given character - fn matches(&self, char) -> bool; - /// Indicate if this is only concerned about ASCII characters, - /// which can allow for a faster implementation. - fn only_ascii(&self) -> bool; -} - -impl CharEq for char { - #[inline] - fn matches(&self, c: char) -> bool { *self == c } - - fn only_ascii(&self) -> bool { (*self as uint) < 128 } -} - -impl<'self> CharEq for &'self fn(char) -> bool { - #[inline] - fn matches(&self, c: char) -> bool { (*self)(c) } - - fn only_ascii(&self) -> bool { false } -} - -impl CharEq for extern "Rust" fn(char) -> bool { - #[inline] - fn matches(&self, c: char) -> bool { (*self)(c) } - - fn only_ascii(&self) -> bool { false } -} - -impl<'self, C: CharEq> CharEq for &'self [C] { - #[inline] - fn matches(&self, c: char) -> bool { - self.iter().any(|m| m.matches(c)) - } - - fn only_ascii(&self) -> bool { - self.iter().all(|m| m.only_ascii()) - } -} - /* Section: Iterators */ @@ -738,6 +698,66 @@ impl<'self> Iterator for NormalizationIterator<'self> { } } +/// An Iterator over the byte indices of all matches of `C` +/// in a string, in reverse. +pub type RevFindIterator<'self, C> = iter::Invert>; + +/// An Iterator over the byte indices of all matches of `C` +/// in a string. +pub struct FindIterator<'self, C> { + priv pred: C, + priv iter: FindIterEither<'self>, +} + +type ByteOffsetIterator<'self> = iter::Enumerate>; + +enum FindIterEither<'self> { + FindIterC(CharOffsetIterator<'self>), + FindIterB(ByteOffsetIterator<'self>) +} + +impl<'self, C: CharEq> Iterator for FindIterator<'self, C> { + fn next(&mut self) -> Option { + #[inline] + fn find> + (pred: &C, iter: &mut I) -> Option { + loop { + match iter.next() { + None => return None, + Some((i, ref t)) if pred.matches(t.to_char()) => return Some(i), + _ => loop, + } + } + } + + match self.iter { + FindIterC(ref mut iter) => find(&self.pred, iter), + FindIterB(ref mut iter) => find(&self.pred, iter), + } + } +} + +impl<'self, C: CharEq> DoubleEndedIterator for FindIterator<'self, C> { + fn next_back(&mut self) -> Option { + #[inline] + fn find_back> + (pred: &C, iter: &mut I) -> Option { + loop { + match iter.next_back() { + None => return None, + Some((i, ref t)) if pred.matches(t.to_char()) => return Some(i), + _ => loop, + } + } + } + + match self.iter { + FindIterC(ref mut iter) => find_back(&self.pred, iter), + FindIterB(ref mut iter) => find_back(&self.pred, iter), + } + } +} + /// Replace all occurrences of one string with another /// /// # Arguments @@ -1548,6 +1568,12 @@ pub trait StrSlice<'self> { /// An Iterator over the string in Unicode Normalization Form KD (compatibility decomposition) fn nfkd_iter(&self) -> NormalizationIterator<'self>; + /// An Iterator over all indices for which `pred` matches. + fn find_iter(&self, pred: C) -> FindIterator<'self, C>; + + /// An Iterator over all indices for which `pred` matches, in reverse. + fn find_iter_rev(&self, pred: C) -> RevFindIterator<'self, C>; + /// Returns true if the string contains only whitespace /// /// Whitespace characters are determined by `char::is_whitespace` @@ -1957,6 +1983,22 @@ impl<'self> StrSlice<'self> for &'self str { } } + fn find_iter(&self, pred: C) -> FindIterator<'self, C> { + if pred.only_ascii() { + FindIterator { pred: pred, iter: FindIterB(self.byte_iter().enumerate()) } + } else { + FindIterator { pred: pred, iter: FindIterC(self.char_offset_iter()) } + } + } + + fn find_iter_rev(&self, pred: C) -> RevFindIterator<'self, C>{ + if pred.only_ascii() { + FindIterator { pred: pred, iter: FindIterB(self.byte_iter().enumerate()) }.invert() + } else { + FindIterator { pred: pred, iter: FindIterC(self.char_offset_iter()) }.invert() + } + } + #[inline] fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) } @@ -2587,6 +2629,7 @@ mod tests { use vec::{Vector, ImmutableVector, CopyableVector}; use cmp::{TotalOrd, Less, Equal, Greater}; use send_str::{SendStrOwned, SendStrStatic}; + use char::CharEq; #[test] fn test_eq() { @@ -2650,6 +2693,42 @@ mod tests { assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u)); } + #[test] + fn test_find_iter() { + assert_eq!("hello".find_iter('l').next(), Some(2u)); + assert_eq!("hello".find_iter(|c:char| c == 'o').next(), Some(4u)); + assert!("hello".find_iter('x').next().is_none()); + assert!("hello".find_iter(|c:char| c == 'x').next().is_none()); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter('华').next(), Some(30u)); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter(|c: char| c == '华').next(), Some(30u)); + + assert_eq!("hello".find_iter('l').to_owned_vec(), ~[2, 3]); + assert_eq!("hello".find_iter(|c:char| c == 'o').to_owned_vec(), ~[4]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter('华').to_owned_vec(), ~[30]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter(|c: char| c == '华').to_owned_vec(), ~[30]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter('ท').to_owned_vec(), ~[12,21]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter(|c: char| c == 'ท').to_owned_vec(), ~[12,21]); + } + + #[test] + fn test_find_iter_rev() { + assert_eq!("hello".find_iter_rev('l').next(), Some(3u)); + assert_eq!("hello".find_iter_rev(|c:char| c == 'o').next(), Some(4u)); + assert!("hello".find_iter_rev('x').next().is_none()); + assert!("hello".find_iter_rev(|c:char| c == 'x').next().is_none()); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev('华').next(), Some(30u)); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev(|c: char| c == '华').next(), Some(30u)); + + assert_eq!("hello".find_iter_rev('l').to_owned_vec(), ~[3, 2]); + assert_eq!("hello".find_iter_rev(|c:char| c == 'o').to_owned_vec(), ~[4]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev('华').to_owned_vec(), ~[30]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev(|c: char| c == '华').to_owned_vec(), ~[30]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev('ท').to_owned_vec(), + ~[21,12]); + assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev(|c: char| c == 'ท').to_owned_vec(), + ~[21,12]); + } + #[test] fn test_push_str() { let mut s = ~""; @@ -3896,6 +3975,7 @@ mod bench { use extra::test::BenchHarness; use super::*; use prelude::*; + use char::CharEq; #[bench] fn char_iterator(bh: &mut BenchHarness) { diff --git a/src/libstd/str/ascii.rs b/src/libstd/str/ascii.rs index ec2d756617756..d105fa05f5ae7 100644 --- a/src/libstd/str/ascii.rs +++ b/src/libstd/str/ascii.rs @@ -16,6 +16,7 @@ use str::StrSlice; use str::OwnedStr; use container::Container; use cast; +use char::{ToChar, CharEq}; use iter::Iterator; use vec::{CopyableVector, ImmutableVector, MutableVector}; use to_bytes::IterBytes; @@ -32,12 +33,6 @@ impl Ascii { self.chr } - /// Converts a ascii character into a `char`. - #[inline] - pub fn to_char(self) -> char { - self.chr as char - } - /// Convert to lowercase. #[inline] pub fn to_lower(self) -> Ascii { @@ -65,6 +60,19 @@ impl ToStr for Ascii { } } +impl ToChar for Ascii { + #[inline] + fn to_char(&self) -> char { self.chr as char } +} + +impl CharEq for Ascii { + #[inline] + fn matches(&self, c: char) -> bool { self.chr as char == c } + + #[inline] + fn only_ascii(&self) -> bool { true } +} + /// Trait for converting into an ascii type. pub trait AsciiCast { /// Convert to an ascii type