Skip to content

RFC: Added an FindIterator to str #9442

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/libstd/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use str::StrSlice;
use unicode::{derived_property, general_category, decompose};
use to_str::ToStr;
use str;
use vec::ImmutableVector;

#[cfg(test)] use str::OwnedStr;

Expand Down Expand Up @@ -79,6 +80,67 @@ pub fn from_u32(i: u32) -> Option<char> {
}
}

/// Something that can be used to compare against a character
pub trait CharEq {
/// Determine if `self` is equal to the given `char`
fn matches(&self, char) -> bool;
/// Indicate if this is only concerned about ASCII characters,
/// which can allow for a faster implementation.
fn only_ascii(&self) -> bool;
}

impl CharEq for char {
#[inline]
fn matches(&self, c: char) -> bool { *self == c }

#[inline]
fn only_ascii(&self) -> bool { (*self as uint) < 128 }
}

impl<'self> CharEq for &'self fn(char) -> bool {
#[inline]
fn matches(&self, c: char) -> bool { (*self)(c) }

#[inline]
fn only_ascii(&self) -> bool { false }
}

impl CharEq for extern "Rust" fn(char) -> bool {
#[inline]
fn matches(&self, c: char) -> bool { (*self)(c) }

#[inline]
fn only_ascii(&self) -> bool { false }
}

impl<'self, C: CharEq> CharEq for &'self [C] {
#[inline]
fn matches(&self, c: char) -> bool {
self.iter().any(|m| m.matches(c))
}

#[inline]
fn only_ascii(&self) -> bool {
self.iter().all(|m| m.only_ascii())
}
}

/// Trait for converting `self` into an `char`.
pub trait ToChar {
/// Converts self into an `char`.
fn to_char(&self) -> char;
}

impl ToChar for char {
#[inline]
fn to_char(&self) -> char { *self }
}

impl ToChar for u8 {
#[inline]
fn to_char(&self) -> char { *self as char }
}

/// Returns whether the specified character is considered a unicode alphabetic
/// character
pub fn is_alphabetic(c: char) -> bool { derived_property::Alphabetic(c) }
Expand Down
2 changes: 1 addition & 1 deletion src/libstd/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub use from_str::from_str;
pub use c_str::ToCStr;
pub use clone::{Clone, DeepClone};
pub use cmp::{Eq, ApproxEq, Ord, TotalEq, TotalOrd, Ordering, Less, Equal, Greater, Equiv};
pub use char::Char;
pub use char::{Char, ToChar};
pub use container::{Container, Mutable, Map, MutableMap, Set, MutableSet};
pub use hash::Hash;
pub use num::Times;
Expand Down
164 changes: 122 additions & 42 deletions src/libstd/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ use at_vec;
use cast;
use cast::transmute;
use char;
use char::Char;
use char::{Char, ToChar, CharEq};
use clone::{Clone, DeepClone};
use container::{Container, Mutable};
use iter;
use iter::{Iterator, FromIterator, Extendable, range};
use iter::{Filter, AdditiveIterator, Map};
use iter::{Invert, DoubleEndedIterator, ExactSize};
Expand Down Expand Up @@ -295,47 +296,6 @@ impl<'self, S: Str> StrVector for &'self [S] {
}
}

/// Something that can be used to compare against a character
pub trait CharEq {
/// Determine if the splitter should split at the given character
fn matches(&self, char) -> bool;
/// Indicate if this is only concerned about ASCII characters,
/// which can allow for a faster implementation.
fn only_ascii(&self) -> bool;
}

impl CharEq for char {
#[inline]
fn matches(&self, c: char) -> bool { *self == c }

fn only_ascii(&self) -> bool { (*self as uint) < 128 }
}

impl<'self> CharEq for &'self fn(char) -> bool {
#[inline]
fn matches(&self, c: char) -> bool { (*self)(c) }

fn only_ascii(&self) -> bool { false }
}

impl CharEq for extern "Rust" fn(char) -> bool {
#[inline]
fn matches(&self, c: char) -> bool { (*self)(c) }

fn only_ascii(&self) -> bool { false }
}

impl<'self, C: CharEq> CharEq for &'self [C] {
#[inline]
fn matches(&self, c: char) -> bool {
self.iter().any(|m| m.matches(c))
}

fn only_ascii(&self) -> bool {
self.iter().all(|m| m.only_ascii())
}
}

/*
Section: Iterators
*/
Expand Down Expand Up @@ -738,6 +698,66 @@ impl<'self> Iterator<char> for NormalizationIterator<'self> {
}
}

/// An Iterator over the byte indices of all matches of `C`
/// in a string, in reverse.
pub type RevFindIterator<'self, C> = iter::Invert<FindIterator<'self, C>>;

/// An Iterator over the byte indices of all matches of `C`
/// in a string.
pub struct FindIterator<'self, C> {
priv pred: C,
priv iter: FindIterEither<'self>,
}

type ByteOffsetIterator<'self> = iter::Enumerate<ByteIterator<'self>>;

enum FindIterEither<'self> {
FindIterC(CharOffsetIterator<'self>),
FindIterB(ByteOffsetIterator<'self>)
}

impl<'self, C: CharEq> Iterator<uint> for FindIterator<'self, C> {
fn next(&mut self) -> Option<uint> {
#[inline]
fn find<T: ToChar, C: CharEq, I: Iterator<(uint, T)>>
(pred: &C, iter: &mut I) -> Option<uint> {
loop {
match iter.next() {
None => return None,
Some((i, ref t)) if pred.matches(t.to_char()) => return Some(i),
_ => loop,
}
}
}

match self.iter {
FindIterC(ref mut iter) => find(&self.pred, iter),
FindIterB(ref mut iter) => find(&self.pred, iter),
}
}
}

impl<'self, C: CharEq> DoubleEndedIterator<uint> for FindIterator<'self, C> {
fn next_back(&mut self) -> Option<uint> {
#[inline]
fn find_back<T: ToChar, C: CharEq, I: DoubleEndedIterator<(uint, T)>>
(pred: &C, iter: &mut I) -> Option<uint> {
loop {
match iter.next_back() {
None => return None,
Some((i, ref t)) if pred.matches(t.to_char()) => return Some(i),
_ => loop,
}
}
}

match self.iter {
FindIterC(ref mut iter) => find_back(&self.pred, iter),
FindIterB(ref mut iter) => find_back(&self.pred, iter),
}
}
}

/// Replace all occurrences of one string with another
///
/// # Arguments
Expand Down Expand Up @@ -1548,6 +1568,12 @@ pub trait StrSlice<'self> {
/// An Iterator over the string in Unicode Normalization Form KD (compatibility decomposition)
fn nfkd_iter(&self) -> NormalizationIterator<'self>;

/// An Iterator over all indices for which `pred` matches.
fn find_iter<C: CharEq>(&self, pred: C) -> FindIterator<'self, C>;

/// An Iterator over all indices for which `pred` matches, in reverse.
fn find_iter_rev<C: CharEq>(&self, pred: C) -> RevFindIterator<'self, C>;

/// Returns true if the string contains only whitespace
///
/// Whitespace characters are determined by `char::is_whitespace`
Expand Down Expand Up @@ -1957,6 +1983,22 @@ impl<'self> StrSlice<'self> for &'self str {
}
}

fn find_iter<C: CharEq>(&self, pred: C) -> FindIterator<'self, C> {
if pred.only_ascii() {
FindIterator { pred: pred, iter: FindIterB(self.byte_iter().enumerate()) }
} else {
FindIterator { pred: pred, iter: FindIterC(self.char_offset_iter()) }
}
}

fn find_iter_rev<C: CharEq>(&self, pred: C) -> RevFindIterator<'self, C>{
if pred.only_ascii() {
FindIterator { pred: pred, iter: FindIterB(self.byte_iter().enumerate()) }.invert()
} else {
FindIterator { pred: pred, iter: FindIterC(self.char_offset_iter()) }.invert()
}
}

#[inline]
fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }

Expand Down Expand Up @@ -2587,6 +2629,7 @@ mod tests {
use vec::{Vector, ImmutableVector, CopyableVector};
use cmp::{TotalOrd, Less, Equal, Greater};
use send_str::{SendStrOwned, SendStrStatic};
use char::CharEq;

#[test]
fn test_eq() {
Expand Down Expand Up @@ -2650,6 +2693,42 @@ mod tests {
assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
}

#[test]
fn test_find_iter() {
assert_eq!("hello".find_iter('l').next(), Some(2u));
assert_eq!("hello".find_iter(|c:char| c == 'o').next(), Some(4u));
assert!("hello".find_iter('x').next().is_none());
assert!("hello".find_iter(|c:char| c == 'x').next().is_none());
assert_eq!("ประเทศไทย中华Việt Nam".find_iter('华').next(), Some(30u));
assert_eq!("ประเทศไทย中华Việt Nam".find_iter(|c: char| c == '华').next(), Some(30u));

assert_eq!("hello".find_iter('l').to_owned_vec(), ~[2, 3]);
assert_eq!("hello".find_iter(|c:char| c == 'o').to_owned_vec(), ~[4]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter('华').to_owned_vec(), ~[30]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter(|c: char| c == '华').to_owned_vec(), ~[30]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter('ท').to_owned_vec(), ~[12,21]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter(|c: char| c == 'ท').to_owned_vec(), ~[12,21]);
}

#[test]
fn test_find_iter_rev() {
assert_eq!("hello".find_iter_rev('l').next(), Some(3u));
assert_eq!("hello".find_iter_rev(|c:char| c == 'o').next(), Some(4u));
assert!("hello".find_iter_rev('x').next().is_none());
assert!("hello".find_iter_rev(|c:char| c == 'x').next().is_none());
assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev('华').next(), Some(30u));
assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev(|c: char| c == '华').next(), Some(30u));

assert_eq!("hello".find_iter_rev('l').to_owned_vec(), ~[3, 2]);
assert_eq!("hello".find_iter_rev(|c:char| c == 'o').to_owned_vec(), ~[4]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev('华').to_owned_vec(), ~[30]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev(|c: char| c == '华').to_owned_vec(), ~[30]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev('ท').to_owned_vec(),
~[21,12]);
assert_eq!("ประเทศไทย中华Việt Nam".find_iter_rev(|c: char| c == 'ท').to_owned_vec(),
~[21,12]);
}

#[test]
fn test_push_str() {
let mut s = ~"";
Expand Down Expand Up @@ -3896,6 +3975,7 @@ mod bench {
use extra::test::BenchHarness;
use super::*;
use prelude::*;
use char::CharEq;

#[bench]
fn char_iterator(bh: &mut BenchHarness) {
Expand Down
20 changes: 14 additions & 6 deletions src/libstd/str/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use str::StrSlice;
use str::OwnedStr;
use container::Container;
use cast;
use char::{ToChar, CharEq};
use iter::Iterator;
use vec::{CopyableVector, ImmutableVector, MutableVector};
use to_bytes::IterBytes;
Expand All @@ -32,12 +33,6 @@ impl Ascii {
self.chr
}

/// Converts a ascii character into a `char`.
#[inline]
pub fn to_char(self) -> char {
self.chr as char
}

/// Convert to lowercase.
#[inline]
pub fn to_lower(self) -> Ascii {
Expand Down Expand Up @@ -65,6 +60,19 @@ impl ToStr for Ascii {
}
}

impl ToChar for Ascii {
#[inline]
fn to_char(&self) -> char { self.chr as char }
}

impl CharEq for Ascii {
#[inline]
fn matches(&self, c: char) -> bool { self.chr as char == c }

#[inline]
fn only_ascii(&self) -> bool { true }
}

/// Trait for converting into an ascii type.
pub trait AsciiCast<T> {
/// Convert to an ascii type
Expand Down