Skip to content

implement rfc 1054: split_whitespace() fn, deprecate words() #24563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/libcollections/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#![feature(unsafe_no_drop_flag, filling_drop)]
#![feature(step_by)]
#![feature(str_char)]
#![feature(str_words)]
#![feature(slice_patterns)]
#![feature(debug_builders)]
#![feature(utf8_error)]
Expand Down
29 changes: 23 additions & 6 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub use core::str::{Matches, RMatches};
pub use core::str::{MatchIndices, RMatchIndices};
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
pub use core::str::{from_utf8_unchecked, ParseBoolError};
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices};
pub use core::str::pattern;

/*
Expand Down Expand Up @@ -1737,27 +1737,44 @@ impl str {
UnicodeStr::grapheme_indices(&self[..], is_extended)
}

/// An iterator over the non-empty words of `self`.
///
/// A 'word' is a subsequence separated by any sequence of whitespace.
/// Sequences of whitespace
/// are collapsed, so empty "words" are not included.
/// An iterator over the non-empty substrings of `self` which contain no whitespace,
/// and which are separated by any amount of whitespace.
///
/// # Examples
///
/// ```
/// # #![feature(str_words)]
/// # #![allow(deprecated)]
/// let some_words = " Mary had\ta little \n\t lamb";
/// let v: Vec<&str> = some_words.words().collect();
///
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
/// ```
#[deprecated(reason = "words() will be removed. Use split_whitespace() instead",
since = "1.1.0")]
#[unstable(feature = "str_words",
reason = "the precise algorithm to use is unclear")]
#[allow(deprecated)]
pub fn words(&self) -> Words {
UnicodeStr::words(&self[..])
}

/// An iterator over the non-empty substrings of `self` which contain no whitespace,
/// and which are separated by any amount of whitespace.
///
/// # Examples
///
/// ```
/// let some_words = " Mary had\ta little \n\t lamb";
/// let v: Vec<&str> = some_words.split_whitespace().collect();
///
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
/// ```
#[stable(feature = "split_whitespace", since = "1.1.0")]
pub fn split_whitespace(&self) -> SplitWhitespace {
UnicodeStr::split_whitespace(&self[..])
}

/// Returns a string's displayed width in columns.
///
/// Control characters have zero width.
Expand Down
1 change: 0 additions & 1 deletion src/libcollectionstest/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#![feature(hash)]
#![feature(rand)]
#![feature(rustc_private)]
#![feature(str_words)]
#![feature(test)]
#![feature(unboxed_closures)]
#![feature(unicode)]
Expand Down
4 changes: 2 additions & 2 deletions src/libcollectionstest/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -939,9 +939,9 @@ fn test_rsplitn() {
}

#[test]
fn test_words() {
fn test_split_whitespace() {
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
let words: Vec<&str> = data.words().collect();
let words: Vec<&str> = data.split_whitespace().collect();
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}

Expand Down
3 changes: 1 addition & 2 deletions src/libgetopts/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@

#![deny(missing_docs)]
#![feature(staged_api)]
#![feature(str_words)]
#![feature(str_char)]
#![cfg_attr(test, feature(rustc_private))]

Expand Down Expand Up @@ -771,7 +770,7 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> String {

// Normalize desc to contain words separated by one space character
let mut desc_normalized_whitespace = String::new();
for word in desc.words() {
for word in desc.split_whitespace() {
desc_normalized_whitespace.push_str(word);
desc_normalized_whitespace.push(' ');
}
Expand Down
1 change: 0 additions & 1 deletion src/librustc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#![feature(staged_api)]
#![feature(std_misc)]
#![feature(path_ext)]
#![feature(str_words)]
#![feature(str_char)]
#![feature(into_cow)]
#![feature(slice_patterns)]
Expand Down
4 changes: 2 additions & 2 deletions src/librustc/session/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ macro_rules! options {
-> bool {
match v {
Some(s) => {
for s in s.words() {
for s in s.split_whitespace() {
slot.push(s.to_string());
}
true
Expand All @@ -431,7 +431,7 @@ macro_rules! options {
-> bool {
match v {
Some(s) => {
let v = s.words().map(|s| s.to_string()).collect();
let v = s.split_whitespace().map(|s| s.to_string()).collect();
*slot = Some(v);
true
},
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_unicode/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ mod u_str;
pub mod char;

pub mod str {
pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
pub use u_str::{UnicodeStr, SplitWhitespace, Words, Graphemes, GraphemeIndices};
pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
pub use u_str::{utf16_items, Utf16Encoder};
}
27 changes: 21 additions & 6 deletions src/librustc_unicode/u_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,16 @@ use core::str::Split;

use tables::grapheme::GraphemeCat;

/// An iterator over the words of a string, separated by a sequence of whitespace
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Words<'a> {
#[deprecated(reason = "struct Words is being replaced by struct SplitWhitespace",
since = "1.1.0")]
#[unstable(feature = "str_words",
reason = "words() will be replaced by split_whitespace() in 1.1.0")]
pub type Words<'a> = SplitWhitespace<'a>;

/// An iterator over the non-whitespace substrings of a string,
/// separated by any amount of whitespace.
#[stable(feature = "split_whitespace", since = "1.1.0")]
pub struct SplitWhitespace<'a> {
inner: Filter<Split<'a, fn(char) -> bool>, fn(&&str) -> bool>,
}

Expand All @@ -36,7 +43,9 @@ pub struct Words<'a> {
pub trait UnicodeStr {
fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
#[allow(deprecated)]
fn words<'a>(&'a self) -> Words<'a>;
fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>;
fn is_whitespace(&self) -> bool;
fn is_alphanumeric(&self) -> bool;
fn width(&self, is_cjk: bool) -> usize;
Expand All @@ -56,15 +65,21 @@ impl UnicodeStr for str {
GraphemeIndices { start_offset: self.as_ptr() as usize, iter: self.graphemes(is_extended) }
}

#[allow(deprecated)]
#[inline]
fn words(&self) -> Words {
self.split_whitespace()
}

#[inline]
fn split_whitespace(&self) -> SplitWhitespace {
fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer

fn is_whitespace(c: char) -> bool { c.is_whitespace() }
let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer

Words { inner: self.split(is_whitespace).filter(is_not_empty) }
SplitWhitespace { inner: self.split(is_whitespace).filter(is_not_empty) }
}

#[inline]
Expand Down Expand Up @@ -545,11 +560,11 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
}
}

impl<'a> Iterator for Words<'a> {
impl<'a> Iterator for SplitWhitespace<'a> {
type Item = &'a str;

fn next(&mut self) -> Option<&'a str> { self.inner.next() }
}
impl<'a> DoubleEndedIterator for Words<'a> {
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
}
2 changes: 1 addition & 1 deletion src/librustdoc/html/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result {
};

// Transform the contents of the header into a hyphenated string
let id = s.words().map(|s| s.to_ascii_lowercase())
let id = s.split_whitespace().map(|s| s.to_ascii_lowercase())
.collect::<Vec<String>>().connect("-");

// This is a terrible hack working around how hoedown gives us rendered
Expand Down
7 changes: 3 additions & 4 deletions src/librustdoc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
#![feature(std_misc)]
#![feature(test)]
#![feature(unicode)]
#![feature(str_words)]
#![feature(path_ext)]
#![feature(path_relative_from)]
#![feature(slice_patterns)]
Expand Down Expand Up @@ -240,7 +239,7 @@ pub fn main_args(args: &[String]) -> isize {

let test_args = matches.opt_strs("test-args");
let test_args: Vec<String> = test_args.iter()
.flat_map(|s| s.words())
.flat_map(|s| s.split_whitespace())
.map(|s| s.to_string())
.collect();

Expand Down Expand Up @@ -404,13 +403,13 @@ fn rust_input(cratefile: &str, externs: core::Externs, matches: &getopts::Matche
}
clean::NameValue(ref x, ref value)
if "passes" == *x => {
for pass in value.words() {
for pass in value.split_whitespace() {
passes.push(pass.to_string());
}
}
clean::NameValue(ref x, ref value)
if "plugins" == *x => {
for p in value.words() {
for p in value.split_whitespace() {
plugins.push(p.to_string());
}
}
Expand Down
4 changes: 1 addition & 3 deletions src/test/run-pass/drop-with-type-ascription-1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
// except according to those terms.


#![feature(str_words)]

fn main() {
let foo = "hello".to_string();
let foo: Vec<&str> = foo.words().collect();
let foo: Vec<&str> = foo.split_whitespace().collect();
let invalid_string = &foo[0];
assert_eq!(*invalid_string, "hello");
}