Skip to content

Commit 700b4c1

Browse files
committed
Auto merge of #25028 - bluss:drain-string, r=alexcrichton
collections: Implement String::drain(range) according to RFC 574 `.drain(range)` is unstable and under feature(collections_drain). This adds a safe way to remove any range of a String as efficiently as possible. As noted in the code, this drain iterator has none of the memory safety issues of the vector version. RFC tracking issue is #23055
2 parents b858b7f + da03c9d commit 700b4c1

File tree

5 files changed

+131
-9
lines changed

5 files changed

+131
-9
lines changed

src/libcollections/string.rs

+107-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ use rustc_unicode::str as unicode_str;
2626
use rustc_unicode::str::Utf16Item;
2727

2828
use borrow::{Cow, IntoCow};
29-
use str::{self, FromStr, Utf8Error};
29+
use range::RangeArgument;
30+
use str::{self, FromStr, Utf8Error, Chars};
3031
use vec::{DerefVec, Vec, as_vec};
3132

3233
/// A growable string stored as a UTF-8 encoded buffer.
@@ -695,6 +696,59 @@ impl String {
695696
pub fn clear(&mut self) {
696697
self.vec.clear()
697698
}
699+
700+
/// Create a draining iterator that removes the specified range in the string
701+
/// and yields the removed chars from start to end. The element range is
702+
/// removed even if the iterator is not consumed until the end.
703+
///
704+
/// # Panics
705+
///
706+
/// Panics if the starting point or end point are not on character boundaries,
707+
/// or if they are out of bounds.
708+
///
709+
/// # Examples
710+
///
711+
/// ```
712+
/// # #![feature(collections_drain)]
713+
///
714+
/// let mut s = String::from("α is alpha, β is beta");
715+
/// let beta_offset = s.find('β').unwrap_or(s.len());
716+
///
717+
/// // Remove the range up until the β from the string
718+
/// let t: String = s.drain(..beta_offset).collect();
719+
/// assert_eq!(t, "α is alpha, ");
720+
/// assert_eq!(s, "β is beta");
721+
///
722+
/// // A full range clears the string
723+
/// s.drain(..);
724+
/// assert_eq!(s, "");
725+
/// ```
726+
#[unstable(feature = "collections_drain",
727+
reason = "recently added, matches RFC")]
728+
pub fn drain<R>(&mut self, range: R) -> Drain where R: RangeArgument<usize> {
729+
// Memory safety
730+
//
731+
// The String version of Drain does not have the memory safety issues
732+
// of the vector version. The data is just plain bytes.
733+
// Because the range removal happens in Drop, if the Drain iterator is leaked,
734+
// the removal will not happen.
735+
let len = self.len();
736+
let start = *range.start().unwrap_or(&0);
737+
let end = *range.end().unwrap_or(&len);
738+
739+
// Take out two simultaneous borrows. The &mut String won't be accessed
740+
// until iteration is over, in Drop.
741+
let self_ptr = self as *mut _;
742+
// slicing does the appropriate bounds checks
743+
let chars_iter = self[start..end].chars();
744+
745+
Drain {
746+
start: start,
747+
end: end,
748+
iter: chars_iter,
749+
string: self_ptr,
750+
}
751+
}
698752
}
699753

700754
impl FromUtf8Error {
@@ -1075,3 +1129,55 @@ impl fmt::Write for String {
10751129
Ok(())
10761130
}
10771131
}
1132+
1133+
/// A draining iterator for `String`.
1134+
#[unstable(feature = "collections_drain", reason = "recently added")]
1135+
pub struct Drain<'a> {
1136+
/// Will be used as &'a mut String in the destructor
1137+
string: *mut String,
1138+
/// Start of part to remove
1139+
start: usize,
1140+
/// End of part to remove
1141+
end: usize,
1142+
/// Current remaining range to remove
1143+
iter: Chars<'a>,
1144+
}
1145+
1146+
unsafe impl<'a> Sync for Drain<'a> {}
1147+
unsafe impl<'a> Send for Drain<'a> {}
1148+
1149+
#[unstable(feature = "collections_drain", reason = "recently added")]
1150+
impl<'a> Drop for Drain<'a> {
1151+
fn drop(&mut self) {
1152+
unsafe {
1153+
// Use Vec::drain. "Reaffirm" the bounds checks to avoid
1154+
// panic code being inserted again.
1155+
let self_vec = (*self.string).as_mut_vec();
1156+
if self.start <= self.end && self.end <= self_vec.len() {
1157+
self_vec.drain(self.start..self.end);
1158+
}
1159+
}
1160+
}
1161+
}
1162+
1163+
#[unstable(feature = "collections_drain", reason = "recently added")]
1164+
impl<'a> Iterator for Drain<'a> {
1165+
type Item = char;
1166+
1167+
#[inline]
1168+
fn next(&mut self) -> Option<char> {
1169+
self.iter.next()
1170+
}
1171+
1172+
fn size_hint(&self) -> (usize, Option<usize>) {
1173+
self.iter.size_hint()
1174+
}
1175+
}
1176+
1177+
#[unstable(feature = "collections_drain", reason = "recently added")]
1178+
impl<'a> DoubleEndedIterator for Drain<'a> {
1179+
#[inline]
1180+
fn next_back(&mut self) -> Option<char> {
1181+
self.iter.next_back()
1182+
}
1183+
}

src/libcollectionstest/string.rs

+17
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,23 @@ fn test_from_iterator() {
348348
assert_eq!(s, d);
349349
}
350350

351+
#[test]
352+
fn test_drain() {
353+
let mut s = String::from("αβγ");
354+
assert_eq!(s.drain(2..4).collect::<String>(), "β");
355+
assert_eq!(s, "αγ");
356+
357+
let mut t = String::from("abcd");
358+
t.drain(..0);
359+
assert_eq!(t, "abcd");
360+
t.drain(..1);
361+
assert_eq!(t, "bcd");
362+
t.drain(3..);
363+
assert_eq!(t, "bcd");
364+
t.drain(..);
365+
assert_eq!(t, "");
366+
}
367+
351368
#[bench]
352369
fn bench_with_capacity(b: &mut Bencher) {
353370
b.iter(|| {

src/libsyntax/codemap.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -543,21 +543,17 @@ impl CodeMap {
543543
}
544544
}
545545

546-
pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
546+
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
547547
let mut files = self.files.borrow_mut();
548548
let start_pos = match files.last() {
549549
None => 0,
550550
Some(last) => last.end_pos.to_usize(),
551551
};
552552

553553
// Remove utf-8 BOM if any.
554-
// FIXME #12884: no efficient/safe way to remove from the start of a string
555-
// and reuse the allocation.
556-
let mut src = if src.starts_with("\u{feff}") {
557-
String::from(&src[3..])
558-
} else {
559-
String::from(&src[..])
560-
};
554+
if src.starts_with("\u{feff}") {
555+
src.drain(..3);
556+
}
561557

562558
// Append '\n' in case it's not already there.
563559
// This is a workaround to prevent CodeMap.lookup_filemap_idx from

src/libsyntax/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#![feature(associated_consts)]
2929
#![feature(collections)]
30+
#![feature(collections_drain)]
3031
#![feature(core)]
3132
#![feature(libc)]
3233
#![feature(rustc_private)]

src/test/run-pass/sync-send-iterators-in-libcollections.rs

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use collections::{BitSet, BitVec};
2121
use collections::{BTreeMap, BTreeSet};
2222
use collections::EnumSet;
2323
use collections::LinkedList;
24+
use collections::String;
2425
use collections::Vec;
2526
use collections::VecDeque;
2627
use collections::VecMap;
@@ -99,4 +100,5 @@ fn main() {
99100

100101
all_sync_send!(Vec::<usize>::new(), into_iter);
101102
is_sync_send!(Vec::<usize>::new(), drain(..));
103+
is_sync_send!(String::new(), drain(..));
102104
}

0 commit comments

Comments
 (0)