Skip to content

Commit 15df006

Browse files
author
Ulrik Sverdrup
committed
collections: Implement String::drain(range) according to RFC 574
`.drain(range)` is unstable and under feature(collections_drain). This adds a safe way to remove any range of a String as efficiently as possible. As noted in the code, this drain iterator has none of the memory safety issues of the vector version. RFC tracking issue is rust-lang#23055
1 parent 42bfeec commit 15df006

File tree

3 files changed

+134
-1
lines changed

3 files changed

+134
-1
lines changed

src/libcollections/string.rs

+115-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use core::prelude::*;
1717
use core::fmt;
1818
use core::hash;
1919
use core::iter::FromIterator;
20+
use core::marker::PhantomData;
2021
use core::mem;
2122
use core::ops::{self, Deref, Add, Index};
2223
use core::ptr;
@@ -26,7 +27,8 @@ use rustc_unicode::str as unicode_str;
2627
use rustc_unicode::str::Utf16Item;
2728

2829
use borrow::{Cow, IntoCow};
29-
use str::{self, FromStr, Utf8Error};
30+
use range::RangeArgument;
31+
use str::{self, FromStr, Utf8Error, Chars};
3032
use vec::{DerefVec, Vec, as_vec};
3133

3234
/// A growable string stored as a UTF-8 encoded buffer.
@@ -695,6 +697,60 @@ impl String {
695697
pub fn clear(&mut self) {
696698
self.vec.clear()
697699
}
700+
701+
/// Create a draining iterator that removes the specified range in the string
702+
/// and yields the removed chars from start to end. The element range is
703+
/// removed even if the iterator is not consumed until the end.
704+
///
705+
/// # Panics
706+
///
707+
/// Panics if the starting point or end point are not on character boundaries,
708+
/// or if they are out of bounds.
709+
///
710+
/// # Examples
711+
///
712+
/// ```
713+
/// # #![feature(collections_drain)]
714+
///
715+
/// let mut s = String::from("α is alpha, β is beta");
716+
/// let beta_offset = s.find('β').unwrap_or(s.len());
717+
///
718+
/// // Remove the range up until the β from the string
719+
/// let t: String = s.drain(..beta_offset).collect();
720+
/// assert_eq!(t, "α is alpha, ");
721+
/// assert_eq!(s, "β is beta");
722+
///
723+
/// // A full range clears the string
724+
/// s.drain(..);
725+
/// assert_eq!(s, "");
726+
/// ```
727+
#[unstable(feature = "collections_drain",
728+
reason = "recently added, matches RFC")]
729+
pub fn drain<R>(&mut self, range: R) -> Drain where R: RangeArgument<usize> {
730+
// Memory safety
731+
//
732+
// The String version of Drain does not have the memory safety issues
733+
// of the vector version. The data is just plain bytes.
734+
// Because the range removal happens in Drop, if the Drain iterator is leaked,
735+
// the removal will not happen.
736+
let len = self.len();
737+
let start = *range.start().unwrap_or(&0);
738+
let end = *range.end().unwrap_or(&len);
739+
740+
// Take out two simultaneous borrows. The &mut String won't be accessed
741+
// until iteration is over, in Drop.
742+
let self_ptr = self as *mut _;
743+
// slicing does the appropriate bounds checks
744+
let chars_iter = self[start..end].chars();
745+
746+
Drain {
747+
start: start,
748+
tail_start: end,
749+
iter: chars_iter,
750+
string: self_ptr,
751+
_marker: PhantomData,
752+
}
753+
}
698754
}
699755

700756
impl FromUtf8Error {
@@ -1072,3 +1128,61 @@ impl fmt::Write for String {
10721128
Ok(())
10731129
}
10741130
}
1131+
1132+
/// A draining iterator for `String`.
1133+
#[unstable(feature = "collections_drain", reason = "recently added")]
1134+
pub struct Drain<'a> {
1135+
string: *mut String,
1136+
/// Start of part to remove
1137+
start: usize,
1138+
/// Index of tail to preserve
1139+
tail_start: usize,
1140+
/// Current remaining range to remove
1141+
iter: Chars<'a>,
1142+
_marker: PhantomData<&'a mut String>,
1143+
}
1144+
1145+
unsafe impl<'a> Sync for Drain<'a> {}
1146+
unsafe impl<'a> Send for Drain<'a> {}
1147+
1148+
#[unstable(feature = "collections_drain", reason = "recently added")]
1149+
impl<'a> Drop for Drain<'a> {
1150+
fn drop(&mut self) {
1151+
unsafe {
1152+
// memmove back untouched tail, then truncate & reset length
1153+
let self_vec = (*self.string).as_mut_vec();
1154+
let tail_len = self_vec.len() - self.tail_start;
1155+
if tail_len > 0 {
1156+
let src = self_vec.as_ptr().offset(self.tail_start as isize);
1157+
let dst = self_vec.as_mut_ptr().offset(self.start as isize);
1158+
ptr::copy(src, dst, tail_len);
1159+
}
1160+
self_vec.set_len(self.start + tail_len);
1161+
}
1162+
}
1163+
}
1164+
1165+
#[unstable(feature = "collections_drain", reason = "recently added")]
1166+
impl<'a> Iterator for Drain<'a> {
1167+
type Item = char;
1168+
1169+
#[inline]
1170+
fn next(&mut self) -> Option<char> {
1171+
self.iter.next()
1172+
}
1173+
1174+
fn size_hint(&self) -> (usize, Option<usize>) {
1175+
self.iter.size_hint()
1176+
}
1177+
}
1178+
1179+
#[unstable(feature = "collections_drain", reason = "recently added")]
1180+
impl<'a> DoubleEndedIterator for Drain<'a> {
1181+
#[inline]
1182+
fn next_back(&mut self) -> Option<char> {
1183+
self.iter.next_back()
1184+
}
1185+
}
1186+
1187+
#[unstable(feature = "collections_drain", reason = "recently added")]
1188+
impl<'a> ExactSizeIterator for Drain<'a> { }

src/libcollectionstest/string.rs

+17
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,23 @@ fn test_from_iterator() {
348348
assert_eq!(s, d);
349349
}
350350

351+
#[test]
352+
fn test_drain() {
353+
let mut s = String::from("αβγ");
354+
assert_eq!(s.drain(2..4).collect::<String>(), "β");
355+
assert_eq!(s, "αγ");
356+
357+
let mut t = String::from("abcd");
358+
t.drain(..0);
359+
assert_eq!(t, "abcd");
360+
t.drain(..1);
361+
assert_eq!(t, "bcd");
362+
t.drain(3..);
363+
assert_eq!(t, "bcd");
364+
t.drain(..);
365+
assert_eq!(t, "");
366+
}
367+
351368
#[bench]
352369
fn bench_with_capacity(b: &mut Bencher) {
353370
b.iter(|| {

src/test/run-pass/sync-send-iterators-in-libcollections.rs

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use collections::{BitSet, BitVec};
2121
use collections::{BTreeMap, BTreeSet};
2222
use collections::EnumSet;
2323
use collections::LinkedList;
24+
use collections::String;
2425
use collections::Vec;
2526
use collections::VecDeque;
2627
use collections::VecMap;
@@ -99,4 +100,5 @@ fn main() {
99100

100101
all_sync_send!(Vec::<usize>::new(), into_iter);
101102
is_sync_send!(Vec::<usize>::new(), drain(..));
103+
is_sync_send!(String::new(), drain(..));
102104
}

0 commit comments

Comments
 (0)