Skip to content

Commit dcd0392

Browse files
paste: permit the delimiter list to be empty
Also: refactored the delimiter processing logic
1 parent b4383f9 commit dcd0392

File tree

2 files changed

+167
-25
lines changed

2 files changed

+167
-25
lines changed

src/uu/paste/src/paste.rs

Lines changed: 146 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use clap::{crate_version, Arg, ArgAction, Command};
99
use std::fs::File;
1010
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
1111
use std::path::Path;
12+
use std::slice::Iter;
1213
use uucore::error::{FromIo, UResult, USimpleError};
1314
use uucore::line_ending::LineEnding;
1415
use uucore::{format_usage, help_about, help_usage};
@@ -118,39 +119,119 @@ fn paste(
118119
));
119120
}
120121

121-
let delimiters: Vec<char> = unescape(delimiters).chars().collect();
122-
let mut delim_count = 0;
123-
let mut delim_length = 1;
122+
struct DelimiterData<'a> {
123+
current_delimiter_length: usize,
124+
delimiters_encoded: &'a [Box<[u8]>],
125+
delimiters_encoded_iter: Iter<'a, Box<[u8]>>,
126+
}
127+
128+
// Precompute instead of doing this inside the loops
129+
let mut delimiters_encoded_option = {
130+
let delimiters_unescaped = unescape(delimiters).chars().collect::<Vec<_>>();
131+
132+
let number_of_delimiters = delimiters_unescaped.len();
133+
134+
if number_of_delimiters > 0 {
135+
let mut vec = Vec::<Box<[u8]>>::with_capacity(number_of_delimiters);
136+
137+
{
138+
// a buffer of length four is large enough to encode any char
139+
let mut buffer = [0_u8; 4_usize];
140+
141+
for ch in delimiters_unescaped {
142+
let delimiter_encoded = ch.encode_utf8(&mut buffer);
143+
144+
vec.push(Box::from(delimiter_encoded.as_bytes()));
145+
}
146+
}
147+
148+
Some(vec.into_boxed_slice())
149+
} else {
150+
None
151+
}
152+
};
153+
154+
let mut delimiter_data_option = match &mut delimiters_encoded_option {
155+
&mut Some(ref mut delimiters_encoded) => {
156+
// TODO
157+
// Is this initial value correct?
158+
let current_delimiter_length = delimiters_encoded.first().unwrap().len();
159+
160+
Some(DelimiterData {
161+
delimiters_encoded,
162+
delimiters_encoded_iter: delimiters_encoded.iter(),
163+
current_delimiter_length,
164+
})
165+
}
166+
None => None,
167+
};
168+
124169
let stdout = stdout();
125170
let mut stdout = stdout.lock();
126171

127172
let mut output = Vec::new();
173+
128174
if serial {
129175
for file in &mut files {
130176
output.clear();
177+
131178
loop {
179+
let current_delimiter_option = match &mut delimiter_data_option {
180+
Some(DelimiterData {
181+
current_delimiter_length,
182+
delimiters_encoded,
183+
delimiters_encoded_iter,
184+
}) => {
185+
let current_delimiter = if let Some(delimiter_from_current_iter) =
186+
delimiters_encoded_iter.next()
187+
{
188+
delimiter_from_current_iter
189+
} else {
190+
// Reset iter after hitting the end
191+
*delimiters_encoded_iter = delimiters_encoded.iter();
192+
193+
// Unwrapping because:
194+
// 1) `delimiters_encoded` is non-empty
195+
// 2) `delimiters_encoded_iter` is a newly constructed Iter
196+
// So `next` should always return an element
197+
delimiters_encoded_iter.next().unwrap()
198+
};
199+
200+
*current_delimiter_length = current_delimiter.len();
201+
202+
Some(current_delimiter)
203+
}
204+
None => None,
205+
};
206+
132207
match read_until(file.as_mut(), line_ending as u8, &mut output) {
133208
Ok(0) => break,
134209
Ok(_) => {
135210
if output.ends_with(&[line_ending as u8]) {
136211
output.pop();
137212
}
138-
// a buffer of length four is large enough to encode any char
139-
let mut buffer = [0; 4];
140-
let ch =
141-
delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
142-
delim_length = ch.len();
143-
144-
for byte in buffer.iter().take(delim_length) {
145-
output.push(*byte);
213+
214+
// Write delimiter, if one exists, to output
215+
if let Some(current_delimiter) = current_delimiter_option {
216+
output.extend_from_slice(current_delimiter);
146217
}
147218
}
148219
Err(e) => return Err(e.map_err_context(String::new)),
149220
}
150-
delim_count += 1;
151221
}
152-
// remove final delimiter
153-
output.truncate(output.len() - delim_length);
222+
223+
if let Some(DelimiterData {
224+
current_delimiter_length,
225+
..
226+
}) = &mut delimiter_data_option
227+
{
228+
// Remove trailing delimiter, if there is a delimiter
229+
// It's safe to truncate to zero (or to a length greater than the length of the Vec),so as long as
230+
// the subtraction didn't panic, this should be fine
231+
if let Some(us) = output.len().checked_sub(*current_delimiter_length) {
232+
output.truncate(us);
233+
}
234+
}
154235

155236
write!(
156237
stdout,
@@ -161,10 +242,39 @@ fn paste(
161242
}
162243
} else {
163244
let mut eof = vec![false; files.len()];
245+
164246
loop {
165247
output.clear();
248+
166249
let mut eof_count = 0;
250+
167251
for (i, file) in files.iter_mut().enumerate() {
252+
let current_delimiter_option = if let Some(DelimiterData {
253+
current_delimiter_length,
254+
delimiters_encoded,
255+
delimiters_encoded_iter,
256+
}) = &mut delimiter_data_option
257+
{
258+
let current_delimiter = if let Some(bo) = delimiters_encoded_iter.next() {
259+
bo
260+
} else {
261+
// Reset iter after hitting the end
262+
*delimiters_encoded_iter = delimiters_encoded.iter();
263+
264+
// Unwrapping because:
265+
// 1) `delimiters_encoded` is non-empty
266+
// 2) `delimiters_encoded_iter` is a newly constructed Iter
267+
// So `next` should always return an element
268+
delimiters_encoded_iter.next().unwrap()
269+
};
270+
271+
*current_delimiter_length = current_delimiter.len();
272+
273+
Some(current_delimiter)
274+
} else {
275+
None
276+
};
277+
168278
if eof[i] {
169279
eof_count += 1;
170280
} else {
@@ -181,32 +291,43 @@ fn paste(
181291
Err(e) => return Err(e.map_err_context(String::new)),
182292
}
183293
}
184-
// a buffer of length four is large enough to encode any char
185-
let mut buffer = [0; 4];
186-
let ch = delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
187-
delim_length = ch.len();
188294

189-
for byte in buffer.iter().take(delim_length) {
190-
output.push(*byte);
295+
// Write delimiter, if one exists, to output
296+
if let Some(current_delimiter) = current_delimiter_option {
297+
output.extend_from_slice(current_delimiter);
191298
}
192-
193-
delim_count += 1;
194299
}
300+
195301
if files.len() == eof_count {
196302
break;
197303
}
198-
// Remove final delimiter
199-
output.truncate(output.len() - delim_length);
304+
305+
if let Some(DelimiterData {
306+
current_delimiter_length,
307+
delimiters_encoded,
308+
delimiters_encoded_iter,
309+
}) = &mut delimiter_data_option
310+
{
311+
// Reset iter after file is processed
312+
*delimiters_encoded_iter = delimiters_encoded.iter();
313+
314+
// Remove trailing delimiter, if there is a delimiter
315+
// It's safe to truncate to zero (or to a length greater than the length of the Vec),so as long as
316+
// the subtraction didn't panic, this should be fine
317+
if let Some(us) = output.len().checked_sub(*current_delimiter_length) {
318+
output.truncate(us);
319+
}
320+
}
200321

201322
write!(
202323
stdout,
203324
"{}{}",
204325
String::from_utf8_lossy(&output),
205326
line_ending
206327
)?;
207-
delim_count = 0;
208328
}
209329
}
330+
210331
Ok(())
211332
}
212333

tests/by-util/test_paste.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,27 @@ fn test_delimiter_list_ending_with_unescaped_backslash() {
193193
}
194194
}
195195

196+
#[test]
197+
fn test_delimiter_list_empty() {
198+
for st in ["-d", "--delimiters"] {
199+
new_ucmd!()
200+
.args(&[st, "", "-s", "--", "-"])
201+
.pipe_in(
202+
"\
203+
A ALPHA 1 _
204+
B BRAVO 2 _
205+
C CHARLIE 3 _
206+
",
207+
)
208+
.succeeds()
209+
.stdout_only(
210+
"\
211+
A ALPHA 1 _B BRAVO 2 _C CHARLIE 3 _
212+
",
213+
);
214+
}
215+
}
216+
196217
#[test]
197218
fn test_data() {
198219
for example in EXAMPLE_DATA {

0 commit comments

Comments
 (0)