Skip to content

Commit 8bb8a26

Browse files
Fix additional paste bugs
1 parent f096a21 commit 8bb8a26

File tree

2 files changed

+91
-36
lines changed

2 files changed

+91
-36
lines changed

src/uu/paste/src/paste.rs

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -227,66 +227,67 @@ fn paste(
227227
Ok(())
228228
}
229229

230-
/// Unescape all special characters
231-
fn unescape(input: &str) -> String {
230+
fn parse_delimiters(delimiters: &str) -> Box<[Box<[u8]>]> {
232231
/// A single backslash char
233232
const BACKSLASH: char = '\\';
234233

235-
let mut string = String::with_capacity(input.len());
234+
fn add_one_byte_single_char_delimiter(vec: &mut Vec<Box<[u8]>>, byte: u8) {
235+
vec.push(Box::new([byte]));
236+
}
237+
238+
// a buffer of length four is large enough to encode any char
239+
let mut buffer = [0; 4];
240+
241+
let mut add_single_char_delimiter = |vec: &mut Vec<Box<[u8]>>, ch: char| {
242+
let delimiter_encoded = ch.encode_utf8(&mut buffer);
243+
244+
vec.push(Box::from(delimiter_encoded.as_bytes()));
245+
};
236246

237-
let mut chars = input.chars();
247+
let mut vec = Vec::<Box<[u8]>>::with_capacity(delimiters.len());
238248

249+
let mut chars = delimiters.chars();
250+
251+
// Unescape all special characters
239252
while let Some(char) = chars.next() {
240253
match char {
254+
// Empty string (not a null character)
241255
BACKSLASH => match chars.next() {
242-
// Keep "\" if it is the last char
256+
// "Empty string (not a null character)"
257+
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
258+
Some('0') => {
259+
vec.push(Box::<[u8; 0]>::new([]));
260+
}
243261
// "\\" to "\"
244-
None | Some(BACKSLASH) => {
245-
string.push(BACKSLASH);
262+
Some(BACKSLASH) => {
263+
add_one_byte_single_char_delimiter(&mut vec, b'\\');
246264
}
247265
// "\n" to U+000A
248266
Some('n') => {
249-
string.push('\n');
267+
add_one_byte_single_char_delimiter(&mut vec, b'\n');
250268
}
251269
// "\t" to U+0009
252270
Some('t') => {
253-
string.push('\t');
271+
add_one_byte_single_char_delimiter(&mut vec, b'\t');
254272
}
255273
Some(other_char) => {
256-
string.push(BACKSLASH);
257-
string.push(other_char);
274+
// "If any other characters follow the <backslash>, the results are unspecified."
275+
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
276+
// However, other implementations remove the backslash
277+
// See "test_posix_unspecified_delimiter"
278+
add_single_char_delimiter(&mut vec, other_char);
279+
}
280+
None => {
281+
unreachable!("Delimiter list cannot end with an unescaped backslash");
258282
}
259283
},
260284
non_backslash_char => {
261-
string.push(non_backslash_char);
285+
add_single_char_delimiter(&mut vec, non_backslash_char);
262286
}
263287
}
264288
}
265289

266-
string
267-
}
268-
269-
fn parse_delimiters(delimiters: &str) -> Box<[Box<[u8]>]> {
270-
let delimiters_unescaped = unescape(delimiters).chars().collect::<Vec<_>>();
271-
272-
let delimiters_unescaped_len = delimiters_unescaped.len();
273-
274-
if delimiters_unescaped_len > 0 {
275-
let mut vec = Vec::<Box<[u8]>>::with_capacity(delimiters_unescaped_len);
276-
277-
// a buffer of length four is large enough to encode any char
278-
let mut buffer = [0; 4];
279-
280-
for delimiter in delimiters_unescaped {
281-
let delimiter_encoded = delimiter.encode_utf8(&mut buffer);
282-
283-
vec.push(Box::from(delimiter_encoded.as_bytes()));
284-
}
285-
286-
vec.into_boxed_slice()
287-
} else {
288-
Box::new([])
289-
}
290+
vec.into_boxed_slice()
290291
}
291292

292293
enum DelimiterState<'a> {

tests/by-util/test_paste.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,60 @@ fn test_three_trailing_backslashes_delimiter() {
289289
}
290290
}
291291

292+
// "If any other characters follow the <backslash>, the results are unspecified."
293+
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
294+
// However, other implementations remove the backslash
295+
#[test]
296+
fn test_posix_unspecified_delimiter() {
297+
for option_style in ["-d", "--delimiters"] {
298+
new_ucmd!()
299+
// This is not "\\z", but "\z"
300+
.args(&[option_style, "\\z", "-s"])
301+
.pipe_in(
302+
"\
303+
1
304+
2
305+
3
306+
4
307+
",
308+
)
309+
.succeeds()
310+
.stdout_only(
311+
"\
312+
1z2z3z4
313+
",
314+
);
315+
}
316+
}
317+
318+
// "If any other characters follow the <backslash>, the results are unspecified."
319+
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
320+
// However, other implementations remove the backslash
321+
#[test]
322+
fn test_slash_zero_delimiter() {
323+
for option_style in ["-d", "--delimiters"] {
324+
new_ucmd!()
325+
// This is "\0z\0"
326+
.args(&[option_style, "\\0z\\0", "-s"])
327+
.pipe_in(
328+
"\
329+
1
330+
2
331+
3
332+
4
333+
5
334+
6
335+
",
336+
)
337+
.succeeds()
338+
.stdout_only(
339+
"\
340+
12z345z6
341+
",
342+
);
343+
}
344+
}
345+
292346
#[test]
293347
fn test_data() {
294348
for example in EXAMPLE_DATA {

0 commit comments

Comments
 (0)