@@ -227,66 +227,67 @@ fn paste(
227227 Ok ( ( ) )
228228}
229229
230- /// Unescape all special characters
231- fn unescape ( input : & str ) -> String {
230+ fn parse_delimiters ( delimiters : & str ) -> Box < [ Box < [ u8 ] > ] > {
232231 /// A single backslash char
233232 const BACKSLASH : char = '\\' ;
234233
235- let mut string = String :: with_capacity ( input. len ( ) ) ;
234+ fn add_one_byte_single_char_delimiter ( vec : & mut Vec < Box < [ u8 ] > > , byte : u8 ) {
235+ vec. push ( Box :: new ( [ byte] ) ) ;
236+ }
237+
238+ // a buffer of length four is large enough to encode any char
239+ let mut buffer = [ 0 ; 4 ] ;
240+
241+ let mut add_single_char_delimiter = |vec : & mut Vec < Box < [ u8 ] > > , ch : char | {
242+ let delimiter_encoded = ch. encode_utf8 ( & mut buffer) ;
243+
244+ vec. push ( Box :: from ( delimiter_encoded. as_bytes ( ) ) ) ;
245+ } ;
236246
237- let mut chars = input . chars ( ) ;
247+ let mut vec = Vec :: < Box < [ u8 ] > > :: with_capacity ( delimiters . len ( ) ) ;
238248
249+ let mut chars = delimiters. chars ( ) ;
250+
251+ // Unescape all special characters
239252 while let Some ( char) = chars. next ( ) {
240253 match char {
254+ // Empty string (not a null character)
241255 BACKSLASH => match chars. next ( ) {
242- // Keep "\" if it is the last char
256+ // "Empty string (not a null character)"
257+ // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
258+ Some ( '0' ) => {
259+ vec. push ( Box :: < [ u8 ; 0 ] > :: new ( [ ] ) ) ;
260+ }
243261 // "\\" to "\"
244- None | Some ( BACKSLASH ) => {
245- string . push ( BACKSLASH ) ;
262+ Some ( BACKSLASH ) => {
263+ add_one_byte_single_char_delimiter ( & mut vec , b'\\' ) ;
246264 }
247265 // "\n" to U+000A
248266 Some ( 'n' ) => {
249- string . push ( '\n' ) ;
267+ add_one_byte_single_char_delimiter ( & mut vec , b '\n') ;
250268 }
251269 // "\t" to U+0009
252270 Some ( 't' ) => {
253- string . push ( '\t' ) ;
271+ add_one_byte_single_char_delimiter ( & mut vec , b '\t') ;
254272 }
255273 Some ( other_char) => {
256- string. push ( BACKSLASH ) ;
257- string. push ( other_char) ;
274+ // "If any other characters follow the <backslash>, the results are unspecified."
275+ // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
276+ // However, other implementations remove the backslash
277+ // See "test_posix_unspecified_delimiter"
278+ add_single_char_delimiter ( & mut vec, other_char) ;
279+ }
280+ None => {
281+ unreachable ! ( "Delimiter list cannot end with an unescaped backslash" ) ;
258282 }
259283 } ,
260284 non_backslash_char => {
261- string . push ( non_backslash_char) ;
285+ add_single_char_delimiter ( & mut vec , non_backslash_char) ;
262286 }
263287 }
264288 }
265289
266- string
267- }
268-
269- fn parse_delimiters ( delimiters : & str ) -> Box < [ Box < [ u8 ] > ] > {
270- let delimiters_unescaped = unescape ( delimiters) . chars ( ) . collect :: < Vec < _ > > ( ) ;
271-
272- let delimiters_unescaped_len = delimiters_unescaped. len ( ) ;
273-
274- if delimiters_unescaped_len > 0 {
275- let mut vec = Vec :: < Box < [ u8 ] > > :: with_capacity ( delimiters_unescaped_len) ;
276-
277- // a buffer of length four is large enough to encode any char
278- let mut buffer = [ 0 ; 4 ] ;
279-
280- for delimiter in delimiters_unescaped {
281- let delimiter_encoded = delimiter. encode_utf8 ( & mut buffer) ;
282-
283- vec. push ( Box :: from ( delimiter_encoded. as_bytes ( ) ) ) ;
284- }
285-
286- vec. into_boxed_slice ( )
287- } else {
288- Box :: new ( [ ] )
289- }
290+ vec. into_boxed_slice ( )
290291}
291292
292293enum DelimiterState < ' a > {
0 commit comments