Skip to content

Commit a75b8a0

Browse files
authored
Merge pull request #6199 from BenWiederhake/dev-wc-ascii-control-is-word
wc: count ASCII control characters as word characters
2 parents abdeead + d5e7f9a commit a75b8a0

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

src/uu/wc/src/wc.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,9 +580,8 @@ fn process_chunk<
580580
if SHOW_WORDS {
581581
if ch.is_whitespace() {
582582
*in_word = false;
583-
} else if ch.is_ascii_control() {
584-
// These count as characters but do not affect the word state
585583
} else if !(*in_word) {
584+
// This also counts control characters! (As of GNU coreutils 9.5)
586585
*in_word = true;
587586
total.words += 1;
588587
}

tests/by-util/test_wc.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ fn test_utf8_words() {
7171
.arg("-w")
7272
.pipe_in_fixture("UTF_8_weirdchars.txt")
7373
.run()
74-
.stdout_is("87\n");
74+
.stdout_is("89\n");
7575
}
7676

7777
#[test]
@@ -80,7 +80,7 @@ fn test_utf8_line_length_words() {
8080
.arg("-Lw")
8181
.pipe_in_fixture("UTF_8_weirdchars.txt")
8282
.run()
83-
.stdout_is(" 87 48\n");
83+
.stdout_is(" 89 48\n");
8484
}
8585

8686
#[test]
@@ -98,7 +98,7 @@ fn test_utf8_line_length_chars_words() {
9898
.arg("-Lmw")
9999
.pipe_in_fixture("UTF_8_weirdchars.txt")
100100
.run()
101-
.stdout_is(" 87 442 48\n");
101+
.stdout_is(" 89 442 48\n");
102102
}
103103

104104
#[test]
@@ -143,7 +143,7 @@ fn test_utf8_chars_words() {
143143
.arg("-mw")
144144
.pipe_in_fixture("UTF_8_weirdchars.txt")
145145
.run()
146-
.stdout_is(" 87 442\n");
146+
.stdout_is(" 89 442\n");
147147
}
148148

149149
#[test]
@@ -161,7 +161,7 @@ fn test_utf8_line_length_lines_words() {
161161
.arg("-Llw")
162162
.pipe_in_fixture("UTF_8_weirdchars.txt")
163163
.run()
164-
.stdout_is(" 25 87 48\n");
164+
.stdout_is(" 25 89 48\n");
165165
}
166166

167167
#[test]
@@ -179,7 +179,7 @@ fn test_utf8_lines_words_chars() {
179179
.arg("-mlw")
180180
.pipe_in_fixture("UTF_8_weirdchars.txt")
181181
.run()
182-
.stdout_is(" 25 87 442\n");
182+
.stdout_is(" 25 89 442\n");
183183
}
184184

185185
#[test]
@@ -197,7 +197,17 @@ fn test_utf8_all() {
197197
.arg("-lwmcL")
198198
.pipe_in_fixture("UTF_8_weirdchars.txt")
199199
.run()
200-
.stdout_is(" 25 87 442 513 48\n");
200+
.stdout_is(" 25 89 442 513 48\n");
201+
}
202+
203+
#[test]
204+
fn test_ascii_control() {
205+
// GNU coreutils "d1" test
206+
new_ucmd!()
207+
.arg("-w")
208+
.pipe_in(*b"\x01\n")
209+
.run()
210+
.stdout_is("1\n");
201211
}
202212

203213
#[test]

0 commit comments

Comments
 (0)