Skip to content

Commit d2d0cbb

Browse files
committed
don't recycle the linebuf immediately.
1 parent c9305aa commit d2d0cbb

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

src/parser/mod.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ where
136136
self.total_size = end;
137137

138138
let mut ix = 0;
139+
140+
// linebuf is necessarily entirely to do spec-compliant NUL handling in
141+
// one place. If the input document contains no NUL bytes, we will never
142+
// use linebuf. Our re2c scanners presume there are no NUL bytes in
143+
// the subject, and use 0 as the sentinel result when !(cursor < len).
139144
let mut linebuf = String::new();
140145

141146
while ix < end {
@@ -163,10 +168,13 @@ where
163168
if ate_line_end > 0 || eol == end {
164169
if !linebuf.is_empty() {
165170
linebuf.push_str(&s[ix..eol]);
166-
let line = mem::take(&mut linebuf);
167-
self.process_line(line.into());
171+
// Keep one active linebuf allocation.
172+
let mut cow = Cow::Owned(mem::take(&mut linebuf));
173+
self.process_line(&mut cow, eol == end);
174+
mem::swap(&mut cow.into_owned(), &mut linebuf);
175+
linebuf.clear();
168176
} else {
169-
self.process_line(s[ix..eol].into());
177+
self.process_line(&mut s[ix..eol].into(), eol == end);
170178
}
171179
} else {
172180
assert_eq!(sb[eol], b'\0');
@@ -179,7 +187,8 @@ where
179187
}
180188

181189
if !linebuf.is_empty() {
182-
self.process_line(linebuf.into());
190+
// Reached only if the input ends with a NUL byte.
191+
self.process_line(&mut linebuf.into(), true);
183192
}
184193

185194
self.finalize_document();
@@ -218,9 +227,13 @@ where
218227
self.line_number += lines;
219228
}
220229

221-
fn process_line(&mut self, mut line: Cow<str>) {
230+
fn process_line(&mut self, line: &mut Cow<str>, at_eof: bool) {
231+
// Most scanners depend on seeing a \r or \n to end the line, even
232+
// though the end of the document suffices per spec. Synthesise a
233+
// final EOL if there isn't one so these scanners work.
222234
let &last_byte = line.as_bytes().last().unwrap();
223235
if !strings::is_line_end_char(last_byte) {
236+
assert!(at_eof); // This case should only ever occur at EOF, once per document.
224237
line.to_mut().push('\n');
225238
}
226239

0 commit comments

Comments
 (0)