@@ -136,6 +136,11 @@ where
136136 self . total_size = end;
137137
138138 let mut ix = 0 ;
139+
140+ // linebuf is necessarily entirely to do spec-compliant NUL handling in
141+ // one place. If the input document contains no NUL bytes, we will never
142+ // use linebuf. Our re2c scanners presume there are no NUL bytes in
143+ // the subject, and use 0 as the sentinel result when !(cursor < len).
139144 let mut linebuf = String :: new ( ) ;
140145
141146 while ix < end {
@@ -163,10 +168,13 @@ where
163168 if ate_line_end > 0 || eol == end {
164169 if !linebuf. is_empty ( ) {
165170 linebuf. push_str ( & s[ ix..eol] ) ;
166- let line = mem:: take ( & mut linebuf) ;
167- self . process_line ( line. into ( ) ) ;
171+ // Keep one active linebuf allocation.
172+ let mut cow = Cow :: Owned ( mem:: take ( & mut linebuf) ) ;
173+ self . process_line ( & mut cow, eol == end) ;
174+ mem:: swap ( & mut cow. into_owned ( ) , & mut linebuf) ;
175+ linebuf. clear ( ) ;
168176 } else {
169- self . process_line ( s[ ix..eol] . into ( ) ) ;
177+ self . process_line ( & mut s[ ix..eol] . into ( ) , eol == end ) ;
170178 }
171179 } else {
172180 assert_eq ! ( sb[ eol] , b'\0' ) ;
@@ -179,7 +187,8 @@ where
179187 }
180188
181189 if !linebuf. is_empty ( ) {
182- self . process_line ( linebuf. into ( ) ) ;
190+ // Reached only if the input ends with a NUL byte.
191+ self . process_line ( & mut linebuf. into ( ) , true ) ;
183192 }
184193
185194 self . finalize_document ( ) ;
@@ -218,9 +227,13 @@ where
218227 self . line_number += lines;
219228 }
220229
221- fn process_line ( & mut self , mut line : Cow < str > ) {
230+ fn process_line ( & mut self , line : & mut Cow < str > , at_eof : bool ) {
231+ // Most scanners depend on seeing a \r or \n to end the line, even
232+ // though the end of the document suffices per spec. Synthesise a
233+ // final EOL if there isn't one so these scanners work.
222234 let & last_byte = line. as_bytes ( ) . last ( ) . unwrap ( ) ;
223235 if !strings:: is_line_end_char ( last_byte) {
236+ assert ! ( at_eof) ; // This case should only ever occur at EOF, once per document.
224237 line. to_mut ( ) . push ( '\n' ) ;
225238 }
226239
0 commit comments