Skip to content

Commit c9305aa

Browse files
committed
simplify and normalise newline handling.
1 parent e306ca1 commit c9305aa

File tree

6 files changed

+81
-140
lines changed

6 files changed

+81
-140
lines changed

src/parser/inlines.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -358,12 +358,14 @@ impl<'a, 'r, 'o, 'd, 'c, 'p> Subject<'a, 'r, 'o, 'd, 'c, 'p> {
358358

359359
fn handle_newline(&mut self) -> Node<'a> {
360360
let nlpos = self.scanner.pos;
361-
if self.input.as_bytes()[self.scanner.pos] == b'\r' {
361+
if self.peek_byte() == Some(b'\r') {
362362
self.scanner.pos += 1;
363363
}
364-
if self.input.as_bytes()[self.scanner.pos] == b'\n' {
364+
if self.peek_byte() == Some(b'\n') {
365365
self.scanner.pos += 1;
366366
}
367+
assert_ne!(nlpos, self.scanner.pos);
368+
367369
let inl = if nlpos > 1
368370
&& self.input.as_bytes()[nlpos - 1] == b' '
369371
&& self.input.as_bytes()[nlpos - 2] == b' '
@@ -1012,10 +1014,22 @@ impl<'a, 'r, 'o, 'd, 'c, 'p> Subject<'a, 'r, 'o, 'd, 'c, 'p> {
10121014
);
10131015
// Build line_offsets by scanning for newlines in the content
10141016
para_ast.line_offsets = vec![0];
1015-
for (i, &byte) in content.as_bytes().iter().enumerate() {
1016-
if byte == b'\n' {
1017-
para_ast.line_offsets.push(i + 1);
1017+
1018+
let mut i = 0;
1019+
let bytes = content.as_bytes();
1020+
let len = content.len();
1021+
while i < len {
1022+
match bytes[i] {
1023+
b'\r' if i + 1 < len && bytes[i + 1] == b'\n' => {
1024+
i += 1;
1025+
para_ast.line_offsets.push(i + 1);
1026+
}
1027+
b'\n' | b'\r' => {
1028+
para_ast.line_offsets.push(i + 1);
1029+
}
1030+
_ => {}
10181031
}
1032+
i += 1;
10191033
}
10201034

10211035
let para_node = self.arena.alloc(para_ast.into());
@@ -2264,7 +2278,7 @@ pub(crate) fn manual_scan_link_url(input: &str) -> Option<(&str, usize)> {
22642278
break;
22652279
} else if b == b'\\' {
22662280
i += 2;
2267-
} else if b == b'\n' || b == b'<' {
2281+
} else if b == b'\n' || b == b'\r' || b == b'<' {
22682282
return None;
22692283
} else {
22702284
i += 1;

src/parser/mod.rs

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -140,21 +140,27 @@ where
140140

141141
while ix < end {
142142
let mut eol = ix;
143-
let mut ate_line_end = false;
143+
let mut ate_line_end = 0;
144144

145145
while eol < end {
146-
if strings::is_line_end_char(sb[eol]) {
147-
ate_line_end = true;
148-
eol += 1;
149-
break;
150-
}
151-
if sb[eol] == 0 {
152-
break;
146+
match sb[eol] {
147+
b'\r' if eol + 1 < end && sb[eol + 1] == b'\n' => {
148+
ate_line_end = 2;
149+
eol += 2;
150+
break;
151+
}
152+
b'\n' | b'\r' => {
153+
ate_line_end = 1;
154+
eol += 1;
155+
break;
156+
}
157+
0 => break,
158+
_ => {}
153159
}
154160
eol += 1;
155161
}
156162

157-
if ate_line_end || eol == end {
163+
if ate_line_end > 0 || eol == end {
158164
if !linebuf.is_empty() {
159165
linebuf.push_str(&s[ix..eol]);
160166
let line = mem::take(&mut linebuf);
@@ -166,24 +172,10 @@ where
166172
assert_eq!(sb[eol], b'\0');
167173
linebuf.push_str(&s[ix..eol]);
168174
linebuf.push('\u{fffd}');
175+
eol += 1;
169176
}
170177

171178
ix = eol;
172-
if ix < end {
173-
if sb[ix] == b'\0' {
174-
ix += 1;
175-
} else {
176-
if ate_line_end {
177-
ix -= 1;
178-
}
179-
if sb[ix] == b'\r' {
180-
ix += 1;
181-
}
182-
if ix < end && sb[ix] == b'\n' {
183-
ix += 1;
184-
}
185-
}
186-
}
187179
}
188180

189181
if !linebuf.is_empty() {
@@ -227,14 +219,11 @@ where
227219
}
228220

229221
fn process_line(&mut self, mut line: Cow<str>) {
230-
let last_byte = line.as_bytes().last();
231-
if last_byte.map_or(true, |&b| !strings::is_line_end_char(b)) {
222+
let &last_byte = line.as_bytes().last().unwrap();
223+
if !strings::is_line_end_char(last_byte) {
232224
line.to_mut().push('\n');
233-
} else if last_byte == Some(&b'\r') {
234-
let line_mut = line.to_mut();
235-
line_mut.pop();
236-
line_mut.push('\n');
237-
};
225+
}
226+
238227
let line = line.as_ref();
239228
let bytes = line.as_bytes();
240229

@@ -1639,7 +1628,7 @@ where
16391628
if content.as_bytes()[pos] == b'\r' {
16401629
pos += 1;
16411630
}
1642-
if content.as_bytes()[pos] == b'\n' {
1631+
if content.as_bytes().get(pos) == Some(&b'\n') {
16431632
pos += 1;
16441633
}
16451634

@@ -2163,7 +2152,7 @@ fn parse_list_marker(
21632152
while strings::is_space_or_tab(bytes[i]) {
21642153
i += 1;
21652154
}
2166-
if bytes[i] == b'\n' {
2155+
if strings::is_line_end_char(bytes[i]) {
21672156
return None;
21682157
}
21692158
}

src/parser/table.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::mem;
55
use crate::nodes::{Ast, Node, NodeTable, NodeValue, TableAlignment};
66
use crate::parser::Parser;
77
use crate::scanners;
8-
use crate::strings::{count_newlines, trim_cow};
8+
use crate::strings::{count_newlines, is_line_end_char, trim_cow};
99

1010
// Limit to prevent a malicious input from causing a denial of service.
1111
// See get_num_autocompleted_cells.
@@ -312,7 +312,7 @@ fn try_inserting_table_header_paragraph<'a>(
312312
.iter()
313313
.rev()
314314
.skip(1)
315-
.take_while(|&&c| c != b'\n')
315+
.take_while(|&&c| !is_line_end_char(c))
316316
.count();
317317

318318
container_ast.sourcepos.start.line += newlines;

src/scanners.re

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ pub fn ipv6_relaxed_url_start(s: &str) -> Option<usize> {
360360
361361
table_spoiler = ['|']['|'];
362362
table_spacechar = [ \t\v\f];
363-
table_newline = [\r]?[\n];
363+
table_newline = [\r\n];
364364
365365
table_delimiter = (table_spacechar*[:]?[-]+[:]?table_spacechar*);
366366
table_cell = (escaped_char|[^\x00|\r\n])+;

src/scanners.rs

Lines changed: 14 additions & 98 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)