From 873e1d90000ee37470d2779b57632d68c47e7a24 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Tue, 27 Mar 2018 10:04:03 -0700 Subject: [PATCH] Fix a corner case in the ATX header parser The parser would previously crash in some cases of empty ATX headers with trailing hashes. This case isn't captured by the CommonMark spec. The bad cases look like this: ``` ### #### ``` (note the leading space here is not part of the bug - it's just so `git commit` doesn't throw these lines away as comments) In these cases the line parser advances `first_nonspace` all the way to the second set of hashes, while the `chop_trailing_spaces` routine for stripping the trailing hashes also trims leading whitespace, leading to a buffer overrun as `add_text_to_container` thinks the content of the line is only the leading "###" but that the text begins at the start of the start of "####". This hasn't been detected previously because the spec only contains a test case for ``` ### ### ``` and standard CommonMark always eats exactly one space after the initial ATX hashes, leaving the line parser's `offset` equal to `first_nonspace`, and correctly adding empty text to the header container. The spec might want an additional test case containing multiple spaces in an empty bookended ATX header. --- src/parser/mod.rs | 21 +++++++++++++++++++-- src/tests.rs | 8 ++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6a09349f..71971b9d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -949,8 +949,25 @@ impl<'a, 'o> Parser<'a, 'o> { } }; let count = self.first_nonspace - self.offset; - self.advance_offset(&line, count, false); - self.add_line(container, &line); + + // In a rare case the above `chop` operation can leave + // the line shorter than the recorded `first_nonspace` + // This happens with ATX headers containing no header + // text, multiple spaces and trailing hashes, e.g + // + // ### ### + // + // In this case `first_nonspace` indexes into the second + // set of hashes, while `chop_trailing_hashtags` truncates + // `line` to just `###` (the first three hashes). + // In this case there's no text to add, and no further + // processing to be done. + let have_line_text = self.first_nonspace <= line.len(); + + if have_line_text { + self.advance_offset(&line, count, false); + self.add_line(container, &line); + } } else { let start_column = self.first_nonspace + 1; container = self.add_child(container, NodeValue::Paragraph, start_column); diff --git a/src/tests.rs b/src/tests.rs index b4f05b3f..813c3945 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -665,3 +665,11 @@ fn pathological_emphases() { timeout_ms(move || html(&s, &exp), 4000); } + +#[test] +fn no_panic_on_empty_bookended_atx_headers() { + html( + "# #", + "

\n" + ); +}