From c5f5f2d004a9a67b28d2ba78c60ef826e98ca072 Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 19:12:49 +0300 Subject: [PATCH 1/2] Revert "html: don't report sourcepos on inlines." This reverts commit cc17182c99da32c284ebe6aeb4db54b9710c185a. --- src/html.rs | 76 +++++++++++++++++++-------------- src/tests/escaped_char_spans.rs | 13 ++++-- src/tests/fuzz.rs | 2 +- src/tests/wikilinks.rs | 13 +++--- 4 files changed, 63 insertions(+), 41 deletions(-) diff --git a/src/html.rs b/src/html.rs index 6c521720..55bf5bc0 100644 --- a/src/html.rs +++ b/src/html.rs @@ -725,31 +725,33 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Text(ref literal) => { - // No sourcepos. if entering { self.escape(literal.as_bytes())?; } } NodeValue::LineBreak => { - // No sourcepos. if entering { - self.output.write_all(b"
\n")?; + self.output.write_all(b"\n")?; } } NodeValue::SoftBreak => { - // No sourcepos. if entering { if self.options.render.hardbreaks { - self.output.write_all(b"
\n")?; + self.output.write_all(b"\n")?; } else { self.output.write_all(b"\n")?; } } } NodeValue::Code(NodeCode { ref literal, .. }) => { - // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; self.escape(literal.as_bytes())?; self.output.write_all(b"")?; } @@ -771,47 +773,52 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Strong => { - // No sourcepos. let parent_node = node.parent(); if !self.options.render.gfm_quirks || (parent_node.is_none() || !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong)) { if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } } NodeValue::Emph => { - // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Strikethrough => { - // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Superscript => { - // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Link(ref nl) => { - // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Image(ref nl) => { - // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::FootnoteDefinition(ref nfd) => { - // No sourcepos. if entering { if self.footnote_ix == 0 { + self.output.write_all(b"\n
    \n")?; + .write_all(b" class=\"footnotes\" data-footnotes>\n
      \n")?; } self.footnote_ix += 1; - self.output.write_all(b"
    1. ")?; } else { @@ -959,14 +970,18 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::FootnoteReference(ref nfr) => { - // No sourcepos. if entering { let mut ref_id = format!("fnref-{}", nfr.name); + + self.output.write_all(b" 1 { ref_id = format!("{}-{}", ref_id, nfr.ref_num); } + self.output - .write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Escaped => { - // No sourcepos. if self.options.render.escaped_char_spans { if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } @@ -1024,9 +1040,10 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::WikiLink(ref nl) => { - // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Underline => { - // No sourcepos. if entering { self.output.write_all(b"")?; } else { @@ -1046,7 +1062,6 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::SpoileredText => { - // No sourcepos. if entering { self.output.write_all(b"")?; } else { @@ -1054,7 +1069,6 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::EscapedTag(ref net) => { - // No sourcepos. self.output.write_all(net.as_bytes())?; } } diff --git a/src/tests/escaped_char_spans.rs b/src/tests/escaped_char_spans.rs index 5b8bbf33..7e2d747b 100644 --- a/src/tests/escaped_char_spans.rs +++ b/src/tests/escaped_char_spans.rs @@ -1,10 +1,17 @@ use super::*; use ntest::test_case; -#[test_case("\\@user", "

      @user

      \n")] -#[test_case("This\\@that", "

      This@that

      \n")] +// html_opts! does a roundtrip check unless sourcepos is set. +// These cases don't work roundtrip, because converting to commonmark +// automatically escapes certain characters. +#[test_case("\\@user", "

      @user

      \n")] +#[test_case("This\\@that", "

      This@that

      \n")] fn escaped_char_spans(markdown: &str, html: &str) { - html_opts!([render.escaped_char_spans], markdown, html, no_roundtrip); + html_opts!( + [render.escaped_char_spans, render.sourcepos], + markdown, + html + ); } #[test_case("\\@user", "

      @user

      \n")] diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs index 21223729..47cd1fb6 100644 --- a/src/tests/fuzz.rs +++ b/src/tests/fuzz.rs @@ -47,7 +47,7 @@ fn footnote_def() { render.hardbreaks ], "\u{15}\u{b}\r[^ ]:", - "

      \u{15}\u{b}
      \n[^ ]:

      \n", + "

      \u{15}\u{b}
      \n[^ ]:

      \n", ); } diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index afcaa07e..19b14445 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -1,19 +1,20 @@ use super::*; +// html_opts! does a roundtrip check unless sourcepos is set. +// These cases don't work roundtrip, because converting to commonmark +// automatically escapes certain characters. #[test] fn wikilinks_does_not_unescape_html_entities_in_link_label() { html_opts!( - [extension.wikilinks_title_after_pipe], + [extension.wikilinks_title_after_pipe, render.sourcepos], concat!("This is [[<script>alert(0)</script>|a <link]]",), - concat!("

      This is a <link

      \n"), - no_roundtrip, + concat!("

      This is a <link

      \n"), ); html_opts!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks_title_before_pipe, render.sourcepos], concat!("This is [[a <link|<script>alert(0)</script>]]",), - concat!("

      This is a <link

      \n"), - no_roundtrip, + concat!("

      This is a <link

      \n"), ); } From 1dc3fed40bb1b7a817ff92c4608059b768dbedc4 Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Fri, 12 Jul 2024 19:13:06 +0300 Subject: [PATCH 2/2] html: only show inline sourcepos when asked for. --- src/html.rs | 84 ++++++++++++++++++++++++++------- src/parser/mod.rs | 24 ++++++++-- src/tests/escaped_char_spans.rs | 13 ++--- src/tests/fuzz.rs | 2 +- src/tests/wikilinks.rs | 13 +++-- 5 files changed, 96 insertions(+), 40 deletions(-) diff --git a/src/html.rs b/src/html.rs index 55bf5bc0..abe29be1 100644 --- a/src/html.rs +++ b/src/html.rs @@ -725,22 +725,29 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Text(ref literal) => { + // Nowhere to put sourcepos. if entering { self.escape(literal.as_bytes())?; } } NodeValue::LineBreak => { + // Unreliable sourcepos. if entering { self.output.write_all(b"\n")?; } } NodeValue::SoftBreak => { + // Unreliable sourcepos. if entering { if self.options.render.hardbreaks { self.output.write_all(b"\n")?; } else { self.output.write_all(b"\n")?; @@ -748,9 +755,12 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Code(NodeCode { ref literal, .. }) => { + // Unreliable sourcepos. if entering { self.output.write_all(b"")?; self.escape(literal.as_bytes())?; self.output.write_all(b"")?; @@ -773,6 +783,7 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Strong => { + // Unreliable sourcepos. let parent_node = node.parent(); if !self.options.render.gfm_quirks || (parent_node.is_none() @@ -780,7 +791,9 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { { if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; @@ -788,36 +801,48 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Emph => { + // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Strikethrough => { + // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Superscript => { + // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Link(ref nl) => { + // Unreliable sourcepos. if entering { self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Image(ref nl) => { + // Unreliable sourcepos. if entering { self.output.write_all(b" HtmlFormatter<'o, 'c> { } #[cfg(feature = "shortcodes")] NodeValue::ShortCode(ref nsc) => { + // Nowhere to put sourcepos. if entering { self.output.write_all(nsc.emoji.as_bytes())?; } @@ -970,16 +999,17 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::FootnoteReference(ref nfr) => { + // Unreliable sourcepos. if entering { let mut ref_id = format!("fnref-{}", nfr.name); - - self.output.write_all(b" 1 { ref_id = format!("{}-{}", ref_id, nfr.ref_num); } + self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Escaped => { + // Unreliable sourcepos. if self.options.render.escaped_char_spans { if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; @@ -1040,9 +1073,12 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::WikiLink(ref nl) => { + // Unreliable sourcepos. if entering { self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Underline => { + // Unreliable sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::SpoileredText => { + // Unreliable sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::EscapedTag(ref net) => { + // Nowhere to put sourcepos. self.output.write_all(net.as_bytes())?; } } @@ -1128,7 +1175,8 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { tag_attributes.push((String::from("data-math-style"), String::from(style_attr))); - if self.options.render.sourcepos { + // Unreliable sourcepos. + if self.options.render.experimental_inline_sourcepos && self.options.render.sourcepos { let ast = node.data.borrow(); tag_attributes.push(("data-sourcepos".to_string(), ast.sourcepos.to_string())); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b6987a97..fed397bc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -761,19 +761,35 @@ pub struct RenderOptions { /// extensions. The description lists extension still has issues; see /// . /// - /// Sourcepos information is **not** reliable for inlines. See - /// for a discussion. + /// Sourcepos information is **not** reliable for inlines, and is not + /// included in HTML without also setting [`experimental_inline_sourcepos`]. + /// See for a discussion. /// /// ```rust /// # use comrak::{markdown_to_commonmark_xml, Options}; /// let mut options = Options::default(); /// options.render.sourcepos = true; - /// let input = "Hello *world*!"; + /// let input = "## Hello world!"; /// let xml = markdown_to_commonmark_xml(input, &options); - /// assert!(xml.contains("")); + /// assert!(xml.contains("")); /// ``` pub sourcepos: bool, + /// Include inline sourcepos in HTML output, which is known to have issues. + /// See for a discussion. + /// ```rust + /// # use comrak::{markdown_to_html, Options}; + /// let mut options = Options::default(); + /// options.render.sourcepos = true; + /// let input = "Hello *world*!"; + /// assert_eq!(markdown_to_html(input, &options), + /// "

      Hello world!

      \n"); + /// options.render.experimental_inline_sourcepos = true; + /// assert_eq!(markdown_to_html(input, &options), + /// "

      Hello world!

      \n"); + /// ``` + pub experimental_inline_sourcepos: bool, + /// Wrap escaped characters in a `` to allow any /// post-processing to recognize them. /// diff --git a/src/tests/escaped_char_spans.rs b/src/tests/escaped_char_spans.rs index 7e2d747b..5b8bbf33 100644 --- a/src/tests/escaped_char_spans.rs +++ b/src/tests/escaped_char_spans.rs @@ -1,17 +1,10 @@ use super::*; use ntest::test_case; -// html_opts! does a roundtrip check unless sourcepos is set. -// These cases don't work roundtrip, because converting to commonmark -// automatically escapes certain characters. -#[test_case("\\@user", "

      @user

      \n")] -#[test_case("This\\@that", "

      This@that

      \n")] +#[test_case("\\@user", "

      @user

      \n")] +#[test_case("This\\@that", "

      This@that

      \n")] fn escaped_char_spans(markdown: &str, html: &str) { - html_opts!( - [render.escaped_char_spans, render.sourcepos], - markdown, - html - ); + html_opts!([render.escaped_char_spans], markdown, html, no_roundtrip); } #[test_case("\\@user", "

      @user

      \n")] diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs index 47cd1fb6..21223729 100644 --- a/src/tests/fuzz.rs +++ b/src/tests/fuzz.rs @@ -47,7 +47,7 @@ fn footnote_def() { render.hardbreaks ], "\u{15}\u{b}\r[^ ]:", - "

      \u{15}\u{b}
      \n[^ ]:

      \n", + "

      \u{15}\u{b}
      \n[^ ]:

      \n", ); } diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 19b14445..afcaa07e 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -1,20 +1,19 @@ use super::*; -// html_opts! does a roundtrip check unless sourcepos is set. -// These cases don't work roundtrip, because converting to commonmark -// automatically escapes certain characters. #[test] fn wikilinks_does_not_unescape_html_entities_in_link_label() { html_opts!( - [extension.wikilinks_title_after_pipe, render.sourcepos], + [extension.wikilinks_title_after_pipe], concat!("This is [[<script>alert(0)</script>|a <link]]",), - concat!("

      This is a <link

      \n"), + concat!("

      This is a <link

      \n"), + no_roundtrip, ); html_opts!( - [extension.wikilinks_title_before_pipe, render.sourcepos], + [extension.wikilinks_title_before_pipe], concat!("This is [[a <link|<script>alert(0)</script>]]",), - concat!("

      This is a <link

      \n"), + concat!("

      This is a <link

      \n"), + no_roundtrip, ); }