Skip to content

Commit 7573c0a

Browse files
committed
restore text w/ sourcepos of broken footnote references with smart chars.
1 parent ec13f5a commit 7573c0a

File tree

4 files changed

+81
-10
lines changed

4 files changed

+81
-10
lines changed

src/nodes.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ pub enum NodeValue {
173173
Image(Box<NodeLink>),
174174

175175
/// **Inline**. A footnote reference.
176-
FootnoteReference(NodeFootnoteReference),
176+
FootnoteReference(Box<NodeFootnoteReference>),
177177

178178
#[cfg(feature = "shortcodes")]
179179
/// **Inline**. An Emoji character generated from a shortcode. Enable with feature "shortcodes".
@@ -459,6 +459,9 @@ pub struct NodeFootnoteReference {
459459
/// The name of the footnote.
460460
pub name: String,
461461

462+
/// The original text elements of the footnote, including their source position spans.
463+
pub texts: Vec<(String, usize)>,
464+
462465
/// The index of reference to the same footnote
463466
pub ref_num: u32,
464467

src/parser/inlines.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -994,11 +994,12 @@ impl<'a, 'r, 'o, 'd, 'c, 'p> Subject<'a, 'r, 'o, 'd, 'c, 'p> {
994994

995995
// Create the footnote reference node
996996
let ref_node = self.make_inline(
997-
NodeValue::FootnoteReference(NodeFootnoteReference {
997+
NodeValue::FootnoteReference(Box::new(NodeFootnoteReference {
998998
name: name.clone(),
999+
texts: vec![], // Unused.
9991000
ref_num: 0,
10001001
ix: 0,
1001-
}),
1002+
})),
10021003
startpos,
10031004
endpos,
10041005
);
@@ -1734,13 +1735,33 @@ impl<'a, 'r, 'o, 'd, 'c, 'p> Subject<'a, 'r, 'o, 'd, 'c, 'p> {
17341735
// do anything fancy here at all.
17351736
let mut sussy = false;
17361737

1738+
let mut texts = vec![];
1739+
17371740
for sibling in sibling_iterator {
1738-
match sibling.data().value {
1741+
let sibling_ast = sibling.data();
1742+
if sibling_ast.sourcepos.start.line != sibling_ast.sourcepos.end.line
1743+
|| sibling_ast.sourcepos.end.column < sibling_ast.sourcepos.start.column
1744+
{
1745+
sussy = true;
1746+
break;
1747+
}
1748+
1749+
match sibling_ast.value {
17391750
NodeValue::Text(ref literal) => {
17401751
text.push_str(literal);
1752+
texts.push((
1753+
literal.to_string(),
1754+
sibling_ast.sourcepos.end.column - sibling_ast.sourcepos.start.column
1755+
+ 1,
1756+
));
17411757
}
17421758
NodeValue::HtmlInline(ref literal) => {
17431759
text.push_str(literal);
1760+
texts.push((
1761+
literal.to_string(),
1762+
sibling_ast.sourcepos.end.column - sibling_ast.sourcepos.start.column
1763+
+ 1,
1764+
));
17441765
}
17451766
_ => {
17461767
sussy = true;
@@ -1751,11 +1772,12 @@ impl<'a, 'r, 'o, 'd, 'c, 'p> Subject<'a, 'r, 'o, 'd, 'c, 'p> {
17511772

17521773
if !sussy && text.len() > 1 {
17531774
let inl = self.make_inline(
1754-
NodeValue::FootnoteReference(NodeFootnoteReference {
1775+
NodeValue::FootnoteReference(Box::new(NodeFootnoteReference {
17551776
name: text[1..].to_string(),
1777+
texts,
17561778
ref_num: 0,
17571779
ix: 0,
1758-
}),
1780+
})),
17591781
// Overridden immediately below.
17601782
self.scanner.pos,
17611783
self.scanner.pos,

src/parser/mod.rs

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,7 +1740,7 @@ where
17401740
Self::find_footnote_definitions(self.root, &mut fd_map);
17411741

17421742
let mut next_ix = 0;
1743-
Self::find_footnote_references(self.root, &mut fd_map, &mut next_ix);
1743+
self.find_footnote_references(&mut fd_map, &mut next_ix);
17441744

17451745
let mut fds = fd_map.into_values().collect::<Vec<_>>();
17461746
fds.sort_unstable_by(|a, b| a.ix.cmp(&b.ix));
@@ -1786,13 +1786,14 @@ where
17861786
}
17871787

17881788
fn find_footnote_references(
1789-
root: Node<'a>,
1789+
&mut self,
17901790
map: &mut HashMap<String, FootnoteDefinition>,
17911791
ixp: &mut u32,
17921792
) {
1793-
let mut stack = vec![root];
1793+
let mut stack = vec![self.root];
17941794
while let Some(node) = stack.pop() {
17951795
let mut ast = node.data_mut();
1796+
let sp = ast.sourcepos;
17961797
match ast.value {
17971798
NodeValue::FootnoteReference(ref mut nfr) => {
17981799
let normalized = strings::normalize_label(&nfr.name, Case::Fold);
@@ -1810,7 +1811,41 @@ where
18101811
nfr.ix = ix;
18111812
nfr.name = strings::normalize_label(&footnote.name, Case::Preserve);
18121813
} else {
1813-
ast.value = NodeValue::Text(format!("[^{}]", nfr.name).into());
1814+
// Restore the nodes as they were-ish. We restore each
1815+
// Text node as it was found, preserving the sourcepos
1816+
// spans. This is important for accurate sourcepos
1817+
// tracking; we assert when 'consuming' sourcepos
1818+
// lengths in post-processing that either the span
1819+
// length matches the byte count of the string (meaning
1820+
// we can reliably subset them both), or that we're
1821+
// consuming a whole span. Trying to consume part of a
1822+
// span without a matching length is undefined, and we
1823+
// will crash; see Spx::consume.
1824+
//
1825+
// See HACK comment in
1826+
// `inlines::Subject::handle_close_bracket` for the
1827+
// producer of these values.
1828+
assert!(!nfr.texts.is_empty());
1829+
let mut lc = sp.start;
1830+
let mut target = node;
1831+
1832+
let mut texts = mem::take(&mut nfr.texts);
1833+
texts.insert(0, ("[".into(), 1));
1834+
texts.push(("]".into(), 1));
1835+
1836+
for (text, span) in &mut texts {
1837+
let inl = self.arena.alloc(
1838+
Ast::new_with_sourcepos(
1839+
NodeValue::Text(mem::take(text).into()),
1840+
(lc, lc.column_add(*span as isize - 1)).into(),
1841+
)
1842+
.into(),
1843+
);
1844+
target.insert_after(inl);
1845+
target = inl;
1846+
lc = lc.column_add(*span as isize);
1847+
}
1848+
node.detach();
18141849
}
18151850
}
18161851
_ => {

src/tests/fuzz.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,3 +540,14 @@ fn oye_siri_prende_las_luces() {
540540
])
541541
);
542542
}
543+
544+
#[test]
545+
fn cursed_lands() {
546+
let mut opts = Options::default();
547+
opts.extension.autolink = true;
548+
opts.extension.footnotes = true;
549+
opts.parse.smart = true;
550+
opts.parse.relaxed_autolinks = true;
551+
552+
markdown_to_html("[^[email protected]]", &opts);
553+
}

0 commit comments

Comments
 (0)