Skip to content

Commit 72b2dd3

Browse files
authored
Merge pull request #619 from sheremetyev/push-opxuuppvplyu
Add support for inline footnotes
2 parents 6d22e82 + caa75d6 commit 72b2dd3

File tree

6 files changed

+732
-12
lines changed

6 files changed

+732
-12
lines changed

fuzz/fuzz_targets/all_options.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ fuzz_target!(|s: &str| {
1919
superscript: true,
2020
header_ids: Some("user-content-".to_string()),
2121
footnotes: true,
22+
inline_footnotes: true,
2223
description_lists: true,
2324
front_matter_delimiter: Some("---".to_string()),
2425
multiline_block_quotes: true,

src/main.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ enum Extension {
178178
Tasklist,
179179
Superscript,
180180
Footnotes,
181+
InlineFootnotes,
181182
DescriptionLists,
182183
MultilineBlockQuotes,
183184
MathDollars,
@@ -264,6 +265,7 @@ fn main() -> Result<(), Box<dyn Error>> {
264265
.superscript(exts.contains(&Extension::Superscript))
265266
.maybe_header_ids(cli.header_ids)
266267
.footnotes(exts.contains(&Extension::Footnotes))
268+
.inline_footnotes(exts.contains(&Extension::InlineFootnotes))
267269
.description_lists(exts.contains(&Extension::DescriptionLists))
268270
.multiline_block_quotes(exts.contains(&Extension::MultilineBlockQuotes))
269271
.math_dollars(exts.contains(&Extension::MathDollars))

src/parser/inlines.rs

Lines changed: 185 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use crate::arena_tree::Node;
22
use crate::ctype::{isdigit, ispunct, isspace};
33
use crate::entity;
44
use crate::nodes::{
5-
Ast, AstNode, NodeCode, NodeFootnoteReference, NodeLink, NodeMath, NodeValue, NodeWikiLink,
6-
Sourcepos,
5+
Ast, AstNode, NodeCode, NodeFootnoteDefinition, NodeFootnoteReference, NodeLink, NodeMath,
6+
NodeValue, NodeWikiLink, Sourcepos,
77
};
88
use crate::parser::autolink;
99
#[cfg(feature = "shortcodes")]
@@ -131,7 +131,7 @@ impl FlankingCheckHelper for char {
131131
}
132132
}
133133

134-
pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i, 'c> {
134+
pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i, 'c, 'p> {
135135
pub arena: &'a Arena<AstNode<'a>>,
136136
options: &'o Options<'c>,
137137
pub input: &'i [u8],
@@ -141,6 +141,7 @@ pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i, 'c> {
141141
line_offset: usize,
142142
flags: Flags,
143143
pub refmap: &'r mut RefMap,
144+
footnote_defs: &'p FootnoteDefs<'a>,
144145
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
145146
last_delimiter: Option<&'d Delimiter<'a, 'd>>,
146147
brackets: Vec<Bracket<'a>>,
@@ -192,6 +193,34 @@ impl RefMap {
192193
}
193194
}
194195

196+
pub struct FootnoteDefs<'a> {
197+
defs: RefCell<Vec<&'a AstNode<'a>>>,
198+
counter: RefCell<usize>,
199+
}
200+
201+
impl<'a> FootnoteDefs<'a> {
202+
pub fn new() -> Self {
203+
Self {
204+
defs: RefCell::new(Vec::new()),
205+
counter: RefCell::new(0),
206+
}
207+
}
208+
209+
pub fn next_name(&self) -> String {
210+
let mut counter = self.counter.borrow_mut();
211+
*counter += 1;
212+
format!("__inline_{}", *counter)
213+
}
214+
215+
pub fn add_definition(&self, def: &'a AstNode<'a>) {
216+
self.defs.borrow_mut().push(def);
217+
}
218+
219+
pub fn definitions(&self) -> std::cell::Ref<'_, Vec<&'a AstNode<'a>>> {
220+
self.defs.borrow()
221+
}
222+
}
223+
195224
pub struct Delimiter<'a: 'd, 'd> {
196225
inl: &'a AstNode<'a>,
197226
position: usize,
@@ -231,13 +260,14 @@ struct WikilinkComponents<'i> {
231260
link_label: Option<(&'i [u8], usize, usize)>,
232261
}
233262

234-
impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
263+
impl<'a, 'r, 'o, 'd, 'i, 'c, 'p> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'p> {
235264
pub fn new(
236265
arena: &'a Arena<AstNode<'a>>,
237266
options: &'o Options<'c>,
238267
input: &'i [u8],
239268
line: usize,
240269
refmap: &'r mut RefMap,
270+
footnote_defs: &'p FootnoteDefs<'a>,
241271
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
242272
) -> Self {
243273
let mut s = Subject {
@@ -250,6 +280,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
250280
line_offset: 0,
251281
flags: Flags::default(),
252282
refmap,
283+
footnote_defs,
253284
delimiter_arena,
254285
last_delimiter: None,
255286
brackets: vec![],
@@ -272,7 +303,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
272303
s.special_chars[b'~' as usize] = true;
273304
s.skip_chars[b'~' as usize] = true;
274305
}
275-
if options.extension.superscript {
306+
if options.extension.superscript || options.extension.inline_footnotes {
276307
s.special_chars[b'^' as usize] = true;
277308
}
278309
#[cfg(feature = "shortcodes")]
@@ -402,8 +433,24 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
402433
'~' if self.options.extension.strikethrough || self.options.extension.subscript => {
403434
Some(self.handle_delim(b'~'))
404435
}
405-
'^' if self.options.extension.superscript && !self.within_brackets => {
406-
Some(self.handle_delim(b'^'))
436+
'^' => {
437+
// Check for inline footnote first
438+
if self.options.extension.footnotes
439+
&& self.options.extension.inline_footnotes
440+
&& self.peek_char_n(1) == Some(&(b'['))
441+
{
442+
self.handle_inline_footnote()
443+
} else if self.options.extension.superscript && !self.within_brackets {
444+
Some(self.handle_delim(b'^'))
445+
} else {
446+
// Just regular text
447+
self.pos += 1;
448+
Some(self.make_inline(
449+
NodeValue::Text("^".to_string()),
450+
self.pos - 1,
451+
self.pos - 1,
452+
))
453+
}
407454
}
408455
'$' => Some(self.handle_dollars(&node_ast.line_offsets)),
409456
'|' if self.options.extension.spoiler => Some(self.handle_delim(b'|')),
@@ -430,11 +477,17 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
430477
startpos += n;
431478
}
432479

433-
Some(self.make_inline(
434-
NodeValue::Text(String::from_utf8(contents).unwrap()),
435-
startpos,
436-
endpos - 1,
437-
))
480+
// Don't create empty text nodes - this can happen after trimming trailing
481+
// whitespace and would cause sourcepos underflow in endpos - 1
482+
if !contents.is_empty() {
483+
Some(self.make_inline(
484+
NodeValue::Text(String::from_utf8(contents).unwrap()),
485+
startpos,
486+
endpos - 1,
487+
))
488+
} else {
489+
None
490+
}
438491
}
439492
};
440493

@@ -1895,6 +1948,126 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
18951948
}
18961949
}
18971950

1951+
fn handle_inline_footnote(&mut self) -> Option<&'a AstNode<'a>> {
1952+
let startpos = self.pos;
1953+
1954+
// We're at ^, next should be [
1955+
self.pos += 2; // Skip ^[
1956+
1957+
// Find the closing ]
1958+
let mut depth = 1;
1959+
let mut endpos = self.pos;
1960+
while endpos < self.input.len() && depth > 0 {
1961+
match self.input[endpos] {
1962+
b'[' => depth += 1,
1963+
b']' => depth -= 1,
1964+
b'\\' if endpos + 1 < self.input.len() => {
1965+
endpos += 1; // Skip escaped character
1966+
}
1967+
_ => {}
1968+
}
1969+
endpos += 1;
1970+
}
1971+
1972+
if depth != 0 {
1973+
// No matching closing bracket, treat as regular text
1974+
self.pos = startpos + 1;
1975+
return Some(self.make_inline(NodeValue::Text("^".to_string()), startpos, startpos));
1976+
}
1977+
1978+
// endpos is now one past the ], so adjust
1979+
endpos -= 1;
1980+
1981+
// Extract the content
1982+
let content = &self.input[self.pos..endpos];
1983+
1984+
// Empty inline footnote should not parse
1985+
if content.is_empty() {
1986+
self.pos = startpos + 1;
1987+
return Some(self.make_inline(NodeValue::Text("^".to_string()), startpos, startpos));
1988+
}
1989+
1990+
// Generate unique name
1991+
let name = self.footnote_defs.next_name();
1992+
1993+
// Create the footnote reference node
1994+
let ref_node = self.make_inline(
1995+
NodeValue::FootnoteReference(NodeFootnoteReference {
1996+
name: name.clone(),
1997+
ref_num: 0,
1998+
ix: 0,
1999+
}),
2000+
startpos,
2001+
endpos,
2002+
);
2003+
2004+
// Parse the content as inlines
2005+
let def_node = self.arena.alloc(Node::new(RefCell::new(Ast::new(
2006+
NodeValue::FootnoteDefinition(NodeFootnoteDefinition {
2007+
name: name.clone(),
2008+
total_references: 0,
2009+
}),
2010+
(self.line, 1).into(),
2011+
))));
2012+
2013+
// Create a paragraph to hold the inline content
2014+
let mut para_ast = Ast::new(
2015+
NodeValue::Paragraph,
2016+
(1, 1).into(), // Use line 1 as base
2017+
);
2018+
// Build line_offsets by scanning for newlines in the content
2019+
let mut line_offsets = vec![0];
2020+
for (i, &byte) in content.iter().enumerate() {
2021+
if byte == b'\n' {
2022+
line_offsets.push(i + 1);
2023+
}
2024+
}
2025+
para_ast.line_offsets = line_offsets;
2026+
let para_node = self.arena.alloc(Node::new(RefCell::new(para_ast)));
2027+
def_node.append(para_node);
2028+
2029+
// Parse the content recursively as inlines
2030+
let delimiter_arena = Arena::new();
2031+
let mut subj = Subject::new(
2032+
self.arena,
2033+
self.options,
2034+
content,
2035+
1, // Use line 1 to match the paragraph's sourcepos
2036+
self.refmap,
2037+
self.footnote_defs,
2038+
&delimiter_arena,
2039+
);
2040+
2041+
while subj.parse_inline(para_node) {}
2042+
subj.process_emphasis(0);
2043+
while subj.pop_bracket() {}
2044+
2045+
// Check if the parsed content is empty or contains only whitespace
2046+
// This handles whitespace-only content, null bytes, etc. generically
2047+
let has_non_whitespace_content = para_node.children().any(|child| {
2048+
let child_data = child.data.borrow();
2049+
match &child_data.value {
2050+
NodeValue::Text(text) => !text.trim().is_empty(),
2051+
NodeValue::SoftBreak | NodeValue::LineBreak => false,
2052+
_ => true, // Any other node type (link, emphasis, etc.) counts as content
2053+
}
2054+
});
2055+
2056+
if !has_non_whitespace_content {
2057+
// Content is empty or whitespace-only after parsing, treat as literal text
2058+
self.pos = startpos + 1;
2059+
return Some(self.make_inline(NodeValue::Text("^".to_string()), startpos, startpos));
2060+
}
2061+
2062+
// Store the footnote definition
2063+
self.footnote_defs.add_definition(def_node);
2064+
2065+
// Move position past the closing ]
2066+
self.pos = endpos + 1;
2067+
2068+
Some(ref_node)
2069+
}
2070+
18982071
pub fn link_label(&mut self) -> Option<&str> {
18992072
let startpos = self.pos;
19002073

src/parser/mod.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ pub struct BrokenLinkReference<'l> {
141141
pub struct Parser<'a, 'o, 'c> {
142142
arena: &'a Arena<AstNode<'a>>,
143143
refmap: RefMap,
144+
footnote_defs: inlines::FootnoteDefs<'a>,
144145
root: &'a AstNode<'a>,
145146
current: &'a AstNode<'a>,
146147
line_number: usize,
@@ -327,6 +328,25 @@ pub struct ExtensionOptions<'c> {
327328
#[cfg_attr(feature = "bon", builder(default))]
328329
pub footnotes: bool,
329330

331+
/// Enables the inline footnotes extension.
332+
///
333+
/// Allows inline footnote syntax `^[content]` where the content can include
334+
/// inline markup. Inline footnotes are automatically converted to regular
335+
/// footnotes with auto-generated names and share the same numbering sequence.
336+
///
337+
/// Requires `footnotes` to be enabled as well.
338+
///
339+
/// ```
340+
/// # use comrak::{markdown_to_html, Options};
341+
/// let mut options = Options::default();
342+
/// options.extension.footnotes = true;
343+
/// options.extension.inline_footnotes = true;
344+
/// assert_eq!(markdown_to_html("Hi^[An inline note].\n", &options),
345+
/// "<p>Hi<sup class=\"footnote-ref\"><a href=\"#fn-__inline_1\" id=\"fnref-__inline_1\" data-footnote-ref>1</a></sup>.</p>\n<section class=\"footnotes\" data-footnotes>\n<ol>\n<li id=\"fn-__inline_1\">\n<p>An inline note <a href=\"#fnref-__inline_1\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n</li>\n</ol>\n</section>\n");
346+
/// ```
347+
#[cfg_attr(feature = "bon", builder(default))]
348+
pub inline_footnotes: bool,
349+
330350
/// Enables the description lists extension.
331351
///
332352
/// Each term must be defined in one paragraph, followed by a blank line,
@@ -1192,6 +1212,7 @@ where
11921212
Parser {
11931213
arena,
11941214
refmap: RefMap::new(),
1215+
footnote_defs: inlines::FootnoteDefs::new(),
11951216
root,
11961217
current: root,
11971218
line_number: 0,
@@ -2656,6 +2677,15 @@ where
26562677
};
26572678

26582679
self.process_inlines();
2680+
2681+
// Append auto-generated inline footnote definitions
2682+
if self.options.extension.footnotes && self.options.extension.inline_footnotes {
2683+
let inline_defs = self.footnote_defs.definitions();
2684+
for def in inline_defs.iter() {
2685+
self.root.append(*def);
2686+
}
2687+
}
2688+
26592689
if self.options.extension.footnotes {
26602690
self.process_footnotes();
26612691
}
@@ -2817,6 +2847,7 @@ where
28172847
content,
28182848
node_data.sourcepos.start.line,
28192849
&mut self.refmap,
2850+
&self.footnote_defs,
28202851
&delimiter_arena,
28212852
);
28222853

@@ -3114,6 +3145,7 @@ where
31143145
content,
31153146
0, // XXX -1 in upstream; never used?
31163147
&mut self.refmap,
3148+
&self.footnote_defs,
31173149
&delimiter_arena,
31183150
);
31193151

src/tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod greentext;
2020
mod header_ids;
2121
#[path = "tests/html.rs"]
2222
mod html_;
23+
mod inline_footnotes;
2324
mod math;
2425
mod multiline_block_quotes;
2526
mod options;

0 commit comments

Comments
 (0)