Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ mod tests;

pub use cm::format_document as format_commonmark;
pub use html::format_document as format_html;
pub use parser::{parse_document, ComrakOptions};
pub use parser::{parse_document, parse_document_with_broken_link_callback, ComrakOptions};
pub use typed_arena::Arena;
pub use html::Anchorizer;

Expand Down
26 changes: 22 additions & 4 deletions src/parser/inlines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use unicode_categories::UnicodeCategories;
const MAXBACKTICKS: usize = 80;
const MAX_LINK_LABEL_LENGTH: usize = 1000;

pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i> {
pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i, 'c, 'subj> {
pub arena: &'a Arena<AstNode<'a>>,
options: &'o ComrakOptions,
pub input: &'i [u8],
Expand All @@ -29,6 +29,10 @@ pub struct Subject<'a: 'd, 'r, 'o, 'd, 'i> {
special_chars: [bool; 256],
skip_chars: [bool; 256],
smart_chars: [bool; 256],
// Need to borrow the callback from the parser only for the lifetime of the Subject, 'subj, and
// then give it back when the Subject goes out of scope. Needs to be a mutable reference so we
// can call the FnMut and let it mutate its captured variables.
callback: Option<&'subj mut &'c mut dyn FnMut(&[u8]) -> Option<(Vec<u8>, Vec<u8>)>>,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jaw drop

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Heh. I totally agree that this super ugly! I spent a long time staring at it trying to figure out if it could be made nicer. Unfortunately, it can't because of two things:

  1. we need a reference to the closure because you can only have one instance of a mutable reference at any given time. You can futher borrow that reference (as we have done here), and the only other alternative is moving the reference around everywhere which is far more ugly!
  2. you can only use a mutable reference through another mutable reference. You can't use it through an immutable reference. So &'a &'b mut would not work.

It's ugly, but it gives us memory safety so I guess it's okay! 😭 😆

}

pub struct Delimiter<'a: 'd, 'd> {
Expand All @@ -50,13 +54,14 @@ struct Bracket<'a: 'd, 'd> {
bracket_after: bool,
}

impl<'a, 'r, 'o, 'd, 'i> Subject<'a, 'r, 'o, 'd, 'i> {
impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> {
pub fn new(
arena: &'a Arena<AstNode<'a>>,
options: &'o ComrakOptions,
input: &'i [u8],
refmap: &'r mut HashMap<Vec<u8>, Reference>,
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
callback: Option<&'subj mut &'c mut dyn FnMut(&[u8]) -> Option<(Vec<u8>, Vec<u8>)>>,
) -> Self {
let mut s = Subject {
arena: arena,
Expand All @@ -72,6 +77,7 @@ impl<'a, 'r, 'o, 'd, 'i> Subject<'a, 'r, 'o, 'd, 'i> {
special_chars: [false; 256],
skip_chars: [false; 256],
smart_chars: [false; 256],
callback: callback,
};
for &c in &[
b'\n', b'\r', b'_', b'*', b'"', b'`', b'\\', b'&', b'<', b'[', b']', b'!',
Expand Down Expand Up @@ -893,6 +899,8 @@ impl<'a, 'r, 'o, 'd, 'i> Subject<'a, 'r, 'o, 'd, 'i> {
let is_image = self.brackets[brackets_len - 1].image;
let after_link_text_pos = self.pos;

// Try to find a link destination within parenthesis

let mut sps = 0;
let mut url: &[u8] = &[];
let mut n: usize = 0;
Expand Down Expand Up @@ -925,6 +933,8 @@ impl<'a, 'r, 'o, 'd, 'i> Subject<'a, 'r, 'o, 'd, 'i> {
}
}

// Try to see if this is a reference link

let (mut lab, mut found_label) = match self.link_label() {
Some(lab) => (lab.to_vec(), true),
None => (vec![], false),
Expand All @@ -939,13 +949,21 @@ impl<'a, 'r, 'o, 'd, 'i> Subject<'a, 'r, 'o, 'd, 'i> {
found_label = true;
}

let reff: Option<Reference> = if found_label {
lab = strings::normalize_label(&lab);
// Need to normalize both to lookup in refmap and to call callback
lab = strings::normalize_label(&lab);
let mut reff = if found_label {
self.refmap.get(&lab).cloned()
} else {
None
};

// Attempt to use the provided broken link callback if a reference cannot be resolved
if reff.is_none() {
if let Some(ref mut callback) = self.callback {
reff = callback(&lab).map(|(url, title)| Reference {url: url, title: title});
}
}

if let Some(reff) = reff {
self.close_bracket_match(is_image, reff.url.clone(), reff.title.clone());
return None;
Expand Down
63 changes: 59 additions & 4 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,53 @@ pub fn parse_document<'a>(
arena: &'a Arena<AstNode<'a>>,
buffer: &str,
options: &ComrakOptions,
) -> &'a AstNode<'a> {
parse_document_with_broken_link_callback(arena, buffer, options, None)
}

/// Parse a Markdown document to an AST.
///
/// In case the parser encounters any potential links that have a broken reference (e.g `[foo]`
/// when there is no `[foo]: url` entry at the bottom) the provided callback will be called with
/// the reference name, and the returned pair will be used as the link destination and title if not
/// None.
///
/// ```
/// extern crate comrak;
/// use comrak::{Arena, parse_document_with_broken_link_callback, format_html, ComrakOptions};
/// use comrak::nodes::{AstNode, NodeValue};
///
/// # fn main() -> std::io::Result<()> {
/// // The returned nodes are created in the supplied Arena, and are bound by its lifetime.
/// let arena = Arena::new();
///
/// let root = parse_document_with_broken_link_callback(
/// &arena,
/// "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.",
/// &ComrakOptions::default(),
/// Some(&mut |link_ref: &[u8]| match link_ref {
/// b"foo" => Some((
/// b"https://www.rust-lang.org/".to_vec(),
/// b"The Rust Language".to_vec(),
/// )),
/// _ => None,
/// }),
/// );
///
/// let mut output = Vec::new();
/// format_html(root, &ComrakOptions::default(), &mut output)?;
/// let output_str = std::str::from_utf8(&output).expect("invalid UTF-8");
/// assert_eq!(output_str, "<h1>Cool input!</h1>\n<p>Wow look at this cool \
/// <a href=\"https://www.rust-lang.org/\" title=\"The Rust Language\">link</a>. \
/// A [broken link] renders as text.</p>\n");
/// # Ok(())
/// # }
/// ```
pub fn parse_document_with_broken_link_callback<'a, 'c>(
arena: &'a Arena<AstNode<'a>>,
buffer: &str,
options: &ComrakOptions,
callback: Option<&'c mut dyn FnMut(&[u8]) -> Option<(Vec<u8>, Vec<u8>)>>,
) -> &'a AstNode<'a> {
let root: &'a AstNode<'a> = arena.alloc(Node::new(RefCell::new(Ast {
value: NodeValue::Document,
Expand All @@ -47,12 +94,12 @@ pub fn parse_document<'a>(
open: true,
last_line_blank: false,
})));
let mut parser = Parser::new(arena, root, options);
let mut parser = Parser::new(arena, root, options, callback);
parser.feed(buffer);
parser.finish()
}

pub struct Parser<'a, 'o> {
pub struct Parser<'a, 'o, 'c> {
arena: &'a Arena<AstNode<'a>>,
refmap: HashMap<Vec<u8>, Reference>,
root: &'a AstNode<'a>,
Expand All @@ -67,6 +114,7 @@ pub struct Parser<'a, 'o> {
partially_consumed_tab: bool,
last_line_length: usize,
options: &'o ComrakOptions,
callback: Option<&'c mut dyn FnMut(&[u8]) -> Option<(Vec<u8>, Vec<u8>)>>,
}

#[derive(Default, Debug, Clone)]
Expand Down Expand Up @@ -340,12 +388,13 @@ struct FootnoteDefinition<'a> {
node: &'a AstNode<'a>,
}

impl<'a, 'o> Parser<'a, 'o> {
impl<'a, 'o, 'c> Parser<'a, 'o, 'c> {
fn new(
arena: &'a Arena<AstNode<'a>>,
root: &'a AstNode<'a>,
options: &'o ComrakOptions,
) -> Parser<'a, 'o> {
callback: Option<&'c mut dyn FnMut(&[u8]) -> Option<(Vec<u8>, Vec<u8>)>>,
) -> Self {
Parser {
arena: arena,
refmap: HashMap::new(),
Expand All @@ -361,6 +410,7 @@ impl<'a, 'o> Parser<'a, 'o> {
partially_consumed_tab: false,
last_line_length: 0,
options: options,
callback: callback,
}
}

Expand Down Expand Up @@ -1262,6 +1312,7 @@ impl<'a, 'o> Parser<'a, 'o> {
content,
&mut self.refmap,
&delimiter_arena,
self.callback.as_mut(),
);

while subj.parse_inline(node) {}
Expand Down Expand Up @@ -1355,10 +1406,12 @@ impl<'a, 'o> Parser<'a, 'o> {
let mut this_bracket = false;
loop {
match n.data.borrow_mut().value {
// Join adjacent text nodes together
NodeValue::Text(ref mut root) => {
let ns = match n.next_sibling() {
Some(ns) => ns,
_ => {
// Post-process once we are finished joining text nodes
self.postprocess_text_node(n, root);
break;
}
Expand All @@ -1370,6 +1423,7 @@ impl<'a, 'o> Parser<'a, 'o> {
ns.detach();
}
_ => {
// Post-process once we are finished joining text nodes
self.postprocess_text_node(n, root);
break;
}
Expand Down Expand Up @@ -1456,6 +1510,7 @@ impl<'a, 'o> Parser<'a, 'o> {
content,
&mut self.refmap,
&delimiter_arena,
self.callback.as_mut(),
);

let mut lab: Vec<u8> = match subj.link_label() {
Expand Down
12 changes: 6 additions & 6 deletions src/parser/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use std::cell::RefCell;
use std::cmp::min;
use strings::trim;

pub fn try_opening_block<'a, 'o>(
parser: &mut Parser<'a, 'o>,
pub fn try_opening_block<'a, 'o, 'c>(
parser: &mut Parser<'a, 'o, 'c>,
container: &'a AstNode<'a>,
line: &[u8],
) -> Option<(&'a AstNode<'a>, bool)> {
Expand All @@ -23,8 +23,8 @@ pub fn try_opening_block<'a, 'o>(
}
}

fn try_opening_header<'a, 'o>(
parser: &mut Parser<'a, 'o>,
fn try_opening_header<'a, 'o, 'c>(
parser: &mut Parser<'a, 'o, 'c>,
container: &'a AstNode<'a>,
line: &[u8],
) -> Option<(&'a AstNode<'a>, bool)> {
Expand Down Expand Up @@ -74,8 +74,8 @@ fn try_opening_header<'a, 'o>(
Some((table, true))
}

fn try_opening_row<'a, 'o>(
parser: &mut Parser<'a, 'o>,
fn try_opening_row<'a, 'o, 'c>(
parser: &mut Parser<'a, 'o, 'c>,
container: &'a AstNode<'a>,
alignments: &[TableAlignment],
line: &[u8],
Expand Down