Skip to content

Commit 878e601

Browse files
authored
Merge pull request #425 from yannham/task/document-validation
AST validation
2 parents 683af1d + 7460a2f commit 878e601

File tree

3 files changed

+84
-14
lines changed

3 files changed

+84
-14
lines changed

src/cm.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ pub fn format_document<'a>(
2020
options: &Options,
2121
output: &mut dyn Write,
2222
) -> io::Result<()> {
23+
// Formatting an ill-formed AST might lead to invalid output. However, we don't want to pay for
24+
// validation in normal workflow. As a middleground, we validate the AST in debug builds. See
25+
// https://github.com/kivikakk/comrak/issues/371.
26+
#[cfg(debug_assertions)]
27+
root.validate().unwrap_or_else(|e| {
28+
panic!("The document to format is ill-formed: {:?}", e);
29+
});
30+
2331
format_document_with_plugins(root, options, output, &Plugins::default())
2432
}
2533

src/nodes.rs

Lines changed: 75 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ pub enum NodeValue {
108108
/// in a document will be contained in a `Text` node.
109109
Text(String),
110110

111-
/// **Inline**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-).
111+
/// **Block**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-).
112112
/// The value is the symbol that was used in the brackets to mark a task item as checked, or
113113
/// None if the item is unchecked.
114114
TaskItem(Option<char>),
@@ -663,6 +663,52 @@ impl<'a> From<Ast> for AstNode<'a> {
663663
}
664664
}
665665

666+
/// Validation errors produced by [Node::validate].
667+
#[derive(Debug, Clone)]
668+
pub enum ValidationError<'a> {
669+
/// The type of a child node is not allowed in the parent node. This can happen when an inline
670+
/// node is found in a block container, a block is found in an inline node, etc.
671+
InvalidChildType {
672+
/// The parent node.
673+
parent: &'a AstNode<'a>,
674+
/// The child node.
675+
child: &'a AstNode<'a>,
676+
},
677+
}
678+
679+
impl<'a> Node<'a, RefCell<Ast>> {
680+
/// The comrak representation of a markdown node in Rust isn't strict enough to rule out
681+
/// invalid trees according to the CommonMark specification. One simple example is that block
682+
/// containers, such as lists, should only contain blocks, but it's possible to put naked
683+
/// inline text in a list item. Such invalid trees can lead comrak to generate incorrect output
684+
/// if rendered.
685+
///
686+
/// This method performs additional structural checks to ensure that a markdown AST is valid
687+
/// according to the CommonMark specification.
688+
///
689+
/// Note that those invalid trees can only be generated programmatically. Parsing markdown with
690+
/// comrak, on the other hand, should always produce a valid tree.
691+
pub fn validate(&'a self) -> Result<(), ValidationError<'a>> {
692+
let mut stack = vec![self];
693+
694+
while let Some(node) = stack.pop() {
695+
// Check that this node type is valid wrt to the type of its parent.
696+
if let Some(parent) = node.parent() {
697+
if !can_contain_type(parent, &node.data.borrow().value) {
698+
return Err(ValidationError::InvalidChildType {
699+
parent,
700+
child: node,
701+
});
702+
}
703+
}
704+
705+
stack.extend(node.children());
706+
}
707+
708+
Ok(())
709+
}
710+
}
711+
666712
pub(crate) fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool {
667713
node.last_child().map_or(false, |n| n.data.borrow().open)
668714
}
@@ -708,7 +754,16 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
708754
| NodeValue::Strong
709755
| NodeValue::Link(..)
710756
| NodeValue::Image(..)
711-
| NodeValue::WikiLink(..) => !child.block(),
757+
| NodeValue::WikiLink(..)
758+
| NodeValue::Strikethrough
759+
| NodeValue::Superscript
760+
| NodeValue::SpoileredText
761+
| NodeValue::Underline
762+
// XXX: this is quite a hack: the EscapedTag _contains_ whatever was
763+
// possibly going to fall into the spoiler. This should be fixed in
764+
// inlines.
765+
| NodeValue::EscapedTag(_)
766+
=> !child.block(),
712767

713768
NodeValue::Table(..) => matches!(*child, NodeValue::TableRow(..)),
714769

@@ -727,22 +782,30 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
727782
| NodeValue::HtmlInline(..)
728783
| NodeValue::Math(..)
729784
| NodeValue::WikiLink(..)
785+
| NodeValue::FootnoteReference(..)
786+
| NodeValue::Superscript
787+
| NodeValue::SpoileredText
788+
| NodeValue::Underline
730789
),
731790

732791
#[cfg(feature = "shortcodes")]
733792
NodeValue::TableCell => matches!(
734793
*child,
735794
NodeValue::Text(..)
736-
| NodeValue::Code(..)
737-
| NodeValue::Emph
738-
| NodeValue::Strong
739-
| NodeValue::Link(..)
740-
| NodeValue::Image(..)
741-
| NodeValue::ShortCode(..)
742-
| NodeValue::Strikethrough
743-
| NodeValue::HtmlInline(..)
744-
| NodeValue::Math(..)
745-
| NodeValue::WikiLink(..)
795+
| NodeValue::Code(..)
796+
| NodeValue::Emph
797+
| NodeValue::Strong
798+
| NodeValue::Link(..)
799+
| NodeValue::Image(..)
800+
| NodeValue::Strikethrough
801+
| NodeValue::HtmlInline(..)
802+
| NodeValue::Math(..)
803+
| NodeValue::WikiLink(..)
804+
| NodeValue::FootnoteReference(..)
805+
| NodeValue::Superscript
806+
| NodeValue::SpoileredText
807+
| NodeValue::Underline
808+
| NodeValue::ShortCode(..)
746809
),
747810

748811
NodeValue::MultilineBlockQuote(_) => {

src/tests/commonmark.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,9 @@ fn commonmark_avoids_spurious_backslash() {
2626

2727
let p1 = ast(NodeValue::Paragraph);
2828
p1.append(ast(NodeValue::Text("Line 1".to_owned())));
29+
p1.append(ast(NodeValue::LineBreak));
2930
root.append(p1);
3031

31-
root.append(ast(NodeValue::LineBreak));
32-
3332
let p2 = ast(NodeValue::Paragraph);
3433
p2.append(ast(NodeValue::Text("Line 2".to_owned())));
3534
root.append(p2);

0 commit comments

Comments
 (0)