Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ FLAGS:
--github-pre-lang Use GitHub-style <pre lang> for code blocks
--hardbreaks Treat newlines as hard line breaks
-h, --help Prints help information
--safe Suppress raw HTML and dangerous URLs
--smart Use smart punctuation
-V, --version Prints version information

Expand Down Expand Up @@ -113,6 +114,8 @@ assert_eq!(

As with [`cmark-gfm`](https://github.com/github/cmark#security), Comrak will pass through inline HTML, dangerous links, anything you can imagine — it only performs Markdown to HTML conversion per the CommonMark/GFM spec. We recommend the use of a sanitisation library like [`ammonia`](https://github.com/notriddle/ammonia) configured specific to your needs.

You can also disable this potentially unsafe feature by using the `safe` option (or `--safe` at the command-line).

## Extensions

Comrak supports the five extensions to CommonMark defined in the
Expand Down
21 changes: 17 additions & 4 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use ctype::isspace;
use nodes::{AstNode, ListType, NodeValue, TableAlignment};
use parser::ComrakOptions;
use regex::Regex;
use scanners;
use std::borrow::Cow;
use std::cell::Cell;
use std::collections::HashSet;
Expand Down Expand Up @@ -149,6 +150,10 @@ fn tagfilter_block(input: &[u8], o: &mut Write) -> io::Result<()> {
Ok(())
}

fn dangerous_url(input: &[u8]) -> bool {
scanners::dangerous_url(input).is_some()
}

impl<'o> HtmlFormatter<'o> {
fn new(options: &'o ComrakOptions, output: &'o mut WriteWithLast<'o>) -> Self {
HtmlFormatter {
Expand Down Expand Up @@ -416,7 +421,9 @@ impl<'o> HtmlFormatter<'o> {
},
NodeValue::HtmlBlock(ref nhb) => if entering {
try!(self.cr());
if self.options.ext_tagfilter {
if self.options.safe {
try!(self.output.write_all(b"<!-- raw HTML omitted -->"));
} else if self.options.ext_tagfilter {
try!(tagfilter_block(&nhb.literal, &mut self.output));
} else {
try!(self.output.write_all(&nhb.literal));
Expand Down Expand Up @@ -472,7 +479,9 @@ impl<'o> HtmlFormatter<'o> {
try!(self.output.write_all(b"</code>"));
},
NodeValue::HtmlInline(ref literal) => if entering {
if self.options.ext_tagfilter && tagfilter(literal) {
if self.options.safe {
try!(self.output.write_all(b"<!-- raw HTML omitted -->"));
} else if self.options.ext_tagfilter && tagfilter(literal) {
try!(self.output.write_all(b"&lt;"));
try!(self.output.write_all(&literal[1..]));
} else {
Expand Down Expand Up @@ -501,7 +510,9 @@ impl<'o> HtmlFormatter<'o> {
},
NodeValue::Link(ref nl) => if entering {
try!(self.output.write_all(b"<a href=\""));
try!(self.escape_href(&nl.url));
if !self.options.safe || !dangerous_url(&nl.url) {
try!(self.escape_href(&nl.url));
}
if !nl.title.is_empty() {
try!(self.output.write_all(b"\" title=\""));
try!(self.escape(&nl.title));
Expand All @@ -512,7 +523,9 @@ impl<'o> HtmlFormatter<'o> {
},
NodeValue::Image(ref nl) => if entering {
try!(self.output.write_all(b"<img src=\""));
try!(self.escape_href(&nl.url));
if !self.options.safe || !dangerous_url(&nl.url) {
try!(self.escape_href(&nl.url));
}
try!(self.output.write_all(b"\" alt=\""));
return Ok(true);
} else {
Expand Down
2 changes: 2 additions & 0 deletions src/lexer.pest
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,5 @@ table_cell = { ( escaped_char | !("|" | "\r" | "\n") ~ any)* }
table_start = { "|"? ~ table_marker ~ ("|" ~ table_marker)* ~ "|"? ~ table_spacechar* ~ table_newline }
table_cell_end = { "|" ~ table_spacechar* ~ table_newline? }
table_row_end = { table_spacechar* ~ table_newline }

dangerous_url = { "data:" ~ !("png" | "gif" | "jpeg" | "webp") | "javascript:" | "vbscript:" | "file:" }
6 changes: 6 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ fn main() {
.value_name("INFO")
.takes_value(true),
)
.arg(
clap::Arg::with_name("safe")
.long("safe")
.help("Suppress raw HTML and dangerous URLs"),
)
.arg(
clap::Arg::with_name("extension")
.short("e")
Expand Down Expand Up @@ -132,6 +137,7 @@ fn main() {
default_info_string: matches
.value_of("default-info-string")
.map(|e| e.to_owned()),
safe: matches.is_present("safe"),
ext_strikethrough: exts.remove("strikethrough"),
ext_tagfilter: exts.remove("tagfilter"),
ext_table: exts.remove("table"),
Expand Down
25 changes: 25 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,31 @@ pub struct ComrakOptions {
/// ```
pub default_info_string: Option<String>,

/// Disable rendering of raw HTML and potentially dangerous links.
///
/// ```
/// # use comrak::{markdown_to_html, ComrakOptions};
/// let mut options = ComrakOptions::default();
/// let input = "<script>\nalert('xyz');\n</script>\n\n\
/// Possibly <marquee>annoying</marquee>.\n\n\
/// [Dangerous](javascript:alert(document.cookie)).\n\n\
/// [Safe](http://commonmark.org).\n";
///
/// assert_eq!(markdown_to_html(input, &options),
/// "<script>\nalert(\'xyz\');\n</script>\n\
/// <p>Possibly <marquee>annoying</marquee>.</p>\n\
/// <p><a href=\"javascript:alert(document.cookie)\">Dangerous</a>.</p>\n\
/// <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n");
///
/// options.safe = true;
/// assert_eq!(markdown_to_html(input, &options),
/// "<!-- raw HTML omitted -->\n\
/// <p>Possibly <!-- raw HTML omitted -->annoying<!-- raw HTML omitted -->.</p>\n\
/// <p><a href=\"\">Dangerous</a>.</p>\n\
/// <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n");
/// ```
pub safe: bool,

/// Enables the
/// [strikethrough extension](https://github.github.com/gfm/#strikethrough-extension-)
/// from the GFM spec.
Expand Down
5 changes: 5 additions & 0 deletions src/scanners.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,8 @@ pub fn table_cell_end(line: &[u8]) -> Option<usize> {
pub fn table_row_end(line: &[u8]) -> Option<usize> {
search(Rule::table_row_end, line)
}

#[inline(always)]
pub fn dangerous_url(line: &[u8]) -> Option<usize> {
search(Rule::dangerous_url, line)
}
27 changes: 27 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,30 @@ fn cm_autolink_regression() {
// Testing that the cm renderer handles this case without crashing
html("<a+c:dd>", "<p><a href=\"a+c:dd\">a+c:dd</a></p>\n");
}

#[test]
fn safe() {
html_opts(
concat!(
"[data:png](data:png/x)\n\n",
"[data:gif](data:gif/x)\n\n",
"[data:jpeg](data:jpeg/x)\n\n",
"[data:webp](data:webp/x)\n\n",
"[data:malicious](data:malicious/x)\n\n",
"[javascript:malicious](javascript:malicious)\n\n",
"[vbscript:malicious](vbscript:malicious)\n\n",
"[file:malicious](file:malicious)\n\n",
),
concat!(
"<p><a href=\"data:png/x\">data:png</a></p>\n",
"<p><a href=\"data:gif/x\">data:gif</a></p>\n",
"<p><a href=\"data:jpeg/x\">data:jpeg</a></p>\n",
"<p><a href=\"data:webp/x\">data:webp</a></p>\n",
"<p><a href=\"\">data:malicious</a></p>\n",
"<p><a href=\"\">javascript:malicious</a></p>\n",
"<p><a href=\"\">vbscript:malicious</a></p>\n",
"<p><a href=\"\">file:malicious</a></p>\n",
),
|opts| opts.safe = true,
)
}