Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ memchr = "2"
pest = "2"
pest_derive = "2"
shell-words = { version = "1.0", optional = true }
emojis = { version = "0.5.2", optional = true }

[dev-dependencies]
timebomb = "0.1.2"
Expand All @@ -41,6 +42,7 @@ propfuzz = "0.0.1"
[features]
default = ["cli", "syntect"]
cli = ["clap", "shell-words", "xdg"]
shortcodes = ["emojis"]

[target.'cfg(all(not(windows), not(target_arch="wasm32")))'.dependencies]
xdg = { version = "^2.1", optional = true }
Expand Down
16 changes: 16 additions & 0 deletions src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use nodes::{
AstNode, ListDelimType, ListType, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink,
NodeValue,
};
#[cfg(feature = "shortcodes")]
use parser::shortcodes::NodeShortCode;
use parser::ComrakOptions;
use scanners;
use std;
Expand Down Expand Up @@ -335,6 +337,8 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
NodeValue::Superscript => self.format_superscript(),
NodeValue::Link(ref nl) => return self.format_link(node, nl, entering),
NodeValue::Image(ref nl) => self.format_image(nl, allow_wrap, entering),
#[cfg(feature = "shortcodes")]
NodeValue::ShortCode(ref ne) => self.format_shortcode(ne, entering),
NodeValue::Table(..) => self.format_table(entering),
NodeValue::TableRow(..) => self.format_table_row(entering),
NodeValue::TableCell => self.format_table_cell(node, entering),
Expand Down Expand Up @@ -655,6 +659,18 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
}

#[cfg(feature = "shortcodes")]
fn format_shortcode(&mut self, ne: &NodeShortCode, entering: bool) {
if entering {
write!(self, ":").unwrap();
} else {
if let Some(shortcode) = ne.shortcode() {
self.output(shortcode.as_bytes(), false, Escaping::Literal);
}
write!(self, ":").unwrap();
}
}

fn format_table(&mut self, entering: bool) {
if entering {
self.custom_escape = Some(table_escape);
Expand Down
13 changes: 13 additions & 0 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ use std::io::{self, Write};
use std::str;
use strings::build_opening_tag;

#[cfg(feature = "shortcodes")]
extern crate emojis;

/// Formats an AST as HTML, modified by the given options.
pub fn format_document<'a>(
root: &'a AstNode<'a>,
Expand Down Expand Up @@ -692,6 +695,16 @@ impl<'o> HtmlFormatter<'o> {
self.output.write_all(b"\" />")?;
}
}
#[cfg(feature = "shortcodes")]
NodeValue::ShortCode(ref emoji) => {
if entering {
if self.options.extension.shortcodes {
if let Some(emoji) = emoji.emoji() {
self.output.write_all(emoji.as_bytes())?;
}
}
}
}
NodeValue::Table(..) => {
if entering {
self.cr()?;
Expand Down
2 changes: 2 additions & 0 deletions src/lexer.pest
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ scheme_rule = { scheme ~ ":" }
autolink_uri = { scheme ~ ":" ~ (!('\x00'..'\x20' | "<" | ">") ~ ANY)* ~ ">" }
autolink_email = { ('a'..'z' | 'A'..'Z' | '0'..'9' | "." | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "/" | "=" | "?" | "^" | "_" | "`" | "{" | "|" | "}" | "~" | "-")+ ~ "@" ~ ('a'..'z' | 'A'..'Z' | '0'..'9') ~ (('a'..'z' | 'A'..'Z' | '0'..'9' | "-"){0,61} ~ ('a'..'z' | 'A'..'Z' | '0'..'9')?)? ~ ("." ~ (('a'..'z' | 'A'..'Z' | '0'..'9' | "-"){0,61} ~ ('a'..'z' | 'A'..'Z' | '0'..'9')?)?)* ~ ">" }

shortcode_rule = { ":" ~ ('A'..'Z' | 'a'..'z' | "-" | "_")+ ~ ":" }

spacechars = { space_char+ }

escaped_char = _{ "\\" ~ ANY }
Expand Down
6 changes: 6 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ struct Cli {
#[arg(long = "unsafe")]
unsafe_: bool,

/// Translate gemojis into UTF8 characters
#[arg(long)]
gemojis: bool,

/// Escape raw HTML instead of clobbering it
#[arg(long)]
escape: bool,
Expand Down Expand Up @@ -203,6 +207,8 @@ fn main() -> Result<(), Box<dyn Error>> {
footnotes: exts.contains(&Extension::Footnotes),
description_lists: exts.contains(&Extension::DescriptionLists),
front_matter_delimiter: cli.front_matter_delimiter,
#[cfg(feature = "shortcodes")]
shortcodes: cli.gemojis,
},
parse: ComrakParseOptions {
smart: cli.smart,
Expand Down
25 changes: 25 additions & 0 deletions src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
use arena_tree::Node;
use std::cell::RefCell;

#[cfg(feature = "shortcodes")]
use parser::shortcodes::NodeShortCode;

/// The core AST node enum.
#[derive(Debug, Clone)]
pub enum NodeValue {
Expand Down Expand Up @@ -146,6 +149,10 @@ pub enum NodeValue {

/// **Inline**. A footnote reference; the `Vec<u8>` is the referent footnote's name.
FootnoteReference(Vec<u8>),

#[cfg(feature = "shortcodes")]
/// **Inline**. An Emoji character generated from a shortcode. Enable with feature "emoji"
ShortCode(NodeShortCode),
}

/// Alignment of a single table cell.
Expand Down Expand Up @@ -449,6 +456,9 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
NodeValue::DescriptionTerm | NodeValue::DescriptionDetails
),

#[cfg(feature = "shortcodes")]
NodeValue::ShortCode(..) => !child.block(),

NodeValue::Paragraph
| NodeValue::Heading(..)
| NodeValue::Emph
Expand All @@ -460,6 +470,20 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {

NodeValue::TableRow(..) => matches!(*child, NodeValue::TableCell),

#[cfg(not(feature = "shortcodes"))]
NodeValue::TableCell => matches!(
*child,
NodeValue::Text(..)
| NodeValue::Code(..)
| NodeValue::Emph
| NodeValue::Strong
| NodeValue::Link(..)
| NodeValue::Image(..)
| NodeValue::Strikethrough
| NodeValue::HtmlInline(..)
),

#[cfg(feature = "shortcodes")]
NodeValue::TableCell => matches!(
*child,
NodeValue::Text(..)
Expand All @@ -468,6 +492,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
| NodeValue::Strong
| NodeValue::Link(..)
| NodeValue::Image(..)
| NodeValue::ShortCode(..)
| NodeValue::Strikethrough
| NodeValue::HtmlInline(..)
),
Expand Down
34 changes: 34 additions & 0 deletions src/parser/inlines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use arena_tree::Node;
use ctype::{ispunct, isspace};
use entity;
use nodes::{Ast, AstNode, NodeCode, NodeLink, NodeValue};
#[cfg(feature = "shortcodes")]
use parser::shortcodes::NodeShortCode;
use parser::{unwrap_into_2, unwrap_into_copy, AutolinkType, Callback, ComrakOptions, Reference};
use scanners;
use std::cell::{Cell, RefCell};
Expand Down Expand Up @@ -91,6 +93,10 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> {
if options.extension.superscript {
s.special_chars[b'^' as usize] = true;
}
#[cfg(feature = "shortcodes")]
if options.extension.shortcodes {
s.special_chars[b':' as usize] = true;
}
for &c in &[b'"', b'\'', b'.', b'-'] {
s.smart_chars[c as usize] = true;
}
Expand All @@ -113,6 +119,8 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> {
'\\' => Some(self.handle_backslash()),
'&' => Some(self.handle_entity()),
'<' => Some(self.handle_pointy_brace()),
#[cfg(feature = "shortcodes")]
':' if self.options.extension.shortcodes => Some(self.handle_colons()),
'*' | '_' | '\'' | '"' => Some(self.handle_delim(c as u8)),
'-' => Some(self.handle_hyphen()),
'.' => Some(self.handle_period()),
Expand Down Expand Up @@ -849,6 +857,23 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> {
}
}

#[cfg(feature = "shortcodes")]
pub fn handle_colons(&mut self) -> &'a AstNode<'a> {
if let Some(matchlen) = scanners::shortcode(&self.input[self.pos..]) {
let s = self.pos + 1;
let e = s + matchlen - 2;
let shortcode = &self.input[s..e];

if NodeShortCode::is_valid(shortcode.to_vec()) {
let inl = make_emoji(self.arena, &shortcode);
self.pos += matchlen;
return inl;
}
}
self.pos += 1;
make_inline(self.arena, NodeValue::Text(b":".to_vec()))
}

pub fn handle_pointy_brace(&mut self) -> &'a AstNode<'a> {
self.pos += 1;

Expand Down Expand Up @@ -1198,3 +1223,12 @@ fn make_autolink<'a>(
));
inl
}

#[cfg(feature = "shortcodes")]
fn make_emoji<'a>(arena: &'a Arena<AstNode<'a>>, shortcode: &[u8]) -> &'a AstNode<'a> {
let inl = make_inline(
arena,
NodeValue::ShortCode(NodeShortCode::from(shortcode.to_vec())),
);
inl
}
18 changes: 18 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod autolink;
mod inlines;
#[cfg(feature = "shortcodes")]
pub mod shortcodes;
mod table;

use adapters::SyntaxHighlighterAdapter;
Expand Down Expand Up @@ -309,6 +311,22 @@ pub struct ComrakExtensionOptions {
/// assert_eq!(&String::from_utf8(buf).unwrap(), input);
/// ```
pub front_matter_delimiter: Option<String>,

#[cfg(feature = "shortcodes")]
/// Available if "shortcodes" feature is enabled. Phrases wrapped inside of ':' blocks will be
/// replaced with emojis.
///
/// ```
/// # use comrak::{markdown_to_html, ComrakOptions};
/// let mut options = ComrakOptions::default();
/// assert_eq!(markdown_to_html("Happy Friday! :smile:", &options),
/// "<p>Happy Friday! :smile:</p>\n");
///
/// options.extension.shortcodes = true;
/// assert_eq!(markdown_to_html("Happy Friday! :smile:", &options),
/// "<p>Happy Friday! 😄</p>\n");
/// ```
pub shortcodes: bool,
}

#[derive(Default, Debug, Clone)]
Expand Down
34 changes: 34 additions & 0 deletions src/parser/shortcodes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
extern crate emojis;

use std::str;

/// The details of an inline emoji
#[derive(Debug, Clone)]
pub struct NodeShortCode {
/// A short code that is translated into an emoji
shortcode: Option<String>,
}

impl NodeShortCode {
pub fn is_valid(value: Vec<u8>) -> bool {
let code = Self::from(value);
code.emoji().is_some()
}

pub fn shortcode(&self) -> Option<String> {
self.shortcode.clone()
}

pub fn emoji(&self) -> Option<&'static str> {
Some(emojis::get_by_shortcode(self.shortcode()?.as_str())?.as_str())
}
}

impl<'a> From<Vec<u8>> for NodeShortCode {
fn from(value: Vec<u8>) -> Self {
let captured = unsafe { str::from_utf8_unchecked(&value) };
Self {
shortcode: Some(captured.to_string()),
}
}
}
6 changes: 6 additions & 0 deletions src/scanners.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ pub fn link_title(line: &[u8]) -> Option<usize> {
search(Rule::link_title, line)
}

#[cfg(feature = "shortcodes")]
#[inline(always)]
pub fn shortcode(line: &[u8]) -> Option<usize> {
search(Rule::shortcode_rule, line)
}

#[inline(always)]
pub fn table_start(line: &[u8]) -> Option<usize> {
search(Rule::table_start, line)
Expand Down
Loading