Skip to content

Commit 1a38822

Browse files
authored
Merge pull request #687 from kivikakk/push-nzsmltxklmnr
write out %25 in hrefs where not part of a percent-encode sequence.
2 parents 86661fe + 5cc8d40 commit 1a38822

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

src/html.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,8 +1699,8 @@ pub fn escape(output: &mut dyn Write, buffer: &str) -> fmt::Result {
16991699
/// * U+0027 APOSTROPHE ' is rendered as '
17001700
/// * Alphanumeric and a range of non-URL safe characters.
17011701
///
1702-
/// The inclusion of characters like "%" in those which are not escaped is
1703-
/// explained somewhat here:
1702+
/// Note that we leave "%" alone if it is followed by two hexdigits.
1703+
/// See:
17041704
///
17051705
/// <https://github.com/github/cmark-gfm/blob/c32ef78bae851cb83b7ad52d0fbff880acdcd44a/src/houdini_href_e.c#L7-L31>
17061706
///
@@ -1721,7 +1721,7 @@ pub fn escape(output: &mut dyn Write, buffer: &str) -> fmt::Result {
17211721
/// or `https` are permitted.
17221722
pub fn escape_href(output: &mut dyn Write, buffer: &str, relaxed_ipv6: bool) -> fmt::Result {
17231723
const HREF_SAFE: [bool; 256] = character_set!(
1724-
b"-_.+!*(),%#@?=;:/,+$~",
1724+
b"-_.+!*(),#@?=;:/,+$~",
17251725
b"abcdefghijklmnopqrstuvwxyz",
17261726
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
17271727
);
@@ -1761,6 +1761,16 @@ pub fn escape_href(output: &mut dyn Write, buffer: &str, relaxed_ipv6: bool) ->
17611761
b'\'' => {
17621762
output.write_str("&#x27;")?;
17631763
}
1764+
b'%' => {
1765+
if bytes.get(i + 1).map_or(false, |b| b.is_ascii_hexdigit())
1766+
&& bytes.get(i + 2).map_or(false, |b| b.is_ascii_hexdigit())
1767+
{
1768+
output.write_str(&buffer[i..=i + 2])?;
1769+
i += 2;
1770+
} else {
1771+
output.write_str("%25")?;
1772+
}
1773+
}
17641774
0 => {
17651775
// U+FFFD REPLACEMENT CHARACTER
17661776
output.write_str("%EF%BF%BD")?;

src/tests/escape.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ fn escape_inline_baseline() {
3939
/// [link destination]: https://spec.commonmark.org/0.31.2/#link-destination
4040
#[test]
4141
fn escape_link_target() {
42-
let url = "rabbits) <cup\rcakes\n> [hyacinth](";
43-
let escaped = r#"<rabbits) \<cup%0Dcakes%0A\> [hyacinth](>"#;
42+
let url = "rabbits) <cup\rcakes\n> [%7Bhya%cinth%7d](";
43+
let escaped = r#"<rabbits) \<cup%0Dcakes%0A\> [%7Bhya%cinth%7d](>"#;
44+
let decoded = "rabbits) <cup\rcakes\n> [{hya%cinth}](";
4445

4546
assert_eq!(escaped, escape_link_destination(url));
4647

@@ -55,9 +56,12 @@ fn escape_link_target() {
5556
.expect("html should be one anchor in a paragraph")
5657
.to_string();
5758

58-
assert_eq!("rabbits)%20%3Ccup%0Dcakes%0A%3E%20%5Bhyacinth%5D(", html);
5959
assert_eq!(
60-
url,
60+
"rabbits)%20%3Ccup%0Dcakes%0A%3E%20%5B%7Bhya%25cinth%7d%5D(",
61+
html
62+
);
63+
assert_eq!(
64+
decoded,
6165
percent_encoding_rfc3986::percent_decode_str(&html)
6266
.unwrap()
6367
.decode_utf8()

0 commit comments

Comments
 (0)