Skip to content

Commit d149324

Browse files
authored
Merge pull request #689 from kivikakk/push-nnqpnowtsnom
tasklists: don't munge first byte into unicode codepoint.
2 parents 988ebcd + c2509e5 commit d149324

File tree

4 files changed

+65
-16
lines changed

4 files changed

+65
-16
lines changed

src/parser/mod.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,12 +2019,21 @@ where
20192019
sourcepos: &mut Sourcepos,
20202020
spx: &mut Spx,
20212021
) {
2022-
let (end, symbol) = match scanners::tasklist(text) {
2022+
let (end, matched) = match scanners::tasklist(text) {
20232023
Some(p) => p,
20242024
None => return,
20252025
};
20262026

2027-
let symbol = symbol as char;
2027+
let mut chars = matched.chars();
2028+
let Some(symbol) = chars.next() else {
2029+
return;
2030+
};
2031+
2032+
// There must be at most one `char`'s worth of content in `matched`,
2033+
// otherwise we ignore it.
2034+
if !chars.next().is_none() {
2035+
return;
2036+
}
20282037

20292038
if !self.options.parse.relaxed_tasklist_matching && !matches!(symbol, ' ' | 'x' | 'X') {
20302039
return;

src/scanners.re

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -462,25 +462,28 @@ pub fn close_multiline_block_quote_fence(s: &str) -> Option<usize> {
462462
*/
463463
}
464464

465-
// Returns both the length of the match, and the tasklist character.
466-
pub fn tasklist(s: &str) -> Option<(usize, u8)> {
465+
// Returns both the length of the match, and the tasklist item contents.
466+
// It is not guaranteed to be one byte, or one "character" long; the caller must ascertain
467+
// its fitness for purpose.
468+
pub fn tasklist(s: &str) -> Option<(usize, &str)> {
467469
let mut cursor = 0;
468470
let mut marker = 0;
469471
let len = s.len();
470472

471-
let mut t1;
473+
let t1;
474+
let mut t2;
472475
/*!stags:re2c format = 'let mut @@{tag} = 0;'; */
473476

474477
/*!local:re2c
475478
re2c:define:YYSTAGP = "@@{tag} = cursor;";
476479
re2c:define:YYSHIFTSTAG = "@@{tag} = (@@{tag} as isize + @@{shift}) as usize;";
477-
re2c:tags = 1;
480+
re2c:tags = 2;
478481
479-
spacechar* [[] @t1 [^\xff\r\n] [\]] (spacechar | [\xff]) {
482+
spacechar* [[] @t1 [^\xff\r\n\]]+ @t2 [\]] (spacechar | [\xff]) {
480483
if cursor == len + 1 {
481484
cursor -= 1;
482485
}
483-
return Some((cursor, s.as_bytes()[t1]));
486+
return Some((cursor, &s[t1..t2]));
484487
}
485488
* { return None; }
486489
*/

src/scanners.rs

Lines changed: 20 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/tests/tasklist.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,3 +491,28 @@ fn sourcepos() {
491491
])
492492
);
493493
}
494+
495+
#[test]
496+
fn tasklist_relaxed_unicode() {
497+
assert_ast_match!(
498+
[extension.tasklist, parse.relaxed_tasklist_matching],
499+
"- [あ] xy\n" // U+3042
500+
" - [い] zw\n", // U+3044
501+
(document (1:1-2:12) [
502+
(list (1:1-2:12) [
503+
(taskitem (1:1-2:12) [
504+
(paragraph (1:9-1:10) [
505+
(text (1:9-1:10) "xy")
506+
])
507+
(list (2:3-2:12) [
508+
(taskitem (2:3-2:12) [
509+
(paragraph (2:11-2:12) [
510+
(text (2:11-2:12) "zw")
511+
])
512+
])
513+
])
514+
])
515+
])
516+
])
517+
);
518+
}

0 commit comments

Comments
 (0)