Skip to content

Commit ff0d224

Browse files
committed
Fix list tightness
- Set the end position precisely - Check list tightness by comparing line numbers - Remove `LAST_LINE_BLANK` flag See also commonmark/commonmark.js#269 . Classification of end positions: - The end of the current line: - Thematic breaks - ATX headings - Setext headings - Fenced code blocks closed explicitly - HTML blocks (`pre`, comments, and others) - The end of the previous line: - Fenced code blocks closed by the end of the parent or EOF - HTML blocks (`div` and others) - HTML blocks closed by the end of the parent or EOF - Paragraphs - Block quotes - Empty list items - The end position of the last child: - Non-empty list items - Lists - The end position of the last non-blank line: - Indented code blocks The first two cases are handed by `finalize` and `closed_explicitly` flag. Non empty list items and lists are handled in `switch` statements in `finalize`. Indented code blocks are handled by setting the end position every time non-blank line is added to the block.
1 parent 1e73dea commit ff0d224

File tree

2 files changed

+65
-75
lines changed

2 files changed

+65
-75
lines changed

src/blocks.c

Lines changed: 65 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,10 @@
3232

3333
#define peek_at(i, n) (i)->data[n]
3434

35-
static bool S_last_line_blank(const cmark_node *node) {
36-
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
37-
}
38-
3935
static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
4036
return (cmark_node_type)node->type;
4137
}
4238

43-
static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
44-
if (is_blank)
45-
node->flags |= CMARK_NODE__LAST_LINE_BLANK;
46-
else
47-
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
48-
}
49-
5039
static CMARK_INLINE bool S_is_line_end_char(char c) {
5140
return (c == '\n' || c == '\r');
5241
}
@@ -124,8 +113,6 @@ void cmark_parser_free(cmark_parser *parser) {
124113
mem->free(parser);
125114
}
126115

127-
static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
128-
129116
// Returns true if line has only space characters, else false.
130117
static bool is_blank_raw(const unsigned char *ptr, const bufsize_t size,
131118
bufsize_t offset) {
@@ -209,26 +196,25 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
209196
return;
210197
}
211198

199+
// Scan forward until line end to keep trailing spaces of the last line.
212200
for (; i < ln->size; ++i) {
213201
c = ln->ptr[i];
214202

215203
if (!S_is_line_end_char(c))
216204
continue;
217205

218-
cmark_strbuf_truncate(ln, i);
206+
if (c == '\r' && i + 1 < ln->size && ln->ptr[i + 1] == '\n') {
207+
i++;
208+
}
209+
210+
cmark_strbuf_truncate(ln, i + 1);
219211
break;
220212
}
221213
}
222214

223-
// Check to see if a node ends with a blank line, descending
224-
// if needed into lists and sublists.
225-
static bool S_ends_with_blank_line(cmark_node *node) {
226-
if ((S_type(node) == CMARK_NODE_LIST ||
227-
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
228-
return(S_ends_with_blank_line(node->last_child));
229-
} else {
230-
return (S_last_line_blank(node));
231-
}
215+
// Check to see if a node ends with a blank line.
216+
static CMARK_INLINE bool S_ends_with_blank_line(cmark_node *node) {
217+
return node->next && node->end_line != node->next->start_line - 1;
232218
}
233219

234220
// returns true if content remains after link defs are resolved.
@@ -336,7 +322,15 @@ static void resolve_all_reference_link_definitions(cmark_parser *parser) {
336322
cmark_iter_free(iter);
337323
}
338324

339-
static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
325+
// `closed_explicitly` states that the node is closed by explicit markers, or
326+
// the node cannot span more than one line:
327+
//
328+
// - Close tag of HTML blocks
329+
// - Closing code fence
330+
// - ATX headings
331+
// - Thematic breaks
332+
static cmark_node *finalize(cmark_parser *parser, cmark_node *b,
333+
bool closed_explicitly) {
340334
bufsize_t pos;
341335
cmark_node *item;
342336
cmark_node *subitem;
@@ -347,22 +341,22 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
347341
CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
348342
b->flags &= ~CMARK_NODE__OPEN;
349343

350-
if (parser->curline.size == 0) {
351-
// end of input - line number has not been incremented
352-
b->end_line = parser->line_number;
353-
b->end_column = parser->last_line_length;
354-
} else if (S_type(b) == CMARK_NODE_DOCUMENT ||
355-
(S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
356-
(S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) {
357-
b->end_line = parser->line_number;
358-
b->end_column = parser->curline.size;
359-
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
360-
b->end_column -= 1;
361-
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
362-
b->end_column -= 1;
363-
} else {
364-
b->end_line = parser->line_number - 1;
365-
b->end_column = parser->last_line_length;
344+
if (S_type(b) != CMARK_NODE_CODE_BLOCK || b->as.code.fenced) {
345+
if (parser->curline.size == 0) {
346+
// end of input - line number has not been incremented
347+
b->end_line = parser->line_number;
348+
b->end_column = parser->last_line_length;
349+
} else if (closed_explicitly) {
350+
b->end_line = parser->line_number;
351+
b->end_column = parser->curline.size;
352+
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
353+
b->end_column -= 1;
354+
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
355+
b->end_column -= 1;
356+
} else {
357+
b->end_line = parser->line_number - 1;
358+
b->end_column = parser->last_line_length;
359+
}
366360
}
367361

368362
cmark_strbuf *node_content = &parser->content;
@@ -376,7 +370,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
376370
case CMARK_NODE_CODE_BLOCK:
377371
if (!b->as.code.fenced) { // indented code
378372
remove_trailing_blank_lines(node_content);
379-
cmark_strbuf_putc(node_content, '\n');
380373
} else {
381374
// first line of contents becomes info
382375
for (pos = 0; pos < node_content->size; ++pos) {
@@ -417,16 +410,15 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
417410

418411
while (item) {
419412
// check for non-final non-empty list item ending with blank line:
420-
if (S_last_line_blank(item) && item->next) {
413+
if (item->next && S_ends_with_blank_line(item)) {
421414
b->as.list.tight = false;
422415
break;
423416
}
424417
// recurse into children of list item, to see if there are
425418
// spaces between them:
426419
subitem = item->first_child;
427420
while (subitem) {
428-
if ((item->next || subitem->next) &&
429-
S_ends_with_blank_line(subitem)) {
421+
if (subitem->next && S_ends_with_blank_line(subitem)) {
430422
b->as.list.tight = false;
431423
break;
432424
}
@@ -437,9 +429,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
437429
}
438430
item = item->next;
439431
}
432+
b->end_line = b->last_child->end_line;
433+
b->end_column = b->last_child->end_column;
440434

441435
break;
442436

437+
case CMARK_NODE_ITEM:
438+
if (b->last_child) {
439+
b->end_line = b->last_child->end_line;
440+
b->end_column = b->last_child->end_column;
441+
}
442+
// If the item is empty, it is closed when the next line is processed and
443+
// the end position is set by the normal path. Note that if the first line
444+
// and second line of a item are blank, it is closed.
445+
break;
446+
443447
case CMARK_NODE_DOCUMENT:
444448
resolve_all_reference_link_definitions(parser);
445449
break;
@@ -459,7 +463,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
459463
// if 'parent' isn't the kind of node that can accept this child,
460464
// then back up til we hit a node that can.
461465
while (!can_contain(S_type(parent), block_type)) {
462-
parent = finalize(parser, parent);
466+
parent = finalize(parser, parent, false);
463467
}
464468

465469
cmark_node *child =
@@ -599,10 +603,10 @@ static int lists_match(cmark_list *list_data, cmark_list *item_data) {
599603

600604
static cmark_node *finalize_document(cmark_parser *parser) {
601605
while (parser->current != parser->root) {
602-
parser->current = finalize(parser, parser->current);
606+
parser->current = finalize(parser, parser->current, false);
603607
}
604608

605-
finalize(parser, parser->root);
609+
finalize(parser, parser->root, false);
606610

607611
// Limit total size of extra content created from reference links to
608612
// document size to avoid superlinear growth. Always allow 100KB.
@@ -922,7 +926,7 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
922926
// the end of a line, we can stop processing it:
923927
*should_continue = false;
924928
S_advance_offset(parser, input, matched, false);
925-
parser->current = finalize(parser, container);
929+
parser->current = finalize(parser, container, true);
926930
} else {
927931
// skip opt. spaces of fence parser->offset
928932
int i = container->as.code.fence_offset;
@@ -1126,6 +1130,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11261130
// it's only now that we know the line is not part of a setext heading:
11271131
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
11281132
parser->first_nonspace + 1);
1133+
*container = finalize(parser, *container, true);
11291134
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
11301135
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
11311136
parser->indent < 4 &&
@@ -1212,35 +1217,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
12121217
static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12131218
cmark_node *last_matched_container,
12141219
cmark_chunk *input) {
1215-
cmark_node *tmp;
12161220
// what remains at parser->offset is a text line. add the text to the
12171221
// appropriate container.
12181222

12191223
S_find_first_nonspace(parser, input);
12201224

1221-
if (parser->blank && container->last_child)
1222-
S_set_last_line_blank(container->last_child, true);
1223-
1224-
// block quote lines are never blank as they start with >
1225-
// and we don't count blanks in fenced code for purposes of tight/loose
1226-
// lists or breaking out of lists. we also don't set last_line_blank
1227-
// on an empty list item.
1228-
const cmark_node_type ctype = S_type(container);
1229-
const bool last_line_blank =
1230-
(parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1231-
ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1232-
!(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) &&
1233-
!(ctype == CMARK_NODE_ITEM && container->first_child == NULL &&
1234-
container->start_line == parser->line_number));
1235-
1236-
S_set_last_line_blank(container, last_line_blank);
1237-
1238-
tmp = container;
1239-
while (tmp->parent) {
1240-
S_set_last_line_blank(tmp->parent, false);
1241-
tmp = tmp->parent;
1242-
}
1243-
12441225
// If the last line processed belonged to a paragraph node,
12451226
// and we didn't match all of the line prefixes for the open containers,
12461227
// and we didn't start any new containers,
@@ -1254,7 +1235,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12541235
} else { // not a lazy continuation
12551236
// Finalize any blocks that were not matched and set cur to container:
12561237
while (parser->current != last_matched_container) {
1257-
parser->current = finalize(parser, parser->current);
1238+
parser->current = finalize(parser, parser->current, false);
12581239
assert(parser->current != NULL);
12591240
}
12601241

@@ -1296,7 +1277,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12961277
}
12971278

12981279
if (matches_end_condition) {
1299-
container = finalize(parser, container);
1280+
container = finalize(parser, container, true);
13001281
assert(parser->current != NULL);
13011282
}
13021283
} else if (parser->blank) {
@@ -1329,6 +1310,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13291310
bool all_matched = true;
13301311
cmark_node *container;
13311312
cmark_chunk input;
1313+
bool need_set_end_position = false;
13321314

13331315
if (parser->options & CMARK_OPT_VALIDATE_UTF8)
13341316
cmark_utf8proc_check(&parser->curline, buffer, bytes);
@@ -1366,6 +1348,10 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13661348

13671349
add_text_to_container(parser, container, last_matched_container, &input);
13681350

1351+
need_set_end_position = S_type(container) == CMARK_NODE_CODE_BLOCK &&
1352+
!container->as.code.fenced &&
1353+
!parser->blank;
1354+
13691355
finished:
13701356
parser->last_line_length = input.len;
13711357
if (parser->last_line_length &&
@@ -1375,6 +1361,11 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13751361
input.data[parser->last_line_length - 1] == '\r')
13761362
parser->last_line_length -= 1;
13771363

1364+
if (need_set_end_position) {
1365+
container->end_line = parser->line_number;
1366+
container->end_column = parser->last_line_length;
1367+
}
1368+
13781369
cmark_strbuf_clear(&parser->curline);
13791370
}
13801371

src/node.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ typedef struct {
4848

4949
enum cmark_node__internal_flags {
5050
CMARK_NODE__OPEN = (1 << 0),
51-
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
5251
};
5352

5453
struct cmark_node {

0 commit comments

Comments
 (0)