32
32
33
33
#define peek_at (i , n ) (i)->data[n]
34
34
35
- static bool S_last_line_blank (const cmark_node * node ) {
36
- return (node -> flags & CMARK_NODE__LAST_LINE_BLANK ) != 0 ;
37
- }
38
-
39
35
static CMARK_INLINE cmark_node_type S_type (const cmark_node * node ) {
40
36
return (cmark_node_type )node -> type ;
41
37
}
42
38
43
- static void S_set_last_line_blank (cmark_node * node , bool is_blank ) {
44
- if (is_blank )
45
- node -> flags |= CMARK_NODE__LAST_LINE_BLANK ;
46
- else
47
- node -> flags &= ~CMARK_NODE__LAST_LINE_BLANK ;
48
- }
49
-
50
39
static CMARK_INLINE bool S_is_line_end_char (char c ) {
51
40
return (c == '\n' || c == '\r' );
52
41
}
@@ -124,8 +113,6 @@ void cmark_parser_free(cmark_parser *parser) {
124
113
mem -> free (parser );
125
114
}
126
115
127
- static cmark_node * finalize (cmark_parser * parser , cmark_node * b );
128
-
129
116
// Returns true if line has only space characters, else false.
130
117
static bool is_blank_raw (const unsigned char * ptr , const bufsize_t size ,
131
118
bufsize_t offset ) {
@@ -209,26 +196,25 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
209
196
return ;
210
197
}
211
198
199
+ // Scan forward until line end to keep trailing spaces of the last line.
212
200
for (; i < ln -> size ; ++ i ) {
213
201
c = ln -> ptr [i ];
214
202
215
203
if (!S_is_line_end_char (c ))
216
204
continue ;
217
205
218
- cmark_strbuf_truncate (ln , i );
206
+ if (c == '\r' && i + 1 < ln -> size && ln -> ptr [i + 1 ] == '\n' ) {
207
+ i ++ ;
208
+ }
209
+
210
+ cmark_strbuf_truncate (ln , i + 1 );
219
211
break ;
220
212
}
221
213
}
222
214
223
- // Check to see if a node ends with a blank line, descending
224
- // if needed into lists and sublists.
225
- static bool S_ends_with_blank_line (cmark_node * node ) {
226
- if ((S_type (node ) == CMARK_NODE_LIST ||
227
- S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
228
- return (S_ends_with_blank_line (node -> last_child ));
229
- } else {
230
- return (S_last_line_blank (node ));
231
- }
215
+ // Check to see if a node ends with a blank line.
216
+ static CMARK_INLINE bool S_ends_with_blank_line (cmark_node * node ) {
217
+ return node -> next && node -> end_line != node -> next -> start_line - 1 ;
232
218
}
233
219
234
220
// returns true if content remains after link defs are resolved.
@@ -336,7 +322,15 @@ static void resolve_all_reference_link_definitions(cmark_parser *parser) {
336
322
cmark_iter_free (iter );
337
323
}
338
324
339
- static cmark_node * finalize (cmark_parser * parser , cmark_node * b ) {
325
+ // `closed_explicitly` states that the node is closed by explicit markers, or
326
+ // the node cannot span more than one line:
327
+ //
328
+ // - Close tag of HTML blocks
329
+ // - Closing code fence
330
+ // - ATX headings
331
+ // - Thematic breaks
332
+ static cmark_node * finalize (cmark_parser * parser , cmark_node * b ,
333
+ bool closed_explicitly ) {
340
334
bufsize_t pos ;
341
335
cmark_node * item ;
342
336
cmark_node * subitem ;
@@ -347,22 +341,22 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
347
341
CMARK_NODE__OPEN ); // shouldn't call finalize on closed blocks
348
342
b -> flags &= ~CMARK_NODE__OPEN ;
349
343
350
- if (parser -> curline . size == 0 ) {
351
- // end of input - line number has not been incremented
352
- b -> end_line = parser -> line_number ;
353
- b -> end_column = parser -> last_line_length ;
354
- } else if ( S_type ( b ) == CMARK_NODE_DOCUMENT ||
355
- ( S_type ( b ) == CMARK_NODE_CODE_BLOCK && b -> as . code . fenced ) ||
356
- ( S_type ( b ) == CMARK_NODE_HEADING && b -> as . heading . setext )) {
357
- b -> end_line = parser -> line_number ;
358
- b -> end_column = parser -> curline .size ;
359
- if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\n' )
360
- b -> end_column -= 1 ;
361
- if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\r' )
362
- b -> end_column -= 1 ;
363
- } else {
364
- b -> end_line = parser -> line_number - 1 ;
365
- b -> end_column = parser -> last_line_length ;
344
+ if (S_type ( b ) != CMARK_NODE_CODE_BLOCK || b -> as . code . fenced ) {
345
+ if ( parser -> curline . size == 0 ) {
346
+ // end of input - line number has not been incremented
347
+ b -> end_line = parser -> line_number ;
348
+ b -> end_column = parser -> last_line_length ;
349
+ } else if ( closed_explicitly ) {
350
+ b -> end_line = parser -> line_number ;
351
+ b -> end_column = parser -> curline . size ;
352
+ if ( b -> end_column && parser -> curline .ptr [ b -> end_column - 1 ] == '\n' )
353
+ b -> end_column -= 1 ;
354
+ if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\r' )
355
+ b -> end_column -= 1 ;
356
+ } else {
357
+ b -> end_line = parser -> line_number - 1 ;
358
+ b -> end_column = parser -> last_line_length ;
359
+ }
366
360
}
367
361
368
362
cmark_strbuf * node_content = & parser -> content ;
@@ -376,7 +370,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
376
370
case CMARK_NODE_CODE_BLOCK :
377
371
if (!b -> as .code .fenced ) { // indented code
378
372
remove_trailing_blank_lines (node_content );
379
- cmark_strbuf_putc (node_content , '\n' );
380
373
} else {
381
374
// first line of contents becomes info
382
375
for (pos = 0 ; pos < node_content -> size ; ++ pos ) {
@@ -417,16 +410,15 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
417
410
418
411
while (item ) {
419
412
// check for non-final non-empty list item ending with blank line:
420
- if (S_last_line_blank ( item ) && item -> next ) {
413
+ if (item -> next && S_ends_with_blank_line ( item ) ) {
421
414
b -> as .list .tight = false;
422
415
break ;
423
416
}
424
417
// recurse into children of list item, to see if there are
425
418
// spaces between them:
426
419
subitem = item -> first_child ;
427
420
while (subitem ) {
428
- if ((item -> next || subitem -> next ) &&
429
- S_ends_with_blank_line (subitem )) {
421
+ if (subitem -> next && S_ends_with_blank_line (subitem )) {
430
422
b -> as .list .tight = false;
431
423
break ;
432
424
}
@@ -437,9 +429,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
437
429
}
438
430
item = item -> next ;
439
431
}
432
+ b -> end_line = b -> last_child -> end_line ;
433
+ b -> end_column = b -> last_child -> end_column ;
440
434
441
435
break ;
442
436
437
+ case CMARK_NODE_ITEM :
438
+ if (b -> last_child ) {
439
+ b -> end_line = b -> last_child -> end_line ;
440
+ b -> end_column = b -> last_child -> end_column ;
441
+ }
442
+ // If the item is empty, it is closed when the next line is processed and
443
+ // the end position is set by the normal path. Note that if the first line
444
+ // and second line of a item are blank, it is closed.
445
+ break ;
446
+
443
447
case CMARK_NODE_DOCUMENT :
444
448
resolve_all_reference_link_definitions (parser );
445
449
break ;
@@ -459,7 +463,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
459
463
// if 'parent' isn't the kind of node that can accept this child,
460
464
// then back up til we hit a node that can.
461
465
while (!can_contain (S_type (parent ), block_type )) {
462
- parent = finalize (parser , parent );
466
+ parent = finalize (parser , parent , false );
463
467
}
464
468
465
469
cmark_node * child =
@@ -599,10 +603,10 @@ static int lists_match(cmark_list *list_data, cmark_list *item_data) {
599
603
600
604
static cmark_node * finalize_document (cmark_parser * parser ) {
601
605
while (parser -> current != parser -> root ) {
602
- parser -> current = finalize (parser , parser -> current );
606
+ parser -> current = finalize (parser , parser -> current , false );
603
607
}
604
608
605
- finalize (parser , parser -> root );
609
+ finalize (parser , parser -> root , false );
606
610
607
611
// Limit total size of extra content created from reference links to
608
612
// document size to avoid superlinear growth. Always allow 100KB.
@@ -922,7 +926,7 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
922
926
// the end of a line, we can stop processing it:
923
927
* should_continue = false;
924
928
S_advance_offset (parser , input , matched , false);
925
- parser -> current = finalize (parser , container );
929
+ parser -> current = finalize (parser , container , true );
926
930
} else {
927
931
// skip opt. spaces of fence parser->offset
928
932
int i = container -> as .code .fence_offset ;
@@ -1126,6 +1130,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1126
1130
// it's only now that we know the line is not part of a setext heading:
1127
1131
* container = add_child (parser , * container , CMARK_NODE_THEMATIC_BREAK ,
1128
1132
parser -> first_nonspace + 1 );
1133
+ * container = finalize (parser , * container , true);
1129
1134
S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
1130
1135
} else if ((!indented || cont_type == CMARK_NODE_LIST ) &&
1131
1136
parser -> indent < 4 &&
@@ -1212,35 +1217,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1212
1217
static void add_text_to_container (cmark_parser * parser , cmark_node * container ,
1213
1218
cmark_node * last_matched_container ,
1214
1219
cmark_chunk * input ) {
1215
- cmark_node * tmp ;
1216
1220
// what remains at parser->offset is a text line. add the text to the
1217
1221
// appropriate container.
1218
1222
1219
1223
S_find_first_nonspace (parser , input );
1220
1224
1221
- if (parser -> blank && container -> last_child )
1222
- S_set_last_line_blank (container -> last_child , true);
1223
-
1224
- // block quote lines are never blank as they start with >
1225
- // and we don't count blanks in fenced code for purposes of tight/loose
1226
- // lists or breaking out of lists. we also don't set last_line_blank
1227
- // on an empty list item.
1228
- const cmark_node_type ctype = S_type (container );
1229
- const bool last_line_blank =
1230
- (parser -> blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1231
- ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1232
- !(ctype == CMARK_NODE_CODE_BLOCK && container -> as .code .fenced ) &&
1233
- !(ctype == CMARK_NODE_ITEM && container -> first_child == NULL &&
1234
- container -> start_line == parser -> line_number ));
1235
-
1236
- S_set_last_line_blank (container , last_line_blank );
1237
-
1238
- tmp = container ;
1239
- while (tmp -> parent ) {
1240
- S_set_last_line_blank (tmp -> parent , false);
1241
- tmp = tmp -> parent ;
1242
- }
1243
-
1244
1225
// If the last line processed belonged to a paragraph node,
1245
1226
// and we didn't match all of the line prefixes for the open containers,
1246
1227
// and we didn't start any new containers,
@@ -1254,7 +1235,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1254
1235
} else { // not a lazy continuation
1255
1236
// Finalize any blocks that were not matched and set cur to container:
1256
1237
while (parser -> current != last_matched_container ) {
1257
- parser -> current = finalize (parser , parser -> current );
1238
+ parser -> current = finalize (parser , parser -> current , false );
1258
1239
assert (parser -> current != NULL );
1259
1240
}
1260
1241
@@ -1296,7 +1277,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1296
1277
}
1297
1278
1298
1279
if (matches_end_condition ) {
1299
- container = finalize (parser , container );
1280
+ container = finalize (parser , container , true );
1300
1281
assert (parser -> current != NULL );
1301
1282
}
1302
1283
} else if (parser -> blank ) {
@@ -1329,6 +1310,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1329
1310
bool all_matched = true;
1330
1311
cmark_node * container ;
1331
1312
cmark_chunk input ;
1313
+ bool need_set_end_position = false;
1332
1314
1333
1315
if (parser -> options & CMARK_OPT_VALIDATE_UTF8 )
1334
1316
cmark_utf8proc_check (& parser -> curline , buffer , bytes );
@@ -1366,6 +1348,10 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1366
1348
1367
1349
add_text_to_container (parser , container , last_matched_container , & input );
1368
1350
1351
+ need_set_end_position = S_type (container ) == CMARK_NODE_CODE_BLOCK &&
1352
+ !container -> as .code .fenced &&
1353
+ !parser -> blank ;
1354
+
1369
1355
finished :
1370
1356
parser -> last_line_length = input .len ;
1371
1357
if (parser -> last_line_length &&
@@ -1375,6 +1361,11 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1375
1361
input .data [parser -> last_line_length - 1 ] == '\r' )
1376
1362
parser -> last_line_length -= 1 ;
1377
1363
1364
+ if (need_set_end_position ) {
1365
+ container -> end_line = parser -> line_number ;
1366
+ container -> end_column = parser -> last_line_length ;
1367
+ }
1368
+
1378
1369
cmark_strbuf_clear (& parser -> curline );
1379
1370
}
1380
1371
0 commit comments