@@ -436,6 +436,71 @@ auto expand_string_literal(
436
436
return parts.generate ();
437
437
}
438
438
439
+ auto expand_raw_string_literal (
440
+ const std::string& opening_seq,
441
+ const std::string& closing_seq,
442
+ string_parts::adds_sequences closing_strategy,
443
+ std::string_view text,
444
+ std::vector<error>& errors,
445
+ source_position src_pos) -> string_parts
446
+ {
447
+ auto const length = std::ssize (text);
448
+ auto pos = 0 ;
449
+ auto first_quote_pos = pos;
450
+ auto current_start = pos; // the current offset before which the string has been added to ret
451
+ string_parts parts{opening_seq, closing_seq, closing_strategy};
452
+
453
+ // Now we're on the first character of the string itself
454
+ for ( ; pos < length; ++pos )
455
+ {
456
+ // Find the next )$
457
+ if (text[pos] == ' $' && text[pos-1 ] == ' )' )
458
+ {
459
+ // Scan back to find the matching (
460
+ auto paren_depth = 1 ;
461
+ auto open = pos - 2 ;
462
+
463
+ for ( ; open > current_start; --open )
464
+ {
465
+ if (text[open ] == ' )' ) {
466
+ ++paren_depth;
467
+ }
468
+ else if (text[open ] == ' (' ) {
469
+ --paren_depth;
470
+ if (paren_depth == 0 ) {
471
+ break ;
472
+ }
473
+ }
474
+ }
475
+ if (text[open ] != ' (' )
476
+ {
477
+ errors.emplace_back (
478
+ source_position ( src_pos.lineno , src_pos.colno + pos ),
479
+ " no matching ( for string interpolation ending in )$"
480
+ );
481
+ return parts;
482
+ }
483
+
484
+ // 'open' is now at the matching (
485
+
486
+ // Put the next non-empty non-interpolated chunk straight into ret
487
+ if (open != current_start) {
488
+ parts.add_string (text.substr (current_start, open - current_start));
489
+ }
490
+ // Then put interpolated chunk into ret
491
+ parts.add_code (" cpp2::to_string" + std::string{text.substr (open , pos - open )});
492
+
493
+ current_start = pos+1 ;
494
+ }
495
+ }
496
+
497
+ // Put the final non-interpolated chunk straight into ret
498
+ if (current_start < std::ssize (text)) {
499
+ parts.add_string (text.substr (current_start));
500
+ }
501
+
502
+ return parts;
503
+ }
439
504
440
505
// -----------------------------------------------------------------------
441
506
// lex: Tokenize a single line while maintaining inter-line state
@@ -455,6 +520,8 @@ auto expand_string_literal(
455
520
// -- this isn't about tokens generated later, that's tokens::generated_tokens
456
521
static auto generated_text = std::deque<std::string>{};
457
522
523
+ static auto multiline_raw_strings = std::deque<multiline_raw_string>{};
524
+
458
525
auto lex_line (
459
526
std::string& mutable_line,
460
527
int const lineno,
@@ -889,6 +956,49 @@ auto lex_line(
889
956
return do_is_keyword (multi_keys);
890
957
};
891
958
959
+ auto reset_processing_of_the_line = [&]() {
960
+ // Redo processing of this whole line now that the string is expanded,
961
+ // which may have moved it in memory... move i back to the line start
962
+ // and discard any tokens we already tokenized for this line
963
+ i = colno_t {-1 };
964
+ while (
965
+ !tokens.empty ()
966
+ && tokens.back ().position ().lineno == lineno
967
+ )
968
+ {
969
+ tokens.pop_back ();
970
+ }
971
+ };
972
+
973
+ auto interpolate_raw_string = [&](
974
+ const std::string& opening_seq,
975
+ const std::string& closing_seq,
976
+ string_parts::adds_sequences closing_strategy,
977
+ std::string_view part,
978
+ int pos_to_replace,
979
+ int size_to_replace
980
+ ) -> bool {
981
+ auto parts = expand_raw_string_literal (opening_seq, closing_seq, closing_strategy, part, errors, source_position (lineno, pos_to_replace + 1 ));
982
+ auto new_part = parts.generate ();
983
+ mutable_line.replace ( pos_to_replace, size_to_replace, new_part );
984
+ i += std::ssize (new_part)-1 ;
985
+
986
+ if (parts.is_expanded ()) {
987
+ // raw string was expanded and we need to repeat the processing of this line
988
+ reset_processing_of_the_line ();
989
+
990
+ // but skipping end of potential multiline raw string that ends on this line
991
+ if (!multiline_raw_strings.empty () && multiline_raw_strings.back ().end .lineno == lineno) {
992
+ i = multiline_raw_strings.back ().end .colno ;
993
+ raw_string_multiline.reset ();
994
+ } else if (raw_string_multiline && raw_string_multiline->start .lineno == lineno) {
995
+ raw_string_multiline.reset ();
996
+ }
997
+ return true ;
998
+ }
999
+ return false ;
1000
+ };
1001
+
892
1002
//
893
1003
// -----------------------------------------------------
894
1004
@@ -918,6 +1028,7 @@ auto lex_line(
918
1028
else if (peek1 == ' R' && peek2 == next) { return 3 ; } // LR"
919
1029
}
920
1030
else if (line[i] == ' R' && peek1 == next) { return 2 ; } // R"
1031
+ else if (line[i] == ' $' && peek1 == ' R' && peek2 == next) { return 3 ; } // $R"
921
1032
return 0 ;
922
1033
};
923
1034
@@ -947,22 +1058,36 @@ auto lex_line(
947
1058
auto end_pos = line.find (raw_string_multiline.value ().closing_seq , i);
948
1059
auto part = line.substr (i, end_pos-i);
949
1060
1061
+ if (const auto & rsm = raw_string_multiline.value (); rsm.should_interpolate ) {
1062
+
1063
+ auto closing_strategy = end_pos == line.npos ? string_parts::no_ends : string_parts::on_the_end;
1064
+ auto size_to_replace = end_pos == line.npos ? std::ssize (line) - i : end_pos - i + std::ssize (rsm.closing_seq );
1065
+
1066
+ if (interpolate_raw_string (rsm.opening_seq , rsm.closing_seq , closing_strategy, part, i, size_to_replace ) ) {
1067
+ continue ;
1068
+ }
1069
+ }
1070
+ // raw string was not expanded
1071
+
950
1072
raw_string_multiline.value ().text += part;
951
1073
if (end_pos == std::string::npos) {
952
1074
raw_string_multiline.value ().text += ' \n ' ;
953
1075
break ;
954
1076
}
955
1077
956
- // here we know that we are dealing with multiline raw string literal
957
- // token needs to use generated_text to store string that exists in multiple lines
958
- i = end_pos+std::ssize (raw_string_multiline.value ().closing_seq )-1 ;
1078
+ // here we know that we are dealing with finalized multiline raw string literal
1079
+ // token needs to use multiline_raw_strings to store string that exists in multiple lines
959
1080
raw_string_multiline.value ().text += raw_string_multiline.value ().closing_seq ;
960
1081
961
- generated_text.push_back (raw_string_multiline.value ().text );
1082
+ // and position where multiline_raw_string ends (needed for reseting line parsing)
1083
+ i = end_pos+std::ssize (raw_string_multiline.value ().closing_seq )-1 ;
1084
+
1085
+ const auto & text = raw_string_multiline.value ().should_interpolate ? raw_string_multiline.value ().text .substr (1 ) : raw_string_multiline.value ().text ;
1086
+ multiline_raw_strings.emplace_back (multiline_raw_string{ text, {lineno, i} });
962
1087
963
1088
tokens.push_back ({
964
- &generated_text .back ()[0 ],
965
- std::ssize (generated_text .back ()),
1089
+ &multiline_raw_strings .back (). text [0 ],
1090
+ std::ssize (multiline_raw_strings .back (). text ),
966
1091
raw_string_multiline.value ().start ,
967
1092
lexeme::StringLiteral
968
1093
});
@@ -1153,7 +1278,62 @@ auto lex_line(
1153
1278
store (1 , lexeme::QuestionMark);
1154
1279
1155
1280
break ;case ' $' :
1156
- store (1 , lexeme::Dollar);
1281
+ if (auto j = is_encoding_prefix_and (' \" ' ); peek (j-2 ) == ' R' ) {
1282
+ // if peek(j-2) is 'R' it means that we deal with raw-string literal
1283
+ auto R_pos = i + j - 2 ;
1284
+ auto seq_pos = i + j;
1285
+
1286
+ if (auto paren_pos = line.find (" (" , seq_pos); paren_pos != std::string::npos) {
1287
+ auto opening_seq = line.substr (i, paren_pos - i + 1 );
1288
+ auto closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
1289
+
1290
+ if (auto closing_pos = line.find (closing_seq, paren_pos+1 ); closing_pos != line.npos ) {
1291
+ if (interpolate_raw_string (
1292
+ opening_seq,
1293
+ closing_seq,
1294
+ string_parts::on_both_ends,
1295
+ std::string_view (&line[paren_pos+1 ], closing_pos-paren_pos-1 ), i, closing_pos-i+std::ssize (closing_seq))
1296
+ ) {
1297
+ continue ;
1298
+ }
1299
+
1300
+ tokens.push_back ({
1301
+ &line[R_pos],
1302
+ i - R_pos + 1 ,
1303
+ source_position (lineno, R_pos + 1 ),
1304
+ lexeme::StringLiteral
1305
+ });
1306
+ } else {
1307
+ raw_string_multiline.emplace (raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq, true });
1308
+
1309
+ if (interpolate_raw_string (
1310
+ opening_seq,
1311
+ closing_seq,
1312
+ string_parts::on_the_begining,
1313
+ std::string_view (&line[paren_pos+1 ], std::ssize (line)-(paren_pos+1 )), i, std::ssize (line)-i)
1314
+ ) {
1315
+ continue ;
1316
+ }
1317
+ // skip entire raw string opening sequence R"
1318
+ i = paren_pos;
1319
+
1320
+ // if we are on the end of the line we need to add new line char
1321
+ if (i+1 == std::ssize (line)) {
1322
+ raw_string_multiline.value ().text += ' \n ' ;
1323
+ }
1324
+ }
1325
+ continue ;
1326
+ }
1327
+ else {
1328
+ errors.emplace_back (
1329
+ source_position (lineno, i + j - 2 ),
1330
+ " invalid new-line in raw string delimiter \" " + std::string (&line[i],j)
1331
+ + " \" - stray 'R' in program \" "
1332
+ );
1333
+ }
1334
+ } else {
1335
+ store (1 , lexeme::Dollar);
1336
+ }
1157
1337
1158
1338
// G
1159
1339
// G literal:
@@ -1313,13 +1493,13 @@ auto lex_line(
1313
1493
auto seq_pos = i + j;
1314
1494
1315
1495
if (auto paren_pos = line.find (" (" , seq_pos); paren_pos != std::string::npos) {
1316
- auto raw_string_opening_seq = line.substr (i, paren_pos - i + 1 );
1317
- auto raw_string_closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
1496
+ auto opening_seq = line.substr (i, paren_pos - i + 1 );
1497
+ auto closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
1318
1498
1319
- if (auto closing_pos = line.find (raw_string_closing_seq , paren_pos+1 ); closing_pos != line.npos ) {
1320
- store (closing_pos+std::ssize (raw_string_closing_seq )-i, lexeme::StringLiteral);
1499
+ if (auto closing_pos = line.find (closing_seq , paren_pos+1 ); closing_pos != line.npos ) {
1500
+ store (closing_pos+std::ssize (closing_seq )-i, lexeme::StringLiteral);
1321
1501
} else {
1322
- raw_string_multiline.emplace (raw_string{source_position{lineno, i}, raw_string_opening_seq, raw_string_opening_seq, raw_string_closing_seq });
1502
+ raw_string_multiline.emplace (raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq });
1323
1503
// skip entire raw string opening sequence R"
1324
1504
i = paren_pos;
1325
1505
@@ -1368,17 +1548,7 @@ auto lex_line(
1368
1548
}
1369
1549
mutable_line.replace ( i, j+1 , s );
1370
1550
1371
- // Redo processing of this whole line now that the string is expanded,
1372
- // which may have moved it in memory... move i back to the line start
1373
- // and discard any tokens we already tokenized for this line
1374
- i = colno_t {-1 };
1375
- while (
1376
- !tokens.empty ()
1377
- && tokens.back ().position ().lineno == lineno
1378
- )
1379
- {
1380
- tokens.pop_back ();
1381
- }
1551
+ reset_processing_of_the_line ();
1382
1552
}
1383
1553
}
1384
1554
}
0 commit comments