Skip to content

Commit 8345c2f

Browse files
authored
perf(lex): use linear search over regex (#514)
1 parent 91a4196 commit 8345c2f

File tree

1 file changed

+37
-35
lines changed

1 file changed

+37
-35
lines changed

source/lex.h

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -933,17 +933,19 @@ auto lex_line(
933933
//G any Cpp1-and-Cpp2 keyword
934934
//G one of: 'import' 'module' 'export' 'is' 'as'
935935
//G
936-
auto do_is_keyword = [&](std::regex const& r) {
937-
std::cmatch m;
938-
if (std::regex_search(&line[i], m, r)) {
939-
assert (m.position(0) == 0);
936+
auto do_is_keyword = [&](std::vector<std::string_view> const& r) {
937+
auto remaining_line = std::string_view(line).substr(unsafe_narrow<std::size_t>(i));
938+
auto m = std::find_if(r.begin(), r.end(), [&](std::string_view s) {
939+
return remaining_line.starts_with(s);
940+
});
941+
if (m != r.end()) {
940942
// If we matched and what's next is EOL or a non-identifier char, we matched!
941943
if (
942-
i+m[0].length() == std::ssize(line) // EOL
943-
|| !is_identifier_continue(line[i+m[0].length()]) // non-identifier char
944+
i+std::ssize(*m) == std::ssize(line) // EOL
945+
|| !is_identifier_continue(line[unsafe_narrow<std::size_t>(i)+std::size(*m)]) // non-identifier char
944946
)
945947
{
946-
return static_cast<int>(m[0].length());
948+
return static_cast<int>(std::ssize(*m));
947949
}
948950
}
949951
return 0;
@@ -955,46 +957,46 @@ auto lex_line(
955957
// reserve all the ones Cpp1 has both for compatibility and to not give up a keyword
956958
// Some keywords like "delete" and "union" are not in this list because we reject them elsewhere
957959
// Cpp2 also adds a couple, notably "is" and "as"
958-
const auto keys = std::regex(
959-
"^alignas|^alignof|^asm|^as|^auto|"
960-
"^bool|^break|"
961-
"^case|^catch|^char16_t|^char32_t|^char8_t|^char|^co_await|^co_return|"
962-
"^co_yield|^concept|^const_cast|^consteval|^constexpr|^constinit|^const|^continue|"
963-
"^decltype|^default|^double|^do|^dynamic_cast|"
964-
"^else|^enum|^explicit|^export|^extern|"
965-
"^float|^for|^friend|"
966-
"^goto|"
967-
"^if|^import|^inline|^int|^is|"
968-
"^long|"
969-
"^module|^mutable|"
970-
"^namespace|^noexcept|"
971-
"^operator|"
972-
"^private|^protected|^public|"
973-
"^register|^reinterpret_cast|^requires|^return|"
974-
"^short|^signed|^sizeof|^static_assert|^static_cast|^static|^switch|"
975-
"^template|^this|^thread_local|^throws|^throw|^try|^typedef|^typeid|^typename|"
976-
"^unsigned|^using|"
977-
"^virtual|^void|^volatile|"
978-
"^wchar_t|^while"
979-
);
960+
static const auto keys = std::vector<std::string_view>{
961+
"alignas", "alignof", "asm", "as", "auto",
962+
"bool", "break",
963+
"case", "catch", "char16_t", "char32_t", "char8_t", "char", "co_await", "co_return",
964+
"co_yield", "concept", "const_cast", "consteval", "constexpr", "constinit", "const", "continue",
965+
"decltype", "default", "double", "do", "dynamic_cast",
966+
"else", "enum", "explicit", "export", "extern",
967+
"float", "for", "friend",
968+
"goto",
969+
"if", "import", "inline", "int", "is",
970+
"long",
971+
"module", "mutable",
972+
"namespace", "noexcept",
973+
"operator",
974+
"private", "protected", "public",
975+
"register", "reinterpret_cast", "requires", "return",
976+
"short", "signed", "sizeof", "static_assert", "static_cast", "static", "switch",
977+
"template", "this", "thread_local", "throws", "throw", "try", "typedef", "typeid", "typename",
978+
"unsigned", "using",
979+
"virtual", "void", "volatile",
980+
"wchar_t", "while"
981+
};
980982

981983
return do_is_keyword(keys);
982984
};
983985

984986
auto peek_is_cpp2_fundamental_type_keyword = [&]
985987
{
986-
const auto keys = std::regex(
987-
"^i8|^i16|^i32|^i64|^longdouble|^longlong|^u8|^u16|^u32|^u64|^ulong|^ulonglong|^ushort"
988-
);
988+
static const auto keys = std::vector<std::string_view>{
989+
"i8", "i16", "i32", "i64", "longdouble", "longlong", "u8", "u16", "u32", "u64", "ulong", "ulonglong", "ushort"
990+
};
989991

990992
return do_is_keyword(keys);
991993
};
992994

993995
auto peek_is_cpp1_multi_token_fundamental_keyword = [&]
994996
{
995-
const auto multi_keys = std::regex(
996-
"^char16_t|^char32_t|^char8_t|^char|^double|^float|^int|^long|^short|^signed|^unsigned"
997-
);
997+
static const auto multi_keys = std::vector<std::string_view>{
998+
"char16_t", "char32_t", "char8_t", "char", "double", "float", "int", "long", "short", "signed", "unsigned"
999+
};
9981000
return do_is_keyword(multi_keys);
9991001
};
10001002

0 commit comments

Comments
 (0)