Skip to content

Commit e47dc44

Browse files
committed
[Parser] Do not eagerly lex keywords
Lex them on demand instead to avoid wasted work.
1 parent d1df91c commit e47dc44

File tree

2 files changed

+56
-85
lines changed

2 files changed

+56
-85
lines changed

src/parser/lexer.cpp

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,57 @@ bool Lexer::takeRParen() {
10691069
return false;
10701070
}
10711071

1072+
std::optional<std::string_view> Lexer::takeKeyword() {
1073+
if (curr) {
1074+
return std::nullopt;
1075+
}
1076+
if (auto result = keyword(next())) {
1077+
index += result->span.size();
1078+
advance();
1079+
return result->span;
1080+
}
1081+
return std::nullopt;
1082+
}
1083+
1084+
bool Lexer::takeKeyword(std::string_view expected) {
1085+
if (auto result = keyword(next()); result && result->span == expected) {
1086+
index += expected.size();
1087+
advance();
1088+
return true;
1089+
}
1090+
return false;
1091+
}
1092+
1093+
std::optional<uint64_t> Lexer::takeOffset() {
1094+
if (auto result = keyword(next())) {
1095+
if (result->span.substr(0, 7) != "offset="sv) {
1096+
return std::nullopt;
1097+
}
1098+
Lexer subLexer(result->span.substr(7));
1099+
if (auto o = subLexer.takeU64()) {
1100+
index += result->span.size();
1101+
advance();
1102+
return o;
1103+
}
1104+
}
1105+
return std::nullopt;
1106+
}
1107+
1108+
std::optional<uint32_t> Lexer::takeAlign() {
1109+
if (auto result = keyword(next())) {
1110+
if (result->span.substr(0, 6) != "align="sv) {
1111+
return std::nullopt;
1112+
}
1113+
Lexer subLexer(result->span.substr(6));
1114+
if (auto o = subLexer.takeU32()) {
1115+
index += result->span.size();
1116+
advance();
1117+
return o;
1118+
}
1119+
}
1120+
return std::nullopt;
1121+
}
1122+
10721123
void Lexer::lexToken() {
10731124
// TODO: Ensure we're getting the longest possible match.
10741125
Token tok;
@@ -1080,8 +1131,6 @@ void Lexer::lexToken() {
10801131
tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
10811132
} else if (auto t = str(next())) {
10821133
tok = Token{t->span, StringTok{t->str}};
1083-
} else if (auto t = keyword(next())) {
1084-
tok = Token{t->span, KeywordTok{}};
10851134
} else {
10861135
// TODO: Do something about lexing errors.
10871136
curr = std::nullopt;
@@ -1163,10 +1212,6 @@ std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
11631212
return os;
11641213
}
11651214

1166-
std::ostream& operator<<(std::ostream& os, const KeywordTok&) {
1167-
return os << "keyword";
1168-
}
1169-
11701215
std::ostream& operator<<(std::ostream& os, const Token& tok) {
11711216
std::visit([&](const auto& t) { os << t; }, tok.data);
11721217
return os << " \"" << tok.span << "\"";

src/parser/lexer.h

Lines changed: 5 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -87,27 +87,15 @@ struct StringTok {
8787
friend std::ostream& operator<<(std::ostream&, const StringTok&);
8888
};
8989

90-
struct KeywordTok {
91-
bool operator==(const KeywordTok&) const { return true; }
92-
friend std::ostream& operator<<(std::ostream&, const KeywordTok&);
93-
};
94-
9590
struct Token {
96-
using Data = std::variant<IdTok, IntTok, FloatTok, StringTok, KeywordTok>;
91+
using Data = std::variant<IdTok, IntTok, FloatTok, StringTok>;
9792
std::string_view span;
9893
Data data;
9994

10095
// ====================
10196
// Token classification
10297
// ====================
10398

104-
std::optional<std::string_view> getKeyword() const {
105-
if (std::get_if<KeywordTok>(&data)) {
106-
return span;
107-
}
108-
return {};
109-
}
110-
11199
template<typename T> std::optional<T> getU() const;
112100
template<typename T> std::optional<T> getS() const;
113101
template<typename T> std::optional<T> getI() const;
@@ -187,77 +175,15 @@ struct Lexer {
187175
return {};
188176
}
189177

190-
std::optional<std::string_view> takeKeyword() {
191-
if (curr) {
192-
if (auto keyword = curr->getKeyword()) {
193-
advance();
194-
return *keyword;
195-
}
196-
}
197-
return {};
198-
}
178+
std::optional<std::string_view> takeKeyword();
179+
bool takeKeyword(std::string_view expected);
199180

200181
std::optional<std::string_view> peekKeyword() {
201182
return Lexer(*this).takeKeyword();
202183
}
203184

204-
bool takeKeyword(std::string_view expected) {
205-
if (curr) {
206-
if (auto keyword = curr->getKeyword()) {
207-
if (*keyword == expected) {
208-
advance();
209-
return true;
210-
}
211-
}
212-
}
213-
return false;
214-
}
215-
216-
std::optional<uint64_t> takeOffset() {
217-
using namespace std::string_view_literals;
218-
if (curr) {
219-
if (auto keyword = curr->getKeyword()) {
220-
if (keyword->substr(0, 7) != "offset="sv) {
221-
return {};
222-
}
223-
Lexer subLexer(keyword->substr(7));
224-
if (subLexer.empty()) {
225-
return {};
226-
}
227-
if (auto o = subLexer.curr->getU<uint64_t>()) {
228-
subLexer.advance();
229-
if (subLexer.empty()) {
230-
advance();
231-
return o;
232-
}
233-
}
234-
}
235-
}
236-
return std::nullopt;
237-
}
238-
239-
std::optional<uint32_t> takeAlign() {
240-
using namespace std::string_view_literals;
241-
if (curr) {
242-
if (auto keyword = curr->getKeyword()) {
243-
if (keyword->substr(0, 6) != "align="sv) {
244-
return {};
245-
}
246-
Lexer subLexer(keyword->substr(6));
247-
if (subLexer.empty()) {
248-
return {};
249-
}
250-
if (auto a = subLexer.curr->getU<uint32_t>()) {
251-
subLexer.advance();
252-
if (subLexer.empty()) {
253-
advance();
254-
return a;
255-
}
256-
}
257-
}
258-
}
259-
return {};
260-
}
185+
std::optional<uint64_t> takeOffset();
186+
std::optional<uint32_t> takeAlign();
261187

262188
template<typename T> std::optional<T> takeU() {
263189
if (curr) {

0 commit comments

Comments
 (0)