Skip to content

Commit fb535c3

Browse files
committed
[Parser] Do not eagerly lex strings
Lex them on demand instead to avoid wasted work.
1 parent cb497b7 commit fb535c3

File tree

2 files changed

+21
-49
lines changed

2 files changed

+21
-49
lines changed

src/parser/lexer.cpp

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,17 +1004,6 @@ std::optional<float> Token::getF32() const {
10041004
return {};
10051005
}
10061006

1007-
std::optional<std::string_view> Token::getString() const {
1008-
if (auto* tok = std::get_if<StringTok>(&data)) {
1009-
if (tok->str) {
1010-
return std::string_view(*tok->str);
1011-
}
1012-
// Remove quotes.
1013-
return span.substr(1, span.size() - 2);
1014-
}
1015-
return {};
1016-
}
1017-
10181007
void Lexer::skipSpace() {
10191008
while (true) {
10201009
if (auto ctx = annotation(next())) {
@@ -1054,6 +1043,22 @@ bool Lexer::takeRParen() {
10541043
return false;
10551044
}
10561045

1046+
std::optional<std::string> Lexer::takeString() {
1047+
if (curr) {
1048+
return std::nullopt;
1049+
}
1050+
if (auto result = str(next())) {
1051+
index += result->span.size();
1052+
advance();
1053+
if (result->str) {
1054+
return result->str;
1055+
}
1056+
// Remove quotes.
1057+
return std::string(result->span.substr(1, result->span.size() - 2));
1058+
}
1059+
return std::nullopt;
1060+
}
1061+
10571062
std::optional<Name> Lexer::takeID() {
10581063
if (curr) {
10591064
return std::nullopt;
@@ -1132,8 +1137,6 @@ void Lexer::lexToken() {
11321137
tok = Token{t->span, IntTok{t->n, t->sign}};
11331138
} else if (auto t = float_(next())) {
11341139
tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
1135-
} else if (auto t = str(next())) {
1136-
tok = Token{t->span, StringTok{t->str}};
11371140
} else {
11381141
// TODO: Do something about lexing errors.
11391142
curr = std::nullopt;
@@ -1204,15 +1207,6 @@ std::ostream& operator<<(std::ostream& os, const FloatTok& tok) {
12041207
return os << tok.d;
12051208
}
12061209

1207-
std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
1208-
if (tok.str) {
1209-
os << '"' << *tok.str << '"';
1210-
} else {
1211-
os << "(raw string)";
1212-
}
1213-
return os;
1214-
}
1215-
12161210
std::ostream& operator<<(std::ostream& os, const Token& tok) {
12171211
std::visit([&](const auto& t) { os << t; }, tok.data);
12181212
return os << " \"" << tok.span << "\"";

src/parser/lexer.h

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,8 @@ struct FloatTok {
6868
friend std::ostream& operator<<(std::ostream&, const FloatTok&);
6969
};
7070

71-
struct StringTok {
72-
// If the string contains escapes, this is its contents.
73-
std::optional<std::string> str;
74-
75-
bool operator==(const StringTok& other) const { return str == other.str; }
76-
friend std::ostream& operator<<(std::ostream&, const StringTok&);
77-
};
78-
7971
struct Token {
80-
using Data = std::variant<IntTok, FloatTok, StringTok>;
72+
using Data = std::variant<IntTok, FloatTok>;
8173
std::string_view span;
8274
Data data;
8375

@@ -90,7 +82,6 @@ struct Token {
9082
template<typename T> std::optional<T> getI() const;
9183
std::optional<double> getF64() const;
9284
std::optional<float> getF32() const;
93-
std::optional<std::string_view> getString() const;
9485

9586
bool operator==(const Token&) const;
9687
friend std::ostream& operator<<(std::ostream& os, const Token&);
@@ -218,27 +209,14 @@ struct Lexer {
218209
return std::nullopt;
219210
}
220211

221-
std::optional<std::string> takeString() {
222-
if (curr) {
223-
if (auto s = curr->getString()) {
224-
std::string ret(*s);
225-
advance();
226-
return ret;
227-
}
228-
}
229-
return {};
230-
}
212+
std::optional<std::string> takeString();
231213

232214
std::optional<Name> takeName() {
233-
// TODO: Move this to lexer and validate UTF.
215+
// TODO: Validate UTF.
234216
if (auto str = takeString()) {
235-
// Copy to a std::string to make sure we have a null terminator, otherwise
236-
// the `Name` constructor won't work correctly.
237-
// TODO: Update `Name` to use string_view instead of char* and/or to take
238-
// rvalue strings to avoid this extra copy.
239-
return Name(std::string(*str));
217+
return Name(*str);
240218
}
241-
return {};
219+
return std::nullopt;
242220
}
243221

244222
bool takeSExprStart(std::string_view expected) {

0 commit comments

Comments
 (0)