Skip to content

Remove unnecessary methods/fields of Rust::Lexer #2347

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 50 additions & 78 deletions gcc/rust/lex/rust-lex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,7 @@ Lexer::build_token ()
Location loc = get_current_location ();

current_char = peek_input ();
current_char32 = peek_codepoint_input ();
skip_codepoint_input ();
skip_input ();

// detect shebang
// Must be the first thing on the first line, starting with #!
Expand Down Expand Up @@ -1080,7 +1079,7 @@ Lexer::build_token ()
}

// find identifiers and keywords.
if (is_identifier_start (current_char32.value))
if (is_identifier_start (current_char.value))
return parse_identifier_or_keyword (loc);

// int and float literals
Expand Down Expand Up @@ -1985,59 +1984,56 @@ Lexer::skip_broken_string_input (Codepoint current_char)
current_column);
}

// Parses a unicode string.
// Parses a string.
TokenPtr
Lexer::parse_string (Location loc)
{
Codepoint current_char32;

std::string str;
str.reserve (16); // some sensible default

int length = 1;
current_char32 = peek_codepoint_input ();
current_char = peek_input ();

// FIXME: This fails if the input ends. How do we check for EOF?
while (current_char32.value != '"' && !current_char32.is_eof ())
while (current_char.value != '"' && !current_char.is_eof ())
{
if (current_char32.value == '\\')
if (current_char.value == '\\')
{
// parse escape
auto utf8_escape_pair = parse_utf8_escape ();
current_char32 = std::get<0> (utf8_escape_pair);
current_char = std::get<0> (utf8_escape_pair);

if (current_char32 == Codepoint (0) && std::get<2> (utf8_escape_pair))
if (current_char == Codepoint (0) && std::get<2> (utf8_escape_pair))
length = std::get<1> (utf8_escape_pair) - 1;
else
length += std::get<1> (utf8_escape_pair);

if (current_char32 != Codepoint (0)
|| !std::get<2> (utf8_escape_pair))
str += current_char32;

// required as parsing utf8 escape only changes current_char
current_char32 = peek_codepoint_input ();
if (current_char != Codepoint (0) || !std::get<2> (utf8_escape_pair))
str += current_char.as_string ();

// FIXME: should remove this but can't.
// `parse_utf8_escape` does not update `current_char` correctly.
current_char = peek_input ();
Comment on lines +2014 to +2016
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line should be removed but can't.
parse_utf8_escape() seems not to update current_char properly.

continue;
}

length += get_input_codepoint_length ();
length++;

str += current_char32;
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char;
skip_input ();
current_char = peek_input ();
}

current_column += length;

if (current_char32.value == '"')
if (current_char.value == '"')
{
current_column++;

skip_input ();
current_char = peek_input ();
}
else if (current_char32.is_eof ())
else if (current_char.is_eof ())
{
rust_error_at (get_current_location (), "unended string literal");
return Token::make (END_OF_FILE, get_current_location ());
Expand All @@ -2059,22 +2055,22 @@ Lexer::parse_identifier_or_keyword (Location loc)
{
std::string str;
str.reserve (16); // default
str += current_char32.as_string ();
str += current_char.as_string ();

bool first_is_underscore = current_char == '_';

int length = 1;
current_char32 = peek_codepoint_input ();
current_char = peek_input ();

// loop through entire name
while (is_identifier_continue (current_char32.value))
while (is_identifier_continue (current_char.value))
{
auto s = current_char32.as_string ();
auto s = current_char.as_string ();
length++;

str += current_char32.as_string ();
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char.as_string ();
skip_input ();
current_char = peek_input ();
}

current_column += length;
Expand Down Expand Up @@ -2128,11 +2124,11 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)

length++;
skip_input ();
Codepoint current_char32 = peek_codepoint_input ();
current_char = peek_input ();

while (!current_char32.is_eof ())
while (!current_char.is_eof ())
{
if (current_char32.value == '"')
if (current_char.value == '"')
{
bool enough_hashes = true;

Expand All @@ -2157,9 +2153,9 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)

length++;

str += current_char32;
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char.as_string ();
skip_input ();
current_char = peek_input ();
}

current_column += length;
Expand Down Expand Up @@ -2411,29 +2407,27 @@ Lexer::parse_decimal_int_or_float (Location loc)
TokenPtr
Lexer::parse_char_or_lifetime (Location loc)
{
Codepoint current_char32;

int length = 1;

current_char32 = peek_codepoint_input ();
if (current_char32.is_eof ())
current_char = peek_input ();
if (current_char.is_eof ())
return nullptr;

// parse escaped char literal
if (current_char32.value == '\\')
if (current_char.value == '\\')
{
// parse escape
auto utf8_escape_pair = parse_utf8_escape ();
current_char32 = std::get<0> (utf8_escape_pair);
Codepoint escaped_char = std::get<0> (utf8_escape_pair);
length += std::get<1> (utf8_escape_pair);

if (peek_codepoint_input ().value != '\'')
if (peek_input ().value != '\'')
{
rust_error_at (get_current_location (), "unended character literal");
}
else
{
skip_codepoint_input ();
skip_input ();
current_char = peek_input ();
length++;
}
Expand All @@ -2442,15 +2436,16 @@ Lexer::parse_char_or_lifetime (Location loc)

loc += length - 1;

return Token::make_char (loc, current_char32);
return Token::make_char (loc, escaped_char);
}
else
{
skip_codepoint_input ();
skip_input ();

if (peek_codepoint_input ().value == '\'')
if (peek_input ().value == '\'')
{
// parse non-escaped char literal
Codepoint non_escaped_char = current_char;

// skip the ' character
skip_input ();
Expand All @@ -2461,21 +2456,21 @@ Lexer::parse_char_or_lifetime (Location loc)

loc += 2;

return Token::make_char (loc, current_char32);
return Token::make_char (loc, non_escaped_char);
}
else if (is_identifier_start (current_char32.value))
else if (is_identifier_start (current_char.value))
{
// parse lifetime name
std::string str;
str += current_char32;
str += current_char.as_string ();
length++;

current_char32 = peek_codepoint_input ();
while (is_identifier_continue (current_char32.value))
current_char = peek_input ();
while (is_identifier_continue (current_char.value))
{
str += current_char32;
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char.as_string ();
skip_input ();
current_char = peek_input ();
length++;
}

Expand All @@ -2499,29 +2494,6 @@ Lexer::parse_char_or_lifetime (Location loc)
}
}

// TODO remove this function
// Returns the length of the codepoint at the current position.
int
Lexer::get_input_codepoint_length ()
{
return 1;
}

// TODO remove this function
// Returns the codepoint at the current position.
Codepoint
Lexer::peek_codepoint_input ()
{
return peek_input ();
}

// TODO remove this function
void
Lexer::skip_codepoint_input ()
{
skip_input ();
}

void
Lexer::split_current_token (TokenId new_left, TokenId new_right)
{
Expand Down
5 changes: 0 additions & 5 deletions gcc/rust/lex/rust-lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,6 @@ class Lexer
std::pair<long, int> parse_partial_hex_escape ();
std::pair<Codepoint, int> parse_partial_unicode_escape ();

int get_input_codepoint_length ();
// Peeks the current utf-8 char
Codepoint peek_codepoint_input ();
void skip_codepoint_input ();
Comment on lines -139 to -142
Copy link
Contributor Author

@tamaroning tamaroning Jun 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions are unnecessary so deleted in this pr(See #2307)

void skip_broken_string_input (Codepoint current_char);

TokenPtr parse_byte_char (Location loc);
Expand Down Expand Up @@ -393,7 +389,6 @@ class Lexer
int current_column;
// Current character.
Codepoint current_char;
Codepoint current_char32;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field was added to parse Unicode identifiers via #2284 but is unnecessary now.

// Line map.
Linemap *line_map;

Expand Down