From 5772f60de6398db6d82d8a59418bd30aede6b3cb Mon Sep 17 00:00:00 2001 From: nick evans Date: Thu, 6 Jun 2024 13:41:53 -0400 Subject: [PATCH 1/3] Drop reimplementation of Ripper lex state This code was for ruby 2.4 compatibility, but rdoc dropped support for ruby 2.4 about three years ago, in f480b970c. This code was almost half of the lines of code in rdoc/parser/ripper_state_lex. --- lib/rdoc/parser/ripper_state_lex.rb | 286 +--------------------------- 1 file changed, 1 insertion(+), 285 deletions(-) diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb index f6cefd0305..928f2011ed 100644 --- a/lib/rdoc/parser/ripper_state_lex.rb +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -7,9 +7,6 @@ class RDoc::Parser::RipperStateLex # :stopdoc: - # TODO: Remove this constants after Ruby 2.4 EOL - RIPPER_HAS_LEX_STATE = Ripper::Filter.method_defined?(:state) - Token = Struct.new(:line_no, :char_no, :kind, :text, :state) EXPR_NONE = 0 @@ -31,284 +28,6 @@ class RDoc::Parser::RipperStateLex EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG) EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) - class InnerStateLex < Ripper::Filter - attr_accessor :lex_state - - def initialize(code) - @lex_state = EXPR_BEG - @in_fname = false - @continue = false - reset - super(code) - end - - def reset - @command_start = false - @cmd_state = @command_start - end - - def on_nl(tok, data) - case @lex_state - when EXPR_FNAME, EXPR_DOT - @continue = true - else - @continue = false - @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 - end - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_ignored_nl(tok, data) - case @lex_state - when EXPR_FNAME, EXPR_DOT - @continue = true - else - @continue = false - @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 - end - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_op(tok, data) - case tok - when '&', '|', '!', '!=', '!~' - case @lex_state - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_BEG - end - when '<<' - # TODO next token? - case @lex_state - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_BEG - end - when '?' - @lex_state = EXPR_BEG - when '&&', '||', '+=', '-=', '*=', '**=', - '&=', '|=', '^=', '<<=', '>>=', '||=', '&&=' - @lex_state = EXPR_BEG - when '::' - case @lex_state - when EXPR_ARG, EXPR_CMDARG - @lex_state = EXPR_DOT - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_BEG - end - else - case @lex_state - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_BEG - end - end - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_kw(tok, data) - case tok - when 'class' - @lex_state = EXPR_CLASS - @in_fname = true - when 'def' - @lex_state = EXPR_FNAME - @continue = true - @in_fname = true - when 'if', 'unless', 'while', 'until' - if ((EXPR_MID | EXPR_END | EXPR_ENDARG | EXPR_ENDFN | EXPR_ARG | EXPR_CMDARG) & @lex_state) != 0 # postfix if - @lex_state = EXPR_BEG | EXPR_LABEL - else - @lex_state = EXPR_BEG - end - when 'begin', 'case', 'when' - @lex_state = EXPR_BEG - when 'return', 'break' - @lex_state = EXPR_MID - else - if @lex_state == EXPR_FNAME - @lex_state = EXPR_END - else - @lex_state = EXPR_END - end - end - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_tstring_beg(tok, data) - @lex_state = EXPR_BEG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_tstring_end(tok, data) - @lex_state = EXPR_END | EXPR_ENDARG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_CHAR(tok, data) - @lex_state = EXPR_END - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_period(tok, data) - @lex_state = EXPR_DOT - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_int(tok, data) - @lex_state = EXPR_END | EXPR_ENDARG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_float(tok, data) - @lex_state = EXPR_END | EXPR_ENDARG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_rational(tok, data) - @lex_state = EXPR_END | EXPR_ENDARG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_imaginary(tok, data) - @lex_state = EXPR_END | EXPR_ENDARG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_symbeg(tok, data) - @lex_state = EXPR_FNAME - @continue = true - @in_fname = true - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - private def on_variables(event, tok, data) - if @in_fname - @lex_state = EXPR_ENDFN - @in_fname = false - @continue = false - elsif @continue - case @lex_state - when EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_ENDFN - @continue = false - end - else - @lex_state = EXPR_CMDARG - end - data << Token.new(lineno, column, event, tok, @lex_state) - end - - def on_ident(tok, data) - on_variables(__method__, tok, data) - end - - def on_ivar(tok, data) - @lex_state = EXPR_END - on_variables(__method__, tok, data) - end - - def on_cvar(tok, data) - @lex_state = EXPR_END - on_variables(__method__, tok, data) - end - - def on_gvar(tok, data) - @lex_state = EXPR_END - on_variables(__method__, tok, data) - end - - def on_backref(tok, data) - @lex_state = EXPR_END - on_variables(__method__, tok, data) - end - - def on_lparen(tok, data) - @lex_state = EXPR_LABEL | EXPR_BEG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_rparen(tok, data) - @lex_state = EXPR_ENDFN - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_lbrace(tok, data) - @lex_state = EXPR_LABEL | EXPR_BEG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_rbrace(tok, data) - @lex_state = EXPR_ENDARG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_lbracket(tok, data) - @lex_state = EXPR_LABEL | EXPR_BEG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_rbracket(tok, data) - @lex_state = EXPR_ENDARG - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_const(tok, data) - case @lex_state - when EXPR_FNAME - @lex_state = EXPR_ENDFN - when EXPR_CLASS, EXPR_CMDARG, EXPR_MID - @lex_state = EXPR_ARG - else - @lex_state = EXPR_CMDARG - end - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_sp(tok, data) - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_comma(tok, data) - @lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0 - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_comment(tok, data) - @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_ignored_sp(tok, data) - @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 - data << Token.new(lineno, column, __method__, tok, @lex_state) - end - - def on_heredoc_beg(tok, data) - data << Token.new(lineno, column, __method__, tok, @lex_state) - @lex_state = EXPR_END - data - end - - def on_heredoc_end(tok, data) - data << Token.new(lineno, column, __method__, tok, @lex_state) - @lex_state = EXPR_BEG - data - end - - def on_default(event, tok, data) - reset - data << Token.new(lineno, column, event, tok, @lex_state) - end - end unless RIPPER_HAS_LEX_STATE - class InnerStateLex < Ripper::Filter def initialize(code) super(code) @@ -317,7 +36,7 @@ def initialize(code) def on_default(event, tok, data) data << Token.new(lineno, column, event, tok, state) end - end if RIPPER_HAS_LEX_STATE + end def get_squashed_tk if @buf.empty? @@ -333,7 +52,6 @@ def get_squashed_tk tk = get_string_tk(tk) when :on_backtick then if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0 - @inner_lex.lex_state = EXPR_ARG unless RIPPER_HAS_LEX_STATE tk[:kind] = :on_ident tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG else @@ -345,7 +63,6 @@ def get_squashed_tk tk = get_embdoc_tk(tk) when :on_heredoc_beg then @heredoc_queue << retrieve_heredoc_info(tk) - @inner_lex.lex_state = EXPR_END unless RIPPER_HAS_LEX_STATE when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then if !@heredoc_queue.empty? get_heredoc_tk(*@heredoc_queue.shift) @@ -549,7 +266,6 @@ def get_squashed_tk private def get_op_tk(tk) redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~] if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then - @inner_lex.lex_state = EXPR_ARG unless RIPPER_HAS_LEX_STATE tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG tk[:kind] = :on_ident elsif tk[:text] =~ /^[-+]$/ then From 59962557f45c4916415888dbdcbc92da1e32169e Mon Sep 17 00:00:00 2001 From: nick evans Date: Fri, 7 Jun 2024 10:54:13 -0400 Subject: [PATCH 2/3] Remove unused Ripper constants and const_defined? This was mostly copied from the diff in @st0012's PR comment. The remaining constants have been updated to get their value directly from Ripper. Co-authored-by: Stan Lo --- lib/rdoc/parser/ripper_state_lex.rb | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb index 928f2011ed..fe6320445a 100644 --- a/lib/rdoc/parser/ripper_state_lex.rb +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -9,24 +9,11 @@ class RDoc::Parser::RipperStateLex Token = Struct.new(:line_no, :char_no, :kind, :text, :state) - EXPR_NONE = 0 - EXPR_BEG = 1 - EXPR_END = 2 - EXPR_ENDARG = 4 - EXPR_ENDFN = 8 - EXPR_ARG = 16 - EXPR_CMDARG = 32 - EXPR_MID = 64 - EXPR_FNAME = 128 - EXPR_DOT = 256 - EXPR_CLASS = 512 - EXPR_LABEL = 1024 - EXPR_LABELED = 2048 - EXPR_FITEM = 4096 - EXPR_VALUE = EXPR_BEG - EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS) - EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG) - EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) + EXPR_END = Ripper::EXPR_END + EXPR_ENDFN = Ripper::EXPR_ENDFN + EXPR_ARG = Ripper::EXPR_ARG + EXPR_FNAME = Ripper::EXPR_FNAME + EXPR_LABEL = Ripper::EXPR_LABEL class InnerStateLex < Ripper::Filter def initialize(code) @@ -53,7 +40,7 @@ def get_squashed_tk when :on_backtick then if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0 tk[:kind] = :on_ident - tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG + tk[:state] = Ripper::Lexer::State.new(EXPR_ARG) else tk = get_string_tk(tk) end @@ -266,7 +253,7 @@ def get_squashed_tk private def get_op_tk(tk) redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~] if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then - tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG + tk[:state] = Ripper::Lexer::State.new(EXPR_ARG) tk[:kind] = :on_ident elsif tk[:text] =~ /^[-+]$/ then tk_ahead = get_squashed_tk From deefe9a22e150f109df9ce6abd9b62bf99bbc204 Mon Sep 17 00:00:00 2001 From: nick evans Date: Fri, 7 Jun 2024 15:01:17 -0400 Subject: [PATCH 3/3] Use Ripper::EXPR_LABEL directly Since this is only used from outside RipperStateLex, there's no longer any benefit to using the indirect reference rather than just going straight to Ripper. --- lib/rdoc/parser/ripper_state_lex.rb | 1 - lib/rdoc/parser/ruby.rb | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb index fe6320445a..2212906bbd 100644 --- a/lib/rdoc/parser/ripper_state_lex.rb +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -13,7 +13,6 @@ class RDoc::Parser::RipperStateLex EXPR_ENDFN = Ripper::EXPR_ENDFN EXPR_ARG = Ripper::EXPR_ARG EXPR_FNAME = Ripper::EXPR_FNAME - EXPR_LABEL = Ripper::EXPR_LABEL class InnerStateLex < Ripper::Filter def initialize(code) diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb index 85f1cd0391..47ad770daf 100644 --- a/lib/rdoc/parser/ruby.rb +++ b/lib/rdoc/parser/ruby.rb @@ -513,7 +513,7 @@ def get_included_module_with_optional_parens when :on_comment, :on_embdoc then @read.pop if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and - (!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then + (!continue or (tk[:state] & Ripper::EXPR_LABEL) != 0) then break if !continue and nest <= 0 end when :on_comma then @@ -526,7 +526,7 @@ def get_included_module_with_optional_parens nest += 1 when 'if', 'unless', 'while', 'until', 'rescue' # postfix if/unless/while/until/rescue must be EXPR_LABEL - nest += 1 unless (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0 + nest += 1 unless (tk[:state] & Ripper::EXPR_LABEL) != 0 when 'end' nest -= 1 break if nest == 0 @@ -1041,7 +1041,7 @@ def parse_constant_body container, constant, is_array_or_hash # :nodoc: elsif (:on_kw == tk[:kind] && 'def' == tk[:text]) then nest += 1 elsif (:on_kw == tk[:kind] && %w{do if unless case begin}.include?(tk[:text])) then - if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + if (tk[:state] & Ripper::EXPR_LABEL) == 0 nest += 1 end elsif [:on_rparen, :on_rbrace, :on_rbracket].include?(tk[:kind]) || @@ -1662,7 +1662,7 @@ def parse_method_or_yield_parameters(method = nil, when :on_comment, :on_embdoc then @read.pop if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and - (!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then + (!continue or (tk[:state] & Ripper::EXPR_LABEL) != 0) then if method && method.block_params.nil? then unget_tk tk read_documentation_modifiers method, modifiers @@ -1882,7 +1882,7 @@ def parse_statements(container, single = NORMAL, current_method = nil, end when 'until', 'while' then - if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + if (tk[:state] & Ripper::EXPR_LABEL) == 0 nest += 1 skip_optional_do_after_expression end @@ -1898,7 +1898,7 @@ def parse_statements(container, single = NORMAL, current_method = nil, skip_optional_do_after_expression when 'case', 'do', 'if', 'unless', 'begin' then - if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + if (tk[:state] & Ripper::EXPR_LABEL) == 0 nest += 1 end