From 86723c6eccf2b35764baf571e5908607ddb63d56 Mon Sep 17 00:00:00 2001 From: Will Fleming Date: Thu, 17 Dec 2015 16:55:29 -0500 Subject: [PATCH 1/3] Be specific about what exceptions can be skipped The Analyzer `#run` loop was rescuing *all* exceptions, which is excessive. I don't think we should hard-fail on all exceptions, either: the Ruby parser gem is definitely known to barf on some valid but esoteric Ruby code, and I wouldn't be surprised if the other parsers had similar edge cases. So this changes behavior to only skip files for a known set of catchable errors. The out-of-process parsers are a little tricky since all you can get from them is an exit code and output streams: for now wrapping those in our own exception class seems reasonable. I'm still catching all exceptions, but only so that we can log a message about which file is impacted before we re-raise. Since a raw exception we'll likely abort on may not contain helpful information about the file that triggered it, this would be helpful for debugging cases. --- lib/cc/engine/analyzers/analyzer_base.rb | 15 +++++++++-- lib/cc/engine/analyzers/javascript/parser.rb | 25 +++++++++++++----- lib/cc/engine/analyzers/parser_error.rb | 7 +++++ lib/cc/engine/analyzers/php/parser.rb | 21 ++++++++++----- lib/cc/engine/analyzers/python/parser.rb | 26 +++++++++++++------ .../engine/analyzers/javascript/main_spec.rb | 10 +++++++ spec/cc/engine/analyzers/php/main_spec.rb | 10 +++++++ spec/cc/engine/analyzers/python/main_spec.rb | 10 +++++++ 8 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 lib/cc/engine/analyzers/parser_error.rb diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 7e111348..87b0bf3f 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -1,16 +1,27 @@ +require "cc/engine/analyzers/parser_error" + module CC module Engine module Analyzers class Base + RESCUABLE_ERRORS = [ + ::CC::Engine::Analyzers::ParserError, + ::Racc::ParseError, + ::Timeout::Error, + ] + def initialize(engine_config:) @engine_config = engine_config end def run(file) process_file(file) + rescue *RESCUABLE_ERRORS => ex + $stderr.puts("Skipping file #{file} due to exception:") + $stderr.puts("(#{ex.class}) #{ex.message} #{ex.backtrace.join("\n")}") rescue => ex - $stderr.puts "Skipping file #{file} due to exception" - $stderr.puts "(#{ex.class}) #{ex.message} #{ex.backtrace.join("\n")}" + $stderr.puts("#{ex.class} error occurred processing file #{file}: aborting.") + raise ex end def files diff --git a/lib/cc/engine/analyzers/javascript/parser.rb b/lib/cc/engine/analyzers/javascript/parser.rb index 62ab6ca3..11788bd4 100644 --- a/lib/cc/engine/analyzers/javascript/parser.rb +++ b/lib/cc/engine/analyzers/javascript/parser.rb @@ -1,4 +1,6 @@ -require 'timeout' +require "open3" +require "timeout" + module CC module Engine @@ -55,14 +57,23 @@ def initialize(command, delegate) def run(input, timeout = DEFAULT_TIMEOUT) Timeout.timeout(timeout) do - IO.popen command, 'r+' do |io| - io.puts input - io.close_write + Open3.popen3 command, "r+" do |stdin, stdout, stderr, wait_thr| + stdin.puts input + stdin.close + + exit_code = wait_thr.value + + output = stdout.gets + stdout.close - output = io.gets - io.close + err_output = stderr.gets + stderr.close - yield output if $?.to_i == 0 + if 0 == exit_code + yield output + else + raise ::CC::Engine::Analyzers::ParserError, "Python parser exited with code #{exit_code}:\n#{err_output}" + end end end end diff --git a/lib/cc/engine/analyzers/parser_error.rb b/lib/cc/engine/analyzers/parser_error.rb new file mode 100644 index 00000000..f841c29d --- /dev/null +++ b/lib/cc/engine/analyzers/parser_error.rb @@ -0,0 +1,7 @@ +module CC + module Engine + module Analyzers + ParserError = Class.new(StandardError) + end + end +end diff --git a/lib/cc/engine/analyzers/php/parser.rb b/lib/cc/engine/analyzers/php/parser.rb index 9fd9a5d0..a764e936 100644 --- a/lib/cc/engine/analyzers/php/parser.rb +++ b/lib/cc/engine/analyzers/php/parser.rb @@ -48,14 +48,23 @@ def initialize(command) def run(input, timeout = DEFAULT_TIMEOUT) Timeout.timeout(timeout) do - IO.popen command, 'r+' do |io| - io.puts input - io.close_write + Open3.popen3 command, "r+" do |stdin, stdout, stderr, wait_thr| + stdin.puts input + stdin.close - output = io.gets - io.close + exit_code = wait_thr.value - yield output if $?.to_i == 0 + output = stdout.gets + stdout.close + + err_output = stderr.gets + stderr.close + + if 0 == exit_code + yield output + else + raise ::CC::Engine::Analyzers::ParserError, "Python parser exited with code #{exit_code}:\n#{err_output}" + end end end end diff --git a/lib/cc/engine/analyzers/python/parser.rb b/lib/cc/engine/analyzers/python/parser.rb index 737bbbf8..64497b7d 100644 --- a/lib/cc/engine/analyzers/python/parser.rb +++ b/lib/cc/engine/analyzers/python/parser.rb @@ -1,5 +1,6 @@ -require 'timeout' -require 'json' +require "timeout" +require "json" +require "open3" module CC module Engine @@ -41,14 +42,23 @@ def initialize(command, delegate) def run(input, timeout = DEFAULT_TIMEOUT) Timeout.timeout(timeout) do - IO.popen command, "r+" do |io| - io.puts input - io.close_write + Open3.popen3 command, "r+" do |stdin, stdout, stderr, wait_thr| + stdin.puts input + stdin.close - output = io.gets - io.close + exit_code = wait_thr.value - yield output if $?.to_i == 0 + output = stdout.gets + stdout.close + + err_output = stderr.gets + stderr.close + + if 0 == exit_code + yield output + else + raise ::CC::Engine::Analyzers::ParserError, "Python parser exited with code #{exit_code}:\n#{err_output}" + end end end end diff --git a/spec/cc/engine/analyzers/javascript/main_spec.rb b/spec/cc/engine/analyzers/javascript/main_spec.rb index 7e79c1c7..c7b7113f 100644 --- a/spec/cc/engine/analyzers/javascript/main_spec.rb +++ b/spec/cc/engine/analyzers/javascript/main_spec.rb @@ -36,6 +36,16 @@ expect(json["content"]["body"]).to match /This issue has a mass of `99`/ expect(json["fingerprint"]).to eq("55ae5d0990647ef496e9e0d315f9727d") end + + it "skips unparsable files" do + create_source_file("foo.js", <<-EOJS) + function () { do(); // missing closing brace + EOJS + + expect { + expect(run_engine(engine_conf)).to eq("") + }.to output(/Skipping file/).to_stderr + end end it "does not flag duplicate comments" do diff --git a/spec/cc/engine/analyzers/php/main_spec.rb b/spec/cc/engine/analyzers/php/main_spec.rb index 33e06340..b27134fa 100644 --- a/spec/cc/engine/analyzers/php/main_spec.rb +++ b/spec/cc/engine/analyzers/php/main_spec.rb @@ -48,6 +48,16 @@ expect(json["content"]["body"]).to match /This issue has a mass of `44`/ expect(json["fingerprint"]).to eq("667da0e2bab866aa2fe9d014a65d57d9") end + + it "skips unparsable files" do + create_source_file("foo.php", <<-EOPHP) + Date: Thu, 17 Dec 2015 17:26:55 -0500 Subject: [PATCH 2/3] refactor: extract CommandLineRunner Each of the out-of-process parser classes implemented its own CommandLineRunner, and they were all functionally the same (with some small differences that weren't actually used). This pulls the class up to one reused class. --- .../engine/analyzers/command_line_runner.rb | 45 +++++++++++++++++++ lib/cc/engine/analyzers/javascript/parser.rb | 45 +------------------ lib/cc/engine/analyzers/php/parser.rb | 38 ++-------------- lib/cc/engine/analyzers/python/parser.rb | 38 +--------------- 4 files changed, 52 insertions(+), 114 deletions(-) create mode 100644 lib/cc/engine/analyzers/command_line_runner.rb diff --git a/lib/cc/engine/analyzers/command_line_runner.rb b/lib/cc/engine/analyzers/command_line_runner.rb new file mode 100644 index 00000000..d75b06bf --- /dev/null +++ b/lib/cc/engine/analyzers/command_line_runner.rb @@ -0,0 +1,45 @@ +require "open3" +require "timeout" + +module CC + module Engine + module Analyzers + class CommandLineRunner + DEFAULT_TIMEOUT = 20 + + def initialize(command, timeout = DEFAULT_TIMEOUT) + @command = command + @timeout = timeout + end + + def run(input) + Timeout.timeout(timeout) do + Open3.popen3 command, "r+" do |stdin, stdout, stderr, wait_thr| + stdin.puts input + stdin.close + + exit_code = wait_thr.value + + output = stdout.gets + stdout.close + + err_output = stderr.gets + stderr.close + + if 0 == exit_code + yield output + else + raise ::CC::Engine::Analyzers::ParserError, "Python parser exited with code #{exit_code}:\n#{err_output}" + end + end + end + end + + private + + attr_reader :command, :timeout + end + end + end +end + diff --git a/lib/cc/engine/analyzers/javascript/parser.rb b/lib/cc/engine/analyzers/javascript/parser.rb index 11788bd4..9fe7ace4 100644 --- a/lib/cc/engine/analyzers/javascript/parser.rb +++ b/lib/cc/engine/analyzers/javascript/parser.rb @@ -1,6 +1,4 @@ -require "open3" -require "timeout" - +require "cc/engine/analyzers/command_line_runner" module CC module Engine @@ -15,7 +13,7 @@ def initialize(code, filename) end def parse - runner = CommandLineRunner.new(js_command, self) + runner = CommandLineRunner.new(js_command) runner.run(strip_shebang(code)) do |ast| json_ast = JSON.parse(ast) @syntax_tree = json_ast @@ -39,45 +37,6 @@ def strip_shebang(code) end end end - - class CommandLineRunner - attr_reader :command, :delegate - - DEFAULT_TIMEOUT = 20 - EXCEPTIONS = [ - StandardError, - Timeout::Error, - SystemStackError - ] - - def initialize(command, delegate) - @command = command - @delegate = delegate - end - - def run(input, timeout = DEFAULT_TIMEOUT) - Timeout.timeout(timeout) do - Open3.popen3 command, "r+" do |stdin, stdout, stderr, wait_thr| - stdin.puts input - stdin.close - - exit_code = wait_thr.value - - output = stdout.gets - stdout.close - - err_output = stderr.gets - stderr.close - - if 0 == exit_code - yield output - else - raise ::CC::Engine::Analyzers::ParserError, "Python parser exited with code #{exit_code}:\n#{err_output}" - end - end - end - end - end end end end diff --git a/lib/cc/engine/analyzers/php/parser.rb b/lib/cc/engine/analyzers/php/parser.rb index a764e936..989a87a7 100644 --- a/lib/cc/engine/analyzers/php/parser.rb +++ b/lib/cc/engine/analyzers/php/parser.rb @@ -1,5 +1,6 @@ -require 'cc/engine/analyzers/php/ast' -require 'cc/engine/analyzers/php/nodes' +require "cc/engine/analyzers/command_line_runner" +require "cc/engine/analyzers/php/ast" +require "cc/engine/analyzers/php/nodes" module CC module Engine @@ -36,39 +37,6 @@ def parser_path ) end end - - class CommandLineRunner - attr_reader :command, :delegate - - DEFAULT_TIMEOUT = 20 - - def initialize(command) - @command = command - end - - def run(input, timeout = DEFAULT_TIMEOUT) - Timeout.timeout(timeout) do - Open3.popen3 command, "r+" do |stdin, stdout, stderr, wait_thr| - stdin.puts input - stdin.close - - exit_code = wait_thr.value - - output = stdout.gets - stdout.close - - err_output = stderr.gets - stderr.close - - if 0 == exit_code - yield output - else - raise ::CC::Engine::Analyzers::ParserError, "Python parser exited with code #{exit_code}:\n#{err_output}" - end - end - end - end - end end end end diff --git a/lib/cc/engine/analyzers/python/parser.rb b/lib/cc/engine/analyzers/python/parser.rb index 64497b7d..37103224 100644 --- a/lib/cc/engine/analyzers/python/parser.rb +++ b/lib/cc/engine/analyzers/python/parser.rb @@ -1,6 +1,6 @@ +require "cc/engine/analyzers/command_line_runner" require "timeout" require "json" -require "open3" module CC module Engine @@ -15,7 +15,7 @@ def initialize(code, filename) end def parse - runner = CommandLineRunner.new(python_command, self) + runner = CommandLineRunner.new(python_command) runner.run(code) do |ast| json_ast = JSON.parse(ast) @syntax_tree = json_ast @@ -29,40 +29,6 @@ def python_command "python #{file}" end end - - class CommandLineRunner - DEFAULT_TIMEOUT = 20 - - attr_reader :command, :delegate - - def initialize(command, delegate) - @command = command - @delegate = delegate - end - - def run(input, timeout = DEFAULT_TIMEOUT) - Timeout.timeout(timeout) do - Open3.popen3 command, "r+" do |stdin, stdout, stderr, wait_thr| - stdin.puts input - stdin.close - - exit_code = wait_thr.value - - output = stdout.gets - stdout.close - - err_output = stderr.gets - stderr.close - - if 0 == exit_code - yield output - else - raise ::CC::Engine::Analyzers::ParserError, "Python parser exited with code #{exit_code}:\n#{err_output}" - end - end - end - end - end end end end From 724e48fbeac9d1c44a4048fe6bc8ace9a48521fe Mon Sep 17 00:00:00 2001 From: Will Fleming Date: Thu, 17 Dec 2015 18:40:08 -0500 Subject: [PATCH 3/3] Timeout errors should be fatal Timeouts are slightly non-deterministic by their nature. So we should consider them fatal. They can still be useful, but should be for truly pathological cases since they are fatal, so I've upped the limit to 5 mintues. --- lib/cc/engine/analyzers/analyzer_base.rb | 1 - lib/cc/engine/analyzers/command_line_runner.rb | 2 +- lib/cc/engine/analyzers/ruby/main.rb | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 87b0bf3f..6860ea69 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -7,7 +7,6 @@ class Base RESCUABLE_ERRORS = [ ::CC::Engine::Analyzers::ParserError, ::Racc::ParseError, - ::Timeout::Error, ] def initialize(engine_config:) diff --git a/lib/cc/engine/analyzers/command_line_runner.rb b/lib/cc/engine/analyzers/command_line_runner.rb index d75b06bf..320dd728 100644 --- a/lib/cc/engine/analyzers/command_line_runner.rb +++ b/lib/cc/engine/analyzers/command_line_runner.rb @@ -5,7 +5,7 @@ module CC module Engine module Analyzers class CommandLineRunner - DEFAULT_TIMEOUT = 20 + DEFAULT_TIMEOUT = 300 def initialize(command, timeout = DEFAULT_TIMEOUT) @command = command diff --git a/lib/cc/engine/analyzers/ruby/main.rb b/lib/cc/engine/analyzers/ruby/main.rb index 3c6846bc..4e6e6da3 100644 --- a/lib/cc/engine/analyzers/ruby/main.rb +++ b/lib/cc/engine/analyzers/ruby/main.rb @@ -19,7 +19,7 @@ class Main < CC::Engine::Analyzers::Base ] DEFAULT_MASS_THRESHOLD = 18 BASE_POINTS = 10_000 - TIMEOUT = 10 + TIMEOUT = 300 private