Generalize fuzz testing tools (#379)

c42f · c42f · commit 81594b9109d9 · 2023-11-11T06:23:27.000+10:00
This rearrangement allows us to fuzz test the hooks (which use the low
level parser API) as well as the high level parser API.
diff --git a/Project.toml b/Project.toml
@@ -9,7 +9,8 @@ julia = "1.0"
 [deps]
 
 [extras]
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test"]
+test = ["Test", "Logging"]
diff --git a/test/diagnostics.jl b/test/diagnostics.jl
@@ -227,12 +227,11 @@ end
         \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m"""
 
     if Sys.isunix()
-        mktempdir() do tempdirname
-            cd(tempdirname) do
-                rm(tempdirname)
-                # Test _file_url doesn't fail with nonexistant directories
-                @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistant__", "test.jl")))
-            end
+        tempdirname = mktempdir()
+        cd(tempdirname) do
+            rm(tempdirname)
+            # Test _file_url doesn't fail with nonexistant directories
+            @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistant__", "test.jl")))
         end
     end
 end
diff --git a/test/fuzz_test.jl b/test/fuzz_test.jl
@@ -1,5 +1,7 @@
 using JuliaSyntax
 using JuliaSyntax: tokenize
+import Logging
+import Test
 
 # Parser fuzz testing tools.
 
@@ -758,6 +760,7 @@ const cutdown_tokens = [
     "\t"
     "\n"
     "x"
+    "β"
     "@"
     ","
     ";"
@@ -884,33 +887,36 @@ const cutdown_tokens = [
 ]
 
 #-------------------------------------------------------------------------------
-
-# The parser should never throw an exception. To test whether this is true,
-# try passing randomly generated bad input data into it.
-function _fuzz_test(bad_input_iter)
-    error_strings = []
-    for str in bad_input_iter
-        try
-            JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true);
-        catch exc
-            !(exc isa InterruptException) || rethrow()
-            rstr = reduce_text(str, parser_throws_exception)
-            @error "Parser threw exception" rstr exception=current_exceptions()
-            push!(error_strings, rstr)
-        end
+# Parsing functions for use with fuzz_test
+
+function try_parseall_failure(str)
+    try
+        JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true);
+        return nothing
+    catch exc
+        !(exc isa InterruptException) || rethrow()
+        rstr = reduce_text(str, parser_throws_exception)
+        @error "Parser threw exception" rstr exception=current_exceptions()
+        return rstr
     end
-    return error_strings
 end
 
-"""
-Fuzz test parser against all tuples of length `N` with elements taken from
-`tokens`.
-"""
-function fuzz_tokens(tokens, N)
-    iter = (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...))
-    _fuzz_test(iter)
+function try_hook_failure(str)
+    try
+        test_logger = Test.TestLogger()
+        Logging.with_logger(test_logger) do
+            Meta_parseall(str)
+        end
+        if !isempty(test_logger.logs)
+            return str
+        end
+    catch exc
+        return str
+    end
+    return nothing
 end
 
+#-------------------------------------------------------------------------------
 """Delete `nlines` adjacent lines from code, at `niters` randomly chosen positions"""
 function delete_lines(lines, nlines, niters)
     selection = trues(length(lines))
@@ -953,29 +959,59 @@ function delete_tokens(code, tokens, ntokens, niters)
 end
 
 #-------------------------------------------------------------------------------
-# Fuzzer functions
+# Generators for "potentially bad input"
+
+"""
+Fuzz test parser against all tuples of length `N` with elements taken from
+`tokens`.
+"""
+function product_token_fuzz(tokens, N)
+    (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...))
+end
 
 """
 Fuzz test parser against randomly generated binary strings
 """
-function fuzz_binary(nbytes, N)
-    bad_strs = _fuzz_test(String(rand(UInt8, nbytes)) for _ in 1:N)
-    reduce_text.(bad_strs, parser_throws_exception)
+function random_binary_fuzz(nbytes, N)
+    (String(rand(UInt8, nbytes)) for _ in 1:N)
 end
 
 """
 Fuzz test by deleting random lines of some given source `code`
 """
-function fuzz_lines(code, N; nlines=10, niters=10)
+function deleted_line_fuzz(code, N; nlines=10, niters=10)
     lines = split(code, '\n')
-    _fuzz_test(delete_lines(lines, nlines, niters) for _=1:N)
+    (delete_lines(lines, nlines, niters) for _=1:N)
 end
 
 """
 Fuzz test by deleting random tokens from given source `code`
 """
-function fuzz_tokens(code, N; ntokens=10, niters=10)
+function deleted_token_fuzz(code, N; ntokens=10, niters=10)
     ts = tokenize(code)
-    _fuzz_test(delete_tokens(code, ts, ntokens, niters) for _=1:N)
+    (delete_tokens(code, ts, ntokens, niters) for _=1:N)
 end
 
+"""
+Fuzz test a parsing function by trying it with many "bad" input strings.
+
+`try_parsefail` should return `nothing` when the parser succeeds, and return a
+string (or reduced string) when parsing succeeds.
+"""
+function fuzz_test(try_parsefail::Function, bad_input_iter)
+    error_strings = []
+    for str in bad_input_iter
+        res = try_parsefail(str)
+        if !isnothing(res)
+            push!(error_strings, res)
+        end
+    end
+    return error_strings
+end
+
+
+# Examples
+#
+# fuzz_test(try_hook_failure, product_token_fuzz(cutdown_tokens, 2))
+# fuzz_test(try_parseall_failure, product_token_fuzz(cutdown_tokens, 2))
+
diff --git a/test/test_utils.jl b/test/test_utils.jl
@@ -422,3 +422,51 @@ function parse_sexpr(code)
 end
 
 
+#-------------------------------------------------------------------------------
+# Tools copied from Base.Meta which call core_parser_hook as if called by
+# Meta.parse(), but without installing the global hook.
+
+function _Meta_parse_string(text::AbstractString, filename::AbstractString,
+                            lineno::Integer, index::Integer, options)
+    if index < 1 || index > ncodeunits(text) + 1
+        throw(BoundsError(text, index))
+    end
+    ex, offset::Int = JuliaSyntax.core_parser_hook(text, filename, lineno, index-1, options)
+    ex, offset+1
+end
+
+function Meta_parse(str::AbstractString, pos::Integer;
+               filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true)
+    ex, pos = _Meta_parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom)
+    if raise && Meta.isexpr(ex, :error)
+        err = ex.args[1]
+        if err isa String
+            err = Meta.ParseError(err) # For flisp parser
+        end
+        throw(err)
+    end
+    return ex, pos
+end
+
+function Meta_parse(str::AbstractString;
+                    filename="none", raise::Bool=true, depwarn::Bool=true)
+    ex, pos = Meta_parse(str, 1; filename=filename, greedy=true, raise=raise, depwarn=depwarn)
+    if Meta.isexpr(ex, :error)
+        return ex
+    end
+    if pos <= ncodeunits(str)
+        raise && throw(Meta.ParseError("extra token after end of expression"))
+        return Expr(:error, "extra token after end of expression")
+    end
+    return ex
+end
+
+function Meta_parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1)
+    return _Meta_parse_string(text, String(filename), lineno, pos, :atom)
+end
+
+function Meta_parseall(text::AbstractString; filename="none", lineno=1)
+    ex,_ = _Meta_parse_string(text, String(filename), lineno, 1, :all)
+    return ex
+end
+