Add Error format support, and JSON output option (#11396)

tusharsadhwani · tushar-deepsource · web-flow · commit 35fbd2a852be · 2024-05-10T16:41:02.000-07:00
### Description Resolves #10816 The changes this PR makes are relatively small. It currently: - Adds an `--output` option to mypy CLI - Adds a `ErrorFormatter` abstract base class, which can be subclassed to create new output formats - Adds a `MypyError` class that represents the external format of a mypy error. - Adds a check for `--output` being `'json'`, in which case the `JSONFormatter` is used to produce the reported output. #### Demo: ```console $ mypy mytest.py mytest.py:2: error: Incompatible types in assignment (expression has type "str", variable has type "int") mytest.py:3: error: Name "z" is not defined Found 2 errors in 1 file (checked 1 source file) $ mypy mytest.py --output=json {"file": "mytest.py", "line": 2, "column": 4, "severity": "error", "message": "Incompatible types in assignment (expression has type \"str\", variable has type \"int\")", "code": "assignment"} {"file": "mytest.py", "line": 3, "column": 4, "severity": "error", "message": "Name \"z\" is not defined", "code": "name-defined"} ``` --- A few notes regarding the changes: - I chose to re-use the intermediate `ErrorTuple`s created during error reporting, instead of using the more general `ErrorInfo` class, because a lot of machinery already exists in mypy for sorting and removing duplicate error reports, which produces `ErrorTuple`s at the end. The error sorting and duplicate removal logic could perhaps be separated out from the rest of the code, to be able to use `ErrorInfo` objects more freely. - `ErrorFormatter` doesn't really need to be an abstract class, but I think it would be better this way. If there's a different method that would be preferred, I'd be happy to know. - The `--output` CLI option is, most probably, not added in the correct place. Any help in how to do it properly would be appreciated, the mypy option parsing code seems very complex. - The ability to add custom output formats can be simply added by subclassing the `ErrorFormatter` class inside a mypy plugin, and adding a `name` field to the formatters. The mypy runtime can then check through the `__subclasses__` of the formatter and determine if such a formatter is present. The "checking for the `name` field" part of this code might be appropriate to add within this PR itself, instead of hard-coding `JSONFormatter`. Does that sound like a good idea? --------- Co-authored-by: Tushar Sadhwani <86737547+tushar-deepsource@users.noreply.github.com> Co-authored-by: Tushar Sadhwani <tushar@deepsource.io>
diff --git a/mypy/build.py b/mypy/build.py
@@ -44,6 +44,7 @@
 
 import mypy.semanal_main
 from mypy.checker import TypeChecker
+from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
 from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
 from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
 from mypy.indirection import TypeIndirectionVisitor
@@ -253,6 +254,7 @@ def _build(
         plugin=plugin,
         plugins_snapshot=snapshot,
         errors=errors,
+        error_formatter=None if options.output is None else OUTPUT_CHOICES.get(options.output),
         flush_errors=flush_errors,
         fscache=fscache,
         stdout=stdout,
@@ -607,6 +609,7 @@ def __init__(
         fscache: FileSystemCache,
         stdout: TextIO,
         stderr: TextIO,
+        error_formatter: ErrorFormatter | None = None,
     ) -> None:
         self.stats: dict[str, Any] = {}  # Values are ints or floats
         self.stdout = stdout
@@ -615,6 +618,7 @@ def __init__(
         self.data_dir = data_dir
         self.errors = errors
         self.errors.set_ignore_prefix(ignore_prefix)
+        self.error_formatter = error_formatter
         self.search_paths = search_paths
         self.source_set = source_set
         self.reports = reports
@@ -3463,11 +3467,8 @@ def process_stale_scc(graph: Graph, scc: list[str], manager: BuildManager) -> No
         for id in stale:
             graph[id].transitive_error = True
     for id in stale:
-        manager.flush_errors(
-            manager.errors.simplify_path(graph[id].xpath),
-            manager.errors.file_messages(graph[id].xpath),
-            False,
-        )
+        errors = manager.errors.file_messages(graph[id].xpath, formatter=manager.error_formatter)
+        manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
         graph[id].write_cache()
         graph[id].mark_as_rechecked()
 
diff --git a/mypy/error_formatter.py b/mypy/error_formatter.py
@@ -0,0 +1,37 @@
+"""Defines the different custom formats in which mypy can output."""
+
+import json
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from mypy.errors import MypyError
+
+
+class ErrorFormatter(ABC):
+    """Base class to define how errors are formatted before being printed."""
+
+    @abstractmethod
+    def report_error(self, error: "MypyError") -> str:
+        raise NotImplementedError
+
+
+class JSONFormatter(ErrorFormatter):
+    """Formatter for basic JSON output format."""
+
+    def report_error(self, error: "MypyError") -> str:
+        """Prints out the errors as simple, static JSON lines."""
+        return json.dumps(
+            {
+                "file": error.file_path,
+                "line": error.line,
+                "column": error.column,
+                "message": error.message,
+                "hint": None if len(error.hints) == 0 else "\n".join(error.hints),
+                "code": None if error.errorcode is None else error.errorcode.code,
+                "severity": error.severity,
+            }
+        )
+
+
+OUTPUT_CHOICES = {"json": JSONFormatter()}
diff --git a/mypy/errors.py b/mypy/errors.py
@@ -8,6 +8,7 @@
 from typing_extensions import Literal, TypeAlias as _TypeAlias
 
 from mypy import errorcodes as codes
+from mypy.error_formatter import ErrorFormatter
 from mypy.errorcodes import IMPORT, IMPORT_NOT_FOUND, IMPORT_UNTYPED, ErrorCode, mypy_error_codes
 from mypy.message_registry import ErrorMessage
 from mypy.options import Options
@@ -834,7 +835,7 @@ def raise_error(self, use_stdout: bool = True) -> NoReturn:
         )
 
     def format_messages(
-        self, error_info: list[ErrorInfo], source_lines: list[str] | None
+        self, error_tuples: list[ErrorTuple], source_lines: list[str] | None
     ) -> list[str]:
         """Return a string list that represents the error messages.
 
@@ -843,9 +844,6 @@ def format_messages(
         severity 'error').
         """
         a: list[str] = []
-        error_info = [info for info in error_info if not info.hidden]
-        errors = self.render_messages(self.sort_messages(error_info))
-        errors = self.remove_duplicates(errors)
         for (
             file,
             line,
@@ -856,7 +854,7 @@ def format_messages(
             message,
             allow_dups,
             code,
-        ) in errors:
+        ) in error_tuples:
             s = ""
             if file is not None:
                 if self.options.show_column_numbers and line >= 0 and column >= 0:
@@ -901,18 +899,28 @@ def format_messages(
                     a.append(" " * (DEFAULT_SOURCE_OFFSET + column) + marker)
         return a
 
-    def file_messages(self, path: str) -> list[str]:
+    def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> list[str]:
         """Return a string list of new error messages from a given file.
 
         Use a form suitable for displaying to the user.
         """
         if path not in self.error_info_map:
             return []
+
+        error_info = self.error_info_map[path]
+        error_info = [info for info in error_info if not info.hidden]
+        error_tuples = self.render_messages(self.sort_messages(error_info))
+        error_tuples = self.remove_duplicates(error_tuples)
+
+        if formatter is not None:
+            errors = create_errors(error_tuples)
+            return [formatter.report_error(err) for err in errors]
+
         self.flushed_files.add(path)
         source_lines = None
         if self.options.pretty and self.read_source:
             source_lines = self.read_source(path)
-        return self.format_messages(self.error_info_map[path], source_lines)
+        return self.format_messages(error_tuples, source_lines)
 
     def new_messages(self) -> list[str]:
         """Return a string list of new error messages.
@@ -1278,3 +1286,56 @@ def report_internal_error(
     # Exit.  The caller has nothing more to say.
     # We use exit code 2 to signal that this is no ordinary error.
     raise SystemExit(2)
+
+
+class MypyError:
+    def __init__(
+        self,
+        file_path: str,
+        line: int,
+        column: int,
+        message: str,
+        errorcode: ErrorCode | None,
+        severity: Literal["error", "note"],
+    ) -> None:
+        self.file_path = file_path
+        self.line = line
+        self.column = column
+        self.message = message
+        self.errorcode = errorcode
+        self.severity = severity
+        self.hints: list[str] = []
+
+
+# (file_path, line, column)
+_ErrorLocation = Tuple[str, int, int]
+
+
+def create_errors(error_tuples: list[ErrorTuple]) -> list[MypyError]:
+    errors: list[MypyError] = []
+    latest_error_at_location: dict[_ErrorLocation, MypyError] = {}
+
+    for error_tuple in error_tuples:
+        file_path, line, column, _, _, severity, message, _, errorcode = error_tuple
+        if file_path is None:
+            continue
+
+        assert severity in ("error", "note")
+        if severity == "note":
+            error_location = (file_path, line, column)
+            error = latest_error_at_location.get(error_location)
+            if error is None:
+                # This is purely a note, with no error correlated to it
+                error = MypyError(file_path, line, column, message, errorcode, severity="note")
+                errors.append(error)
+                continue
+
+            error.hints.append(message)
+
+        else:
+            error = MypyError(file_path, line, column, message, errorcode, severity="error")
+            errors.append(error)
+            error_location = (file_path, line, column)
+            latest_error_at_location[error_location] = error
+
+    return errors
diff --git a/mypy/main.py b/mypy/main.py
@@ -18,6 +18,7 @@
     parse_version,
     validate_package_allow_list,
 )
+from mypy.error_formatter import OUTPUT_CHOICES
 from mypy.errorcodes import error_codes
 from mypy.errors import CompileError
 from mypy.find_sources import InvalidSourceList, create_source_list
@@ -72,7 +73,9 @@ def main(
     if clean_exit:
         options.fast_exit = False
 
-    formatter = util.FancyFormatter(stdout, stderr, options.hide_error_codes)
+    formatter = util.FancyFormatter(
+        stdout, stderr, options.hide_error_codes, hide_success=bool(options.output)
+    )
 
     if options.install_types and (stdout is not sys.stdout or stderr is not sys.stderr):
         # Since --install-types performs user input, we want regular stdout and stderr.
@@ -156,7 +159,9 @@ def run_build(
     stdout: TextIO,
     stderr: TextIO,
 ) -> tuple[build.BuildResult | None, list[str], bool]:
-    formatter = util.FancyFormatter(stdout, stderr, options.hide_error_codes)
+    formatter = util.FancyFormatter(
+        stdout, stderr, options.hide_error_codes, hide_success=bool(options.output)
+    )
 
     messages = []
     messages_by_file = defaultdict(list)
@@ -525,6 +530,14 @@ def add_invertible_flag(
         stdout=stdout,
     )
 
+    general_group.add_argument(
+        "-O",
+        "--output",
+        metavar="FORMAT",
+        help="Set a custom output format",
+        choices=OUTPUT_CHOICES,
+    )
+
     config_group = parser.add_argument_group(
         title="Config file",
         description="Use a config file instead of command line arguments. "
diff --git a/mypy/options.py b/mypy/options.py
@@ -376,10 +376,12 @@ def __init__(self) -> None:
 
         self.disable_bytearray_promotion = False
         self.disable_memoryview_promotion = False
-
         self.force_uppercase_builtins = False
         self.force_union_syntax = False
 
+        # Sets custom output format
+        self.output: str | None = None
+
     def use_lowercase_names(self) -> bool:
         if self.python_version >= (3, 9):
             return not self.force_uppercase_builtins
diff --git a/mypy/test/testoutput.py b/mypy/test/testoutput.py
@@ -0,0 +1,58 @@
+"""Test cases for `--output=json`.
+
+These cannot be run by the usual unit test runner because of the backslashes in
+the output, which get normalized to forward slashes by the test suite on Windows.
+"""
+
+from __future__ import annotations
+
+import os
+import os.path
+
+from mypy import api
+from mypy.defaults import PYTHON3_VERSION
+from mypy.test.config import test_temp_dir
+from mypy.test.data import DataDrivenTestCase, DataSuite
+
+
+class OutputJSONsuite(DataSuite):
+    files = ["outputjson.test"]
+
+    def run_case(self, testcase: DataDrivenTestCase) -> None:
+        test_output_json(testcase)
+
+
+def test_output_json(testcase: DataDrivenTestCase) -> None:
+    """Runs Mypy in a subprocess, and ensures that `--output=json` works as intended."""
+    mypy_cmdline = ["--output=json"]
+    mypy_cmdline.append(f"--python-version={'.'.join(map(str, PYTHON3_VERSION))}")
+
+    # Write the program to a file.
+    program_path = os.path.join(test_temp_dir, "main")
+    mypy_cmdline.append(program_path)
+    with open(program_path, "w", encoding="utf8") as file:
+        for s in testcase.input:
+            file.write(f"{s}\n")
+
+    output = []
+    # Type check the program.
+    out, err, returncode = api.run(mypy_cmdline)
+    # split lines, remove newlines, and remove directory of test case
+    for line in (out + err).rstrip("\n").splitlines():
+        if line.startswith(test_temp_dir + os.sep):
+            output.append(line[len(test_temp_dir + os.sep) :].rstrip("\r\n"))
+        else:
+            output.append(line.rstrip("\r\n"))
+
+    if returncode > 1:
+        output.append("!!! Mypy crashed !!!")
+
+    # Remove temp file.
+    os.remove(program_path)
+
+    # JSON encodes every `\` character into `\\`, so we need to remove `\\` from windows paths
+    # and `/` from POSIX paths
+    json_os_separator = os.sep.replace("\\", "\\\\")
+    normalized_output = [line.replace(test_temp_dir + json_os_separator, "") for line in output]
+
+    assert normalized_output == testcase.output
diff --git a/mypy/util.py b/mypy/util.py
@@ -563,8 +563,12 @@ class FancyFormatter:
     This currently only works on Linux and Mac.
     """
 
-    def __init__(self, f_out: IO[str], f_err: IO[str], hide_error_codes: bool) -> None:
+    def __init__(
+        self, f_out: IO[str], f_err: IO[str], hide_error_codes: bool, hide_success: bool = False
+    ) -> None:
         self.hide_error_codes = hide_error_codes
+        self.hide_success = hide_success
+
         # Check if we are in a human-facing terminal on a supported platform.
         if sys.platform not in ("linux", "darwin", "win32", "emscripten"):
             self.dummy_term = True
@@ -793,6 +797,9 @@ def format_success(self, n_sources: int, use_color: bool = True) -> str:
         n_sources is total number of files passed directly on command line,
         i.e. excluding stubs and followed imports.
         """
+        if self.hide_success:
+            return ""
+
         msg = f"Success: no issues found in {n_sources} source file{plural_s(n_sources)}"
         if not use_color:
             return msg
diff --git a/test-data/unit/outputjson.test b/test-data/unit/outputjson.test
@@ -0,0 +1,44 @@
+-- Test cases for `--output=json`.
+-- These cannot be run by the usual unit test runner because of the backslashes
+-- in the output, which get normalized to forward slashes by the test suite on
+-- Windows.
+
+[case testOutputJsonNoIssues]
+# flags: --output=json
+def foo() -> None:
+    pass
+
+foo()
+[out]
+
+[case testOutputJsonSimple]
+# flags: --output=json
+def foo() -> None:
+    pass
+
+foo(1)
+[out]
+{"file": "main", "line": 5, "column": 0, "message": "Too many arguments for \"foo\"", "hint": null, "code": "call-arg", "severity": "error"}
+
+[case testOutputJsonWithHint]
+# flags: --output=json
+from typing import Optional, overload
+
+@overload
+def foo() -> None: ...
+@overload
+def foo(x: int) -> None: ...
+
+def foo(x: Optional[int] = None) -> None:
+    ...
+
+reveal_type(foo)
+
+foo('42')
+
+def bar() -> None: ...
+bar('42')
+[out]
+{"file": "main", "line": 12, "column": 12, "message": "Revealed type is \"Overload(def (), def (x: builtins.int))\"", "hint": null, "code": "misc", "severity": "note"}
+{"file": "main", "line": 14, "column": 0, "message": "No overload variant of \"foo\" matches argument type \"str\"", "hint": "Possible overload variants:\n    def foo() -> None\n    def foo(x: int) -> None", "code": "call-overload", "severity": "error"}
+{"file": "main", "line": 17, "column": 0, "message": "Too many arguments for \"bar\"", "hint": null, "code": "call-arg", "severity": "error"}