diff --git a/scripts/stubtest.py b/scripts/stubtest.py index 048075f1445e..00475b78168d 100644 --- a/scripts/stubtest.py +++ b/scripts/stubtest.py @@ -1,234 +1,1079 @@ """Tests for stubs. -Verify that various things in stubs are consistent with how things behave -at runtime. +Verify that various things in stubs are consistent with how things behave at runtime. + """ +import argparse +import copy +import enum import importlib +import inspect +import subprocess import sys -from typing import Dict, Any, List, Iterator, NamedTuple, Optional, Mapping, Tuple -from typing_extensions import Type, Final -from collections import defaultdict +import types +import warnings from functools import singledispatch +from pathlib import Path +from typing import Any, Dict, Generic, Iterator, List, Optional, Tuple, TypeVar, Union + +from typing_extensions import Type -from mypy import build -from mypy.build import default_data_dir -from mypy.modulefinder import compute_search_paths, FindModuleCache -from mypy.errors import CompileError +import mypy.build +import mypy.modulefinder +import mypy.types from mypy import nodes from mypy.options import Options +from mypy.util import FancyFormatter -from dumpmodule import module_to_json, DumpNode - - -# TODO: email.contentmanager has a symbol table with a None node. -# This seems like it should not be. - -skip = { - '_importlib_modulespec', - '_subprocess', - 'distutils.command.bdist_msi', - 'distutils.command.bdist_packager', - 'msvcrt', - 'wsgiref.types', - 'mypy_extensions', - 'unittest.mock', # mock.call infinite loops on inspect.getsourcelines - # https://bugs.python.org/issue25532 - # TODO: can we filter only call? -} # type: Final - - -messages = { - 'not_in_runtime': ('{error.stub_type} "{error.name}" defined at line ' - ' {error.line} in stub but is not defined at runtime'), - 'not_in_stub': ('{error.module_type} "{error.name}" defined at line' - ' {error.line} at runtime but is not defined in stub'), - 'no_stubs': 'could not find typeshed {error.name}', - 'inconsistent': ('"{error.name}" is {error.stub_type} in stub but' - ' {error.module_type} at runtime'), -} # type: Final - -Error = NamedTuple('Error', ( - ('module', str), - ('name', str), - ('error_type', str), - ('line', Optional[int]), - ('stub_type', Optional[Type[nodes.Node]]), - ('module_type', Optional[str]), -)) - -ErrorParts = Tuple[ - List[str], - str, - Optional[int], - Optional[Type[nodes.Node]], - Optional[str], -] - - -def test_stub(options: Options, - find_module_cache: FindModuleCache, - name: str) -> Iterator[Error]: - stubs = { - mod: stub for mod, stub in build_stubs(options, find_module_cache, name).items() - if (mod == name or mod.startswith(name + '.')) and mod not in skip - } - for mod, stub in stubs.items(): - instance = dump_module(mod) +class Missing: + """Marker object for things that are missing (from a stub or the runtime).""" + + def __repr__(self) -> str: + return "MISSING" + + +MISSING = Missing() + +T = TypeVar("T") +MaybeMissing = Union[T, Missing] + +_formatter = FancyFormatter(sys.stdout, sys.stderr, False) + + +def _style(message: str, **kwargs: Any) -> str: + """Wrapper around mypy.util for fancy formatting.""" + kwargs.setdefault("color", "none") + return _formatter.style(message, **kwargs) + + +class Error: + def __init__( + self, + object_path: List[str], + message: str, + stub_object: MaybeMissing[nodes.Node], + runtime_object: MaybeMissing[Any], + *, + stub_desc: Optional[str] = None, + runtime_desc: Optional[str] = None + ) -> None: + """Represents an error found by stubtest. + + :param object_path: Location of the object with the error, + e.g. ``["module", "Class", "method"]`` + :param message: Error message + :param stub_object: The mypy node representing the stub + :param runtime_object: Actual object obtained from the runtime + :param stub_desc: Specialised description for the stub object, should you wish + :param runtime_desc: Specialised description for the runtime object, should you wish + + """ + self.object_desc = ".".join(object_path) + self.message = message + self.stub_object = stub_object + self.runtime_object = runtime_object + self.stub_desc = stub_desc or str(getattr(stub_object, "type", stub_object)) + self.runtime_desc = runtime_desc or str(runtime_object) + + def is_missing_stub(self) -> bool: + """Whether or not the error is for something missing from the stub.""" + return isinstance(self.stub_object, Missing) + + def is_positional_only_related(self) -> bool: + """Whether or not the error is for something being (or not being) positional-only.""" + # TODO: This is hacky, use error codes or something more resilient + return "leading double underscore" in self.message + + def get_description(self, concise: bool = False) -> str: + """Returns a description of the error. + + :param concise: Whether to return a concise, one-line description + + """ + if concise: + return _style(self.object_desc, bold=True) + " " + self.message - for identifiers, error_type, line, stub_type, module_type in verify(stub, instance): - yield Error(mod, '.'.join(identifiers), error_type, line, stub_type, module_type) + stub_line = None + stub_file = None + if not isinstance(self.stub_object, Missing): + stub_line = self.stub_object.line + # TODO: Find a way of getting the stub file + + stub_loc_str = "" + if stub_line: + stub_loc_str += " at line {}".format(stub_line) + if stub_file: + stub_loc_str += " in file {}".format(stub_file) + + runtime_line = None + runtime_file = None + if not isinstance(self.runtime_object, Missing): + try: + runtime_line = inspect.getsourcelines(self.runtime_object)[1] + except (OSError, TypeError): + pass + try: + runtime_file = inspect.getsourcefile(self.runtime_object) + except TypeError: + pass + + runtime_loc_str = "" + if runtime_line: + runtime_loc_str += " at line {}".format(runtime_line) + if runtime_file: + runtime_loc_str += " in file {}".format(runtime_file) + + output = [ + _style("error: ", color="red", bold=True), + _style(self.object_desc, bold=True), + " ", + self.message, + "\n", + "Stub:", + _style(stub_loc_str, dim=True), + "\n", + _style(self.stub_desc + "\n", color="blue", dim=True), + "Runtime:", + _style(runtime_loc_str, dim=True), + "\n", + _style(self.runtime_desc + "\n", color="blue", dim=True), + ] + return "".join(output) + + +def test_module(module_name: str) -> Iterator[Error]: + """Tests a given module's stub against introspecting it at runtime. + + Requires the stub to have been built already, accomplished by a call to ``build_stubs``. + + :param module_name: The module to test + + """ + stub = get_stub(module_name) + if stub is None: + yield Error([module_name], "failed to find stubs", MISSING, None) + return + + try: + runtime = importlib.import_module(module_name) + except Exception as e: + yield Error([module_name], "failed to import: {}".format(e), stub, MISSING) + return + + # collections likes to warn us about the things we're doing + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + yield from verify(stub, runtime, [module_name]) @singledispatch -def verify(node: nodes.Node, - module_node: Optional[DumpNode]) -> Iterator[ErrorParts]: - raise TypeError('unknown mypy node ' + str(node)) +def verify( + stub: nodes.Node, runtime: MaybeMissing[Any], object_path: List[str] +) -> Iterator[Error]: + """Entry point for comparing a stub to a runtime object. + + We use single dispatch based on the type of ``stub``. + + :param stub: The mypy node representing a part of the stub + :param runtime: The runtime object corresponding to ``stub`` + """ + yield Error(object_path, "is an unknown mypy node", stub, runtime) @verify.register(nodes.MypyFile) -def verify_mypyfile(stub: nodes.MypyFile, - instance: Optional[DumpNode]) -> Iterator[ErrorParts]: - if instance is None: - yield [], 'not_in_runtime', stub.line, type(stub), None - elif instance['type'] != 'file': - yield [], 'inconsistent', stub.line, type(stub), instance['type'] - else: - stub_children = defaultdict(lambda: None, stub.names) # type: Mapping[str, Optional[nodes.SymbolTableNode]] - instance_children = defaultdict(lambda: None, instance['names']) +def verify_mypyfile( + stub: nodes.MypyFile, runtime: MaybeMissing[types.ModuleType], object_path: List[str] +) -> Iterator[Error]: + if isinstance(runtime, Missing): + yield Error(object_path, "is not present at runtime", stub, runtime) + return + if not isinstance(runtime, types.ModuleType): + yield Error(object_path, "is not a module", stub, runtime) + return - # TODO: I would rather not filter public children here. - # For example, what if the checkersurfaces an inconsistency - # in the typing of a private child - public_nodes = { - name: (stub_children[name], instance_children[name]) - for name in set(stub_children) | set(instance_children) - if not name.startswith('_') - and (stub_children[name] is None or stub_children[name].module_public) # type: ignore - } + # Check things in the stub that are public + to_check = set( + m + for m, o in stub.names.items() + if o.module_public and (not m.startswith("_") or hasattr(runtime, m)) + ) + # Check all things declared in module's __all__ + to_check.update(getattr(runtime, "__all__", [])) + to_check.difference_update({"__file__", "__doc__", "__name__", "__builtins__", "__package__"}) + # We currently don't check things in the module that aren't in the stub, other than things that + # are in __all__, to avoid false positives. - for node, (stub_child, instance_child) in public_nodes.items(): - stub_child = getattr(stub_child, 'node', None) - for identifiers, error_type, line, stub_type, module_type in verify(stub_child, instance_child): - yield ([node] + identifiers, error_type, line, stub_type, module_type) + for entry in sorted(to_check): + yield from verify( + stub.names[entry].node if entry in stub.names else MISSING, + getattr(runtime, entry, MISSING), + object_path + [entry], + ) @verify.register(nodes.TypeInfo) -def verify_typeinfo(stub: nodes.TypeInfo, - instance: Optional[DumpNode]) -> Iterator[ErrorParts]: - if not instance: - yield [], 'not_in_runtime', stub.line, type(stub), None - elif instance['type'] != 'class': - yield [], 'inconsistent', stub.line, type(stub), instance['type'] +def verify_typeinfo( + stub: nodes.TypeInfo, runtime: MaybeMissing[Type[Any]], object_path: List[str] +) -> Iterator[Error]: + if isinstance(runtime, Missing): + yield Error(object_path, "is not present at runtime", stub, runtime, stub_desc=repr(stub)) + return + if not isinstance(runtime, type): + yield Error(object_path, "is not a type", stub, runtime, stub_desc=repr(stub)) + return + + to_check = set(stub.names) + to_check.update(m for m in vars(runtime) if not m.startswith("_")) + + for entry in sorted(to_check): + yield from verify( + next((t.names[entry].node for t in stub.mro if entry in t.names), MISSING), + getattr(runtime, entry, MISSING), + object_path + [entry], + ) + + +def _verify_static_class_methods( + stub: nodes.FuncItem, runtime: types.FunctionType, object_path: List[str] +) -> Iterator[str]: + if runtime.__name__ == "__new__": + # Special cased by Python, so never declared as staticmethod + return + if inspect.isbuiltin(runtime): + # The isinstance checks don't work reliably for builtins, e.g. datetime.datetime.now, so do + # something a little hacky that seems to work well + probably_class_method = isinstance(getattr(runtime, "__self__", None), type) + if probably_class_method and not stub.is_class: + yield "runtime is a classmethod but stub is not" + if not probably_class_method and stub.is_class: + yield "stub is a classmethod but runtime is not" + return + + # Look the object up statically, to avoid binding by the descriptor protocol + static_runtime = importlib.import_module(object_path[0]) + for entry in object_path[1:]: + static_runtime = inspect.getattr_static(static_runtime, entry) + + if isinstance(static_runtime, classmethod) and not stub.is_class: + yield "runtime is a classmethod but stub is not" + if not isinstance(static_runtime, classmethod) and stub.is_class: + yield "stub is a classmethod but runtime is not" + if isinstance(static_runtime, staticmethod) and not stub.is_static: + yield "runtime is a staticmethod but stub is not" + if not isinstance(static_runtime, staticmethod) and stub.is_static: + yield "stub is a staticmethod but runtime is not" + + +def _verify_arg_name( + stub_arg: nodes.Argument, runtime_arg: inspect.Parameter, function_name: str +) -> Iterator[str]: + """Checks whether argument names match.""" + # Ignore exact names for all dunder methods other than __init__ + if is_dunder(function_name, exclude_init=True): + return + + def strip_prefix(s: str, prefix: str) -> str: + return s[len(prefix) :] if s.startswith(prefix) else s + + if strip_prefix(stub_arg.variable.name, "__") == runtime_arg.name: + return + + def names_approx_match(a: str, b: str) -> bool: + a = a.strip("_") + b = b.strip("_") + return a.startswith(b) or b.startswith(a) or len(a) == 1 or len(b) == 1 + + # Be more permissive about names matching for positional-only arguments + if runtime_arg.kind == inspect.Parameter.POSITIONAL_ONLY and names_approx_match( + stub_arg.variable.name, runtime_arg.name + ): + return + # This comes up with namedtuples, so ignore + if stub_arg.variable.name == "_self": + return + yield ( + 'stub argument "{}" differs from runtime argument "{}"'.format( + stub_arg.variable.name, runtime_arg.name + ) + ) + + +def _verify_arg_default_value( + stub_arg: nodes.Argument, runtime_arg: inspect.Parameter +) -> Iterator[str]: + """Checks whether argument default values are compatible.""" + if runtime_arg.default != inspect.Parameter.empty: + if stub_arg.kind not in (nodes.ARG_OPT, nodes.ARG_NAMED_OPT): + yield ( + 'runtime argument "{}" has a default value but stub argument does not'.format( + runtime_arg.name + ) + ) + else: + runtime_type = get_mypy_type_of_runtime_value(runtime_arg.default) + # Fallback to the type annotation type if var type is missing. The type annotation + # is an UnboundType, but I don't know enough to know what the pros and cons here are. + # UnboundTypes have ugly question marks following them, so default to var type. + # Note we do this same fallback when constructing signatures in from_overloadedfuncdef + stub_type = stub_arg.variable.type or stub_arg.type_annotation + if isinstance(stub_type, mypy.types.TypeVarType): + stub_type = stub_type.upper_bound + if ( + runtime_type is not None + and stub_type is not None + # Avoid false positives for marker objects + and type(runtime_arg.default) != object + and not is_subtype_helper(runtime_type, stub_type) + ): + yield ( + 'runtime argument "{}" has a default value of type {}, ' + "which is incompatible with stub argument type {}".format( + runtime_arg.name, runtime_type, stub_type + ) + ) else: - for attr, attr_node in stub.names.items(): - subdump = instance['attributes'].get(attr, None) - for identifiers, error_type, line, stub_type, module_type in verify(attr_node.node, subdump): - yield ([attr] + identifiers, error_type, line, stub_type, module_type) + if stub_arg.kind in (nodes.ARG_OPT, nodes.ARG_NAMED_OPT): + yield ( + 'stub argument "{}" has a default value but runtime argument does not'.format( + stub_arg.variable.name + ) + ) + + +class Signature(Generic[T]): + def __init__(self) -> None: + self.pos = [] # type: List[T] + self.kwonly = {} # type: Dict[str, T] + self.varpos = None # type: Optional[T] + self.varkw = None # type: Optional[T] + + def __str__(self) -> str: + def get_name(arg: Any) -> str: + if isinstance(arg, inspect.Parameter): + return arg.name + if isinstance(arg, nodes.Argument): + return arg.variable.name + raise ValueError + + def get_type(arg: Any) -> Optional[str]: + if isinstance(arg, inspect.Parameter): + return None + if isinstance(arg, nodes.Argument): + return str(arg.variable.type or arg.type_annotation) + raise ValueError + + def has_default(arg: Any) -> bool: + if isinstance(arg, inspect.Parameter): + return arg.default != inspect.Parameter.empty + if isinstance(arg, nodes.Argument): + return arg.kind in (nodes.ARG_OPT, nodes.ARG_NAMED_OPT) + raise ValueError + + def get_desc(arg: Any) -> str: + arg_type = get_type(arg) + return ( + get_name(arg) + + (": {}".format(arg_type) if arg_type else "") + + (" = ..." if has_default(arg) else "") + ) + + ret = "def (" + ret += ", ".join( + [get_desc(arg) for arg in self.pos] + + (["*" + get_name(self.varpos)] if self.varpos else (["*"] if self.kwonly else [])) + + [get_desc(arg) for arg in self.kwonly.values()] + + (["**" + get_name(self.varkw)] if self.varkw else []) + ) + ret += ")" + return ret + + @staticmethod + def from_funcitem(stub: nodes.FuncItem) -> "Signature[nodes.Argument]": + stub_sig = Signature() # type: Signature[nodes.Argument] + for stub_arg in stub.arguments: + if stub_arg.kind in (nodes.ARG_POS, nodes.ARG_OPT): + stub_sig.pos.append(stub_arg) + elif stub_arg.kind in (nodes.ARG_NAMED, nodes.ARG_NAMED_OPT): + stub_sig.kwonly[stub_arg.variable.name] = stub_arg + elif stub_arg.kind == nodes.ARG_STAR: + stub_sig.varpos = stub_arg + elif stub_arg.kind == nodes.ARG_STAR2: + stub_sig.varkw = stub_arg + else: + raise ValueError + return stub_sig + + @staticmethod + def from_inspect_signature(signature: inspect.Signature,) -> "Signature[inspect.Parameter]": + runtime_sig = Signature() # type: Signature[inspect.Parameter] + for runtime_arg in signature.parameters.values(): + if runtime_arg.kind in ( + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + ): + runtime_sig.pos.append(runtime_arg) + elif runtime_arg.kind == inspect.Parameter.KEYWORD_ONLY: + runtime_sig.kwonly[runtime_arg.name] = runtime_arg + elif runtime_arg.kind == inspect.Parameter.VAR_POSITIONAL: + runtime_sig.varpos = runtime_arg + elif runtime_arg.kind == inspect.Parameter.VAR_KEYWORD: + runtime_sig.varkw = runtime_arg + else: + raise ValueError + return runtime_sig + + @staticmethod + def from_overloadedfuncdef(stub: nodes.OverloadedFuncDef,) -> "Signature[nodes.Argument]": + """Returns a Signature from an OverloadedFuncDef. + + If life were simple, to verify_overloadedfuncdef, we'd just verify_funcitem for each of its + items. Unfortunately, life isn't simple and overloads are pretty deceitful. So instead, we + try and combine the overload's items into a single signature that is compatible with any + lies it might try to tell. + + """ + # For all dunder methods other than __init__, just assume all args are positional-only + assume_positional_only = is_dunder(stub.name, exclude_init=True) + + all_args = {} # type: Dict[str, List[Tuple[nodes.Argument, int]]] + for func in map(_resolve_funcitem_from_decorator, stub.items): + assert func is not None + for index, arg in enumerate(func.arguments): + # For positional-only args, we allow overloads to have different names for the same + # argument. To accomplish this, we just make up a fake index-based name. + name = ( + "__{}".format(index) + if arg.variable.name.startswith("__") or assume_positional_only + else arg.variable.name + ) + all_args.setdefault(name, []).append((arg, index)) + + def get_position(arg_name: str) -> int: + # We just need this to return the positional args in the correct order. + return max(index for _, index in all_args[arg_name]) + + def get_type(arg_name: str) -> mypy.types.ProperType: + with mypy.state.strict_optional_set(True): + all_types = [ + arg.variable.type or arg.type_annotation for arg, _ in all_args[arg_name] + ] + return mypy.typeops.make_simplified_union([t for t in all_types if t]) + + def get_kind(arg_name: str) -> int: + kinds = {arg.kind for arg, _ in all_args[arg_name]} + if nodes.ARG_STAR in kinds: + return nodes.ARG_STAR + if nodes.ARG_STAR2 in kinds: + return nodes.ARG_STAR2 + # The logic here is based on two tenets: + # 1) If an arg is ever optional (or unspecified), it is optional + # 2) If an arg is ever positional, it is positional + is_opt = ( + len(all_args[arg_name]) < len(stub.items) + or nodes.ARG_OPT in kinds + or nodes.ARG_NAMED_OPT in kinds + ) + is_pos = nodes.ARG_OPT in kinds or nodes.ARG_POS in kinds + if is_opt: + return nodes.ARG_OPT if is_pos else nodes.ARG_NAMED_OPT + return nodes.ARG_POS if is_pos else nodes.ARG_NAMED + + sig = Signature() # type: Signature[nodes.Argument] + for arg_name in sorted(all_args, key=get_position): + # example_arg_name gives us a real name (in case we had a fake index-based name) + example_arg_name = all_args[arg_name][0][0].variable.name + arg = nodes.Argument( + nodes.Var(example_arg_name, get_type(arg_name)), + type_annotation=None, + initializer=None, + kind=get_kind(arg_name), + ) + if arg.kind in (nodes.ARG_POS, nodes.ARG_OPT): + sig.pos.append(arg) + elif arg.kind in (nodes.ARG_NAMED, nodes.ARG_NAMED_OPT): + sig.kwonly[arg.variable.name] = arg + elif arg.kind == nodes.ARG_STAR: + sig.varpos = arg + elif arg.kind == nodes.ARG_STAR2: + sig.varkw = arg + else: + raise ValueError + return sig + + +def _verify_signature( + stub: Signature[nodes.Argument], runtime: Signature[inspect.Parameter], function_name: str +) -> Iterator[str]: + # Check positional arguments match up + for stub_arg, runtime_arg in zip(stub.pos, runtime.pos): + yield from _verify_arg_name(stub_arg, runtime_arg, function_name) + yield from _verify_arg_default_value(stub_arg, runtime_arg) + if ( + runtime_arg.kind == inspect.Parameter.POSITIONAL_ONLY + and not stub_arg.variable.name.startswith("__") + and not stub_arg.variable.name.strip("_") == "self" + and not is_dunder(function_name) # noisy for dunder methods + ): + yield ( + 'stub argument "{}" should be positional-only ' + '(rename with a leading double underscore, i.e. "__{}")'.format( + stub_arg.variable.name, runtime_arg.name + ) + ) + if ( + runtime_arg.kind != inspect.Parameter.POSITIONAL_ONLY + and stub_arg.variable.name.startswith("__") + ): + yield ( + 'stub argument "{}" should be positional or keyword ' + "(remove leading double underscore)".format(stub_arg.variable.name) + ) + + # Checks involving *args + if len(stub.pos) == len(runtime.pos): + if stub.varpos is None and runtime.varpos is not None: + yield 'stub does not have *args argument "{}"'.format(runtime.varpos.name) + if stub.varpos is not None and runtime.varpos is None: + yield 'runtime does not have *args argument "{}"'.format(stub.varpos.variable.name) + elif len(stub.pos) > len(runtime.pos): + if runtime.varpos is None: + for stub_arg in stub.pos[len(runtime.pos) :]: + # If the variable is in runtime.kwonly, it's just mislabelled as not a + # keyword-only argument; we report the error while checking keyword-only arguments + if stub_arg.variable.name not in runtime.kwonly: + yield 'runtime does not have argument "{}"'.format(stub_arg.variable.name) + # We do not check whether stub takes *args when the runtime does, for cases where the stub + # just listed out the extra parameters the function takes + elif len(stub.pos) < len(runtime.pos): + if stub.varpos is None: + for runtime_arg in runtime.pos[len(stub.pos) :]: + yield 'stub does not have argument "{}"'.format(runtime_arg.name) + elif runtime.pos is None: + yield 'runtime does not have *args argument "{}"'.format(stub.varpos.variable.name) + + # Check keyword-only args + for arg in sorted(set(stub.kwonly) & set(runtime.kwonly)): + stub_arg, runtime_arg = stub.kwonly[arg], runtime.kwonly[arg] + yield from _verify_arg_name(stub_arg, runtime_arg, function_name) + yield from _verify_arg_default_value(stub_arg, runtime_arg) + + # Checks involving **kwargs + if stub.varkw is None and runtime.varkw is not None: + # We do not check whether stub takes **kwargs when the runtime does, for cases where the + # stub just listed out the extra keyword parameters the function takes + # Also check against positional parameters, to avoid a nitpicky message when an argument + # isn't marked as keyword-only + stub_pos_names = set(stub_arg.variable.name for stub_arg in stub.pos) + if not set(runtime.kwonly).issubset(set(stub.kwonly) | stub_pos_names): + yield 'stub does not have **kwargs argument "{}"'.format(runtime.varkw.name) + if stub.varkw is not None and runtime.varkw is None: + yield 'runtime does not have **kwargs argument "{}"'.format(stub.varkw.variable.name) + if runtime.varkw is None or not set(runtime.kwonly).issubset(set(stub.kwonly)): + for arg in sorted(set(stub.kwonly) - set(runtime.kwonly)): + yield 'runtime does not have argument "{}"'.format(arg) + if stub.varkw is None or not set(stub.kwonly).issubset(set(runtime.kwonly)): + for arg in sorted(set(runtime.kwonly) - set(stub.kwonly)): + if arg in set(stub_arg.variable.name for stub_arg in stub.pos): + yield 'stub argument "{}" is not keyword-only'.format(arg) + else: + yield 'stub does not have argument "{}"'.format(arg) @verify.register(nodes.FuncItem) -def verify_funcitem(stub: nodes.FuncItem, - instance: Optional[DumpNode]) -> Iterator[ErrorParts]: - if not instance: - yield [], 'not_in_runtime', stub.line, type(stub), None - elif 'type' not in instance or instance['type'] not in ('function', 'callable'): - yield [], 'inconsistent', stub.line, type(stub), instance['type'] - # TODO check arguments and return value - - -@verify.register(type(None)) -def verify_none(stub: None, - instance: Optional[DumpNode]) -> Iterator[ErrorParts]: - if instance is None: - yield [], 'not_in_stub', None, None, None - else: - yield [], 'not_in_stub', instance['line'], None, instance['type'] +def verify_funcitem( + stub: nodes.FuncItem, runtime: MaybeMissing[types.FunctionType], object_path: List[str] +) -> Iterator[Error]: + if isinstance(runtime, Missing): + yield Error(object_path, "is not present at runtime", stub, runtime) + return + if ( + not isinstance(runtime, (types.FunctionType, types.BuiltinFunctionType)) + and not isinstance(runtime, (types.MethodType, types.BuiltinMethodType)) + and not inspect.ismethoddescriptor(runtime) + ): + yield Error(object_path, "is not a function", stub, runtime) + return + + for message in _verify_static_class_methods(stub, runtime, object_path): + yield Error(object_path, "is inconsistent, " + message, stub, runtime) + + try: + signature = inspect.signature(runtime) + except (ValueError, RuntimeError): + # inspect.signature throws sometimes + # catch RuntimeError because of https://bugs.python.org/issue39504 + return + + stub_sig = Signature.from_funcitem(stub) + runtime_sig = Signature.from_inspect_signature(signature) + + for message in _verify_signature(stub_sig, runtime_sig, function_name=stub.name): + yield Error( + object_path, + "is inconsistent, " + message, + stub, + runtime, + runtime_desc="def " + str(signature), + ) + + +@verify.register(Missing) +def verify_none( + stub: Missing, runtime: MaybeMissing[Any], object_path: List[str] +) -> Iterator[Error]: + if isinstance(runtime, Missing): + try: + # We shouldn't really get here since that would involve something not existing both in + # the stub and the runtime, however, some modules like distutils.command have some + # weird things going on. Try to see if we can find a runtime object by importing it, + # otherwise crash. + runtime = importlib.import_module(".".join(object_path)) + except ImportError: + raise RuntimeError + yield Error(object_path, "is not present in stub", stub, runtime) @verify.register(nodes.Var) -def verify_var(node: nodes.Var, - module_node: Optional[DumpNode]) -> Iterator[ErrorParts]: - if False: - yield None - # Need to check if types are inconsistent. - #if 'type' not in dump or dump['type'] != node.node.type: - # import ipdb; ipdb.set_trace() - # yield name, 'inconsistent', node.node.line, shed_type, module_type +def verify_var( + stub: nodes.Var, runtime: MaybeMissing[Any], object_path: List[str] +) -> Iterator[Error]: + if isinstance(runtime, Missing): + # Don't always yield an error here, because we often can't find instance variables + if len(object_path) <= 1: + yield Error(object_path, "is not present at runtime", stub, runtime) + return + + runtime_type = get_mypy_type_of_runtime_value(runtime) + if ( + runtime_type is not None + and stub.type is not None + and not is_subtype_helper(runtime_type, stub.type) + ): + should_error = True + # Avoid errors when defining enums, since runtime_type is the enum itself, but we'd + # annotate it with the type of runtime.value + if isinstance(runtime, enum.Enum): + runtime_type = get_mypy_type_of_runtime_value(runtime.value) + if runtime_type is not None and is_subtype_helper(runtime_type, stub.type): + should_error = False + + if should_error: + yield Error( + object_path, + "variable differs from runtime type {}".format(runtime_type), + stub, + runtime, + ) @verify.register(nodes.OverloadedFuncDef) -def verify_overloadedfuncdef(node: nodes.OverloadedFuncDef, - module_node: Optional[DumpNode]) -> Iterator[ErrorParts]: - # Should check types of the union of the overloaded types. - if False: - yield None +def verify_overloadedfuncdef( + stub: nodes.OverloadedFuncDef, runtime: MaybeMissing[Any], object_path: List[str] +) -> Iterator[Error]: + if isinstance(runtime, Missing): + yield Error(object_path, "is not present at runtime", stub, runtime) + return + + if stub.is_property: + # We get here in cases of overloads from property.setter + return + + try: + signature = inspect.signature(runtime) + except ValueError: + return + + stub_sig = Signature.from_overloadedfuncdef(stub) + runtime_sig = Signature.from_inspect_signature(signature) + + for message in _verify_signature(stub_sig, runtime_sig, function_name=stub.name): + # TODO: This is a little hacky, but the addition here is super useful + if "has a default value of type" in message: + message += ( + ". This is often caused by overloads failing to account for explicitly passing " + "in the default value." + ) + yield Error( + object_path, + "is inconsistent, " + message, + stub, + runtime, + stub_desc=str(stub.type) + "\nInferred signature: {}".format(stub_sig), + runtime_desc="def " + str(signature), + ) @verify.register(nodes.TypeVarExpr) -def verify_typevarexpr(node: nodes.TypeVarExpr, - module_node: Optional[DumpNode]) -> Iterator[ErrorParts]: +def verify_typevarexpr( + stub: nodes.TypeVarExpr, runtime: MaybeMissing[Any], object_path: List[str] +) -> Iterator[Error]: if False: yield None +def _verify_property(stub: nodes.Decorator, runtime: Any) -> Iterator[str]: + assert stub.func.is_property + if isinstance(runtime, property): + return + if inspect.isdatadescriptor(runtime): + # It's enough like a property... + return + # Sometimes attributes pretend to be properties, for instance, to express that they + # are read only. So whitelist if runtime_type matches the return type of stub. + runtime_type = get_mypy_type_of_runtime_value(runtime) + func_type = ( + stub.func.type.ret_type if isinstance(stub.func.type, mypy.types.CallableType) else None + ) + if ( + runtime_type is not None + and func_type is not None + and is_subtype_helper(runtime_type, func_type) + ): + return + yield "is inconsistent, cannot reconcile @property on stub with runtime object" + + +def _resolve_funcitem_from_decorator(dec: nodes.OverloadPart) -> Optional[nodes.FuncItem]: + """Returns a FuncItem that corresponds to the output of the decorator. + + Returns None if we can't figure out what that would be. For convenience, this function also + accepts FuncItems. + + """ + if isinstance(dec, nodes.FuncItem): + return dec + if dec.func.is_property: + return None + + def apply_decorator_to_funcitem( + decorator: nodes.Expression, func: nodes.FuncItem + ) -> Optional[nodes.FuncItem]: + if not isinstance(decorator, nodes.NameExpr): + return None + if decorator.fullname is None: + # Happens with namedtuple + return None + if decorator.fullname in ( + "builtins.staticmethod", + "typing.overload", + "abc.abstractmethod", + ): + return func + if decorator.fullname == "builtins.classmethod": + assert func.arguments[0].variable.name in ("cls", "metacls") + ret = copy.copy(func) + # Remove the cls argument, since it's not present in inspect.signature of classmethods + ret.arguments = ret.arguments[1:] + return ret + # Just give up on any other decorators. After excluding properties, we don't run into + # anything else when running on typeshed's stdlib. + return None + + func = dec.func # type: nodes.FuncItem + for decorator in dec.original_decorators: + resulting_func = apply_decorator_to_funcitem(decorator, func) + if resulting_func is None: + return None + func = resulting_func + return func + + @verify.register(nodes.Decorator) -def verify_decorator(node: nodes.Decorator, - module_node: Optional[DumpNode]) -> Iterator[ErrorParts]: - if False: - yield None +def verify_decorator( + stub: nodes.Decorator, runtime: MaybeMissing[Any], object_path: List[str] +) -> Iterator[Error]: + if isinstance(runtime, Missing): + yield Error(object_path, "is not present at runtime", stub, runtime) + return + if stub.func.is_property: + for message in _verify_property(stub, runtime): + yield Error(object_path, message, stub, runtime) + return + + func = _resolve_funcitem_from_decorator(stub) + if func is not None: + yield from verify(func, runtime, object_path) @verify.register(nodes.TypeAlias) -def verify_typealias(node: nodes.TypeAlias, - module_node: Optional[DumpNode]) -> Iterator[ErrorParts]: +def verify_typealias( + stub: nodes.TypeAlias, runtime: MaybeMissing[Any], object_path: List[str] +) -> Iterator[Error]: if False: yield None -def dump_module(name: str) -> DumpNode: - mod = importlib.import_module(name) - return {'type': 'file', 'names': module_to_json(mod)} +def is_dunder(name: str, exclude_init: bool = False) -> bool: + """Returns whether name is a dunder name. + :param exclude_init: Whether to return False for __init__ -def build_stubs(options: Options, - find_module_cache: FindModuleCache, - mod: str) -> Dict[str, nodes.MypyFile]: - sources = find_module_cache.find_modules_recursive(mod) - try: - res = build.build(sources=sources, options=options) - messages = res.errors - except CompileError as error: - messages = error.messages - - if messages: - for msg in messages: - print(msg) - sys.exit(1) - return res.files + """ + if exclude_init and name == "__init__": + return False + return name.startswith("__") and name.endswith("__") + + +def is_subtype_helper(left: mypy.types.Type, right: mypy.types.Type) -> bool: + """Checks whether ``left`` is a subtype of ``right``.""" + left = mypy.types.get_proper_type(left) + right = mypy.types.get_proper_type(right) + if ( + isinstance(left, mypy.types.LiteralType) + and isinstance(left.value, int) + and left.value in (0, 1) + and isinstance(right, mypy.types.Instance) + and right.type.fullname == "builtins.bool" + ): + # Pretend Literal[0, 1] is a subtype of bool to avoid unhelpful errors. + return True + with mypy.state.strict_optional_set(True): + return mypy.subtypes.is_subtype(left, right) + + +def get_mypy_type_of_runtime_value(runtime: Any) -> Optional[mypy.types.Type]: + """Returns a mypy type object representing the type of ``runtime``. + + Returns None if we can't find something that works. + + """ + if runtime is None: + return mypy.types.NoneType() + if isinstance(runtime, property): + # Give up on properties to avoid issues with things that are typed as attributes. + return None + if isinstance(runtime, (types.FunctionType, types.BuiltinFunctionType)): + # TODO: Construct a mypy.types.CallableType + return None + + # Try and look up a stub for the runtime object + stub = get_stub(type(runtime).__module__) + if stub is None: + return None + type_name = type(runtime).__name__ + if type_name not in stub.names: + return None + type_info = stub.names[type_name].node + if not isinstance(type_info, nodes.TypeInfo): + return None + def anytype() -> mypy.types.AnyType: + return mypy.types.AnyType(mypy.types.TypeOfAny.unannotated) -def main(args: List[str]) -> Iterator[Error]: - if len(args) == 1: - print('must provide at least one module to test') + if isinstance(runtime, tuple): + # Special case tuples so we construct a valid mypy.types.TupleType + opt_items = [get_mypy_type_of_runtime_value(v) for v in runtime] + items = [(i if i is not None else anytype()) for i in opt_items] + fallback = mypy.types.Instance(type_info, [anytype()]) + return mypy.types.TupleType(items, fallback) + + # Technically, Literals are supposed to be only bool, int, str or bytes, but this + # seems to work fine + return mypy.types.LiteralType( + value=runtime, + fallback=mypy.types.Instance(type_info, [anytype() for _ in type_info.type_vars]), + ) + + +_all_stubs = {} # type: Dict[str, nodes.MypyFile] + + +def build_stubs(modules: List[str], options: Options, find_submodules: bool = False) -> List[str]: + """Uses mypy to construct stub objects for the given modules. + + This sets global state that ``get_stub`` can access. + + Returns all modules we might want to check. If ``find_submodules`` is False, this is equal + to ``modules``. + + :param modules: List of modules to build stubs for. + :param options: Mypy options for finding and building stubs. + :param find_submodules: Whether to attempt to find submodules of the given modules as well. + + """ + data_dir = mypy.build.default_data_dir() + search_path = mypy.modulefinder.compute_search_paths([], options, data_dir) + find_module_cache = mypy.modulefinder.FindModuleCache(search_path) + + all_modules = [] + sources = [] + for module in modules: + all_modules.append(module) + if not find_submodules: + module_path = find_module_cache.find_module(module) + if module_path is None: + # test_module will yield an error later when it can't find stubs + continue + sources.append(mypy.modulefinder.BuildSource(module_path, module, None)) + else: + found_sources = find_module_cache.find_modules_recursive(module) + sources.extend(found_sources) + all_modules.extend(s.module for s in found_sources if s.module not in all_modules) + + res = mypy.build.build(sources=sources, options=options) + if res.errors: + output = [_style("error: ", color="red", bold=True), " failed mypy build.\n"] + print("".join(output) + "\n".join(res.errors)) sys.exit(1) + + global _all_stubs + _all_stubs = res.files + + return all_modules + + +def get_stub(module: str) -> Optional[nodes.MypyFile]: + """Returns a stub object for the given module, if we've built one.""" + return _all_stubs.get(module) + + +def get_typeshed_stdlib_modules(custom_typeshed_dir: Optional[str]) -> List[str]: + """Returns a list of stdlib modules in typeshed (for current Python version).""" + # This snippet is based on code in mypy.modulefinder.default_lib_path + if custom_typeshed_dir: + typeshed_dir = Path(custom_typeshed_dir) else: - modules = args[1:] + typeshed_dir = Path(mypy.build.default_data_dir()) + if (typeshed_dir / "stubs-auto").exists(): + typeshed_dir /= "stubs-auto" + typeshed_dir /= "typeshed" + + versions = ["2and3", "3"] + for minor in range(sys.version_info.minor + 1): + versions.append("3.{}".format(minor)) + + modules = [] + for version in versions: + base = typeshed_dir / "stdlib" / version + if base.exists(): + output = subprocess.check_output(["find", str(base), "-type", "f"]).decode("utf-8") + paths = [Path(p) for p in output.splitlines()] + for path in paths: + if path.stem == "__init__": + path = path.parent + modules.append(".".join(path.relative_to(base).parts[:-1] + (path.stem,))) + return sorted(modules) + + +def get_whitelist_entries(whitelist_file: Optional[str]) -> Iterator[str]: + if not whitelist_file: + return + + def strip_comments(s: str) -> str: + try: + return s[: s.index("#")].strip() + except ValueError: + return s.strip() + + with open(whitelist_file) as f: + for line in f.readlines(): + entry = strip_comments(line) + if entry: + yield entry + + +def main() -> int: + assert sys.version_info >= (3, 5), "This script requires at least Python 3.5" + + parser = argparse.ArgumentParser( + description="Compares stubs to objects introspected from the runtime." + ) + parser.add_argument("modules", nargs="*", help="Modules to test") + parser.add_argument("--concise", action="store_true", help="Make output concise") + parser.add_argument( + "--ignore-missing-stub", + action="store_true", + help="Ignore errors for stub missing things that are present at runtime", + ) + parser.add_argument( + "--ignore-positional-only", + action="store_true", + help="Ignore errors for whether an argument should or shouldn't be positional-only", + ) + parser.add_argument( + "--custom-typeshed-dir", metavar="DIR", help="Use the custom typeshed in DIR" + ) + parser.add_argument( + "--check-typeshed", action="store_true", help="Check all stdlib modules in typeshed" + ) + parser.add_argument( + "--whitelist", + action="append", + metavar="FILE", + default=[], + help=( + "Use file as a whitelist. Can be passed multiple times to combine multiple " + "whitelists. Whitelist can be created with --generate-whitelist" + ), + ) + parser.add_argument( + "--generate-whitelist", + action="store_true", + help="Print a whitelist (to stdout) to be used with --whitelist", + ) + args = parser.parse_args() + + # Load the whitelist. This is a series of strings corresponding to Error.object_desc + # Values in the dict will store whether we used the whitelist entry or not. + whitelist = { + entry: False + for whitelist_file in args.whitelist + for entry in get_whitelist_entries(whitelist_file) + } + + # If we need to generate a whitelist, we store Error.object_desc for each error here. + generated_whitelist = set() + + modules = args.modules + if args.check_typeshed: + assert not args.modules, "Cannot pass both --check-typeshed and a list of modules" + modules = get_typeshed_stdlib_modules(args.custom_typeshed_dir) + modules.remove("antigravity") # it's super annoying + + assert modules, "No modules to check" options = Options() options.incremental = False - data_dir = default_data_dir() - search_path = compute_search_paths([], options, data_dir) - find_module_cache = FindModuleCache(search_path) + options.custom_typeshed_dir = args.custom_typeshed_dir + modules = build_stubs(modules, options, find_submodules=not args.check_typeshed) + + exit_code = 0 for module in modules: - for error in test_stub(options, find_module_cache, module): - yield error + for error in test_module(module): + # Filter errors + if args.ignore_missing_stub and error.is_missing_stub(): + continue + if args.ignore_positional_only and error.is_positional_only_related(): + continue + if error.object_desc in whitelist: + whitelist[error.object_desc] = True + continue + + # We have errors, so change exit code, and output whatever necessary + exit_code = 1 + if args.generate_whitelist: + generated_whitelist.add(error.object_desc) + continue + print(error.get_description(concise=args.concise)) + + # Print unused whitelist entries + for w in whitelist: + if not whitelist[w]: + exit_code = 1 + print("note: unused whitelist entry {}".format(w)) + + # Print the generated whitelist + if args.generate_whitelist: + for e in sorted(generated_whitelist): + print(e) + exit_code = 0 + return exit_code -if __name__ == '__main__': - for err in main(sys.argv): - print(messages[err.error_type].format(error=err)) +if __name__ == "__main__": + sys.exit(main())