diff --git a/mypy/server/mergecheck.py b/mypy/server/mergecheck.py new file mode 100644 index 000000000000..aab591b813be --- /dev/null +++ b/mypy/server/mergecheck.py @@ -0,0 +1,79 @@ +"""Check for duplicate AST nodes after merge.""" + +from typing import Dict, List, Tuple + +from mypy.nodes import SymbolNode, Var, Decorator, OverloadedFuncDef, FuncDef +from mypy.server.objgraph import get_reachable_graph, get_path + + +# If True, print more verbose output on failure. +DUMP_MISMATCH_NODES = False + + +def check_consistency(o: object) -> None: + """Fail if there are two AST nodes with the same fullname reachable from 'o'. + + Raise AssertionError on failure and print some debugging output. + """ + seen, parents = get_reachable_graph(o) + reachable = list(seen.values()) + syms = [x for x in reachable if isinstance(x, SymbolNode)] + + m = {} # type: Dict[str, SymbolNode] + for sym in syms: + fn = sym.fullname() + # Skip None names, since they are ambiguous. + # TODO: Everything should have a proper full name? + if fn is None: + continue + # Skip stuff that should be expected to have duplicate names + if isinstance(sym, (Var, Decorator)): + continue + if isinstance(sym, FuncDef) and sym.is_overload: + continue + + if fn not in m: + m[sym.fullname()] = sym + continue + + # We have trouble and need to decide what to do about it. + sym1, sym2 = sym, m[fn] + + # If the type changed, then it shouldn't have been merged. + if type(sym1) is not type(sym2): + continue + + path1 = get_path(sym1, seen, parents) + path2 = get_path(sym2, seen, parents) + + if fn in m: + print('\nDuplicate %r nodes with fullname %r found:' % (type(sym).__name__, fn)) + print('[1] %d: %s' % (id(sym1), path_to_str(path1))) + print('[2] %d: %s' % (id(sym2), path_to_str(path2))) + + if DUMP_MISMATCH_NODES and fn in m: + # Add verbose output with full AST node contents. + print('---') + print(id(sym1), sym1) + print('---') + print(id(sym2), sym2) + + assert sym.fullname() not in m + + +def path_to_str(path: List[Tuple[object, object]]) -> str: + result = '' + for attr, obj in path: + t = type(obj).__name__ + if t in ('dict', 'tuple', 'SymbolTable', 'list'): + result += '[%s]' % repr(attr) + else: + if isinstance(obj, Var): + result += '.%s(%s:%s)' % (attr, t, obj.name()) + elif t in ('BuildManager', 'FineGrainedBuildManager'): + # Omit class name for some classes that aren't part of a class + # hierarchy since there isn't much ambiguity. + result += '.%s' % attr + else: + result += '.%s(%s)' % (attr, t) + return result diff --git a/mypy/server/objgraph.py b/mypy/server/objgraph.py new file mode 100644 index 000000000000..e15e780406b8 --- /dev/null +++ b/mypy/server/objgraph.py @@ -0,0 +1,132 @@ +"""Find all objects reachable from a root object.""" + +from collections import deque +from collections.abc import Iterable +from typing import List, Dict, Iterator, Optional, Tuple, Mapping +import weakref +import types + + +method_descriptor_type = type(object.__dir__) +method_wrapper_type = type(object().__ne__) +wrapper_descriptor_type = type(object.__ne__) + +FUNCTION_TYPES = (types.BuiltinFunctionType, + types.FunctionType, + types.MethodType, + method_descriptor_type, + wrapper_descriptor_type, + method_wrapper_type) + +ATTR_BLACKLIST = { + '__doc__', + '__name__', + '__class__', + '__dict__', + + # Mypy specific attribute blacklists + 'indirection_detector', + 'all_types', + 'type_maps', + 'semantic_analyzer', # Semantic analyzer has stale caches + 'semantic_analyzer_pass3', # Semantic analyzer has stale caches +} + +# Instances of these types can't have references to other objects +ATOMIC_TYPE_BLACKLIST = { + bool, + int, + float, + str, + type(None), + object, +} + +# Don't look at most attributes of these types +COLLECTION_TYPE_BLACKLIST = { + list, + set, + dict, + tuple, +} + +# Don't return these objects +TYPE_BLACKLIST = { + weakref.ReferenceType, +} + + +def isproperty(o: object, attr: str) -> bool: + return isinstance(getattr(type(o), attr, None), property) + + +def get_edge_candidates(o: object) -> Iterator[Tuple[object, object]]: + if type(o) not in COLLECTION_TYPE_BLACKLIST: + for attr in dir(o): + if attr not in ATTR_BLACKLIST and hasattr(o, attr) and not isproperty(o, attr): + e = getattr(o, attr) + if not type(e) in ATOMIC_TYPE_BLACKLIST: + yield attr, e + if isinstance(o, Mapping): + for k, v in o.items(): + yield k, v + elif isinstance(o, Iterable) and not isinstance(o, str): + for i, e in enumerate(o): + yield i, e + + +def get_edges(o: object) -> Iterator[Tuple[object, object]]: + for s, e in get_edge_candidates(o): + if (isinstance(e, FUNCTION_TYPES)): + # We don't want to collect methods, but do want to collect values + # in closures and self pointers to other objects + + if hasattr(e, '__closure__'): + yield (s, '__closure__'), getattr(e, '__closure__') + if hasattr(e, '__self__'): + se = getattr(e, '__self__') + if se is not o and se is not type(o): + yield (s, '__self__'), se + else: + if not type(e) in TYPE_BLACKLIST: + yield s, e + + +def get_reachable_graph(root: object) -> Tuple[Dict[int, object], + Dict[int, Tuple[int, object]]]: + parents = {} + seen = {id(root): root} + worklist = [root] + while worklist: + o = worklist.pop() + for s, e in get_edges(o): + if id(e) in seen: + continue + parents[id(e)] = (id(o), s) + seen[id(e)] = e + worklist.append(e) + + return seen, parents + + +def find_all_reachable(root: object) -> List[object]: + return list(get_reachable_graph(root)[0].values()) + + +def aggregate_by_type(objs: List[object]) -> Dict[type, List[object]]: + m = {} # type: Dict[type, List[object]] + for o in objs: + m.setdefault(type(o), []).append(o) + return m + + +def get_path(o: object, + seen: Dict[int, object], + parents: Dict[int, Tuple[int, object]]) -> List[Tuple[object, object]]: + path = [] + while id(o) in parents: + pid, attr = parents[id(o)] + o = seen[pid] + path.append((attr, o)) + path.reverse() + return path diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py index 469f97e6b519..f102d6571da2 100644 --- a/mypy/test/testfinegrained.py +++ b/mypy/test/testfinegrained.py @@ -30,9 +30,15 @@ from mypy.test.testtypegen import ignore_node from mypy.types import TypeStrVisitor, Type from mypy.util import short_type +from mypy.server.mergecheck import check_consistency + import pytest # type: ignore # no pytest in typeshed +# Set to True to perform (somewhat expensive) checks for duplicate AST nodes after merge +CHECK_CONSISTENCY = False + + class FineGrainedSuite(DataSuite): files = [ 'fine-grained.test', @@ -80,6 +86,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: fine_grained_manager = None if not self.use_cache: fine_grained_manager = FineGrainedBuildManager(manager, graph) + if CHECK_CONSISTENCY: + check_consistency(fine_grained_manager) steps = testcase.find_steps() all_triggered = [] @@ -107,6 +115,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: fine_grained_manager = FineGrainedBuildManager(manager, graph) new_messages = fine_grained_manager.update(modules) + if CHECK_CONSISTENCY: + check_consistency(fine_grained_manager) all_triggered.append(fine_grained_manager.triggered) new_messages = normalize_messages(new_messages) diff --git a/test-data/unit/merge.test b/test-data/unit/merge.test index cb5a11ac502e..5d539ce29e7a 100644 --- a/test-data/unit/merge.test +++ b/test-data/unit/merge.test @@ -1333,3 +1333,28 @@ target: C: TypeInfo<3> D: TypeInfo<5> NewType: Var<4> + +[case testCallable_symtable-skip] +# The TypeInfo is currently not being merged correctly +import target +[file target.py] +def g(o: object) -> None: + if callable(o): + pass +[file target.py.next] +def g(o: object) -> None: + if callable(o): + o() +[builtins fixtures/callable.pyi] +[out] +__main__: + target: MypyFile<0> +target: + : TypeInfo<1> + g: FuncDef<2> +==> +__main__: + target: MypyFile<0> +target: + : TypeInfo<1> + g: FuncDef<2>