Skip to content

Optionally check that we don't have duplicate nodes after AST merge #4647

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions mypy/server/mergecheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Check for duplicate AST nodes after merge."""

from typing import Dict, List, Tuple

from mypy.nodes import SymbolNode, Var, Decorator, OverloadedFuncDef, FuncDef
from mypy.server.objgraph import get_reachable_graph, get_path


# If True, print more verbose output on failure.
DUMP_MISMATCH_NODES = False


def check_consistency(o: object) -> None:
"""Fail if there are two AST nodes with the same fullname reachable from 'o'.

Raise AssertionError on failure and print some debugging output.
"""
seen, parents = get_reachable_graph(o)
reachable = list(seen.values())
syms = [x for x in reachable if isinstance(x, SymbolNode)]

m = {} # type: Dict[str, SymbolNode]
for sym in syms:
fn = sym.fullname()
# Skip None names, since they are ambiguous.
# TODO: Everything should have a proper full name?
if fn is None:
continue
# Skip stuff that should be expected to have duplicate names
if isinstance(sym, (Var, Decorator)):
continue
if isinstance(sym, FuncDef) and sym.is_overload:
continue

if fn not in m:
m[sym.fullname()] = sym
continue

# We have trouble and need to decide what to do about it.
sym1, sym2 = sym, m[fn]

# If the type changed, then it shouldn't have been merged.
if type(sym1) is not type(sym2):
continue

path1 = get_path(sym1, seen, parents)
path2 = get_path(sym2, seen, parents)

if fn in m:
print('\nDuplicate %r nodes with fullname %r found:' % (type(sym).__name__, fn))
print('[1] %d: %s' % (id(sym1), path_to_str(path1)))
print('[2] %d: %s' % (id(sym2), path_to_str(path2)))

if DUMP_MISMATCH_NODES and fn in m:
# Add verbose output with full AST node contents.
print('---')
print(id(sym1), sym1)
print('---')
print(id(sym2), sym2)

assert sym.fullname() not in m


def path_to_str(path: List[Tuple[object, object]]) -> str:
result = '<root>'
for attr, obj in path:
t = type(obj).__name__
if t in ('dict', 'tuple', 'SymbolTable', 'list'):
result += '[%s]' % repr(attr)
else:
if isinstance(obj, Var):
result += '.%s(%s:%s)' % (attr, t, obj.name())
elif t in ('BuildManager', 'FineGrainedBuildManager'):
# Omit class name for some classes that aren't part of a class
# hierarchy since there isn't much ambiguity.
result += '.%s' % attr
else:
result += '.%s(%s)' % (attr, t)
return result
132 changes: 132 additions & 0 deletions mypy/server/objgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Find all objects reachable from a root object."""

from collections import deque
from collections.abc import Iterable
from typing import List, Dict, Iterator, Optional, Tuple, Mapping
import weakref
import types


method_descriptor_type = type(object.__dir__)
method_wrapper_type = type(object().__ne__)
wrapper_descriptor_type = type(object.__ne__)

FUNCTION_TYPES = (types.BuiltinFunctionType,
types.FunctionType,
types.MethodType,
method_descriptor_type,
wrapper_descriptor_type,
method_wrapper_type)

ATTR_BLACKLIST = {
'__doc__',
'__name__',
'__class__',
'__dict__',

# Mypy specific attribute blacklists
'indirection_detector',
'all_types',
'type_maps',
'semantic_analyzer', # Semantic analyzer has stale caches
'semantic_analyzer_pass3', # Semantic analyzer has stale caches
}

# Instances of these types can't have references to other objects
ATOMIC_TYPE_BLACKLIST = {
bool,
int,
float,
str,
type(None),
object,
}

# Don't look at most attributes of these types
COLLECTION_TYPE_BLACKLIST = {
list,
set,
dict,
tuple,
}

# Don't return these objects
TYPE_BLACKLIST = {
weakref.ReferenceType,
}


def isproperty(o: object, attr: str) -> bool:
return isinstance(getattr(type(o), attr, None), property)


def get_edge_candidates(o: object) -> Iterator[Tuple[object, object]]:
if type(o) not in COLLECTION_TYPE_BLACKLIST:
for attr in dir(o):
if attr not in ATTR_BLACKLIST and hasattr(o, attr) and not isproperty(o, attr):
e = getattr(o, attr)
if not type(e) in ATOMIC_TYPE_BLACKLIST:
yield attr, e
if isinstance(o, Mapping):
for k, v in o.items():
yield k, v
elif isinstance(o, Iterable) and not isinstance(o, str):
for i, e in enumerate(o):
yield i, e


def get_edges(o: object) -> Iterator[Tuple[object, object]]:
for s, e in get_edge_candidates(o):
if (isinstance(e, FUNCTION_TYPES)):
# We don't want to collect methods, but do want to collect values
# in closures and self pointers to other objects

if hasattr(e, '__closure__'):
yield (s, '__closure__'), getattr(e, '__closure__')
if hasattr(e, '__self__'):
se = getattr(e, '__self__')
if se is not o and se is not type(o):
yield (s, '__self__'), se
else:
if not type(e) in TYPE_BLACKLIST:
yield s, e


def get_reachable_graph(root: object) -> Tuple[Dict[int, object],
Dict[int, Tuple[int, object]]]:
parents = {}
seen = {id(root): root}
worklist = [root]
while worklist:
o = worklist.pop()
for s, e in get_edges(o):
if id(e) in seen:
continue
parents[id(e)] = (id(o), s)
seen[id(e)] = e
worklist.append(e)

return seen, parents


def find_all_reachable(root: object) -> List[object]:
return list(get_reachable_graph(root)[0].values())


def aggregate_by_type(objs: List[object]) -> Dict[type, List[object]]:
m = {} # type: Dict[type, List[object]]
for o in objs:
m.setdefault(type(o), []).append(o)
return m


def get_path(o: object,
seen: Dict[int, object],
parents: Dict[int, Tuple[int, object]]) -> List[Tuple[object, object]]:
path = []
while id(o) in parents:
pid, attr = parents[id(o)]
o = seen[pid]
path.append((attr, o))
path.reverse()
return path
10 changes: 10 additions & 0 deletions mypy/test/testfinegrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,15 @@
from mypy.test.testtypegen import ignore_node
from mypy.types import TypeStrVisitor, Type
from mypy.util import short_type
from mypy.server.mergecheck import check_consistency

import pytest # type: ignore # no pytest in typeshed


# Set to True to perform (somewhat expensive) checks for duplicate AST nodes after merge
CHECK_CONSISTENCY = False


class FineGrainedSuite(DataSuite):
files = [
'fine-grained.test',
Expand Down Expand Up @@ -80,6 +86,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
fine_grained_manager = None
if not self.use_cache:
fine_grained_manager = FineGrainedBuildManager(manager, graph)
if CHECK_CONSISTENCY:
check_consistency(fine_grained_manager)

steps = testcase.find_steps()
all_triggered = []
Expand Down Expand Up @@ -107,6 +115,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
fine_grained_manager = FineGrainedBuildManager(manager, graph)

new_messages = fine_grained_manager.update(modules)
if CHECK_CONSISTENCY:
check_consistency(fine_grained_manager)
all_triggered.append(fine_grained_manager.triggered)
new_messages = normalize_messages(new_messages)

Expand Down
25 changes: 25 additions & 0 deletions test-data/unit/merge.test
Original file line number Diff line number Diff line change
Expand Up @@ -1333,3 +1333,28 @@ target:
C: TypeInfo<3>
D: TypeInfo<5>
NewType: Var<4>

[case testCallable_symtable-skip]
# The TypeInfo is currently not being merged correctly
import target
[file target.py]
def g(o: object) -> None:
if callable(o):
pass
[file target.py.next]
def g(o: object) -> None:
if callable(o):
o()
[builtins fixtures/callable.pyi]
[out]
__main__:
target: MypyFile<0>
target:
<callable subtype of object>: TypeInfo<1>
g: FuncDef<2>
==>
__main__:
target: MypyFile<0>
target:
<callable subtype of object>: TypeInfo<1>
g: FuncDef<2>