diff --git a/mypy/build.py b/mypy/build.py index 0201cd2c2186..97aa47f4f2b1 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -30,6 +30,8 @@ from typing import ClassVar from typing_extensions import Final +from mypy_extensions import TypedDict + from mypy.nodes import (MypyFile, ImportBase, Import, ImportFrom, ImportAll) from mypy.semanal_pass1 import SemanticAnalyzerPass1 from mypy.newsemanal.semanal_pass1 import ReachabilityAnalyzer @@ -241,11 +243,7 @@ def default_data_dir() -> str: ('hash', str), ('dependencies', List[str]), # names of imported modules ('data_mtime', int), # mtime of data_json - ('deps_mtime', Optional[int]), # mtime of deps_json ('data_json', str), # path of .data.json - # path of .deps.json, which we use to store fine-grained - # dependency information for fine-grained mode - ('deps_json', Optional[str]), ('suppressed', List[str]), # dependencies that weren't imported ('child_modules', List[str]), # all submodules of the given module ('options', Optional[Dict[str, object]]), # build options @@ -261,16 +259,16 @@ def default_data_dir() -> str: # suppressed contains those reachable imports that were prevented by # silent mode or simply not found. +# Metadata for the fine-grained dependencies file associated with a module. +FgDepMeta = TypedDict('FgDepMeta', {'path': str, 'mtime': int}) + -def cache_meta_from_dict(meta: Dict[str, Any], - data_json: str, deps_json: Optional[str]) -> CacheMeta: +def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta: """Build a CacheMeta object from a json metadata dictionary Args: meta: JSON metadata read from the metadata cache file data_json: Path to the .data.json file containing the AST trees - deps_json: Optionally, path to the .deps.json file containing - fine-grained dependency information. """ sentinel = None # type: Any # Values to be validated by the caller return CacheMeta( @@ -281,9 +279,7 @@ def cache_meta_from_dict(meta: Dict[str, Any], meta.get('hash', sentinel), meta.get('dependencies', []), int(meta['data_mtime']) if 'data_mtime' in meta else sentinel, - int(meta['deps_mtime']) if meta.get('deps_mtime') is not None else None, data_json, - deps_json, meta.get('suppressed', []), meta.get('child_modules', []), meta.get('options'), @@ -460,6 +456,7 @@ class BuildManager(BuildManagerBase): options: Build options missing_modules: Set of modules that could not be imported encountered so far stale_modules: Set of modules that needed to be rechecked (only used by tests) + fg_deps_meta: Metadata for fine-grained dependencies caches associated with modules version_id: The current mypy version (based on commit id when possible) plugin: Active mypy plugin(s) plugins_snapshot: @@ -475,7 +472,7 @@ class BuildManager(BuildManagerBase): determine whether we write cache files or not. stats: Dict with various instrumentation numbers, it is used not only for debugging, but also required for correctness, - in particular to check consistency of the protocol dependency cache. + in particular to check consistency of the fine-grained dependency cache. fscache: A file system cacher """ @@ -504,6 +501,7 @@ def __init__(self, data_dir: str, self.version_id = version_id self.modules = {} # type: Dict[str, MypyFile] self.missing_modules = set() # type: Set[str] + self.fg_deps_meta = {} # type: Dict[str, FgDepMeta] self.plugin = plugin if options.new_semantic_analyzer: # Set of namespaces (module or class) that are being populated during semantic @@ -704,6 +702,17 @@ def parse_file(self, id: str, path: str, source: str, ignore_errors: bool) -> My self.errors.set_file_ignored_lines(path, tree.ignored_lines, ignore_errors) return tree + def load_fine_grained_deps(self, id: str) -> Dict[str, Set[str]]: + t0 = time.time() + if id in self.fg_deps_meta: + # TODO: Assert deps file wasn't changed. + deps = json.loads(self.metastore.read(self.fg_deps_meta[id]['path'])) + else: + deps = {} + val = {k: set(v) for k, v in deps.items()} + self.add_stats(load_fg_deps_time=time.time() - t0) + return val + def report_file(self, file: MypyFile, type_map: Dict[Expression, Type], @@ -715,46 +724,148 @@ def stats_summary(self) -> Mapping[str, object]: return self.stats -def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]], - manager: BuildManager, graph: Graph) -> None: - """Write cache files for protocol dependencies. +def deps_to_json(x: Dict[str, Set[str]]) -> str: + return json.dumps({k: list(v) for k, v in x.items()}) + + +# File for storing metadata about all the fine-grained dependency caches +DEPS_META_FILE = '@deps.meta.json' # type: Final +# File for storing fine-grained dependencies that didn't a parent in the build +DEPS_ROOT_FILE = '@root.deps.json' # type: Final - Serialize protocol dependencies map for fine grained mode. Also take the snapshot - of current sources to later check consistency between protocol cache and individual - cache files. +# The name of the fake module used to store fine-grained dependencies that +# have no other place to go. +FAKE_ROOT_MODULE = '@root' # type: Final - Out of three kinds of protocol dependencies described in TypeState._snapshot_protocol_deps, - only the last two kinds are stored in global protocol caches, dependencies of the first kind - (i.e. , -> ) are written to the normal - per-file fine grained dependency caches. + +def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], + manager: BuildManager, graph: Graph) -> None: + """Write cache files for fine-grained dependencies. + + Serialize fine-grained dependencies map for fine grained mode. + + Dependencies on some module 'm' is stored in the dependency cache + file m.deps.json. This entails some spooky action at a distance: + if module 'n' depends on 'm', that produces entries in m.deps.json. + When there is a dependency on a module that does not exist in the + build, it is stored with its first existing parent module. If no + such module exists, it is stored with the fake module FAKE_ROOT_MODULE. + + This means that the validity of the fine-grained dependency caches + are a global property, so we store validity checking information for + fine-grained dependencies in a global cache file: + * We take a snapshot of current sources to later check consistency + between the fine-grained dependency cache and module cache metadata + * We store the mtime of all of the dependency files to verify they + haven't changed """ metastore = manager.metastore - proto_meta, proto_cache = get_protocol_deps_cache_name() - meta_snapshot = {} # type: Dict[str, str] + error = False + + fg_deps_meta = manager.fg_deps_meta.copy() + + for id in rdeps: + if id != FAKE_ROOT_MODULE: + _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) + else: + deps_json = DEPS_ROOT_FILE + assert deps_json + manager.log("Writing deps cache", deps_json) + if not manager.metastore.write(deps_json, deps_to_json(rdeps[id])): + manager.log("Error writing fine-grained deps JSON file {}".format(deps_json)) + error = True + else: + fg_deps_meta[id] = {'path': deps_json, 'mtime': manager.getmtime(deps_json)} + + meta_snapshot = {} # type: Dict[str, str] for id, st in graph.items(): # If we didn't parse a file (so it doesn't have a # source_hash), then it must be a module with a fresh cache, # so use the hash from that. if st.source_hash: - meta_snapshot[id] = st.source_hash + hash = st.source_hash else: assert st.meta, "Module must be either parsed or cached" - meta_snapshot[id] = st.meta.hash + hash = st.meta.hash + meta_snapshot[id] = hash - if not metastore.write(proto_meta, json.dumps(meta_snapshot)): - manager.log("Error writing protocol meta JSON file {}".format(proto_cache)) - error = True - listed_proto_deps = {k: list(v) for (k, v) in proto_deps.items()} - if not metastore.write(proto_cache, json.dumps(listed_proto_deps)): - manager.log("Error writing protocol deps JSON file {}".format(proto_cache)) + meta = {'snapshot': meta_snapshot, 'deps_meta': fg_deps_meta} + + if not metastore.write(DEPS_META_FILE, json.dumps(meta)): + manager.log("Error writing fine-grained deps meta JSON file {}".format(DEPS_META_FILE)) error = True + if error: manager.errors.set_file(_cache_dir_prefix(manager), None) - manager.errors.report(0, 0, "Error writing protocol dependencies cache", + manager.errors.report(0, 0, "Error writing fine-grained dependencies cache", blocker=True) +def invert_deps(deps: Dict[str, Set[str]], + graph: Graph) -> Dict[str, Dict[str, Set[str]]]: + """Splits fine-grained dependencies based on the module of the trigger. + + Returns a dictionary from module ids to all dependencies on that + module. Dependencies not associated with a module in the build will be + associated with the nearest parent module that is in the build, or the + fake module FAKE_ROOT_MODULE if none are. + """ + # Lazy import to speed up startup + from mypy.server.target import module_prefix, trigger_to_target + + # Prepopulate the map for all the modules that have been processed, + # so that we always generate files for processed modules (even if + # there aren't any dependencies to them.) + rdeps = {id: {} for id, st in graph.items() if st.tree} # type: Dict[str, Dict[str, Set[str]]] + for trigger, targets in deps.items(): + module = module_prefix(graph, trigger_to_target(trigger)) + if not module or not graph[module].tree: + module = FAKE_ROOT_MODULE + + mod_rdeps = rdeps.setdefault(module, {}) + mod_rdeps.setdefault(trigger, set()).update(targets) + + return rdeps + + +def generate_deps_for_cache(proto_deps: Dict[str, Set[str]], + manager: BuildManager, + graph: Graph) -> Dict[str, Dict[str, Set[str]]]: + """Generate fine-grained dependencies into a form suitable for serializing. + + This does a few things: + 1. Computes all fine grained deps from modules that were processed + 2. Splits fine-grained deps based on the module of the trigger + 3. For each module we generated fine-grained deps for, load any previous + deps and merge them in. + + Returns a dictionary from module ids to all dependencies on that + module. Dependencies not associated with a module in the build will be + associated with the nearest parent module that is in the build, or the + fake module FAKE_ROOT_MODULE if none are. + """ + from mypy.server.update import merge_dependencies # Lazy import to speed up startup + + # Compute the full set of dependencies from everything we've processed. + deps = {} # type: Dict[str, Set[str]] + things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps] + for st_deps in things: + merge_dependencies(st_deps, deps) + + # Split the dependencies out into based on the module that is depended on. + rdeps = invert_deps(deps, graph) + + # We can't just clobber existing dependency information, so we + # load the deps for every module we've generated new dependencies + # to and merge the new deps into them. + for module, mdeps in rdeps.items(): + old_deps = manager.load_fine_grained_deps(module) + merge_dependencies(old_deps, mdeps) + + return rdeps + + PLUGIN_SNAPSHOT_FILE = '@plugins_snapshot.json' # type: Final @@ -780,19 +891,21 @@ def read_plugins_snapshot(manager: BuildManager) -> Optional[Dict[str, str]]: return snapshot -def read_protocol_cache(manager: BuildManager, - graph: Graph) -> Optional[Dict[str, Set[str]]]: - """Read and validate protocol dependencies cache. +def read_deps_cache(manager: BuildManager, + graph: Graph) -> Optional[Dict[str, FgDepMeta]]: + """Read and validate the fine-grained dependencies cache. + + See the write_deps_cache documentation for more information on + the details of the cache. - See docstring for write_protocol_cache for details about which kinds of - dependencies are read. + Returns None if the cache was invalid in some way. """ - proto_meta, proto_cache = get_protocol_deps_cache_name() - meta_snapshot = _load_json_file(proto_meta, manager, - log_sucess='Proto meta ', - log_error='Could not load protocol metadata: ') - if meta_snapshot is None: + deps_meta = _load_json_file(DEPS_META_FILE, manager, + log_sucess='Deps meta ', + log_error='Could not load fine-grained dependency metadata: ') + if deps_meta is None: return None + meta_snapshot = deps_meta['snapshot'] # Take a snapshot of the source hashes from all of the metas we found. # (Including the ones we rejected because they were out of date.) # We use this to verify that they match up with the proto_deps. @@ -802,18 +915,20 @@ def read_protocol_cache(manager: BuildManager, common = set(meta_snapshot.keys()) & set(current_meta_snapshot.keys()) if any(meta_snapshot[id] != current_meta_snapshot[id] for id in common): # TODO: invalidate also if options changed (like --strict-optional)? - manager.log('Protocol cache inconsistent, ignoring') + manager.log('Fine-grained dependencies cache inconsistent, ignoring') return None - deps = _load_json_file(proto_cache, manager, - log_sucess='Proto deps ', - log_error='Could not load protocol cache: ') - if deps is None: - return None - if not isinstance(deps, dict): - manager.log('Could not load protocol cache: cache is not a dict: {}' - .format(type(deps))) - return None - return {k: set(v) for (k, v) in deps.items()} + + module_deps_metas = deps_meta['deps_meta'] + for id, meta in module_deps_metas.items(): + try: + matched = manager.getmtime(meta['path']) == meta['mtime'] + except FileNotFoundError: + matched = False + if not matched: + manager.log('Invalid or missing fine-grained deps cache: {}'.format(meta['path'])) + return None + + return module_deps_metas def _load_json_file(file: str, manager: BuildManager, @@ -873,20 +988,6 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str return (prefix + '.meta.json', prefix + '.data.json', deps_json) -def get_protocol_deps_cache_name() -> Tuple[str, str]: - """Return file names for fine grained protocol dependencies cache. - - Since these dependencies represent a global state of the program, they - are serialized per program, not per module, and the corresponding files - live at the root of the cache folder for a given Python version. - Return a tuple ('meta file path', 'data file path'), where the meta file - contains hashes of all source files at the time the protocol dependencies - were written, and data file contains the protocol dependencies. - """ - name = '@proto_deps' - return name + '.meta.json', name + '.data.json' - - def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: """Find cache data for a module. @@ -900,7 +1001,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache valid; otherwise None. """ # TODO: May need to take more build options into account - meta_json, data_json, deps_json = get_cache_names(id, path, manager) + meta_json, data_json, _ = get_cache_names(id, path, manager) manager.trace('Looking for {} at {}'.format(id, meta_json)) t0 = time.time() meta = _load_json_file(meta_json, manager, @@ -912,14 +1013,13 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache manager.log('Could not load cache for {}: meta cache is not a dict: {}' .format(id, repr(meta))) return None - m = cache_meta_from_dict(meta, data_json, deps_json) + m = cache_meta_from_dict(meta, data_json) manager.add_stats(load_meta_time=time.time() - t0) # Don't check for path match, that is dealt with in validate_meta(). if (m.id != id or m.mtime is None or m.size is None or - m.dependencies is None or m.data_mtime is None or - (manager.options.cache_fine_grained and m.deps_mtime is None)): + m.dependencies is None or m.data_mtime is None): manager.log('Metadata abandoned for {}: attributes are missing'.format(id)) return None @@ -989,13 +1089,6 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], if data_mtime != meta.data_mtime: manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) return None - deps_mtime = None - if manager.options.cache_fine_grained: - assert meta.deps_json - deps_mtime = manager.getmtime(meta.deps_json) - if deps_mtime != meta.deps_mtime: - manager.log('Metadata abandoned for {}: deps cache is modified'.format(id)) - return None path = manager.normpath(path) try: @@ -1053,7 +1146,6 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], 'size': size, 'hash': source_hash, 'data_mtime': data_mtime, - 'deps_mtime': deps_mtime, 'dependencies': meta.dependencies, 'suppressed': meta.suppressed, 'child_modules': meta.child_modules, @@ -1095,7 +1187,6 @@ def json_dumps(obj: Any, debug_cache: bool) -> str: def write_cache(id: str, path: str, tree: MypyFile, - serialized_fine_grained_deps: Dict[str, List[str]], dependencies: List[str], suppressed: List[str], child_modules: List[str], dep_prios: List[int], dep_lines: List[int], old_interface_hash: str, source_hash: str, @@ -1131,9 +1222,9 @@ def write_cache(id: str, path: str, tree: MypyFile, # Obtain file paths. path = manager.normpath(path) - meta_json, data_json, deps_json = get_cache_names(id, path, manager) - manager.log('Writing {} {} {} {} {}'.format( - id, path, meta_json, data_json, deps_json)) + meta_json, data_json, _ = get_cache_names(id, path, manager) + manager.log('Writing {} {} {} {}'.format( + id, path, meta_json, data_json)) # Update tree.path so that in bazel mode it's made relative (since # sometimes paths leak out). @@ -1184,14 +1275,6 @@ def write_cache(id: str, path: str, tree: MypyFile, return interface_hash, None data_mtime = manager.getmtime(data_json) - deps_mtime = None - if deps_json: - deps_str = json_dumps(serialized_fine_grained_deps, manager.options.debug_cache) - if not metastore.write(deps_json, deps_str): - manager.log("Error writing deps JSON file {}".format(deps_json)) - return interface_hash, None - deps_mtime = manager.getmtime(deps_json) - mtime = 0 if bazel else int(st.st_mtime) size = st.st_size options = manager.options.clone_for_module(id) @@ -1202,7 +1285,6 @@ def write_cache(id: str, path: str, tree: MypyFile, 'size': size, 'hash': source_hash, 'data_mtime': data_mtime, - 'deps_mtime': deps_mtime, 'dependencies': dependencies, 'suppressed': suppressed, 'child_modules': child_modules, @@ -1222,7 +1304,7 @@ def write_cache(id: str, path: str, tree: MypyFile, # The next run will simply find the cache entry out of date. manager.log("Error writing meta JSON file {}".format(meta_json)) - return interface_hash, cache_meta_from_dict(meta, data_json, deps_json) + return interface_hash, cache_meta_from_dict(meta, data_json) def delete_cache(id: str, path: str, manager: BuildManager) -> None: @@ -1442,12 +1524,12 @@ class State: # Whether the module has an error or any of its dependencies have one. transitive_error = False - fine_grained_deps = None # type: Dict[str, Set[str]] - # Type checker used for checking this file. Use type_checker() for # access and to construct this on demand. _type_checker = None # type: Optional[TypeChecker] + fine_grained_deps_loaded = False + def __init__(self, id: Optional[str], path: Optional[str], @@ -1478,7 +1560,6 @@ def __init__(self, self.id = id or '__main__' self.options = manager.options.clone_for_module(self.id) self._type_checker = None - self.fine_grained_deps = {} if not path and source is None: assert id is not None try: @@ -1609,14 +1690,8 @@ def wrap_context(self) -> Iterator[None]: self.manager.errors.set_import_context(save_import_context) self.check_blockers() - # Methods for processing cached modules. - def load_fine_grained_deps(self) -> None: - assert self.meta is not None, "Internal error: this method must be called only" \ - " for cached modules" - assert self.meta.deps_json - deps = json.loads(self.manager.metastore.read(self.meta.deps_json)) - # TODO: Assert deps file wasn't changed. - self.fine_grained_deps = {k: set(v) for k, v in deps.items()} + def load_fine_grained_deps(self) -> Dict[str, Set[str]]: + return self.manager.load_fine_grained_deps(self.id) def load_tree(self, temporary: bool = False) -> None: assert self.meta is not None, "Internal error: this method must be called only" \ @@ -1896,7 +1971,7 @@ def _patch_indirect_dependencies(self, elif dep not in self.suppressed and dep in self.manager.missing_modules: self.suppressed.append(dep) - def compute_fine_grained_deps(self) -> None: + def compute_fine_grained_deps(self) -> Dict[str, Set[str]]: assert self.tree is not None if '/typeshed/' in self.xpath or self.xpath.startswith('typeshed/'): # We don't track changes to typeshed -- the assumption is that they are only changed @@ -1904,12 +1979,12 @@ def compute_fine_grained_deps(self) -> None: # # TODO: Not a reliable test, as we could have a package named typeshed. # TODO: Consider relaxing this -- maybe allow some typeshed changes to be tracked. - return + return {} from mypy.server.deps import get_dependencies # Lazy import to speed up startup - self.fine_grained_deps = get_dependencies(target=self.tree, - type_map=self.type_map(), - python_version=self.options.python_version, - options=self.manager.options) + return get_dependencies(target=self.tree, + type_map=self.type_map(), + python_version=self.options.python_version, + options=self.manager.options) def valid_references(self) -> Set[str]: assert self.ancestors is not None @@ -1939,7 +2014,6 @@ def write_cache(self) -> None: assert self.source_hash is not None new_interface_hash, self.meta = write_cache( self.id, self.path, self.tree, - {k: list(v) for k, v in self.fine_grained_deps.items()}, list(self.dependencies), list(self.suppressed), list(self.child_modules), dep_prios, dep_lines, self.interface_hash, self.source_hash, self.ignore_all, self.manager) @@ -2256,31 +2330,31 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: dump_graph(graph) return graph - # Fine grained protocol dependencies are serialized separately, so we read them - # after we load the cache for whole graph. + # Fine grained dependencies that didn't have an associated module in the build + # are serialized separately, so we read them after we load the graph. # We need to read them both for running in daemon mode and if we are generating # a fine-grained cache (so that we can properly update them incrementally). - # The `read_protocol_cache` will also validate - # the protocol cache against the loaded individual cache files. + # The `read_deps_cache` will also validate + # the deps cache against the loaded individual cache files. if manager.options.cache_fine_grained or manager.use_fine_grained_cache(): - proto_deps = read_protocol_cache(manager, graph) - if proto_deps is not None: - TypeState.proto_deps = proto_deps + t2 = time.time() + fg_deps_meta = read_deps_cache(manager, graph) + manager.add_stats(load_fg_deps_time=time.time() - t2) + if fg_deps_meta is not None: + manager.fg_deps_meta = fg_deps_meta elif manager.stats.get('fresh_metas', 0) > 0: # Clear the stats so we don't infinite loop because of positive fresh_metas manager.stats.clear() - # There were some cache files read, but no protocol dependencies loaded. - manager.log("Error reading protocol dependencies cache -- aborting cache load") + # There were some cache files read, but no fine-grained dependencies loaded. + manager.log("Error reading fine-grained dependencies cache -- aborting cache load") manager.cache_enabled = False manager.log("Falling back to full run -- reloading graph...") return dispatch(sources, manager) # If we are loading a fine-grained incremental mode cache, we - # don't want to do a real incremental reprocess of the graph---we - # just want to load in all of the cache information. - if manager.use_fine_grained_cache(): - process_fine_grained_cache_graph(graph, manager) - else: + # don't want to do a real incremental reprocess of the + # graph---we'll handle it all later. + if not manager.use_fine_grained_cache(): process_graph(graph, manager) # Update plugins snapshot. write_plugins_snapshot(manager) @@ -2291,8 +2365,10 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: # Since these are a global property of the program, they are calculated after we # processed the whole graph. TypeState.update_protocol_deps() - if TypeState.proto_deps is not None and not manager.options.fine_grained_incremental: - write_protocol_deps_cache(TypeState.proto_deps, manager, graph) + if not manager.options.fine_grained_incremental: + proto_deps = TypeState.proto_deps or {} + rdeps = generate_deps_for_cache(proto_deps, manager, graph) + write_deps_cache(rdeps, manager, graph) if manager.options.dump_deps: # This speeds up startup a little when not using the daemon mode. @@ -2592,18 +2668,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.log("No fresh SCCs left in queue") -def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> None: - """Finish loading everything for use in the fine-grained incremental cache""" - - # If we are running in fine-grained incremental mode with caching, - # we don't actually have much to do: just load the fine-grained - # deps. - t0 = time.time() - for id, state in graph.items(): - state.load_fine_grained_deps() - manager.add_stats(load_fg_deps_time=time.time() - t0) - - def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: """Come up with the ideal processing order within an SCC. diff --git a/mypy/server/target.py b/mypy/server/target.py index 78e28582f4b3..3bc5b748bf6e 100644 --- a/mypy/server/target.py +++ b/mypy/server/target.py @@ -1,6 +1,16 @@ from typing import Iterable, Tuple, List, Optional +def trigger_to_target(s: str) -> str: + assert s[0] == '<' + # Strip off the angle brackets + s = s[1:-1] + # If there is a [wildcard] or similar, strip that off too + if s[-1] == ']': + s = s.split('[')[0] + return s + + def module_prefix(modules: Iterable[str], target: str) -> Optional[str]: result = split_target(modules, target) if result is None: diff --git a/mypy/server/update.py b/mypy/server/update.py index 50d6157cdc49..f7ec00a67da7 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -121,6 +121,7 @@ from mypy.build import ( BuildManager, State, BuildResult, Graph, load_graph, process_fresh_modules, DEBUG_FINE_GRAINED, + FAKE_ROOT_MODULE, ) from mypy.modulefinder import BuildSource from mypy.checker import FineGrainedDeferredNode @@ -138,7 +139,7 @@ from mypy.server.astmerge import merge_asts from mypy.server.aststrip import strip_target from mypy.server.deps import get_dependencies_of_target -from mypy.server.target import module_prefix, split_target +from mypy.server.target import module_prefix, split_target, trigger_to_target from mypy.server.trigger import make_trigger, WILDCARD_TAG from mypy.typestate import TypeState @@ -328,6 +329,8 @@ def update_module(self, previous_modules = self.previous_modules graph = self.graph + ensure_deps_loaded(module, self.deps, graph) + # If this is an already existing module, make sure that we have # its tree loaded so that we can snapshot it for comparison. ensure_trees_loaded(manager, graph, [module]) @@ -358,7 +361,8 @@ def update_module(self, if not trigger.endswith('__>')] self.manager.log_fine_grained('triggered: %r' % sorted(filtered)) self.triggered.extend(triggered | self.previous_targets_with_errors) - collect_dependencies([module], self.deps, graph) + if module in graph: + merge_dependencies(graph[module].compute_fine_grained_deps(), self.deps) remaining += propagate_changes_using_dependencies( manager, graph, self.deps, triggered, {module}, @@ -402,6 +406,26 @@ def find_unloaded_deps(manager: BuildManager, graph: Dict[str, State], return unloaded +def ensure_deps_loaded(module: str, + deps: Dict[str, Set[str]], graph: Dict[str, State]) -> None: + """Ensure that the dependencies on a module are loaded. + + Dependencies are loaded into the 'deps' dictionary. + + This also requires loading dependencies from any parent modules, + since dependencies will get stored with parent modules when a module + doesn't exist. + """ + if module in graph and graph[module].fine_grained_deps_loaded: + return + parts = module.split('.') + for i in range(len(parts)): + base = '.'.join(parts[:i + 1]) + if base in graph and not graph[base].fine_grained_deps_loaded: + merge_dependencies(graph[base].load_fine_grained_deps(), deps) + graph[base].fine_grained_deps_loaded = True + + def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], initial: Sequence[str]) -> None: """Ensure that the modules in initial and their deps have loaded trees.""" @@ -416,8 +440,10 @@ def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]: """Return the fine-grained dependency map for an entire build.""" # Deps for each module were computed during build() or loaded from the cache. - deps = {} # type: Dict[str, Set[str]] - collect_dependencies(graph, deps, graph) + deps = manager.load_fine_grained_deps(FAKE_ROOT_MODULE) # type: Dict[str, Set[str]] + for id in graph: + if graph[id].tree is not None: + merge_dependencies(graph[id].compute_fine_grained_deps(), deps) TypeState.add_all_protocol_deps(deps) return deps @@ -653,14 +679,10 @@ def get_sources(fscache: FileSystemCache, return sources -def collect_dependencies(new_modules: Iterable[str], - deps: Dict[str, Set[str]], - graph: Dict[str, State]) -> None: - for id in new_modules: - if id not in graph: - continue - for trigger, targets in graph[id].fine_grained_deps.items(): - deps.setdefault(trigger, set()).update(targets) +def merge_dependencies(new_deps: Dict[str, Set[str]], + deps: Dict[str, Set[str]]) -> None: + for trigger, targets in new_deps.items(): + deps.setdefault(trigger, set()).update(targets) # Merge also the newly added protocol deps. TypeState.update_protocol_deps(deps) @@ -820,6 +842,10 @@ def find_targets_recursive( worklist = set() for target in current: if target.startswith('<'): + module_id = module_prefix(graph, trigger_to_target(target)) + if module_id: + ensure_deps_loaded(module_id, deps, graph) + worklist |= deps.get(target, set()) - processed else: module_id = module_prefix(graph, target) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 1818a0814a50..a4010956d5d5 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -3637,9 +3637,9 @@ import b [file a.py] [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by clobbering --- the proto deps file with something with hash mismatches. -[file ../.mypy_cache/3.6/@proto_deps.meta.json.2] -{"__main__": "00000000000000000000000000000000", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "00000000000000000000000000000000"} +-- the proto deps file with something with mtime mismatches. +[file ../.mypy_cache/3.6/@deps.meta.json.2] +{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@root": {"path": "@root.deps.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} [file b.py.2] # uh -- Every file should get reloaded, since the cache was invalidated @@ -3666,7 +3666,7 @@ import b [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by deleting -- the proto deps file. -[delete ../.mypy_cache/3.6/@proto_deps.meta.json.2] +[delete ../.mypy_cache/3.6/@deps.meta.json.2] [file b.py.2] # uh -- Every file should get reloaded, since the cache was invalidated diff --git a/test-data/unit/fine-grained-cache-incremental.test b/test-data/unit/fine-grained-cache-incremental.test index 9387d6d1c3d6..79e8abdb9776 100644 --- a/test-data/unit/fine-grained-cache-incremental.test +++ b/test-data/unit/fine-grained-cache-incremental.test @@ -51,6 +51,34 @@ x = 'hi' == a.py:3: error: Unsupported operand types for + ("int" and "str") +[case testIncrCacheDoubleChange1] +# num_build_steps: 2 +import b +import c +[file a.py] +def f(x: int) -> None: + pass +[file b.py] +from a import f +f(10) +[file c.py] +from a import f +f(10) + +[file a.py.2] +def f(x: int) -> None: + pass +# nothing changed + +[file a.py.3] +def f(x: str) -> None: + pass +[out] +== +== +c.py:2: error: Argument 1 to "f" has incompatible type "int"; expected "str" +b.py:2: error: Argument 1 to "f" has incompatible type "int"; expected "str" + [case testIncrCacheProtocol1] # num_build_steps: 2 import a @@ -173,9 +201,10 @@ a.py:8: note: x: expected "int", got "str" [file a.py] [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by clobbering --- the proto deps file with something with hash mismatches. -[file ../.mypy_cache/3.6/@proto_deps.meta.json.2] -{"__main__": "00000000000000000000000000000000", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "00000000000000000000000000000000"} +-- the proto deps file with something with mtime mismatches. +[file ../.mypy_cache/3.6/@deps.meta.json.2] +{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@root": {"path": "@root.deps.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} + [file b.py.2] # uh -- A full reload shows up as nothing getting rechecked by fine-grained mode.