diff --git a/misc/incremental_checker.py b/misc/incremental_checker.py index 4bd30551a5cb..9d0c7209c48b 100755 --- a/misc/incremental_checker.py +++ b/misc/incremental_checker.py @@ -49,6 +49,7 @@ CACHE_PATH = ".incremental_checker_cache.json" MYPY_REPO_URL = "https://github.com/python/mypy.git" MYPY_TARGET_FILE = "mypy" +DAEMON_CMD = ["python3", "-m", "mypy.dmypy"] JsonDict = Dict[str, Any] @@ -121,23 +122,30 @@ def get_nth_commit(repo_folder_path, n: int) -> Tuple[str, str]: def run_mypy(target_file_path: Optional[str], mypy_cache_path: str, mypy_script: Optional[str], - incremental: bool = True, + *, + incremental: bool = False, + daemon: bool = False, verbose: bool = False) -> Tuple[float, str]: """Runs mypy against `target_file_path` and returns what mypy prints to stdout as a string. If `incremental` is set to True, this function will use store and retrieve all caching data inside `mypy_cache_path`. If `verbose` is set to True, this function will pass the "-v -v" flags to mypy to make it output debugging information. + + If `daemon` is True, we use daemon mode; the daemon must be started and stopped by the caller. """ - if mypy_script is None: - command = ["python3", "-m", "mypy"] + if daemon: + command = DAEMON_CMD + ["check", "-q"] else: - command = [mypy_script] - command.extend(["--cache-dir", mypy_cache_path]) - if incremental: - command.append("--incremental") - if verbose: - command.extend(["-v", "-v"]) + if mypy_script is None: + command = ["python3", "-m", "mypy"] + else: + command = [mypy_script] + command.extend(["--cache-dir", mypy_cache_path]) + if incremental: + command.append("--incremental") + if verbose: + command.extend(["-v", "-v"]) if target_file_path is not None: command.append(target_file_path) start = time.time() @@ -148,6 +156,21 @@ def run_mypy(target_file_path: Optional[str], return runtime, output +def start_daemon(mypy_cache_path: str, verbose: bool) -> None: + stdout, stderr, status = execute(DAEMON_CMD + ["status"], fail_on_error=False) + if status: + cmd = DAEMON_CMD + ["start", "--", "--cache-dir", mypy_cache_path] + if verbose: + cmd.extend(["-v", "-v"]) + execute(cmd) + + +def stop_daemon() -> None: + stdout, stderr, status = execute(DAEMON_CMD + ["status"], fail_on_error=False) + if status == 0: + execute(DAEMON_CMD + ["stop"]) + + def load_cache(incremental_cache_path: str = CACHE_PATH) -> JsonDict: if os.path.exists(incremental_cache_path): with open(incremental_cache_path, 'r') as stream: @@ -196,7 +219,9 @@ def test_incremental(commits: List[Tuple[str, str]], temp_repo_path: str, target_file_path: Optional[str], mypy_cache_path: str, - mypy_script: Optional[str]) -> None: + *, + mypy_script: Optional[str] = None, + daemon: bool = False) -> None: """Runs incremental mode on all `commits` to verify the output matches the expected output. This function runs mypy on the `target_file_path` inside the `temp_repo_path`. The @@ -208,7 +233,7 @@ def test_incremental(commits: List[Tuple[str, str]], print('Now testing commit {0}: "{1}"'.format(commit_id, message)) execute(["git", "-C", temp_repo_path, "checkout", commit_id]) runtime, output = run_mypy(target_file_path, mypy_cache_path, mypy_script, - incremental=True) + incremental=True, daemon=daemon) expected_runtime = cache[commit_id]['runtime'] # type: float expected_output = cache[commit_id]['output'] # type: str if output != expected_output: @@ -278,11 +303,15 @@ def test_repo(target_repo_url: str, temp_repo_path: str, save_cache(cache, incremental_cache_path) # Stage 4: Rewind and re-run mypy (with incremental mode enabled) + if params.daemon: + start_daemon(mypy_cache_path, False) test_incremental(commits, cache, temp_repo_path, target_file_path, mypy_cache_path, - mypy_script=params.mypy_script) + mypy_script=params.mypy_script, daemon=params.daemon) - # Stage 5: Remove temp files + # Stage 5: Remove temp files, stop daemon cleanup(temp_repo_path, mypy_cache_path) + if params.daemon: + stop_daemon() def main() -> None: @@ -309,6 +338,8 @@ def main() -> None: parser.add_argument("--sample", type=int, help="use a random sample of size SAMPLE") parser.add_argument("--seed", type=str, help="random seed") parser.add_argument("--mypy-script", type=str, help="alternate mypy script to run") + parser.add_argument("--daemon", action='store_true', + help="use mypy daemon instead of incremental (highly experimental)") if len(sys.argv[1:]) == 0: parser.print_help() diff --git a/mypy/build.py b/mypy/build.py index 891fc3c8d6dd..beb30544ad19 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -13,6 +13,7 @@ import binascii import collections import contextlib +import gc import hashlib import json import os.path @@ -23,8 +24,8 @@ from os.path import dirname, basename import errno -from typing import (AbstractSet, Dict, Iterable, Iterator, List, cast, Any, - NamedTuple, Optional, Set, Tuple, Union, Callable) +from typing import (AbstractSet, Any, cast, Dict, Iterable, Iterator, List, + Mapping, NamedTuple, Optional, Set, Tuple, Union, Callable) # Can't use TYPE_CHECKING because it's not in the Python 3.5.1 stdlib MYPY = False if MYPY: @@ -80,7 +81,7 @@ def __init__(self, manager: 'BuildManager', graph: Graph) -> None: self.manager = manager self.graph = graph self.files = manager.modules - self.types = manager.all_types + self.types = manager.all_types # Non-empty for tests only self.errors = manager.errors.messages() @@ -119,10 +120,17 @@ def is_source(self, file: MypyFile) -> bool: return False +# A dict containing saved cache data from a previous run. This will +# be updated in place with newly computed cache data. See dmypy.py. +SavedCache = Dict[str, Tuple['CacheMeta', MypyFile]] + + def build(sources: List[BuildSource], options: Options, alt_lib_path: Optional[str] = None, - bin_dir: Optional[str] = None) -> BuildResult: + bin_dir: Optional[str] = None, + saved_cache: Optional[SavedCache] = None, + ) -> BuildResult: """Analyze a program. A single call to build performs parsing, semantic analysis and optionally @@ -138,7 +146,10 @@ def build(sources: List[BuildSource], (takes precedence over other directories) bin_dir: directory containing the mypy script, used for finding data directories; if omitted, use '.' as the data directory + saved_cache: optional dict with saved cache state for dmypy (read-write!) """ + # This seems the most reasonable place to tune garbage collection. + gc.set_threshold(50000) data_dir = default_data_dir(bin_dir) @@ -195,16 +206,16 @@ def build(sources: List[BuildSource], options=options, version_id=__version__, plugin=plugin, - errors=errors) + errors=errors, + saved_cache=saved_cache) try: graph = dispatch(sources, manager) return BuildResult(manager, graph) finally: - manager.log("Build finished in %.3f seconds with %d modules, %d types, and %d errors" % + manager.log("Build finished in %.3f seconds with %d modules, and %d errors" % (time.time() - manager.start_time, len(manager.modules), - len(manager.all_types), manager.errors.num_messages())) # Finish the HTML or XML reports even if CompileError was raised. reports.finish() @@ -339,6 +350,27 @@ def default_lib_path(data_dir: str, # silent mode or simply not found. +def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta: + sentinel = None # type: Any # the values will be post-validated below + return CacheMeta( + meta.get('id', sentinel), + meta.get('path', sentinel), + int(meta['mtime']) if 'mtime' in meta else sentinel, + meta.get('size', sentinel), + meta.get('hash', sentinel), + meta.get('dependencies', []), + int(meta['data_mtime']) if 'data_mtime' in meta else sentinel, + data_json, + meta.get('suppressed', []), + meta.get('child_modules', []), + meta.get('options'), + meta.get('dep_prios', []), + meta.get('interface_hash', ''), + meta.get('version_id', sentinel), + meta.get('ignore_all', True), + ) + + # Priorities used for imports. (Here, top-level includes inside a class.) # These are used to determine a more predictable order in which the # nodes in an import cycle are processed. @@ -452,8 +484,6 @@ def find_config_file_line_number(path: str, section: str, setting_name: str) -> return -1 -# TODO: Get rid of all_types. It's not used except for one log message. -# Maybe we could instead publish a map from module ID to its type_map. class BuildManager: """This class holds shared state for building a mypy program. @@ -469,13 +499,15 @@ class BuildManager: Semantic analyzer, pass 2 semantic_analyzer_pass3: Semantic analyzer, pass 3 - all_types: Map {Expression: Type} collected from all modules + all_types: Map {Expression: Type} collected from all modules (tests only) options: Build options missing_modules: Set of modules that could not be imported encountered so far stale_modules: Set of modules that needed to be rechecked (only used by tests) version_id: The current mypy version (based on commit id when possible) plugin: Active mypy plugin(s) errors: Used for reporting all errors + saved_cache: Dict with saved cache state for dmypy (read-write!) + stats: Dict with various instrumentation numbers """ def __init__(self, data_dir: str, @@ -486,7 +518,9 @@ def __init__(self, data_dir: str, options: Options, version_id: str, plugin: Plugin, - errors: Errors) -> None: + errors: Errors, + saved_cache: Optional[SavedCache] = None, + ) -> None: self.start_time = time.time() self.data_dir = data_dir self.errors = errors @@ -501,14 +535,15 @@ def __init__(self, data_dir: str, self.plugin = plugin self.semantic_analyzer = SemanticAnalyzerPass2(self.modules, self.missing_modules, lib_path, self.errors, self.plugin) - self.modules = self.semantic_analyzer.modules self.semantic_analyzer_pass3 = SemanticAnalyzerPass3(self.modules, self.errors, self.semantic_analyzer) - self.all_types = {} # type: Dict[Expression, Type] + self.all_types = {} # type: Dict[Expression, Type] # Used by tests only self.indirection_detector = TypeIndirectionVisitor() self.stale_modules = set() # type: Set[str] self.rechecked_modules = set() # type: Set[str] self.plugin = plugin + self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache + self.stats = {} # type: Dict[str, Any] # Values are ints or floats def maybe_swap_for_shadow_path(self, path: str) -> str: if (self.options.shadow_file and @@ -592,6 +627,9 @@ def parse_file(self, id: str, path: str, source: str, ignore_errors: bool) -> My num_errs = self.errors.num_messages() tree = parse(source, path, self.errors, options=self.options) tree._fullname = id + self.add_stats(files_parsed=1, + modules_parsed=int(not tree.is_stub), + stubs_parsed=int(tree.is_stub)) if self.errors.num_messages() != num_errs: self.log("Bailing due to parse errors") @@ -626,7 +664,10 @@ def report_file(self, def log(self, *message: str) -> None: if self.options.verbosity >= 1: - print('LOG: ', *message, file=sys.stderr) + if message: + print('LOG: ', *message, file=sys.stderr) + else: + print(file=sys.stderr) sys.stderr.flush() def trace(self, *message: str) -> None: @@ -634,6 +675,16 @@ def trace(self, *message: str) -> None: print('TRACE:', *message, file=sys.stderr) sys.stderr.flush() + def add_stats(self, **kwds: Any) -> None: + for key, value in kwds.items(): + if key in self.stats: + self.stats[key] += value + else: + self.stats[key] = value + + def stats_summary(self) -> Mapping[str, object]: + return self.stats + def remove_cwd_prefix_from_path(p: str) -> str: """Remove current working directory prefix from p, if present. @@ -880,6 +931,14 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache A CacheMeta instance if the cache data was found and appears valid; otherwise None. """ + saved_cache = manager.saved_cache + if id in saved_cache: + m, t = saved_cache[id] + manager.add_stats(reused_metas=1) + manager.trace("Reusing saved metadata for %s" % id) + # Note: it could still be skipped if the mtime/size/hash mismatches. + return m + # TODO: May need to take more build options into account meta_json, data_json = get_cache_names(id, path, manager) manager.trace('Looking for {} at {}'.format(id, meta_json)) @@ -894,24 +953,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache manager.log('Could not load cache for {}: meta cache is not a dict: {}' .format(id, repr(meta))) return None - sentinel = None # type: Any # the values will be post-validated below - m = CacheMeta( - meta.get('id', sentinel), - meta.get('path', sentinel), - int(meta['mtime']) if 'mtime' in meta else sentinel, - meta.get('size', sentinel), - meta.get('hash', sentinel), - meta.get('dependencies', []), - int(meta['data_mtime']) if 'data_mtime' in meta else sentinel, - data_json, - meta.get('suppressed', []), - meta.get('child_modules', []), - meta.get('options'), - meta.get('dep_prios', []), - meta.get('interface_hash', ''), - meta.get('version_id', sentinel), - meta.get('ignore_all', True), - ) + m = cache_meta_from_dict(meta, data_json) # Don't check for path match, that is dealt with in validate_meta(). if (m.id != id or m.mtime is None or m.size is None or @@ -948,6 +990,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache .format(key, cached_options.get(key), current_options.get(key))) return None + manager.add_stats(fresh_metas=1) return m @@ -971,7 +1014,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], ignore_all: bool, manager: BuildManager) -> Optional[CacheMeta]: '''Checks whether the cached AST of this module can be used. - Return: + Returns: None, if the cached AST is unusable. Original meta, if mtime/size matched. Meta with mtime updated to match source file, if hash/size matched but mtime/path didn't. @@ -999,6 +1042,10 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], # TODO: Share stat() outcome with find_module() path = os.path.abspath(path) + # TODO: Don't use isfile() but check st.st_mode + if not os.path.isfile(path): + manager.log('Metadata abandoned for {}: file {} does not exist'.format(id, path)) + return None st = manager.get_stat(path) # TODO: Errors size = st.st_size if size != meta.size: @@ -1044,7 +1091,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], return meta # It's a match on (id, path, size, hash, mtime). - manager.log('Metadata fresh for {}: file {}'.format(id, path)) + manager.trace('Metadata fresh for {}: file {}'.format(id, path)) return meta @@ -1059,7 +1106,7 @@ def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], suppressed: List[str], child_modules: List[str], dep_prios: List[int], old_interface_hash: str, source_hash: str, - ignore_all: bool, manager: BuildManager) -> str: + ignore_all: bool, manager: BuildManager) -> Tuple[str, Optional[CacheMeta]]: """Write cache files for a module. Note that this mypy's behavior is still correct when any given @@ -1076,8 +1123,10 @@ def write_cache(id: str, path: str, tree: MypyFile, old_interface_hash: the hash from the previous version of the data cache file manager: the build manager (for pyversion, log/trace) - Return: - The new interface hash based on the serialized tree + Returns: + A tuple containing the interface hash and CacheMeta + corresponding to the metadata that was written (the latter may + be None if the cache could not be written). """ # Obtain file paths path = os.path.abspath(path) @@ -1110,7 +1159,7 @@ def write_cache(id: str, path: str, tree: MypyFile, except OSError: pass # Still return the interface hash we computed. - return interface_hash + return interface_hash, None # Write data cache file, if applicable if old_interface_hash == interface_hash: @@ -1132,7 +1181,7 @@ def write_cache(id: str, path: str, tree: MypyFile, # data_mtime field won't match the data file's mtime. # Both have the effect of slowing down the next run a # little bit due to an out-of-date cache file. - return interface_hash + return interface_hash, None data_mtime = getmtime(data_json) mtime = int(st.st_mtime) @@ -1166,7 +1215,7 @@ def write_cache(id: str, path: str, tree: MypyFile, # The next run will simply find the cache entry out of date. manager.log("Error writing meta JSON file {}".format(meta_json)) - return interface_hash + return interface_hash, cache_meta_from_dict(meta, data_json) def delete_cache(id: str, path: str, manager: BuildManager) -> None: @@ -1179,6 +1228,8 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: path = os.path.abspath(path) meta_json, data_json = get_cache_names(id, path, manager) manager.log('Deleting {} {} {} {}'.format(id, path, meta_json, data_json)) + if id in manager.saved_cache: + del manager.saved_cache[id] for filename in [data_json, meta_json]: try: @@ -1385,6 +1436,9 @@ class State: # Whether to ignore all errors ignore_all = False + # Whether this module was found to have errors + has_errors = False + def __init__(self, id: Optional[str], path: Optional[str], @@ -1561,6 +1615,7 @@ def mark_interface_stale(self, *, on_errors: bool = False) -> None: """Marks this module as having a stale public interface, and discards the cache data.""" self.meta = None self.externally_same = False + self.has_errors = on_errors if not on_errors: self.manager.stale_modules.add(self.id) @@ -1593,6 +1648,7 @@ def load_tree(self) -> None: # TODO: Assert data file wasn't changed. self.tree = MypyFile.deserialize(data) self.manager.modules[self.id] = self.tree + self.manager.add_stats(fresh_trees=1) def fix_cross_refs(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" @@ -1788,7 +1844,9 @@ def finish_passes(self) -> None: if self.options.semantic_analysis_only: return with self.wrap_context(): - manager.all_types.update(self.type_checker.type_map) + # Some tests want to look at the set of all types. + if manager.options.use_builtins_fixtures: + manager.all_types.update(self.type_checker.type_map) if self.options.incremental: self._patch_indirect_dependencies(self.type_checker.module_refs, @@ -1841,7 +1899,7 @@ def write_cache(self) -> None: self.mark_interface_stale(on_errors=True) return dep_prios = [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies] - new_interface_hash = write_cache( + new_interface_hash, self.meta = write_cache( self.id, self.path, self.tree, list(self.dependencies), list(self.suppressed), list(self.child_modules), dep_prios, self.interface_hash, self.source_hash, self.ignore_all, @@ -1855,8 +1913,15 @@ def write_cache(self) -> None: def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: + manager.log() manager.log("Mypy version %s" % __version__) + t0 = time.time() graph = load_graph(sources, manager) + t1 = time.time() + manager.add_stats(graph_size=len(graph), + stubs_found=sum(g.path is not None and g.path.endswith('.pyi') + for g in graph.values()), + graph_load_time=(t1 - t0)) if not graph: print("Nothing to do?!") return graph @@ -1868,9 +1933,19 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: if manager.options.warn_unused_ignores: # TODO: This could also be a per-module option. manager.errors.generate_unused_ignore_notes() + manager.saved_cache.update(preserve_cache(graph)) return graph +def preserve_cache(graph: Graph) -> SavedCache: + saved_cache = {} + for id, state in graph.items(): + assert state.id == id + if state.meta is not None and state.tree is not None and not state.has_errors: + saved_cache[id] = (state.meta, state.tree) + return saved_cache + + class NodeInfo: """Some info about a node in the graph of SCCs.""" @@ -2092,11 +2167,11 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: scc_str = " ".join(scc) if fresh: - manager.log("Queuing %s SCC (%s)" % (fresh_msg, scc_str)) + manager.trace("Queuing %s SCC (%s)" % (fresh_msg, scc_str)) fresh_scc_queue.append(scc) else: if len(fresh_scc_queue) > 0: - manager.log("Processing the last {} queued SCCs".format(len(fresh_scc_queue))) + manager.log("Processing {} queued fresh SCCs".format(len(fresh_scc_queue))) # Defer processing fresh SCCs until we actually run into a stale SCC # and need the earlier modules to be loaded. # @@ -2107,7 +2182,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # TODO: see if it's possible to determine if we need to process only a # _subset_ of the past SCCs instead of having to process them all. for prev_scc in fresh_scc_queue: - process_fresh_scc(graph, prev_scc) + process_fresh_scc(graph, prev_scc, manager) fresh_scc_queue = [] size = len(scc) if size == 1: @@ -2117,8 +2192,11 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: process_stale_scc(graph, scc, manager) sccs_left = len(fresh_scc_queue) + nodes_left = sum(len(scc) for scc in fresh_scc_queue) + manager.add_stats(sccs_left=sccs_left, nodes_left=nodes_left) if sccs_left: - manager.log("{} fresh SCCs left in queue (and will remain unprocessed)".format(sccs_left)) + manager.log("{} fresh SCCs ({} nodes) left in queue (and will remain unprocessed)" + .format(sccs_left, nodes_left)) manager.trace(str(fresh_scc_queue)) else: manager.log("No fresh SCCs left in queue") @@ -2171,8 +2249,19 @@ def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> return [s for ss in sccs for s in order_ascc(graph, ss, pri_max)] -def process_fresh_scc(graph: Graph, scc: List[str]) -> None: +def process_fresh_scc(graph: Graph, scc: List[str], manager: BuildManager) -> None: """Process the modules in one SCC from their cached data.""" + # TODO: Clean this up, it's ugly. + saved_cache = manager.saved_cache + if all(id in saved_cache for id in scc): + trees = {id: saved_cache[id][1] for id in scc} + if all(trees.values()): + for id, tree in trees.items(): + manager.add_stats(reused_trees=1) + manager.trace("Reusing saved tree %s" % id) + graph[id].tree = tree + manager.modules[id] = tree + return for id in scc: graph[id].load_tree() for id in scc: diff --git a/mypy/dmypy.py b/mypy/dmypy.py new file mode 100644 index 000000000000..87d9e85f8a00 --- /dev/null +++ b/mypy/dmypy.py @@ -0,0 +1,596 @@ +"""Client for mypy daemon mode. + +Highly experimental! Only supports UNIX-like systems. + +This manages a daemon process which keeps useful state in memory +rather than having to read it back from disk on each run. +""" + +import argparse +import gc +import io +import json +import os +import signal +import socket +import sys +import time + +from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, TypeVar + +import psutil # type: ignore # It's not in typeshed yet + +# TODO: Import all mypy modules lazily to speed up client startup time. +import mypy.build +import mypy.errors +import mypy.main + +# Argument parser. Subparsers are tied to action functions by the +# @action(subparse) decorator. + +parser = argparse.ArgumentParser(description="Client for mypy daemon mode", + fromfile_prefix_chars='@') +parser.set_defaults(action=None) +subparsers = parser.add_subparsers() + +start_parser = subparsers.add_parser('start', help="Start daemon") +start_parser.add_argument('flags', metavar='FLAG', nargs='*', type=str, + help="Regular mypy flags (precede with --)") + +status_parser = subparsers.add_parser('status', help="Show daemon status") + +stop_parser = subparsers.add_parser('stop', help="Stop daemon (asks it politely to go away)") + +kill_parser = subparsers.add_parser('kill', help="Kill daemon (kills the process)") + +restart_parser = subparsers.add_parser('restart', + help="Restart daemon (stop or kill followed by start)") +restart_parser.add_argument('flags', metavar='FLAG', nargs='*', type=str, + help="Regular mypy flags (precede with --)") + +check_parser = subparsers.add_parser('check', help="Check some files (requires running daemon)") +check_parser.add_argument('-q', '--quiet', action='store_true', + help="Suppress instrumentation stats") +check_parser.add_argument('files', metavar='FILE', nargs='+', help="File (or directory) to check") + +recheck_parser = subparsers.add_parser('recheck', + help="Check the same files as the most previous check run (requires running daemon)") +recheck_parser.add_argument('-q', '--quiet', action='store_true', + help="Suppress instrumentation stats") + +hang_parser = subparsers.add_parser('hang', help="Hang for 100 seconds") + +daemon_parser = subparsers.add_parser('daemon', help="Run daemon in foreground") +daemon_parser.add_argument('flags', metavar='FLAG', nargs='*', type=str, + help="Regular mypy flags (precede with --)") + +help_parser = subparsers.add_parser('help') + + +def main() -> None: + """The code is top-down.""" + args = parser.parse_args() + if not args.action: + parser.print_usage() + else: + args.action(args) + + +ActionFunction = Callable[[argparse.Namespace], None] + + +def action(subparser: argparse.ArgumentParser) -> Callable[[ActionFunction], None]: + """Decorator to tie an action function to a subparser.""" + def register(func: ActionFunction) -> None: + subparser.set_defaults(action=func) + return register + + +# Action functions (run in client from command line). +# TODO: Use a separate exception instead of SystemExit to indicate failures. + +@action(start_parser) +def do_start(args: argparse.Namespace) -> None: + """Start daemon (it must not already be running). + + This is where mypy flags are set. Setting flags is a bit awkward; + you have to use e.g.: + + dmypy start -- --strict + + since we don't want to duplicate mypy's huge list of flags. + """ + try: + pid, sockname = get_status() + except SystemExit as err: + if daemonize(Server(args.flags).serve): + sys.exit(1) + wait_for_server() + else: + sys.exit("Daemon is still alive") + + +@action(status_parser) +def do_status(args: argparse.Namespace) -> None: + """Print daemon status. + + This verifies that it is responsive to requests. + """ + status = read_status() + show_stats(status) + check_status(status) + try: + response = request('status') + except Exception as err: + print("Daemon is stuck; consider %s kill" % sys.argv[0]) + else: + show_stats(response) + + +@action(stop_parser) +def do_stop(args: argparse.Namespace) -> None: + """Stop daemon politely (via a request).""" + try: + response = request('stop') + except Exception as err: + sys.exit("Daemon is stuck; consider %s kill" % sys.argv[0]) + else: + if response: + print("Stop response:", response) + else: + print("Daemon stopped") + + +@action(kill_parser) +def do_kill(args: argparse.Namespace) -> None: + """Kill daemon rudely (by killing the process).""" + pid, sockname = get_status() + try: + os.kill(pid, signal.SIGKILL) + except os.error as err: + sys.exit(str(err)) + else: + print("Daemon killed") + + +@action(restart_parser) +def do_restart(args: argparse.Namespace) -> None: + """Restart daemon. + + We first try to stop it politely if it's running. This also sets + mypy flags (and has the same issues as start). + """ + try: + response = request('stop') + except SystemExit: + pass + else: + if response: + sys.exit("Status: %s" % str(response)) + else: + print("Daemon stopped") + if daemonize(Server(args.flags).serve): + sys.exit(1) + wait_for_server() + + +def wait_for_server(timeout: float = 5.0) -> None: + """Wait until the server is up. + + Exit if it doesn't happen within the timeout. + """ + endtime = time.time() + timeout + while time.time() < endtime: + try: + data = read_status() + except SystemExit: + # If the file isn't there yet, retry later. + time.sleep(0.1) + continue + # If the file's content is bogus or the process is dead, fail. + pid, sockname = check_status(data) + print("Daemon started") + return + sys.exit("Timed out waiting for daemon to start") + + +@action(check_parser) +def do_check(args: argparse.Namespace) -> None: + """Ask the daemon to check a list of files.""" + t0 = time.time() + response = request('check', files=args.files) + t1 = time.time() + response['roundtrip_time'] = t1 - t0 + check_output(response, args.quiet) + + +@action(recheck_parser) +def do_recheck(args: argparse.Namespace) -> None: + """Ask the daemon to check the same list of files it checked most recently. + + This doesn't work across daemon restarts. + """ + t0 = time.time() + response = request('recheck') + t1 = time.time() + response['roundtrip_time'] = t1 - t0 + check_output(response, args.quiet) + + +def check_output(response: Dict[str, Any], quiet: bool) -> None: + """Print the output from a check or recheck command.""" + try: + out, err, status = response['out'], response['err'], response['status'] + except KeyError: + sys.exit("Response: %s" % str(response)) + sys.stdout.write(out) + sys.stderr.write(err) + if not quiet: + show_stats(response) + if status: + sys.exit(status) + + +def show_stats(response: Mapping[str, object]) -> None: + for key, value in sorted(response.items()): + if key not in ('out', 'err'): + print("%20s: %10s" % (key, "%.3f" % value if isinstance(value, float) else value)) + + +@action(hang_parser) +def do_hang(args: argparse.Namespace) -> None: + """Hang for 100 seconds, as a debug hack.""" + request('hang') + + +@action(daemon_parser) +def do_daemon(args: argparse.Namespace) -> None: + """Serve requests in the foreground.""" + Server(args.flags).serve() + + +@action(help_parser) +def do_help(args: argparse.Namespace) -> None: + """Print full help (same as dmypy --help).""" + parser.print_help() + + +# Client-side infrastructure. + +STATUS_FILE = 'dmypy.json' + + +def request(command: str, **kwds: object) -> Dict[str, Any]: + """Send a request to the daemon. + + Return the JSON dict with the response. + """ + args = dict(kwds) + if command: + args.update(command=command) + data = json.dumps(args) + pid, sockname = get_status() + sock = socket.socket(socket.AF_UNIX) + sock.connect(sockname) + sock.sendall(data.encode('utf8')) + sock.shutdown(socket.SHUT_WR) + try: + response = receive(sock) + except OSError as err: + return {'error': str(err)} + else: + return response + finally: + sock.close() + + +def get_status() -> Tuple[int, str]: + """Read status file and check if the process is alive. + + Return (pid, sockname) on success. + + Raise SystemExit() if something's wrong. + """ + data = read_status() + return check_status(data) + + +def check_status(data: Dict[str, Any]) -> Tuple[int, str]: + """Check if the process is alive. + + Return (pid, sockname) on success. + + Raise SystemExit() if something's wrong. + """ + if 'pid' not in data: + raise SystemExit("Invalid status file (no pid field)") + pid = data['pid'] + if not isinstance(pid, int): + raise SystemExit("pid field is not an int") + try: + os.kill(pid, 0) + except OSError as err: + raise SystemExit("Daemon has died") + if 'sockname' not in data: + raise SystemExit("Invalid status file (no sockname field)") + sockname = data['sockname'] + if not isinstance(sockname, str): + raise SystemExit("sockname field is not a string") + return pid, sockname + + +def read_status() -> Dict[str, object]: + """Read status file.""" + if not os.path.isfile(STATUS_FILE): + raise SystemExit("No status file found") + with open(STATUS_FILE) as f: + try: + data = json.load(f) + except Exception as err: + raise SystemExit("Malformed status file (not JSON)") + if not isinstance(data, dict): + raise SystemExit("Invalid status file (not a dict)") + return data + + +def daemonize(func: Callable[[], None]) -> int: + """Arrange to call func() in a grandchild of the current process. + + Return 0 for success, exit status for failure, negative if + subprocess killed by signal. + """ + # See https://stackoverflow.com/questions/473620/how-do-you-create-a-daemon-in-python + sys.stdout.flush() + sys.stderr.flush() + pid = os.fork() + if pid: + # Parent process: wait for child in case things go bad there. + npid, sts = os.waitpid(pid, 0) + sig = sts & 0xff + if sig: + print("Child killed by signal", sig) + return -sig + sts = sts >> 8 + if sts: + print("Child exit status", sts) + return sts + # Child process: do a bunch of UNIX stuff and then fork a grandchild. + try: + os.setsid() # Detach controlling terminal + os.umask(0o27) + devnull = os.open('/dev/null', os.O_RDWR) + os.dup2(devnull, 0) + os.dup2(devnull, 1) + os.dup2(devnull, 2) + os.close(devnull) + pid = os.fork() + if pid: + # Child is done, exit to parent. + os._exit(0) + # Grandchild: run the server. + func() + finally: + # Make sure we never get back into the caller. + os._exit(1) + + +# Server code. + +SOCKET_NAME = 'dmypy.sock' # In current directory. + + +class Server: + + # NOTE: the instance is constructed in the parent process but + # serve() is called in the grandchild (by daemonize()). + + def __init__(self, flags: List[str]) -> None: + """Initialize the server with the desired mypy flags.""" + self.saved_cache = {} # type: mypy.build.SavedCache + sources, options = mypy.main.process_options(['-i'] + flags, False) + if sources: + sys.exit("dmypy: start/restart does not accept sources") + if options.report_dirs: + sys.exit("dmypy: start/restart cannot generate reports") + if not options.incremental: + sys.exit("dmypy: start/restart should not disable incremental mode") + if options.quick_and_dirty: + sys.exit("dmypy: start/restart should not specify quick_and_dirty mode") + self.options = options + if os.path.isfile(STATUS_FILE): + os.unlink(STATUS_FILE) + + def serve(self) -> None: + """Serve requests, synchronously (no thread or fork).""" + try: + sock = self.create_listening_socket() + try: + with open(STATUS_FILE, 'w') as f: + json.dump({'pid': os.getpid(), 'sockname': sock.getsockname()}, f) + f.write('\n') # I like my JSON with trailing newline + while True: + conn, addr = sock.accept() + data = receive(conn) + resp = {} # type: Dict[str, Any] + if 'command' not in data: + resp = {'error': "No command found in request"} + else: + command = data['command'] + if not isinstance(command, str): + resp = {'error': "Command is not a string"} + else: + command = data.pop('command') + resp = self.run_command(command, data) + try: + conn.sendall(json.dumps(resp).encode('utf8')) + except OSError as err: + pass # Maybe the client hung up + conn.close() + if command == 'stop': + sock.close() + sys.exit(0) + finally: + os.unlink(STATUS_FILE) + finally: + os.unlink(self.sockname) + + def create_listening_socket(self) -> socket.socket: + """Create the socket and set it up for listening.""" + self.sockname = os.path.abspath(SOCKET_NAME) + if os.path.exists(self.sockname): + os.unlink(self.sockname) + sock = socket.socket(socket.AF_UNIX) + sock.bind(self.sockname) + sock.listen(1) + return sock + + def run_command(self, command: str, data: Mapping[str, object]) -> Dict[str, object]: + """Run a specific command from the registry.""" + key = 'cmd_' + command + method = getattr(self.__class__, key, None) + if method is None: + return {'error': "Unrecognized command '%s'" % command} + else: + return method(self, **data) + + # Command functions (run in the server via RPC). + + def cmd_status(self) -> Dict[str, object]: + """Return daemon status.""" + res = {} # type: Dict[str, object] + res.update(get_meminfo()) + return res + + def cmd_stop(self) -> Dict[str, object]: + """Stop daemon.""" + return {} + + last_sources = None + + def cmd_check(self, files: Sequence[str]) -> Dict[str, object]: + """Check a list of files.""" + # TODO: Move this into check(), in case one of the args is a directory. + # Capture stdout/stderr and catch SystemExit while processing the source list. + save_stdout = sys.stdout + save_stderr = sys.stderr + try: + sys.stdout = stdout = io.StringIO() + sys.stderr = stderr = io.StringIO() + self.last_sources = mypy.main.create_source_list(files, self.options) + except SystemExit as err: + return {'out': stdout.getvalue(), 'err': stderr.getvalue(), 'status': err.code} + finally: + sys.stdout = save_stdout + sys.stderr = save_stderr + return self.check(self.last_sources) + + def cmd_recheck(self) -> Dict[str, object]: + """Check the same list of files we checked most recently.""" + if not self.last_sources: + return {'error': "Command 'recheck' is only valid after a 'check' command"} + return self.check(self.last_sources) + + last_mananager = None # type: Optional[mypy.build.BuildManager] + + def check(self, sources: List[mypy.build.BuildSource], + alt_lib_path: Optional[str] = None) -> Dict[str, Any]: + # TODO: Move stats handling code to make the logic here less cluttered. + bound_gc_callback = self.gc_callback + self.gc_start_time = None # type: Optional[float] + self.gc_time = 0.0 + self.gc_calls = 0 + self.gc_collected = 0 + self.gc_uncollectable = 0 + t0 = time.time() + try: + gc.callbacks.append(bound_gc_callback) + # saved_cache is mutated in place. + res = mypy.build.build(sources, self.options, + saved_cache=self.saved_cache, + alt_lib_path=alt_lib_path) + msgs = res.errors + self.last_manager = res.manager # type: Optional[mypy.build.BuildManager] + except mypy.errors.CompileError as err: + msgs = err.messages + self.last_manager = None + finally: + while bound_gc_callback in gc.callbacks: + gc.callbacks.remove(bound_gc_callback) + t1 = time.time() + if msgs: + msgs.append("") + response = {'out': "\n".join(msgs), 'err': "", 'status': 1} + else: + response = {'out': "", 'err': "", 'status': 0} + response['build_time'] = t1 - t0 + response['gc_time'] = self.gc_time + response['gc_calls'] = self.gc_calls + response['gc_collected'] = self.gc_collected + response['gc_uncollectable'] = self.gc_uncollectable + response.update(get_meminfo()) + if self.last_manager is not None: + response.update(self.last_manager.stats_summary()) + return response + + def cmd_hang(self) -> Dict[str, object]: + """Hang for 100 seconds, as a debug hack.""" + time.sleep(100) + return {} + + def gc_callback(self, phase: str, info: Mapping[str, int]) -> None: + if phase == 'start': + assert self.gc_start_time is None, "Start phase out of sequence" + self.gc_start_time = time.time() + elif phase == 'stop': + assert self.gc_start_time is not None, "Stop phase out of sequence" + self.gc_calls += 1 + self.gc_time += time.time() - self.gc_start_time + self.gc_start_time = None + self.gc_collected += info['collected'] + self.gc_uncollectable += info['uncollectable'] + else: + assert False, "Unrecognized gc phase (%r)" % (phase,) + + +# Misc utilities. + +def receive(sock: socket.socket) -> Any: + """Receive JSON data from a socket until EOF.""" + bdata = bytearray() + while True: + more = sock.recv(100000) + if not more: + break + bdata.extend(more) + if not bdata: + raise OSError("No data received") + data = json.loads(bdata.decode('utf8')) + if not isinstance(data, dict): + raise OSError("Data received is not a dict (%s)" % str(type(data))) + return data + + +MiB = 2**20 + + +def get_meminfo() -> Mapping[str, float]: + # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process + import resource # Since it doesn't exist on Windows. + res = {} + rusage = resource.getrusage(resource.RUSAGE_SELF) + if sys.platform == 'darwin': + factor = 1 + else: + factor = 1024 # Linux + res['memory_maxrss_mib'] = rusage.ru_maxrss * factor / MiB + process = psutil.Process(os.getpid()) + meminfo = process.memory_info() + res['memory_rss_mib'] = meminfo.rss / MiB + res['memory_vms_mib'] = meminfo.vms / MiB + return res + + +# Run main(). + +if __name__ == '__main__': + main() diff --git a/mypy/main.py b/mypy/main.py index 60bf8a22e0bd..9de3054a927e 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -8,7 +8,7 @@ import sys import time -from typing import Any, Dict, List, Mapping, Optional, Set, Tuple +from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple from mypy import build from mypy import defaults @@ -28,6 +28,21 @@ class InvalidPackageName(Exception): """Exception indicating that a package name was invalid.""" +orig_stat = os.stat + + +def stat_proxy(path: str) -> os.stat_result: + try: + st = orig_stat(path) + except os.error as err: + print("stat(%r) -> %s" % (path, err)) + raise + else: + print("stat(%r) -> (st_mode=%o, st_mtime=%d, st_size=%d)" % + (path, st.st_mode, st.st_mtime, st.st_size)) + return st + + def main(script_path: Optional[str], args: Optional[List[str]] = None) -> None: """Main entry point to the type checker. @@ -37,6 +52,7 @@ def main(script_path: Optional[str], args: Optional[List[str]] = None) -> None: be used. """ t0 = time.time() + # To log stat() calls: os.stat = stat_proxy if script_path: bin_dir = find_bin_directory(script_path) # type: Optional[str] else: @@ -505,28 +521,33 @@ def add_invertible_flag(flag: str, targets = [BuildSource(None, None, '\n'.join(special_opts.command))] return targets, options else: - targets = [] - for f in special_opts.files: - if f.endswith(PY_EXTENSIONS): - try: - targets.append(BuildSource(f, crawl_up(f)[1], None)) - except InvalidPackageName as e: - fail(str(e)) - elif os.path.isdir(f): - try: - sub_targets = expand_dir(f) - except InvalidPackageName as e: - fail(str(e)) - if not sub_targets: - fail("There are no .py[i] files in directory '{}'" - .format(f)) - targets.extend(sub_targets) - else: - mod = os.path.basename(f) if options.scripts_are_modules else None - targets.append(BuildSource(f, mod, None)) + targets = create_source_list(special_opts.files, options) return targets, options +def create_source_list(files: Sequence[str], options: Options) -> List[BuildSource]: + targets = [] + for f in files: + if f.endswith(PY_EXTENSIONS): + try: + targets.append(BuildSource(f, crawl_up(f)[1], None)) + except InvalidPackageName as e: + fail(str(e)) + elif os.path.isdir(f): + try: + sub_targets = expand_dir(f) + except InvalidPackageName as e: + fail(str(e)) + if not sub_targets: + fail("There are no .py[i] files in directory '{}'" + .format(f)) + targets.extend(sub_targets) + else: + mod = os.path.basename(f) if options.scripts_are_modules else None + targets.append(BuildSource(f, mod, None)) + return targets + + def keyfunc(name: str) -> Tuple[int, str]: """Determines sort order for directory listing. diff --git a/mypy/test/testdmypy.py b/mypy/test/testdmypy.py new file mode 100644 index 000000000000..1f483e1bc315 --- /dev/null +++ b/mypy/test/testdmypy.py @@ -0,0 +1,310 @@ +"""Type checker test cases""" + +import os +import re +import shutil +import sys +import time +import typed_ast + +from typing import Dict, List, Optional, Set, Tuple + +from mypy import build +from mypy import defaults +from mypy.main import process_options +from mypy.myunit import AssertionFailure +from mypy.test.config import test_temp_dir, test_data_prefix +from mypy.test.data import parse_test_cases, DataDrivenTestCase, DataSuite +from mypy.test.helpers import ( + assert_string_arrays_equal, normalize_error_messages, + retry_on_error, testcase_pyversion, update_testcase_output, +) +from mypy.errors import CompileError +from mypy.options import Options + +from mypy import experiments +from mypy import dmypy + +# List of files that contain test case descriptions. +files = [ + 'check-enum.test', + 'check-incremental.test', + 'check-newtype.test', +] + + +class TypeCheckSuite(DataSuite): + + @classmethod + def cases(cls) -> List[DataDrivenTestCase]: + if sys.platform == 'win32': + return [] # Nothing here works on Windows. + c = [] # type: List[DataDrivenTestCase] + for f in files: + tc = parse_test_cases(os.path.join(test_data_prefix, f), + None, test_temp_dir, True) + c += [case for case in tc + if cls.has_stable_flags(case) and cls.is_incremental(case)] + return c + + def run_case(self, testcase: DataDrivenTestCase) -> None: + assert self.is_incremental(testcase), "Testcase is not incremental" + assert self.has_stable_flags(testcase), "Testcase has varying flags" + # All tests run once with a cold cache, then at least once + # with a warm cache and maybe changed files. Expected output + # is specified separately for each run. + self.clear_cache() + num_steps = max([2] + list(testcase.output2.keys())) + # Check that there are no file changes beyond the last run (they would be ignored). + for dn, dirs, files in os.walk(os.curdir): + for file in files: + m = re.search(r'\.([2-9])$', file) + if m and int(m.group(1)) > num_steps: + raise ValueError( + 'Output file {} exists though test case only has {} runs'.format( + file, num_steps)) + self.server = None # type: Optional[dmypy.Server] + for step in range(1, num_steps + 1): + self.run_case_once(testcase, step) + + @classmethod + def is_incremental(cls, testcase: DataDrivenTestCase) -> bool: + return 'incremental' in testcase.name.lower() or 'incremental' in testcase.file + + @classmethod + def has_stable_flags(cls, testcase: DataDrivenTestCase) -> bool: + if any(re.match(r'# flags[2-9]:', line) for line in testcase.input): + return False + for filename, contents in testcase.files: + if os.path.basename(filename).startswith('mypy.ini.'): + return False + return True + + def clear_cache(self) -> None: + dn = defaults.CACHE_DIR + if os.path.exists(dn): + shutil.rmtree(dn) + + def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int) -> None: + assert incremental_step >= 1 + build.find_module_clear_caches() + original_program_text = '\n'.join(testcase.input) + module_data = self.parse_module(original_program_text, incremental_step) + + if incremental_step == 1: + # In run 1, copy program text to program file. + for module_name, program_path, program_text in module_data: + if module_name == '__main__': + with open(program_path, 'w') as f: + f.write(program_text) + break + elif incremental_step > 1: + # In runs 2+, copy *.[num] files to * files. + for dn, dirs, files in os.walk(os.curdir): + for file in files: + if file.endswith('.' + str(incremental_step)): + full = os.path.join(dn, file) + target = full[:-2] + # Use retries to work around potential flakiness on Windows (AppVeyor). + retry_on_error(lambda: shutil.copy(full, target)) + + # In some systems, mtime has a resolution of 1 second which can cause + # annoying-to-debug issues when a file has the same size after a + # change. We manually set the mtime to circumvent this. + new_time = os.stat(target).st_mtime + 1 + os.utime(target, times=(new_time, new_time)) + # Delete files scheduled to be deleted in [delete .num] sections. + for path in testcase.deleted_paths.get(incremental_step, set()): + # Use retries to work around potential flakiness on Windows (AppVeyor). + retry_on_error(lambda: os.remove(path)) + + # Parse options after moving files (in case mypy.ini is being moved). + options = self.parse_options(original_program_text, testcase, incremental_step) + if incremental_step == 1: + self.server = dmypy.Server([]) # TODO: Fix ugly API + self.server.options = options + + assert self.server is not None # Set in step 1 and survives into next steps + sources = [] + for module_name, program_path, program_text in module_data: + # Always set to none so we're forced to reread the module in incremental mode + sources.append(build.BuildSource(program_path, module_name, None)) + response = self.server.check(sources, alt_lib_path=test_temp_dir) + a = (response['out'] or response['err']).splitlines() + a = normalize_error_messages(a) + + # Make sure error messages match + if incremental_step == 1: + msg = 'Unexpected type checker output in incremental, run 1 ({}, line {})' + output = testcase.output + elif incremental_step > 1: + msg = ('Unexpected type checker output in incremental, run {}'.format( + incremental_step) + ' ({}, line {})') + output = testcase.output2.get(incremental_step, []) + else: + raise AssertionError() + + if output != a and self.update_data: + update_testcase_output(testcase, a) + assert_string_arrays_equal(output, a, msg.format(testcase.file, testcase.line)) + + manager = self.server.last_manager + if manager is not None: + if options.follow_imports == 'normal' and testcase.output is None: + self.verify_cache(module_data, a, manager) + if incremental_step > 1: + suffix = '' if incremental_step == 2 else str(incremental_step - 1) + self.check_module_equivalence( + 'rechecked' + suffix, + testcase.expected_rechecked_modules.get(incremental_step - 1), + manager.rechecked_modules) + self.check_module_equivalence( + 'stale' + suffix, + testcase.expected_stale_modules.get(incremental_step - 1), + manager.stale_modules) + + def check_module_equivalence(self, name: str, + expected: Optional[Set[str]], actual: Set[str]) -> None: + if expected is not None: + expected_normalized = sorted(expected) + actual_normalized = sorted(actual.difference({"__main__"})) + assert_string_arrays_equal( + expected_normalized, + actual_normalized, + ('Actual modules ({}) do not match expected modules ({}) ' + 'for "[{} ...]"').format( + ', '.join(actual_normalized), + ', '.join(expected_normalized), + name)) + + def verify_cache(self, module_data: List[Tuple[str, str, str]], a: List[str], + manager: build.BuildManager) -> None: + # There should be valid cache metadata for each module except + # those in error_paths; for those there should not be. + # + # NOTE: When A imports B and there's an error in B, the cache + # data for B is invalidated, but the cache data for A remains. + # However build.process_graphs() will ignore A's cache data. + # + # Also note that when A imports B, and there's an error in A + # _due to a valid change in B_, the cache data for B will be + # invalidated and updated, but the old cache data for A will + # remain unchanged. As before, build.process_graphs() will + # ignore A's (old) cache data. + error_paths = self.find_error_paths(a) + modules = self.find_module_files() + modules.update({module_name: path for module_name, path, text in module_data}) + missing_paths = self.find_missing_cache_files(modules, manager) + if not missing_paths.issubset(error_paths): + raise AssertionFailure("cache data discrepancy %s != %s" % + (missing_paths, error_paths)) + + def find_error_paths(self, a: List[str]) -> Set[str]: + hits = set() + for line in a: + m = re.match(r'([^\s:]+):\d+: error:', line) + if m: + # Normalize to Linux paths. + p = m.group(1).replace(os.path.sep, '/') + hits.add(p) + return hits + + def find_module_files(self) -> Dict[str, str]: + modules = {} + for dn, dirs, files in os.walk(test_temp_dir): + dnparts = dn.split(os.sep) + assert dnparts[0] == test_temp_dir + del dnparts[0] + for file in files: + if file.endswith('.py'): + if file == "__init__.py": + # If the file path is `a/b/__init__.py`, exclude the file name + # and make sure the module id is just `a.b`, not `a.b.__init__`. + id = '.'.join(dnparts) + else: + base, ext = os.path.splitext(file) + id = '.'.join(dnparts + [base]) + modules[id] = os.path.join(dn, file) + return modules + + def find_missing_cache_files(self, modules: Dict[str, str], + manager: build.BuildManager) -> Set[str]: + ignore_errors = True + missing = {} + for id, path in modules.items(): + meta = build.find_cache_meta(id, path, manager) + if not build.validate_meta(meta, id, path, ignore_errors, manager): + missing[id] = path + return set(missing.values()) + + def parse_module(self, + program_text: str, + incremental_step: int) -> List[Tuple[str, str, str]]: + """Return the module and program names for a test case. + + Normally, the unit tests will parse the default ('__main__') + module and follow all the imports listed there. You can override + this behavior and instruct the tests to check multiple modules + by using a comment like this in the test case input: + + # cmd: mypy -m foo.bar foo.baz + + You can also use `# cmdN:` to have a different cmd for incremental + step N (2, 3, ...). + + Return a list of tuples (module name, file name, program text). + """ + m = re.search('# cmd: mypy -m ([a-zA-Z0-9_. ]+)$', program_text, flags=re.MULTILINE) + regex = '# cmd{}: mypy -m ([a-zA-Z0-9_. ]+)$'.format(incremental_step) + alt_m = re.search(regex, program_text, flags=re.MULTILINE) + if alt_m is not None and incremental_step > 1: + # Optionally return a different command if in a later step + # of incremental mode, otherwise default to reusing the + # original cmd. + m = alt_m + + if m: + # The test case wants to use a non-default main + # module. Look up the module and give it as the thing to + # analyze. + module_names = m.group(1) + out = [] + for module_name in module_names.split(' '): + path = build.find_module(module_name, [test_temp_dir]) + assert path is not None, "Can't find ad hoc case file" + with open(path) as f: + program_text = f.read() + out.append((module_name, path, program_text)) + return out + else: + return [('__main__', 'main', program_text)] + + def parse_options(self, program_text: str, testcase: DataDrivenTestCase, + incremental_step: int) -> Options: + options = Options() + flags = re.search('# flags: (.*)$', program_text, flags=re.MULTILINE) + if incremental_step > 1: + flags2 = re.search('# flags{}: (.*)$'.format(incremental_step), program_text, + flags=re.MULTILINE) + if flags2: + flags = flags2 + + flag_list = None + if flags: + flag_list = flags.group(1).split() + targets, options = process_options(flag_list, require_targets=False) + if targets: + raise RuntimeError('Specifying targets via the flags pragma is not supported.') + else: + options = Options() + + # Allow custom python version to override testcase_pyversion + if (not flag_list or + all(flag not in flag_list for flag in ['--python-version', '-2', '--py2'])): + options.python_version = testcase_pyversion(testcase.file, testcase.name) + + options.use_builtins_fixtures = True + options.show_traceback = True + options.incremental = True + + return options diff --git a/runtests.py b/runtests.py index 2f1bc7aa94cc..d4712bbfbabb 100755 --- a/runtests.py +++ b/runtests.py @@ -204,6 +204,7 @@ def test_path(*names: str): PYTEST_FILES = test_path( 'testcheck', + 'testdmypy', 'testextensions', 'testdeps', 'testdiff', diff --git a/scripts/dmypy b/scripts/dmypy new file mode 100755 index 000000000000..b760d5a7cc1e --- /dev/null +++ b/scripts/dmypy @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Mypy daemon client. + +This is just a wrapper script. Look at mypy/dmypy.py for the actual +implementation. +""" + +import os +import os.path +import sys + +file_dir = os.path.dirname(__file__) +parent_dir = os.path.join(file_dir, os.pardir) +if os.path.exists(os.path.join(parent_dir, '.git')): + # We are running from a git clone. + sys.path.insert(0, parent_dir) + +import mypy.dmypy + +mypy.dmypy.main() diff --git a/setup.py b/setup.py index 64d5fac9d74e..364f75321615 100644 --- a/setup.py +++ b/setup.py @@ -103,11 +103,15 @@ def run(self): py_modules=[], packages=['mypy', 'mypy.test', 'mypy.myunit', 'mypy.server'], entry_points={'console_scripts': ['mypy=mypy.__main__:console_entry', - 'stubgen=mypy.stubgen:main']}, + 'stubgen=mypy.stubgen:main', + 'dmypy=mypy.dmypy:main', + ]}, data_files=data_files, classifiers=classifiers, cmdclass={'build_py': CustomPythonBuild}, - install_requires = ['typed-ast >= 1.1.0, < 1.2.0'], + install_requires = ['typed-ast >= 1.1.0, < 1.2.0', + 'psutil >= 5.4.0, < 5.5.0', + ], extras_require = { ':python_version < "3.5"': 'typing >= 3.5.3', }, diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 5c952c86a92d..525a4564fb25 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -3020,7 +3020,6 @@ tmp/mod.py:7: error: Revealed type is 'builtins.bytes' # cmd: mypy -m a # cmd2: mypy -m b # flags: --follow-imports=silent -# flags2: --follow-imports=silent [file a.py] import b