Skip to content

Commit 5ac30ea

Browse files
authored
Overhaul file-system operations to use FileSystemCache (#4623)
This overhauls our file-system accesses to (almost) always go through a FileSystemCache when stat()ing or reading source files. (We don't use it for reading cache json files, since we seem to only do a single read on each anyways and so caching doesn't help us any.) This eliminates a number of potential bugs having to do with seeing inconsistent file-system state and reduces the number of file reads+decodes in fine-grained incremental mode. This also allows us to eliminate a number of caches maintained by find_module that duplicated functionality of FileSystemCache. Since I was reluctant to make the FileSystemCache global, I took the opportunity to make find_module's caches non-global as well.
1 parent f456718 commit 5ac30ea

13 files changed

+273
-275
lines changed

mypy/build.py

+103-182
Large diffs are not rendered by default.

mypy/dmypy_server.py

+11-18
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,16 @@ def check_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str,
283283
return self.fine_grained_increment(sources)
284284

285285
def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:
286-
self.fscache = FileSystemCache(self.options.python_version)
287-
self.fswatcher = FileSystemWatcher(self.fscache)
286+
# The file system cache we create gets passed off to
287+
# BuildManager, and thence to FineGrainedBuildManager, which
288+
# assumes responsibility for clearing it after updates.
289+
fscache = FileSystemCache(self.options.python_version)
290+
self.fswatcher = FileSystemWatcher(fscache)
288291
self.update_sources(sources)
289292
try:
290293
result = mypy.build.build(sources=sources,
291294
options=self.options,
295+
fscache=fscache,
292296
alt_lib_path=self.alt_lib_path)
293297
except mypy.errors.CompileError as e:
294298
output = ''.join(s + '\n' for s in e.messages)
@@ -298,9 +302,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
298302
out, err = '', output
299303
return {'out': out, 'err': err, 'status': 2}
300304
messages = result.errors
301-
manager = result.manager
302-
graph = result.graph
303-
self.fine_grained_manager = FineGrainedBuildManager(manager, graph)
305+
self.fine_grained_manager = FineGrainedBuildManager(result)
304306
self.previous_sources = sources
305307

306308
# If we are using the fine-grained cache, build hasn't actually done
@@ -317,7 +319,6 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
317319
state.path,
318320
FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))
319321

320-
# Run an update
321322
changed, removed = self.find_changed(sources)
322323

323324
# Find anything that has had its dependency list change
@@ -326,16 +327,14 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
326327
assert state.path is not None
327328
changed.append((state.id, state.path))
328329

329-
if changed or removed:
330-
messages = self.fine_grained_manager.update(changed, removed)
330+
# Run an update
331+
messages = self.fine_grained_manager.update(changed, removed)
331332
else:
332333
# Stores the initial state of sources as a side effect.
333334
self.fswatcher.find_changed()
334335

335-
self.fscache.flush()
336-
336+
fscache.flush()
337337
status = 1 if messages else 0
338-
self.previous_messages = messages[:]
339338
return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}
340339

341340
def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:
@@ -345,19 +344,13 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[
345344
self.update_sources(sources)
346345
changed, removed = self.find_changed(sources)
347346
t1 = time.time()
348-
if not (changed or removed):
349-
# Nothing changed -- just produce the same result as before.
350-
messages = self.previous_messages
351-
else:
352-
messages = self.fine_grained_manager.update(changed, removed)
347+
messages = self.fine_grained_manager.update(changed, removed)
353348
t2 = time.time()
354349
self.fine_grained_manager.manager.log(
355350
"fine-grained increment: find_changed: {:.3f}s, update: {:.3f}s".format(
356351
t1 - t0, t2 - t1))
357352
status = 1 if messages else 0
358-
self.previous_messages = messages[:]
359353
self.previous_sources = sources
360-
self.fscache.flush()
361354
return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}
362355

363356
def update_sources(self, sources: List[mypy.build.BuildSource]) -> None:

mypy/errors.py

-7
Original file line numberDiff line numberDiff line change
@@ -616,13 +616,6 @@ def __init__(self,
616616
self.module_with_blocker = module_with_blocker
617617

618618

619-
class DecodeError(Exception):
620-
"""Exception raised when a file cannot be decoded due to an unknown encoding type.
621-
622-
Essentially a wrapper for the LookupError raised by `bytearray.decode`
623-
"""
624-
625-
626619
def remove_path_prefix(path: str, prefix: str) -> str:
627620
"""If path starts with prefix, return copy of path with the prefix removed.
628621
Otherwise, return path. If path is None, return None.

mypy/fscache.py

+68-31
Original file line numberDiff line numberDiff line change
@@ -30,45 +30,21 @@
3030

3131
import os
3232
import stat
33-
from typing import Tuple, Dict, List
33+
from typing import Tuple, Dict, List, Optional
34+
from mypy.util import read_with_python_encoding
3435

35-
from mypy.build import read_with_python_encoding
36-
from mypy.errors import DecodeError
3736

38-
39-
class FileSystemCache:
40-
def __init__(self, pyversion: Tuple[int, int]) -> None:
41-
self.pyversion = pyversion
37+
class FileSystemMetaCache:
38+
def __init__(self) -> None:
4239
self.flush()
4340

4441
def flush(self) -> None:
4542
"""Start another transaction and empty all caches."""
4643
self.stat_cache = {} # type: Dict[str, os.stat_result]
4744
self.stat_error_cache = {} # type: Dict[str, Exception]
48-
self.read_cache = {} # type: Dict[str, str]
49-
self.read_error_cache = {} # type: Dict[str, Exception]
50-
self.hash_cache = {} # type: Dict[str, str]
5145
self.listdir_cache = {} # type: Dict[str, List[str]]
5246
self.listdir_error_cache = {} # type: Dict[str, Exception]
53-
54-
def read_with_python_encoding(self, path: str) -> str:
55-
if path in self.read_cache:
56-
return self.read_cache[path]
57-
if path in self.read_error_cache:
58-
raise self.read_error_cache[path]
59-
60-
# Need to stat first so that the contents of file are from no
61-
# earlier instant than the mtime reported by self.stat().
62-
self.stat(path)
63-
64-
try:
65-
data, md5hash = read_with_python_encoding(path, self.pyversion)
66-
except Exception as err:
67-
self.read_error_cache[path] = err
68-
raise
69-
self.read_cache[path] = data
70-
self.hash_cache[path] = md5hash
71-
return data
47+
self.isfile_case_cache = {} # type: Dict[str, bool]
7248

7349
def stat(self, path: str) -> os.stat_result:
7450
if path in self.stat_cache:
@@ -97,11 +73,40 @@ def listdir(self, path: str) -> List[str]:
9773
return results
9874

9975
def isfile(self, path: str) -> bool:
100-
st = self.stat(path)
76+
try:
77+
st = self.stat(path)
78+
except OSError:
79+
return False
10180
return stat.S_ISREG(st.st_mode)
10281

82+
def isfile_case(self, path: str) -> bool:
83+
"""Return whether path exists and is a file.
84+
85+
On case-insensitive filesystems (like Mac or Windows) this returns
86+
False if the case of the path's last component does not exactly
87+
match the case found in the filesystem.
88+
TODO: We should maybe check the case for some directory components also,
89+
to avoid permitting wrongly-cased *packages*.
90+
"""
91+
if path in self.isfile_case_cache:
92+
return self.isfile_case_cache[path]
93+
head, tail = os.path.split(path)
94+
if not tail:
95+
res = False
96+
else:
97+
try:
98+
names = self.listdir(head)
99+
res = tail in names and self.isfile(path)
100+
except OSError:
101+
res = False
102+
self.isfile_case_cache[path] = res
103+
return res
104+
103105
def isdir(self, path: str) -> bool:
104-
st = self.stat(path)
106+
try:
107+
st = self.stat(path)
108+
except OSError:
109+
return False
105110
return stat.S_ISDIR(st.st_mode)
106111

107112
def exists(self, path: str) -> bool:
@@ -111,6 +116,38 @@ def exists(self, path: str) -> bool:
111116
return False
112117
return True
113118

119+
120+
class FileSystemCache(FileSystemMetaCache):
121+
def __init__(self, pyversion: Tuple[int, int]) -> None:
122+
self.pyversion = pyversion
123+
self.flush()
124+
125+
def flush(self) -> None:
126+
"""Start another transaction and empty all caches."""
127+
super().flush()
128+
self.read_cache = {} # type: Dict[str, str]
129+
self.read_error_cache = {} # type: Dict[str, Exception]
130+
self.hash_cache = {} # type: Dict[str, str]
131+
132+
def read_with_python_encoding(self, path: str) -> str:
133+
if path in self.read_cache:
134+
return self.read_cache[path]
135+
if path in self.read_error_cache:
136+
raise self.read_error_cache[path]
137+
138+
# Need to stat first so that the contents of file are from no
139+
# earlier instant than the mtime reported by self.stat().
140+
self.stat(path)
141+
142+
try:
143+
data, md5hash = read_with_python_encoding(path, self.pyversion)
144+
except Exception as err:
145+
self.read_error_cache[path] = err
146+
raise
147+
self.read_cache[path] = data
148+
self.hash_cache[path] = md5hash
149+
return data
150+
114151
def md5(self, path: str) -> str:
115152
if path not in self.hash_cache:
116153
self.read_with_python_encoding(path)

mypy/main.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,8 @@ def add_invertible_flag(flag: str,
535535
.format(special_opts.package))
536536
options.build_type = BuildType.MODULE
537537
lib_path = [os.getcwd()] + build.mypy_path()
538-
targets = build.find_modules_recursive(special_opts.package, lib_path)
538+
# TODO: use the same cache as the BuildManager will
539+
targets = build.FindModuleCache().find_modules_recursive(special_opts.package, lib_path)
539540
if not targets:
540541
fail("Can't find package '{}'".format(special_opts.package))
541542
return targets, options
@@ -548,6 +549,7 @@ def add_invertible_flag(flag: str,
548549
return targets, options
549550

550551

552+
# TODO: use a FileSystemCache for this
551553
def create_source_list(files: Sequence[str], options: Options) -> List[BuildSource]:
552554
targets = []
553555
for f in files:

mypy/server/update.py

+24-18
Original file line numberDiff line numberDiff line change
@@ -114,17 +114,17 @@
114114
Major todo items:
115115
116116
- Fully support multiple type checking passes
117-
- Use mypy.fscache to access file system
118117
"""
119118

119+
import os
120120
import time
121121
import os.path
122122
from typing import (
123123
Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple, Callable
124124
)
125125

126126
from mypy.build import (
127-
BuildManager, State, BuildSource, Graph, load_graph, find_module_clear_caches,
127+
BuildManager, State, BuildSource, BuildResult, Graph, load_graph,
128128
PRI_INDIRECT, DEBUG_FINE_GRAINED,
129129
)
130130
from mypy.checker import DeferredNode
@@ -135,6 +135,7 @@
135135
)
136136
from mypy.options import Options
137137
from mypy.types import Type
138+
from mypy.fscache import FileSystemCache
138139
from mypy.semanal import apply_semantic_analyzer_patches
139140
from mypy.server.astdiff import (
140141
snapshot_symbol_table, compare_symbol_table_snapshots, SnapshotItem
@@ -150,21 +151,23 @@
150151

151152

152153
class FineGrainedBuildManager:
153-
def __init__(self,
154-
manager: BuildManager,
155-
graph: Graph) -> None:
154+
def __init__(self, result: BuildResult) -> None:
156155
"""Initialize fine-grained build based on a batch build.
157156
158157
Args:
158+
result: Result from the initialized build.
159+
The manager and graph will be taken over by this class.
159160
manager: State of the build (mutated by this class)
160161
graph: Additional state of the build (only read to initialize state)
161162
"""
163+
manager = result.manager
162164
self.manager = manager
165+
self.graph = result.graph
163166
self.options = manager.options
164167
self.previous_modules = get_module_to_path_map(manager)
165-
self.deps = get_all_dependencies(manager, graph, self.options)
168+
self.deps = get_all_dependencies(manager, self.graph, self.options)
166169
self.previous_targets_with_errors = manager.errors.targets()
167-
self.graph = graph
170+
self.previous_messages = result.errors[:]
168171
# Module, if any, that had blocking errors in the last run as (id, path) tuple.
169172
# TODO: Handle blocking errors in the initial build
170173
self.blocking_error = None # type: Optional[Tuple[str, str]]
@@ -205,13 +208,15 @@ def update(self,
205208
A list of errors.
206209
"""
207210
changed_modules = changed_modules + removed_modules
208-
assert changed_modules or removed_modules, 'No changed modules'
209-
210211
removed_set = {module for module, _ in removed_modules}
211212
self.changed_modules = changed_modules
212213

213-
# Reset global caches for the new build.
214-
find_module_clear_caches()
214+
if not changed_modules:
215+
self.manager.fscache.flush()
216+
return self.previous_messages
217+
218+
# Reset find_module's caches for the new build.
219+
self.manager.find_module_cache.clear()
215220

216221
self.triggered = []
217222
self.updated_modules = []
@@ -249,8 +254,10 @@ def update(self,
249254
if blocker:
250255
self.blocking_error = (next_id, next_path)
251256
self.stale = changed_modules
252-
return messages
257+
break
253258

259+
self.manager.fscache.flush()
260+
self.previous_messages = messages[:]
254261
return messages
255262

256263
def update_single(self,
@@ -383,7 +390,7 @@ def update_single_isolated(module: str,
383390
manager.log_fine_grained('new module %r' % module)
384391

385392
old_modules = dict(manager.modules)
386-
sources = get_sources(previous_modules, [(module, path)])
393+
sources = get_sources(manager.fscache, previous_modules, [(module, path)])
387394

388395
if module in manager.missing_modules:
389396
manager.missing_modules.remove(module)
@@ -407,7 +414,7 @@ def update_single_isolated(module: str,
407414
remaining_modules = []
408415
return BlockedUpdate(err.module_with_blocker, path, remaining_modules, err.messages)
409416

410-
if not os.path.isfile(path) or force_removed:
417+
if not manager.fscache.isfile(path) or force_removed:
411418
delete_module(module, graph, manager)
412419
return NormalUpdate(module, path, [], None)
413420

@@ -537,13 +544,12 @@ def get_module_to_path_map(manager: BuildManager) -> Dict[str, str]:
537544
for module, node in manager.modules.items()}
538545

539546

540-
def get_sources(modules: Dict[str, str],
547+
def get_sources(fscache: FileSystemCache,
548+
modules: Dict[str, str],
541549
changed_modules: List[Tuple[str, str]]) -> List[BuildSource]:
542-
# TODO: Race condition when reading from the file system; we should only read each
543-
# bit of external state once during a build to have a consistent view of the world
544550
sources = []
545551
for id, path in changed_modules:
546-
if os.path.isfile(path):
552+
if fscache.isfile(path):
547553
sources.append(BuildSource(path, id, None))
548554
return sources
549555

mypy/stubgen.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def find_module_path_and_all(module: str, pyversion: Tuple[int, int],
156156
module_all = getattr(mod, '__all__', None)
157157
else:
158158
# Find module by going through search path.
159-
module_path = mypy.build.find_module(module, ['.'] + search_path)
159+
module_path = mypy.build.FindModuleCache().find_module(module, ['.'] + search_path)
160160
if not module_path:
161161
raise SystemExit(
162162
"Can't find module '{}' (consider using --search-path)".format(module))
@@ -201,7 +201,7 @@ def generate_stub(path: str,
201201
include_private: bool = False
202202
) -> None:
203203

204-
source, _ = mypy.build.read_with_python_encoding(path, pyversion)
204+
source, _ = mypy.util.read_with_python_encoding(path, pyversion)
205205
options = MypyOptions()
206206
options.python_version = pyversion
207207
try:

0 commit comments

Comments
 (0)