Skip to content

Significantly speed up file handling error paths #17920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,8 +736,8 @@ def maybe_swap_for_shadow_path(self, path: str) -> str:
shadow_file = self.shadow_equivalence_map.get(path)
return shadow_file if shadow_file else path

def get_stat(self, path: str) -> os.stat_result:
return self.fscache.stat(self.maybe_swap_for_shadow_path(path))
def get_stat(self, path: str) -> os.stat_result | None:
return self.fscache.stat_or_none(self.maybe_swap_for_shadow_path(path))

def getmtime(self, path: str) -> int:
"""Return a file's mtime; but 0 in bazel mode.
Expand Down Expand Up @@ -1394,9 +1394,9 @@ def validate_meta(
if bazel:
# Normalize path under bazel to make sure it isn't absolute
path = normpath(path, manager.options)
try:
st = manager.get_stat(path)
except OSError:

st = manager.get_stat(path)
if st is None:
return None
if not stat.S_ISDIR(st.st_mode) and not stat.S_ISREG(st.st_mode):
manager.log(f"Metadata abandoned for {id}: file or directory {path} does not exist")
Expand Down Expand Up @@ -1572,10 +1572,9 @@ def write_cache(
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))

# Obtain and set up metadata
try:
st = manager.get_stat(path)
except OSError as err:
manager.log(f"Cannot get stat for {path}: {err}")
st = manager.get_stat(path)
if st is None:
manager.log(f"Cannot get stat for {path}")
# Remove apparently-invalid cache files.
# (This is purely an optimization.)
for filename in [data_json, meta_json]:
Expand Down
59 changes: 26 additions & 33 deletions mypy/fscache.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def set_package_root(self, package_root: list[str]) -> None:

def flush(self) -> None:
"""Start another transaction and empty all caches."""
self.stat_cache: dict[str, os.stat_result] = {}
self.stat_error_cache: dict[str, OSError] = {}
self.stat_or_none_cache: dict[str, os.stat_result | None] = {}

self.listdir_cache: dict[str, list[str]] = {}
self.listdir_error_cache: dict[str, OSError] = {}
self.isfile_case_cache: dict[str, bool] = {}
Expand All @@ -62,24 +62,21 @@ def flush(self) -> None:
self.hash_cache: dict[str, str] = {}
self.fake_package_cache: set[str] = set()

def stat(self, path: str) -> os.stat_result:
if path in self.stat_cache:
return self.stat_cache[path]
if path in self.stat_error_cache:
raise copy_os_error(self.stat_error_cache[path])
def stat_or_none(self, path: str) -> os.stat_result | None:
if path in self.stat_or_none_cache:
return self.stat_or_none_cache[path]

st = None
try:
st = os.stat(path)
except OSError as err:
except OSError:
if self.init_under_package_root(path):
try:
return self._fake_init(path)
st = self._fake_init(path)
except OSError:
pass
# Take a copy to get rid of associated traceback and frame objects.
# Just assigning to __traceback__ doesn't free them.
self.stat_error_cache[path] = copy_os_error(err)
raise err
self.stat_cache[path] = st

self.stat_or_none_cache[path] = st
return st

def init_under_package_root(self, path: str) -> bool:
Expand Down Expand Up @@ -112,9 +109,9 @@ def init_under_package_root(self, path: str) -> bool:
if not os.path.basename(dirname).isidentifier():
# Can't put an __init__.py in a place that's not an identifier
return False
try:
st = self.stat(dirname)
except OSError:

st = self.stat_or_none(dirname)
if st is None:
return False
else:
if not stat.S_ISDIR(st.st_mode):
Expand Down Expand Up @@ -145,15 +142,14 @@ def _fake_init(self, path: str) -> os.stat_result:
assert basename == "__init__.py", path
assert not os.path.exists(path), path # Not cached!
dirname = os.path.normpath(dirname)
st = self.stat(dirname) # May raise OSError
st = os.stat(dirname) # May raise OSError
# Get stat result as a list so we can modify it.
seq: list[float] = list(st)
seq[stat.ST_MODE] = stat.S_IFREG | 0o444
seq[stat.ST_INO] = 1
seq[stat.ST_NLINK] = 1
seq[stat.ST_SIZE] = 0
st = os.stat_result(seq)
self.stat_cache[path] = st
# Make listdir() and read() also pretend this file exists.
self.fake_package_cache.add(dirname)
return st
Expand Down Expand Up @@ -181,9 +177,8 @@ def listdir(self, path: str) -> list[str]:
return results

def isfile(self, path: str) -> bool:
try:
st = self.stat(path)
except OSError:
st = self.stat_or_none(path)
if st is None:
return False
return stat.S_ISREG(st.st_mode)

Expand Down Expand Up @@ -248,18 +243,14 @@ def exists_case(self, path: str, prefix: str) -> bool:
return res

def isdir(self, path: str) -> bool:
try:
st = self.stat(path)
except OSError:
st = self.stat_or_none(path)
if st is None:
return False
return stat.S_ISDIR(st.st_mode)

def exists(self, path: str) -> bool:
try:
self.stat(path)
except FileNotFoundError:
return False
return True
st = self.stat_or_none(path)
return st is not None

def read(self, path: str) -> bytes:
if path in self.read_cache:
Expand All @@ -269,7 +260,7 @@ def read(self, path: str) -> bytes:

# Need to stat first so that the contents of file are from no
# earlier instant than the mtime reported by self.stat().
self.stat(path)
self.stat_or_none(path)

dirname, basename = os.path.split(path)
dirname = os.path.normpath(dirname)
Expand All @@ -294,8 +285,10 @@ def hash_digest(self, path: str) -> str:
return self.hash_cache[path]

def samefile(self, f1: str, f2: str) -> bool:
s1 = self.stat(f1)
s2 = self.stat(f2)
s1 = self.stat_or_none(f1)
s2 = self.stat_or_none(f2)
if s1 is None or s2 is None:
return False
return os.path.samestat(s1, s2)


Expand Down
13 changes: 6 additions & 7 deletions mypy/fswatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import os
from typing import AbstractSet, Iterable, NamedTuple

from mypy.fscache import FileSystemCache
Expand Down Expand Up @@ -56,18 +57,16 @@ def remove_watched_paths(self, paths: Iterable[str]) -> None:
del self._file_data[path]
self._paths -= set(paths)

def _update(self, path: str) -> None:
st = self.fs.stat(path)
def _update(self, path: str, st: os.stat_result) -> None:
hash_digest = self.fs.hash_digest(path)
self._file_data[path] = FileData(st.st_mtime, st.st_size, hash_digest)

def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
changed = set()
for path in paths:
old = self._file_data[path]
try:
st = self.fs.stat(path)
except FileNotFoundError:
st = self.fs.stat_or_none(path)
if st is None:
if old is not None:
# File was deleted.
changed.add(path)
Expand All @@ -76,13 +75,13 @@ def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
if old is None:
# File is new.
changed.add(path)
self._update(path)
self._update(path, st)
# Round mtimes down, to match the mtimes we write to meta files
elif st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime):
# Only look for changes if size or mtime has changed as an
# optimization, since calculating hash is expensive.
new_hash = self.fs.hash_digest(path)
self._update(path)
self._update(path, st)
if st.st_size != old.st_size or new_hash != old.hash:
# Changed file.
changed.add(path)
Expand Down
Loading