Skip to content

Commit a21e5e1

Browse files
authored
Speed up case sensitive is-file check in file system cache (#10093)
This speeds up resolving imports somewhat. Add fast paths to `isfile_case`. On Linux we skip all logic, since the file system is almost always case sensitive. Cache results of parent directories as well. Add tests.
1 parent 3412fab commit a21e5e1

File tree

2 files changed

+141
-18
lines changed

2 files changed

+141
-18
lines changed

mypy/fscache.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import os
3232
import stat
33+
import sys
3334
from typing import Dict, List, Set
3435
from mypy.util import hash_digest
3536
from mypy_extensions import mypyc_attr
@@ -53,6 +54,7 @@ def flush(self) -> None:
5354
self.listdir_cache = {} # type: Dict[str, List[str]]
5455
self.listdir_error_cache = {} # type: Dict[str, OSError]
5556
self.isfile_case_cache = {} # type: Dict[str, bool]
57+
self.exists_case_cache = {} # type: Dict[str, bool]
5658
self.read_cache = {} # type: Dict[str, bytes]
5759
self.read_error_cache = {} # type: Dict[str, Exception]
5860
self.hash_cache = {} # type: Dict[str, str]
@@ -197,32 +199,53 @@ def isfile_case(self, path: str, prefix: str) -> bool:
197199
198200
The caller must ensure that prefix is a valid file system prefix of path.
199201
"""
202+
if sys.platform == "linux":
203+
# Assume that the file system on Linux is case sensitive
204+
return self.isfile(path)
205+
if not self.isfile(path):
206+
# Fast path
207+
return False
200208
if path in self.isfile_case_cache:
201209
return self.isfile_case_cache[path]
202210
head, tail = os.path.split(path)
203211
if not tail:
212+
self.isfile_case_cache[path] = False
213+
return False
214+
try:
215+
names = self.listdir(head)
216+
# This allows one to check file name case sensitively in
217+
# case-insensitive filesystems.
218+
res = tail in names
219+
except OSError:
204220
res = False
205-
else:
206-
try:
207-
names = self.listdir(head)
208-
# This allows one to check file name case sensitively in
209-
# case-insensitive filesystems.
210-
res = tail in names and self.isfile(path)
211-
except OSError:
212-
res = False
213-
214-
# Also check the other path components in case sensitive way.
215-
head, dir = os.path.split(head)
216-
while res and head and dir and head.startswith(prefix):
217-
try:
218-
res = dir in self.listdir(head)
219-
except OSError:
220-
res = False
221-
head, dir = os.path.split(head)
222-
221+
if res:
222+
# Also recursively check the other path components in case sensitive way.
223+
res = self._exists_case(head, prefix)
223224
self.isfile_case_cache[path] = res
224225
return res
225226

227+
def _exists_case(self, path: str, prefix: str) -> bool:
228+
"""Helper to check path components in case sensitive fashion, up to prefix."""
229+
if path in self.exists_case_cache:
230+
return self.exists_case_cache[path]
231+
head, tail = os.path.split(path)
232+
if not head.startswith(prefix) or not tail:
233+
# Only perform the check for paths under prefix.
234+
self.exists_case_cache[path] = True
235+
return True
236+
try:
237+
names = self.listdir(head)
238+
# This allows one to check file name case sensitively in
239+
# case-insensitive filesystems.
240+
res = tail in names
241+
except OSError:
242+
res = False
243+
if res:
244+
# Also recursively check other path components.
245+
res = self._exists_case(head, prefix)
246+
self.exists_case_cache[path] = res
247+
return res
248+
226249
def isdir(self, path: str) -> bool:
227250
try:
228251
st = self.stat(path)

mypy/test/testfscache.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""Unit tests for file system cache."""
2+
3+
import os
4+
import shutil
5+
import sys
6+
import tempfile
7+
import unittest
8+
from typing import Optional
9+
10+
from mypy.fscache import FileSystemCache
11+
12+
13+
class TestFileSystemCache(unittest.TestCase):
14+
def setUp(self) -> None:
15+
self.tempdir = tempfile.mkdtemp()
16+
self.oldcwd = os.getcwd()
17+
os.chdir(self.tempdir)
18+
self.fscache = FileSystemCache()
19+
20+
def tearDown(self) -> None:
21+
os.chdir(self.oldcwd)
22+
shutil.rmtree(self.tempdir)
23+
24+
def test_isfile_case_1(self) -> None:
25+
self.make_file('bar.py')
26+
self.make_file('pkg/sub_package/__init__.py')
27+
self.make_file('pkg/sub_package/foo.py')
28+
# Run twice to test both cached and non-cached code paths.
29+
for i in range(2):
30+
assert self.isfile_case('bar.py')
31+
assert self.isfile_case('pkg/sub_package/__init__.py')
32+
assert self.isfile_case('pkg/sub_package/foo.py')
33+
assert not self.isfile_case('non_existent.py')
34+
assert not self.isfile_case('pkg/non_existent.py')
35+
assert not self.isfile_case('pkg/')
36+
assert not self.isfile_case('bar.py/')
37+
for i in range(2):
38+
assert not self.isfile_case('Bar.py')
39+
assert not self.isfile_case('pkg/sub_package/__init__.PY')
40+
assert not self.isfile_case('pkg/Sub_Package/foo.py')
41+
assert not self.isfile_case('Pkg/sub_package/foo.py')
42+
43+
def test_isfile_case_2(self) -> None:
44+
self.make_file('bar.py')
45+
self.make_file('pkg/sub_package/__init__.py')
46+
self.make_file('pkg/sub_package/foo.py')
47+
# Run twice to test both cached and non-cached code paths.
48+
# This reverses the order of checks from test_isfile_case_1.
49+
for i in range(2):
50+
assert not self.isfile_case('Bar.py')
51+
assert not self.isfile_case('pkg/sub_package/__init__.PY')
52+
assert not self.isfile_case('pkg/Sub_Package/foo.py')
53+
assert not self.isfile_case('Pkg/sub_package/foo.py')
54+
for i in range(2):
55+
assert self.isfile_case('bar.py')
56+
assert self.isfile_case('pkg/sub_package/__init__.py')
57+
assert self.isfile_case('pkg/sub_package/foo.py')
58+
assert not self.isfile_case('non_existent.py')
59+
assert not self.isfile_case('pkg/non_existent.py')
60+
61+
def test_isfile_case_3(self) -> None:
62+
self.make_file('bar.py')
63+
self.make_file('pkg/sub_package/__init__.py')
64+
self.make_file('pkg/sub_package/foo.py')
65+
# Run twice to test both cached and non-cached code paths.
66+
for i in range(2):
67+
assert self.isfile_case('bar.py')
68+
assert not self.isfile_case('non_existent.py')
69+
assert not self.isfile_case('pkg/non_existent.py')
70+
assert not self.isfile_case('Bar.py')
71+
assert not self.isfile_case('pkg/sub_package/__init__.PY')
72+
assert not self.isfile_case('pkg/Sub_Package/foo.py')
73+
assert not self.isfile_case('Pkg/sub_package/foo.py')
74+
assert self.isfile_case('pkg/sub_package/__init__.py')
75+
assert self.isfile_case('pkg/sub_package/foo.py')
76+
77+
def test_isfile_case_other_directory(self) -> None:
78+
self.make_file('bar.py')
79+
with tempfile.TemporaryDirectory() as other:
80+
self.make_file('other_dir.py', base=other)
81+
self.make_file('pkg/other_dir.py', base=other)
82+
assert self.isfile_case(os.path.join(other, 'other_dir.py'))
83+
assert not self.isfile_case(os.path.join(other, 'Other_Dir.py'))
84+
assert not self.isfile_case(os.path.join(other, 'bar.py'))
85+
if sys.platform in ('win32', 'darwin'):
86+
# We only check case for directories under our prefix, and since
87+
# this path is not under the prefix, case difference is fine.
88+
assert self.isfile_case(os.path.join(other, 'PKG/other_dir.py'))
89+
90+
def make_file(self, path: str, base: Optional[str] = None) -> None:
91+
if base is None:
92+
base = self.tempdir
93+
fullpath = os.path.join(base, path)
94+
os.makedirs(os.path.dirname(fullpath), exist_ok=True)
95+
if not path.endswith('/'):
96+
with open(fullpath, 'w') as f:
97+
f.write('# test file')
98+
99+
def isfile_case(self, path: str) -> bool:
100+
return self.fscache.isfile_case(os.path.join(self.tempdir, path), self.tempdir)

0 commit comments

Comments
 (0)