16
16
from typing_extensions import Final , TypeAlias as _TypeAlias
17
17
18
18
from mypy .fscache import FileSystemCache
19
+ from mypy .nodes import MypyFile
19
20
from mypy .options import Options
20
21
from mypy .stubinfo import is_legacy_bundled_package
21
22
from mypy import pyinfo
@@ -115,6 +116,33 @@ def __repr__(self) -> str:
115
116
self .base_dir )
116
117
117
118
119
+ class BuildSourceSet :
120
+ """Helper to efficiently test a file's membership in a set of build sources."""
121
+
122
+ def __init__ (self , sources : List [BuildSource ]) -> None :
123
+ self .source_text_present = False
124
+ self .source_modules = {} # type: Dict[str, str]
125
+ self .source_paths = set () # type: Set[str]
126
+
127
+ for source in sources :
128
+ if source .text is not None :
129
+ self .source_text_present = True
130
+ if source .path :
131
+ self .source_paths .add (source .path )
132
+ if source .module :
133
+ self .source_modules [source .module ] = source .path or ''
134
+
135
+ def is_source (self , file : MypyFile ) -> bool :
136
+ if file .path and file .path in self .source_paths :
137
+ return True
138
+ elif file ._fullname in self .source_modules :
139
+ return True
140
+ elif self .source_text_present :
141
+ return True
142
+ else :
143
+ return False
144
+
145
+
118
146
class FindModuleCache :
119
147
"""Module finder with integrated cache.
120
148
@@ -130,8 +158,10 @@ def __init__(self,
130
158
search_paths : SearchPaths ,
131
159
fscache : Optional [FileSystemCache ],
132
160
options : Optional [Options ],
133
- stdlib_py_versions : Optional [StdlibVersions ] = None ) -> None :
161
+ stdlib_py_versions : Optional [StdlibVersions ] = None ,
162
+ source_set : Optional [BuildSourceSet ] = None ) -> None :
134
163
self .search_paths = search_paths
164
+ self .source_set = source_set
135
165
self .fscache = fscache or FileSystemCache ()
136
166
# Cache for get_toplevel_possibilities:
137
167
# search_paths -> (toplevel_id -> list(package_dirs))
@@ -153,6 +183,52 @@ def clear(self) -> None:
153
183
self .initial_components .clear ()
154
184
self .ns_ancestors .clear ()
155
185
186
+ def find_module_via_source_set (self , id : str ) -> Optional [ModuleSearchResult ]:
187
+ if not self .source_set :
188
+ return None
189
+
190
+ p = self .source_set .source_modules .get (id , None )
191
+ if p and self .fscache .isfile (p ):
192
+ # NB: need to make sure we still have __init__.py all the way up
193
+ # otherwise we might have false positives compared to slow path
194
+ # in case of deletion of init files, which is covered by some tests
195
+ # TODO: are there some combination of flags in which this check should be skipped?
196
+ d = os .path .dirname (p )
197
+ for _ in range (id .count ('.' )):
198
+ if not any (self .fscache .isfile (os .path .join (d , '__init__' + x ))
199
+ for x in PYTHON_EXTENSIONS ):
200
+ return None
201
+ d = os .path .dirname (d )
202
+ return p
203
+
204
+ idx = id .rfind ('.' )
205
+ if idx != - 1 :
206
+ # When we're looking for foo.bar.baz and can't find a matching module
207
+ # in the source set, look up for a foo.bar module.
208
+ parent = self .find_module_via_source_set (id [:idx ])
209
+ if parent is None or not isinstance (parent , str ):
210
+ return None
211
+
212
+ basename , ext = os .path .splitext (parent )
213
+ if (
214
+ not any (parent .endswith ('__init__' + x ) for x in PYTHON_EXTENSIONS )
215
+ and (ext in PYTHON_EXTENSIONS and not self .fscache .isdir (basename ))
216
+ ):
217
+ # If we do find such a *module* (and crucially, we don't want a package,
218
+ # hence the filtering out of __init__ files, and checking for the presence
219
+ # of a folder with a matching name), then we can be pretty confident that
220
+ # 'baz' will either be a top-level variable in foo.bar, or will not exist.
221
+ #
222
+ # Either way, spelunking in other search paths for another 'foo.bar.baz'
223
+ # module should be avoided because:
224
+ # 1. in the unlikely event that one were found, it's highly likely that
225
+ # it would be unrelated to the source being typechecked and therefore
226
+ # more likely to lead to erroneous results
227
+ # 2. as described in _find_module, in some cases the search itself could
228
+ # potentially waste significant amounts of time
229
+ return ModuleNotFoundReason .NOT_FOUND
230
+ return None
231
+
156
232
def find_lib_path_dirs (self , id : str , lib_path : Tuple [str , ...]) -> PackageDirs :
157
233
"""Find which elements of a lib_path have the directory a module needs to exist.
158
234
@@ -218,7 +294,7 @@ def find_module(self, id: str, *, fast_path: bool = False) -> ModuleSearchResult
218
294
elif top_level in self .stdlib_py_versions :
219
295
use_typeshed = self ._typeshed_has_version (top_level )
220
296
self .results [id ] = self ._find_module (id , use_typeshed )
221
- if (not fast_path
297
+ if (not ( fast_path or ( self . options is not None and self . options . fast_module_lookup ))
222
298
and self .results [id ] is ModuleNotFoundReason .NOT_FOUND
223
299
and self ._can_find_module_in_parent_dir (id )):
224
300
self .results [id ] = ModuleNotFoundReason .WRONG_WORKING_DIRECTORY
@@ -284,6 +360,39 @@ def _can_find_module_in_parent_dir(self, id: str) -> bool:
284
360
def _find_module (self , id : str , use_typeshed : bool ) -> ModuleSearchResult :
285
361
fscache = self .fscache
286
362
363
+ # Fast path for any modules in the current source set.
364
+ # This is particularly important when there are a large number of search
365
+ # paths which share the first (few) component(s) due to the use of namespace
366
+ # packages, for instance:
367
+ # foo/
368
+ # company/
369
+ # __init__.py
370
+ # foo/
371
+ # bar/
372
+ # company/
373
+ # __init__.py
374
+ # bar/
375
+ # baz/
376
+ # company/
377
+ # __init__.py
378
+ # baz/
379
+ #
380
+ # mypy gets [foo/company/foo, bar/company/bar, baz/company/baz, ...] as input
381
+ # and computes [foo, bar, baz, ...] as the module search path.
382
+ #
383
+ # This would result in O(n) search for every import of company.*, leading to
384
+ # O(n**2) behavior in load_graph as such imports are unsurprisingly present
385
+ # at least once, and usually many more times than that, in each and every file
386
+ # being parsed.
387
+ #
388
+ # Thankfully, such cases are efficiently handled by looking up the module path
389
+ # via BuildSourceSet.
390
+ p = (self .find_module_via_source_set (id )
391
+ if (self .options is not None and self .options .fast_module_lookup )
392
+ else None )
393
+ if p :
394
+ return p
395
+
287
396
# If we're looking for a module like 'foo.bar.baz', it's likely that most of the
288
397
# many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover
289
398
# that only once and cache it for when we look for modules like 'foo.bar.blah'
0 commit comments