Skip to content

Commit b027308

Browse files
authored
New semantic analyzer: Support multiple passes over functions. (#6280)
This allows supporting forward references within function namespaces. The logic is very simple, go over and over each top-level function/method until everything inside it is resolved, plus one more pass to report errors. The big diff in _visit_func_def() is because of whitespace, I didn't change any function body analysis logic apart from removing previous two-pass special-casing for functions (GitHub has an option to ignore whitespace in the diff).
1 parent 9ffb7ca commit b027308

File tree

4 files changed

+189
-126
lines changed

4 files changed

+189
-126
lines changed

mypy/newsemanal/semanal.py

+68-93
Original file line numberDiff line numberDiff line change
@@ -125,15 +125,6 @@
125125
'builtins.bytearray': 'builtins.str',
126126
})
127127

128-
# When analyzing a function, should we analyze the whole function in one go, or
129-
# should we only perform one phase of the analysis? The latter is used for
130-
# nested functions. In the first phase we add the function to the symbol table
131-
# but don't process body. In the second phase we process function body. This
132-
# way we can have mutually recursive nested functions.
133-
FUNCTION_BOTH_PHASES = 0 # type: Final # Everything in one go
134-
FUNCTION_FIRST_PHASE_POSTPONE_SECOND = 1 # type: Final # Add to symbol table but postpone body
135-
FUNCTION_SECOND_PHASE = 2 # type: Final # Only analyze body
136-
137128
# Map from the full name of a missing definition to the test fixture (under
138129
# test-data/unit/fixtures/) that provides the definition. This is used for
139130
# generating better error messages when running mypy tests only.
@@ -193,14 +184,6 @@ class NewSemanticAnalyzer(NodeVisitor[None],
193184
# Stack of functions being analyzed
194185
function_stack = None # type: List[FuncItem]
195186

196-
# Status of postponing analysis of nested function bodies. By using this we
197-
# can have mutually recursive nested functions. Values are FUNCTION_x
198-
# constants. Note that separate phasea are not used for methods.
199-
postpone_nested_functions_stack = None # type: List[int]
200-
# Postponed functions collected if
201-
# postpone_nested_functions_stack[-1] == FUNCTION_FIRST_PHASE_POSTPONE_SECOND.
202-
postponed_functions_stack = None # type: List[List[Node]]
203-
204187
loop_depth = 0 # Depth of breakable loops
205188
cur_mod_id = '' # Current module id (or None) (phase 2)
206189
is_stub_file = False # Are we analyzing a stub file?
@@ -228,6 +211,11 @@ def __init__(self,
228211
errors: Report analysis errors using this instance
229212
"""
230213
self.locals = [None]
214+
# Saved namespaces from previous iteration. Every top-level function/method body is
215+
# analyzed in several iterations until all names are resolved. We need to save
216+
# the local namespaces for the top level function and all nested functions between
217+
# these iterations. See also semanal_main.process_top_level_function().
218+
self.saved_locals = {} # type: Dict[FuncItem, SymbolTable]
231219
self.imports = set()
232220
self.type = None
233221
self.type_stack = []
@@ -243,8 +231,6 @@ def __init__(self,
243231
# missing name in these namespaces, we need to defer the current analysis target,
244232
# since it's possible that the name will be there once the namespace is complete.
245233
self.incomplete_namespaces = incomplete_namespaces
246-
self.postpone_nested_functions_stack = [FUNCTION_BOTH_PHASES]
247-
self.postponed_functions_stack = []
248234
self.all_exports = [] # type: List[str]
249235
# Map from module id to list of explicitly exported names (i.e. names in __all__).
250236
self.export_map = {} # type: Dict[str, List[str]]
@@ -451,67 +437,56 @@ def add_func_to_symbol_table(self, func: Union[FuncDef, OverloadedFuncDef]) -> N
451437
self.add_symbol(func.name(), func, func)
452438

453439
def _visit_func_def(self, defn: FuncDef) -> None:
454-
phase_info = self.postpone_nested_functions_stack[-1]
455-
if phase_info != FUNCTION_SECOND_PHASE:
456-
self.function_stack.append(defn)
457-
# First phase of analysis for function.
458-
if not defn._fullname:
459-
defn._fullname = self.qualified_name(defn.name())
460-
if defn.type:
461-
assert isinstance(defn.type, CallableType)
462-
self.update_function_type_variables(defn.type, defn)
463-
self.function_stack.pop()
440+
self.function_stack.append(defn)
464441

465-
defn.is_conditional = self.block_depth[-1] > 0
442+
if defn.type:
443+
assert isinstance(defn.type, CallableType)
444+
self.update_function_type_variables(defn.type, defn)
445+
self.function_stack.pop()
466446

467-
if self.is_class_scope():
468-
# Method definition
469-
assert self.type is not None
470-
defn.info = self.type
471-
if defn.type is not None and defn.name() in ('__init__', '__init_subclass__'):
472-
assert isinstance(defn.type, CallableType)
473-
if isinstance(defn.type.ret_type, AnyType):
474-
defn.type = defn.type.copy_modified(ret_type=NoneTyp())
475-
self.prepare_method_signature(defn, self.type)
476-
477-
# Analyze function signature and initializers in the first phase
478-
# (at least this mirrors what happens at runtime).
479-
with self.tvar_scope_frame(self.tvar_scope.method_frame()):
480-
if defn.type:
481-
self.check_classvar_in_signature(defn.type)
447+
defn.is_conditional = self.block_depth[-1] > 0
448+
449+
if self.is_class_scope():
450+
# Method definition
451+
assert self.type is not None
452+
defn.info = self.type
453+
if defn.type is not None and defn.name() in ('__init__', '__init_subclass__'):
454+
assert isinstance(defn.type, CallableType)
455+
if isinstance(defn.type.ret_type, AnyType):
456+
defn.type = defn.type.copy_modified(ret_type=NoneTyp())
457+
self.prepare_method_signature(defn, self.type)
458+
459+
# Analyze function signature and initializers first.
460+
with self.tvar_scope_frame(self.tvar_scope.method_frame()):
461+
if defn.type:
462+
self.check_classvar_in_signature(defn.type)
463+
assert isinstance(defn.type, CallableType)
464+
# Signature must be analyzed in the surrounding scope so that
465+
# class-level imported names and type variables are in scope.
466+
analyzer = self.type_analyzer()
467+
defn.type = analyzer.visit_callable_type(defn.type, nested=False)
468+
self.add_type_alias_deps(analyzer.aliases_used)
469+
self.check_function_signature(defn)
470+
if isinstance(defn, FuncDef):
482471
assert isinstance(defn.type, CallableType)
483-
# Signature must be analyzed in the surrounding scope so that
484-
# class-level imported names and type variables are in scope.
485-
analyzer = self.type_analyzer()
486-
defn.type = analyzer.visit_callable_type(defn.type, nested=False)
487-
self.add_type_alias_deps(analyzer.aliases_used)
488-
self.check_function_signature(defn)
489-
if isinstance(defn, FuncDef):
490-
assert isinstance(defn.type, CallableType)
491-
defn.type = set_callable_name(defn.type, defn)
492-
for arg in defn.arguments:
493-
if arg.initializer:
494-
arg.initializer.accept(self)
495-
496-
if phase_info == FUNCTION_FIRST_PHASE_POSTPONE_SECOND:
497-
# Postpone this function (for the second phase).
498-
self.postponed_functions_stack[-1].append(defn)
499-
return
500-
if phase_info != FUNCTION_FIRST_PHASE_POSTPONE_SECOND:
501-
# Second phase of analysis for function.
502-
self.analyze_function(defn)
503-
if defn.is_coroutine and isinstance(defn.type, CallableType):
504-
if defn.is_async_generator:
505-
# Async generator types are handled elsewhere
506-
pass
507-
else:
508-
# A coroutine defined as `async def foo(...) -> T: ...`
509-
# has external return type `Coroutine[Any, Any, T]`.
510-
any_type = AnyType(TypeOfAny.special_form)
511-
ret_type = self.named_type_or_none('typing.Coroutine',
512-
[any_type, any_type, defn.type.ret_type])
513-
assert ret_type is not None, "Internal error: typing.Coroutine not found"
514-
defn.type = defn.type.copy_modified(ret_type=ret_type)
472+
defn.type = set_callable_name(defn.type, defn)
473+
for arg in defn.arguments:
474+
if arg.initializer:
475+
arg.initializer.accept(self)
476+
477+
self.analyze_function(defn)
478+
if defn.is_coroutine and isinstance(defn.type, CallableType):
479+
if defn.is_async_generator:
480+
# Async generator types are handled elsewhere
481+
pass
482+
else:
483+
# A coroutine defined as `async def foo(...) -> T: ...`
484+
# has external return type `Coroutine[Any, Any, T]`.
485+
any_type = AnyType(TypeOfAny.special_form)
486+
ret_type = self.named_type_or_none('typing.Coroutine',
487+
[any_type, any_type, defn.type.ret_type])
488+
assert ret_type is not None, "Internal error: typing.Coroutine not found"
489+
defn.type = defn.type.copy_modified(ret_type=ret_type)
515490

516491
def prepare_method_signature(self, func: FuncDef, info: TypeInfo) -> None:
517492
"""Check basic signature validity and tweak annotation of self/cls argument."""
@@ -630,6 +605,10 @@ def analyze_overload_sigs_and_impl(
630605
types = []
631606
non_overload_indexes = []
632607
impl = None # type: Optional[OverloadPart]
608+
# TODO: This is really bad, we should not modify defn.items neither here nor above.
609+
if defn.impl:
610+
# We are visiting this second time.
611+
defn.items.append(defn.impl)
633612
for i, item in enumerate(defn.items):
634613
if i != 0:
635614
# Assume that the first item was already visited
@@ -780,7 +759,7 @@ def analyze_function(self, defn: FuncItem) -> None:
780759
a = self.type_analyzer()
781760
a.bind_function_type_variables(cast(CallableType, defn.type), defn)
782761
self.function_stack.append(defn)
783-
self.enter()
762+
self.enter(defn)
784763
for arg in defn.arguments:
785764
self.add_local(arg.variable, defn)
786765

@@ -790,18 +769,7 @@ def analyze_function(self, defn: FuncItem) -> None:
790769
if is_method and not defn.is_static and not defn.is_class and defn.arguments:
791770
defn.arguments[0].variable.is_self = True
792771

793-
# First analyze body of the function but ignore nested functions.
794-
self.postpone_nested_functions_stack.append(FUNCTION_FIRST_PHASE_POSTPONE_SECOND)
795-
self.postponed_functions_stack.append([])
796772
defn.body.accept(self)
797-
798-
# Analyze nested functions (if any) as a second phase.
799-
self.postpone_nested_functions_stack[-1] = FUNCTION_SECOND_PHASE
800-
for postponed in self.postponed_functions_stack[-1]:
801-
postponed.accept(self)
802-
self.postpone_nested_functions_stack.pop()
803-
self.postponed_functions_stack.pop()
804-
805773
self.leave()
806774
self.function_stack.pop()
807775

@@ -977,12 +945,10 @@ def enter_class(self, info: TypeInfo) -> None:
977945
self.type_stack.append(self.type)
978946
self.locals.append(None) # Add class scope
979947
self.block_depth.append(-1) # The class body increments this to 0
980-
self.postpone_nested_functions_stack.append(FUNCTION_BOTH_PHASES)
981948
self.type = info
982949

983950
def leave_class(self) -> None:
984951
""" Restore analyzer state. """
985-
self.postpone_nested_functions_stack.pop()
986952
self.block_depth.pop()
987953
self.locals.pop()
988954
self.type = self.type_stack.pop()
@@ -3797,8 +3763,17 @@ def qualified_name(self, n: str) -> str:
37973763
base = self.cur_mod_id
37983764
return base + '.' + n
37993765

3800-
def enter(self) -> None:
3801-
self.locals.append(SymbolTable())
3766+
def enter(self, function: Optional[FuncItem] = None) -> None:
3767+
"""Enter the function scope.
3768+
3769+
The argument can be omitted for temporary scopes (like comprehensions
3770+
and generator expressions) that can't have incomplete definitions.
3771+
"""
3772+
if function:
3773+
names = self.saved_locals.setdefault(function, SymbolTable())
3774+
else:
3775+
names = SymbolTable()
3776+
self.locals.append(names)
38023777
self.global_decls.append(set())
38033778
self.nonlocal_decls.append(set())
38043779
# -1 since entering block will increment this to 0.

mypy/newsemanal/semanal_main.py

+34-8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
MYPY = False
3434
if MYPY:
3535
from mypy.build import Graph, State
36+
from mypy.newsemanal.semanal import NewSemanticAnalyzer
3637

3738

3839
# Perform up to this many semantic analysis iterations until giving up trying to bind all names.
@@ -53,7 +54,6 @@ def semantic_analysis_for_scc(graph: 'Graph', scc: List[str]) -> None:
5354

5455
def process_top_levels(graph: 'Graph', scc: List[str]) -> None:
5556
# Process top levels until everything has been bound.
56-
# TODO: Limit the number of iterations
5757

5858
# Initialize ASTs and symbol tables.
5959
for id in scc:
@@ -86,19 +86,46 @@ def process_top_levels(graph: 'Graph', scc: List[str]) -> None:
8686

8787

8888
def process_functions(graph: 'Graph', scc: List[str]) -> None:
89-
# TODO: This doesn't quite work yet
9089
# Process functions.
91-
deferred = [] # type: List[str]
9290
for module in scc:
9391
tree = graph[module].tree
9492
assert tree is not None
93+
analyzer = graph[module].manager.new_semantic_analyzer
9594
symtable = tree.names
9695
targets = get_all_leaf_targets(symtable, module, None)
9796
for target, node, active_type in targets:
98-
deferred, incomplete = semantic_analyze_target(target, graph[module], node,
99-
active_type)
100-
assert not deferred # There can't be cross-function forward refs
101-
assert not incomplete # Ditto
97+
assert isinstance(node, (FuncDef, OverloadedFuncDef, Decorator))
98+
process_top_level_function(analyzer, graph[module], module, node, active_type)
99+
100+
101+
def process_top_level_function(analyzer: 'NewSemanticAnalyzer',
102+
state: 'State', module: str,
103+
node: Union[FuncDef, OverloadedFuncDef, Decorator],
104+
active_type: Optional[TypeInfo]) -> None:
105+
"""Analyze single top-level function or method.
106+
107+
Process the body of the function (including nested functions) again and again,
108+
until all names have been resolved (ot iteration limit reached).
109+
"""
110+
iteration = 0
111+
# We need one more iteration after incomplete is False (e.g. to report errors, if any).
112+
more_iterations = incomplete = True
113+
# Start in the incomplete state (no missing names will be reported on first pass).
114+
# Note that we use module name, since functions don't create qualified names.
115+
deferred = [module]
116+
analyzer.incomplete_namespaces.add(module)
117+
while deferred and more_iterations:
118+
iteration += 1
119+
if not incomplete or iteration == MAX_ITERATIONS:
120+
# OK, this is one last pass, now missing names will be reported.
121+
more_iterations = False
122+
analyzer.incomplete_namespaces.discard(module)
123+
deferred, incomplete = semantic_analyze_target(module, state, node,
124+
active_type)
125+
126+
# After semantic analysis is done, discard local namespaces
127+
# to avoid memory hoarding.
128+
analyzer.saved_locals.clear()
102129

103130

104131
TargetInfo = Tuple[str, Union[MypyFile, FuncDef, OverloadedFuncDef, Decorator], Optional[TypeInfo]]
@@ -124,7 +151,6 @@ def semantic_analyze_target(target: str,
124151
state: 'State',
125152
node: Union[MypyFile, FuncDef, OverloadedFuncDef, Decorator],
126153
active_type: Optional[TypeInfo]) -> Tuple[List[str], bool]:
127-
# TODO: Support refreshing function targets (currently only works for module top levels)
128154
tree = state.tree
129155
assert tree is not None
130156
analyzer = state.manager.new_semantic_analyzer

mypy/test/testcheck.py

+2
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ def run_case_once(self, testcase: DataDrivenTestCase,
150150
options.show_traceback = True
151151
if 'optional' in testcase.file:
152152
options.strict_optional = True
153+
if 'newsemanal' in testcase.file:
154+
options.new_semantic_analyzer = True
153155
if incremental_step and options.incremental:
154156
# Don't overwrite # flags: --no-incremental in incremental test cases
155157
options.incremental = True

0 commit comments

Comments
 (0)