It's time we started using mypy again

gvanrossum · gvanrossum · commit c84fe5cd3896 · 2019-07-31T22:43:44.000-07:00
I found a PR for mypy that supports the walrus operator: python/mypy#6899 (This fixes some of the more egregious errors, but there are about 50 more.)
diff --git a/pegen/__main__.py b/pegen/__main__.py
@@ -14,6 +14,8 @@
 import tokenize
 import traceback
 
+from typing import Final
+
 from pegen.parser_generator import ParserGenerator
 from pegen.tokenizer import Tokenizer
 from pegen.tokenizer import grammar_tokenizer
@@ -23,7 +25,7 @@
 def print_memstats() -> bool:
     MiB: Final = 2 ** 20
     try:
-        import psutil
+        import psutil  # type: ignore
     except ImportError:
         return False
     print("Memory stats:")
diff --git a/pegen/grammar.py b/pegen/grammar.py
@@ -7,11 +7,14 @@
 import token
 import tokenize
 import traceback
-from typing import AbstractSet, Callable, Dict, Generic, Iterable, Optional, List, TypeVar, Union
+from typing import AbstractSet, Callable, Dict, Generic, Iterable, List, Optional, Tuple, TYPE_CHECKING, TypeVar, Union
 
 from pegen.parser import memoize, Parser
 from pegen.tokenizer import exact_token_types
 
+if TYPE_CHECKING:
+    from pegen.parser_generator import ParserGenerator
+
 
 def dedupe(name: str, names: List[str]) -> str:
     origname = name
@@ -24,12 +27,12 @@ def dedupe(name: str, names: List[str]) -> str:
 
 
 class Rule:
-    def __init__(self, name: str, type: str, rhs: Rhs):
+    def __init__(self, name: str, type: Optional[str], rhs: Rhs):
         self.name = name
         self.type = type
         self.rhs = rhs
         self.visited = False
-        self.nullable = None
+        self.nullable = False
         self.left_recursive = False
         self.leader = False
 
@@ -45,13 +48,12 @@ def __str__(self):
     def __repr__(self):
         return f"Rule({self.name!r}, {self.type!r}, {self.rhs!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         if self.visited:
             # A left-recursive rule is considered non-nullable.
             return False
         self.visited = True
         self.nullable = self.rhs.visit(rules)
-        assert self.nullable is not None
         return self.nullable
 
     def initial_names(self) -> AbstractSet[str]:
@@ -188,6 +190,15 @@ def __init__(self, value: str):
     def __str__(self):
         return self.value
 
+    def visit(self, rules: Dict[str, Rule]) -> bool:
+        raise NotImplementedError
+
+    def initial_names(self) -> AbstractSet[str]:
+        raise NotImplementedError
+
+    def make_call(self, gen: ParserGenerator, cpython: bool) -> Tuple[str, str]:
+        raise NotImplementedError
+
 
 class NameLeaf(Leaf):
     """The value is the name."""
@@ -202,7 +213,7 @@ def __str__(self):
     def __repr__(self):
         return f"NameLeaf({self.value!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         if self.value in rules:
             return rules[self.value].visit(rules)
         # Token or unknown; never empty.
@@ -237,7 +248,7 @@ class StringLeaf(Leaf):
     def __repr__(self):
         return f"StringLeaf({self.value!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         # The string token '' is considered empty.
         return not self.value
 
@@ -269,7 +280,7 @@ def __str__(self):
     def __repr__(self):
         return f"Rhs({self.alts!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         for alt in self.alts:
             if alt.visit(rules):
                 return True
@@ -341,7 +352,7 @@ def __repr__(self):
             args.append(f"action={self.action!r}")
         return f"Alt({', '.join(args)})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         for item in self.items:
             if not item.visit(rules):
                 return False
@@ -446,7 +457,7 @@ class NamedItem:
     def __init__(self, name: Optional[str], item: Item):
         self.name = name
         self.item = item
-        self.nullable = None
+        self.nullable = False
 
     def __str__(self):
         if self.name:
@@ -457,9 +468,8 @@ def __str__(self):
     def __repr__(self):
         return f"NamedItem({self.name!r}, {self.item!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         self.nullable = self.item.visit(rules)
-        assert self.nullable is not None
         return self.nullable
 
     def initial_names(self) -> AbstractSet[str]:
@@ -582,7 +592,7 @@ def make_call(self, gen: ParserGenerator, cpython: bool) -> Tuple[str, str]:
         else:
             return "opt", f"{call},"  # Note trailing comma!
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         return True
 
     def initial_names(self) -> AbstractSet[str]:
@@ -596,6 +606,12 @@ def __init__(self, node: Plain):
         self.node = node
         self.memo = None
 
+    def visit(self, rules: Dict[str, Rule]) -> bool:
+        raise NotImplementedError
+
+    def make_call(self, gen: ParserGenerator, cpython: bool) -> Tuple[str, str]:
+        raise NotImplementedError
+
     def initial_names(self) -> AbstractSet[str]:
         return self.node.initial_names()
 
@@ -607,7 +623,7 @@ def __str__(self):
     def __repr__(self):
         return f"Repeat0({self.node!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         return True
 
     def make_call(self, gen: ParserGenerator, cpython: bool) -> Tuple[str, str]:
@@ -628,8 +644,7 @@ def __str__(self):
     def __repr__(self):
         return f"Repeat1({self.node!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
-        # TODO: What if self.node is itself nullable?
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         return False
 
     def make_call(self, gen: ParserGenerator, cpython: bool) -> Tuple[str, str]:
@@ -653,7 +668,7 @@ def __str__(self):
     def __repr__(self):
         return f"Group({self.rhs!r})"
 
-    def visit(self, rules: Dict[str, Rule]) -> Optional[bool]:
+    def visit(self, rules: Dict[str, Rule]) -> bool:
         return self.rhs.visit(rules)
 
     def initial_names(self) -> AbstractSet[str]:
diff --git a/pegen/parser.py b/pegen/parser.py
@@ -5,7 +5,7 @@
 import time
 import token
 import tokenize
-from typing import TypeVar, Generic, Dict, Tuple, Callable, Optional, NoReturn
+from typing import Callable, Dict, Generic, Optional, Tuple, TypeVar
 
 from pegen.tokenizer import CURLY_STUFF
 from pegen.tokenizer import exact_token_types
@@ -16,7 +16,7 @@
 T = TypeVar('T')
 
 
-def memoize(method: Callable[[Parser], T]):
+def memoize(method: Callable[[Parser], T]) -> Callable[[Parser], T]:
     """Memoize a symbol method."""
     method_name = method.__name__
 
@@ -56,11 +56,11 @@ def symbol_wrapper(self: Parser) -> T:
                 self.reset(endmark)
         return tree
 
-    symbol_wrapper.__wrapped__ = method
+    symbol_wrapper.__wrapped__ = method  # type: ignore
     return symbol_wrapper
 
 
-def memoize_left_rec(method: Callable[[Parser], T]):
+def memoize_left_rec(method: Callable[[Parser], T]) -> Callable[[Parser], T]:
     """Memoize a left-recursive symbol method."""
     method_name = method.__name__
 
@@ -134,14 +134,14 @@ def left_rec_symbol_wrapper(self: Parser) -> T:
                 self.reset(endmark)
         return tree
 
-    left_rec_symbol_wrapper.__wrapped__ = method
+    left_rec_symbol_wrapper.__wrapped__ = method  # type: ignore
     return left_rec_symbol_wrapper
 
 
-def memoize_expect(method: Callable[[Parser], Optional[tokenize.TokenInfo]]) -> bool:
+def memoize_expect(method: Callable[[Parser, str], T]) -> Callable[[Parser, str], T]:
     """Memoize the expect() method."""
 
-    def expect_wrapper(self: Parser, type: str) -> Optional[tokenize.TokenInfo]:
+    def expect_wrapper(self: Parser, type: str) -> T:
         mark = self.mark()
         key = mark, type
         # Fast path: cache hit.
@@ -167,7 +167,7 @@ def expect_wrapper(self: Parser, type: str) -> Optional[tokenize.TokenInfo]:
         self.reset(endmark)
         return res
 
-    expect_wrapper.__wrapped__ = method
+    expect_wrapper.__wrapped__ = method  # type: ignore
     return expect_wrapper
 
 
@@ -178,10 +178,8 @@ def __init__(self, tokenizer: Tokenizer, *, verbose=False):
         self._tokenizer = tokenizer
         self._verbose = verbose
         self._level = 0
-        self._symbol_cache: Dict[Tuple[Mark,
-                                       Callable[[Parser], Optional[T]]],
-                                 Tuple[Optional[T], Mark]] = {}
-        self._token_cache: Dict[Tuple[Mark, str], bool] = {}
+        self._symbol_cache: Dict[Tuple[Mark, str], Tuple[Optional[T], Mark]] = {}
+        self._token_cache: Dict[Tuple[Mark, str], Tuple[Optional[T], Mark]] = {}
         # Pass through common tokeniser methods.
         # TODO: Rename to _mark and _reset.
         self.mark = self._tokenizer.mark
@@ -252,7 +250,7 @@ def negative_lookahead(self, func: Callable[..., T], *args) -> bool:
         self.reset(mark)
         return not ok
 
-    def make_syntax_error(self, filename="<unknown>") -> NoReturn:
+    def make_syntax_error(self, filename="<unknown>") -> SyntaxError:
         tok = self._tokenizer.diagnose()
         return SyntaxError("pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line))
 
diff --git a/pegen/parser_generator.py b/pegen/parser_generator.py
@@ -8,6 +8,7 @@
 from pegen.grammar import Rhs
 from pegen.grammar import Alt
 from pegen.grammar import NamedItem
+from pegen.grammar import Plain
 
 MODULE_PREFIX = """\
 #!/usr/bin/env python3.8
@@ -83,7 +84,7 @@ def __init__(self, rules: Dict[str, Rule], file: Optional[IO[Text]]):
         self.counter = 0  # For name_rule()/name_loop()
 
     @contextlib.contextmanager
-    def indent(self) -> None:
+    def indent(self) -> Iterator[None]:
         self.level += 1
         try:
             yield
@@ -172,7 +173,7 @@ def compute_nullables(rules: Dict[str, Rule]) -> None:
         rule.visit(rules)
 
 
-def compute_left_recursives(rules: Dict[str, Rule]) -> Tuple[Dict[str, Set[str]], List[Set[str]]]:
+def compute_left_recursives(rules: Dict[str, Rule]) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
     graph = make_first_graph(rules)
     sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
     for scc in sccs:
@@ -199,7 +200,7 @@ def compute_left_recursives(rules: Dict[str, Rule]) -> Tuple[Dict[str, Set[str]]
     return graph, sccs
 
 
-def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, str]:
+def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
     """Compute the graph of left-invocations.
 
     There's an edge from A to B if A may invoke B at its initial
@@ -208,7 +209,7 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, str]:
     Note that this requires the nullable flags to have been computed.
     """
     graph = {}
-    vertices = set()
+    vertices: Set[str] = set()
     for rulename, rhs in rules.items():
         graph[rulename] = names = rhs.initial_names()
         vertices |= names
diff --git a/pegen/sccutils.py b/pegen/sccutils.py
@@ -3,7 +3,7 @@
 from typing import *
 
 def strongly_connected_components(vertices: AbstractSet[str],
-                                  edges: Dict[str, Iterable[str]]) -> Iterator[Set[str]]:
+                                  edges: Dict[str, AbstractSet[str]]) -> Iterator[AbstractSet[str]]:
     """Compute Strongly Connected Components of a directed graph.
 
     Args:
@@ -48,7 +48,7 @@ def dfs(v: str) -> Iterator[Set[str]]:
 
 
 def topsort(data: Dict[AbstractSet[str],
-                       Set[AbstractSet[str]]]) -> Iterable[Set[AbstractSet[str]]]:
+                       Set[AbstractSet[str]]]) -> Iterable[AbstractSet[AbstractSet[str]]]:
     """Topological sort.
 
     Args:
@@ -96,7 +96,7 @@ def topsort(data: Dict[AbstractSet[str],
     assert not data, "A cyclic dependency exists amongst %r" % data
 
 
-def find_cycles_in_scc(graph: Dict[str, Set[str]], scc: Set[str], start: str) -> Iterable[List[str]]:
+def find_cycles_in_scc(graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str) -> Iterable[List[str]]:
     """Find cycles in SCC emanating from start.
 
     Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
diff --git a/pegen/tokenizer.py b/pegen/tokenizer.py
@@ -2,7 +2,7 @@
 
 import token
 import tokenize
-from typing import List, Iterable
+from typing import List, Iterator
 
 Mark = int  # NewType('Mark', int)
 
@@ -13,7 +13,7 @@
 exact_token_types = token.EXACT_TOKEN_TYPES  # type: ignore
 
 
-def shorttok(tok: tokenizer.TokenInfo) -> str:
+def shorttok(tok: tokenize.TokenInfo) -> str:
     return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
 
 
@@ -25,7 +25,7 @@ class Tokenizer:
 
     _tokens: List[tokenize.TokenInfo]
 
-    def __init__(self, tokengen: Iterable[TokenInfo], *, verbose=False):
+    def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose=False):
         self._tokengen = tokengen
         self._tokens = []
         self._index = 0
@@ -38,7 +38,7 @@ def getnext(self) -> tokenize.TokenInfo:
         cached = True
         while self._index == len(self._tokens):
             tok = next(self._tokengen)
-            if tok.type in (token.NL, token.COMMENT):
+            if tok.type in (tokenize.NL, tokenize.COMMENT):
                 continue
             if tok.type == token.ERRORTOKEN and tok.string.isspace():
                 continue
@@ -54,7 +54,7 @@ def peek(self) -> tokenize.TokenInfo:
         """Return the next token *without* updating the index."""
         while self._index == len(self._tokens):
             tok = next(self._tokengen)
-            if tok.type in (token.NL, token.COMMENT):
+            if tok.type in (tokenize.NL, tokenize.COMMENT):
                 continue
             if tok.type == token.ERRORTOKEN and tok.string.isspace():
                 continue