explosion
diff --git a/‎azure-pipelines.yml‎
Lines changed: 0 additions & 1 deletion b/‎azure-pipelines.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎requirements.txt‎
Lines changed: 2 additions & 3 deletions b/‎requirements.txt‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎setup.cfg‎
Lines changed: 2 additions & 2 deletions b/‎setup.cfg‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎thinc/backends/ops.py‎
Lines changed: 115 additions & 35 deletions b/‎thinc/backends/ops.py‎
Lines changed: 115 additions & 35 deletions
diff --git a/‎thinc/config.py‎
Lines changed: 6 additions & 5 deletions b/‎thinc/config.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎thinc/layers/array_getitem.py‎
Lines changed: 3 additions & 2 deletions b/‎thinc/layers/array_getitem.py‎
Lines changed: 3 additions & 2 deletions
@@ -61,7 +61,6 @@ jobs:
     displayName: 'Build sdist'
 
   - script: |
-      python -m pip install mypy==0.910
       python -m mypy thinc
     displayName: 'Run mypy'
 
 
@@ -8,7 +8,7 @@ wasabi>=0.8.1,<1.1.0
 catalogue>=2.0.4,<2.1.0
 ml_datasets>=0.2.0,<0.3.0
 # Third-party dependencies
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
 numpy>=1.15.0
 # Backports of modern Python features
 dataclasses>=0.6,<1.0; python_version < "3.7"
@@ -22,8 +22,7 @@ pytest-cov>=2.7.0,<2.8.0
 coverage>=5.0.0,<6.0.0
 mock>=2.0.0,<3.0.0
 flake8>=3.5.0,<3.6.0
-# restricting mypy until faster 3.10 wheels are available
-mypy>=0.901,<0.920; python_version < "3.10"
+mypy>=0.901,<0.960
 types-mock>=0.1.1
 types-contextvars>=0.1.2; python_version < "3.7"
 types-dataclasses>=0.1.3; python_version < "3.7"
 
@@ -31,7 +31,7 @@ python_requires = >=3.6
 setup_requires =
     cython>=0.25,<3.0
     numpy>=1.15.0
-    # We also need our Cython packages here to compile against
+    # We also need our Cython packages here to compile against 
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=1.0.2,<1.1.0
@@ -48,7 +48,7 @@ install_requires =
     # Third-party dependencies
     setuptools
     numpy>=1.15.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
     # Backports of modern Python features
     dataclasses>=0.6,<1.0; python_version < "3.7"
     typing_extensions>=3.7.4.1,<4.0.0.0; python_version < "3.8"
 
@@ -5,9 +5,9 @@
 import numpy
 import itertools
 
-from .. import registry
 from ..types import Xp, Shape, DTypes, DTypesInt, DTypesFloat, List2d, ArrayXd
-from ..types import Array3d, Floats1d, Floats2d, Floats3d, Floats4d
+from ..types import Floats1d, Floats2d, Floats3d, Floats4d
+from ..types import Array1d, Array2d, Array3d, Array4d, ListXd
 from ..types import FloatsXd, Ints1d, Ints2d, Ints3d, Ints4d, IntsXd, _Floats
 from ..types import DeviceTypes, Generator, Padded, Batchable, SizedGenerator
 from ..util import get_array_module, is_xp_array, to_numpy
@@ -135,13 +135,11 @@ def _get_batch(self, sequence, indices):
         if isinstance(sequence, list):
             subseq = [sequence[i] for i in indices]
         elif isinstance(sequence, tuple):
-            subseq = tuple(sequence[i] for i in indices)  # type: ignore
+            subseq = tuple(sequence[i] for i in indices)
         else:
-            subseq = sequence[indices]  # type: ignore
+            subseq = sequence[indices]
         if is_xp_array(subseq):
-            subseq = self.as_contig(
-                cast(ArrayXd, self.xp.asarray(subseq))
-            )  # type: ignore
+            subseq = self.as_contig(self.xp.asarray(subseq))
         return subseq
 
     def _get_batch_sizes(self, length: int, sizes: Iterator[int]):
@@ -225,13 +223,65 @@ def affine(self, X: Floats2d, W: Floats2d, b: Floats1d) -> Floats2d:
         Y += b
         return Y
 
+    @overload 
     def flatten(
         self,
-        X: Sequence[ArrayT],
+        X: List[Floats2d],
         dtype: Optional[DTypes] = None,
         pad: int = 0,
         ndim_if_empty: int = 2,
-    ) -> ArrayT:
+     ) -> Floats2d:
+        ...
+
+    @overload 
+    def flatten(
+        self,
+        X: List[Ints1d],
+        dtype: Optional[DTypes] = None,
+        pad: int = 0,
+        ndim_if_empty: int = 2,
+     ) -> Ints1d:
+        ...
+
+    @overload 
+    def flatten(
+        self,
+        X: List2d,
+        dtype: Optional[DTypes] = None,
+        pad: int = 0,
+        ndim_if_empty: int = 2,
+     ) -> Array2d:
+        ...
+
+    # further specific typed signatures can be added as necessary
+
+    @overload 
+    def flatten(
+        self,
+        X: ListXd,
+        dtype: Optional[DTypes] = None,
+        pad: int = 0,
+        ndim_if_empty: int = 2,
+     ) -> ArrayXd:
+        ...
+
+    @overload 
+    def flatten(
+        self,
+        X: Sequence[ArrayXd],
+        dtype: Optional[DTypes] = None,
+        pad: int = 0,
+        ndim_if_empty: int = 2,
+     ) -> ArrayXd:
+        ...
+
+    def flatten(
+        self,
+        X: Sequence[ArrayXd],
+        dtype: Optional[DTypes] = None,
+        pad: int = 0,
+        ndim_if_empty: int = 2,
+    ) -> ArrayXd:
         """Flatten a list of arrays into one large array."""
         if X is None or len(X) == 0:
             return self.alloc((0,) * ndim_if_empty, dtype=dtype or "f")
@@ -252,7 +302,25 @@ def flatten(
             result = xp.asarray(result, dtype=dtype)
         return result
 
+    @overload
     def unflatten(self, X: Floats2d, lengths: Ints1d, pad: int = 0) -> List[Floats2d]:
+        ...
+
+    @overload
+    def unflatten(self, X: Ints1d, lengths: Ints1d, pad: int = 0) -> List[Ints1d]:
+        ...
+
+    @overload
+    def unflatten(self, X: Array2d, lengths: Ints1d, pad: int = 0) -> List2d:
+        ...
+
+    # further specific typed signatures can be added as necessary
+
+    @overload
+    def unflatten(self, X: ArrayXd, lengths: Ints1d, pad: int = 0) -> ListXd:
+        ...
+
+    def unflatten(self, X: ArrayXd, lengths: Ints1d, pad: int = 0) -> ListXd:
         """The reverse/backward operation of the `flatten` function: unflatten
         a large array into a list of arrays according to the given lengths.
         """
@@ -302,7 +370,7 @@ def pad(  # noqa: F811
         output: Array3d = self.alloc(final_shape, dtype=seqs[0].dtype)
         for i, arr in enumerate(seqs):
             # It's difficult to convince this that the dtypes will match.
-            output[i, : arr.shape[0]] = arr  # type: ignore
+            output[i, : arr.shape[0]] = arr  # type: ignore[assignment, call-overload]
         return output
 
     def unpad(self, padded: Array3d, lengths: List[int]) -> List2d:
@@ -314,14 +382,14 @@ def unpad(self, padded: Array3d, lengths: List[int]) -> List2d:
             output.append(padded[i, :length])
         return cast(List2d, output)
 
-    def list2padded(self, seqs: List[Floats2d]) -> Padded:
+    def list2padded(self, seqs: List2d) -> Padded:
         """Pack a sequence of 2d arrays into a Padded datatype."""
         if not seqs:
             return Padded(
                 self.alloc3f(0, 0, 0), self.alloc1i(0), self.alloc1i(0), self.alloc1i(0)
             )
         elif len(seqs) == 1:
-            data = self.reshape3f(seqs[0], seqs[0].shape[0], 1, seqs[0].shape[1])
+            data = self.reshape3(seqs[0], seqs[0].shape[0], 1, seqs[0].shape[1])
             size_at_t = self.asarray1i([1] * data.shape[0])
             lengths = self.asarray1i([data.shape[0]])
             indices = self.asarray1i([0])
@@ -336,8 +404,8 @@ def list2padded(self, seqs: List[Floats2d]) -> Padded:
         # Reorder the sequences, by length. This looks the same in either
         # direction: you're swapping elements between their original and sorted
         # position.
-        seqs = [seqs[i] for i in indices_]
-        arr: Floats3d = self.pad(seqs)
+        seqs = cast(List2d, [seqs[i] for i in indices_])
+        arr: Array3d = self.pad(seqs)
         assert arr.shape == (nB, nS, nO), (nB, nS, nO)
         arr = self.as_contig(arr.transpose((1, 0, 2)))
         assert arr.shape == (nS, nB, nO)
@@ -350,7 +418,7 @@ def list2padded(self, seqs: List[Floats2d]) -> Padded:
             batch_size_at_t_[t] = current_size
         assert sum(lengths_) == sum(batch_size_at_t_)
         return Padded(
-            cast(Floats3d, arr),
+            arr,
             self.asarray1i(batch_size_at_t_),
             self.asarray1i(lengths_),
             self.asarray1i(indices_),
@@ -361,7 +429,7 @@ def padded2list(self, padded: Padded) -> List2d:
         data = padded.data
         indices = to_numpy(padded.indices)
         lengths = to_numpy(padded.lengths)
-        unpadded: List[Optional[Floats2d]] = [None] * len(lengths)
+        unpadded: List[Optional[Array2d]] = [None] * len(lengths)
         # Transpose from (length, batch, data) to (batch, length, data)
         data = self.as_contig(data.transpose((1, 0, 2)))
         for i in range(data.shape[0]):
@@ -500,6 +568,18 @@ def alloc(
         else:
             return self.xp.empty(shape, dtype=dtype)
 
+    def reshape1(self, array: ArrayXd, d0: int) -> Array1d:
+        return cast(Array1d, self.reshape(array, (d0,)))
+
+    def reshape2(self, array: ArrayXd, d0: int, d1: int) -> Array2d:
+        return cast(Array2d, self.reshape(array, (d0, d1)))
+
+    def reshape3(self, array: ArrayXd, d0: int, d1: int, d2: int) -> Array3d:
+        return cast(Array3d, self.reshape(array, (d0, d1, d2)))
+
+    def reshape4(self, array: ArrayXd, d0: int, d1: int, d2: int, d3: int) -> Array4d:
+        return cast(Array4d, self.reshape(array, (d0, d1, d2, d3)))
+
     def reshape1f(self, array: FloatsXd, d0: int) -> Floats1d:
         return cast(Floats1d, self.reshape(array, (d0,)))
 
@@ -619,7 +699,7 @@ def asarray(
                 return self.xp.asarray(data, dtype=dtype)
         elif hasattr(data, "numpy"):
             # Handles PyTorch Tensor
-            return data.numpy()  # type: ignore
+            return data.numpy()  # type: ignore[union-attr]
         elif dtype is not None:
             return self.xp.array(data, dtype=dtype)
         else:
@@ -641,8 +721,8 @@ def sigmoid(self, X: FloatsType, *, inplace: bool = False) -> FloatsType:
 
         if inplace:
             self.xp.exp(-X, out=X)
-            X += 1.0  # type: ignore
-            X **= -1.0  # type: ignore
+            X += 1.0  # type: ignore[assignment]
+            X **= -1.0  # type: ignore[assignment]
             return cast(FloatsType, X)
         else:
             return cast(FloatsType, 1.0 / (1.0 + self.xp.exp(-X)))
@@ -786,10 +866,10 @@ def clipped_linear(
         inplace: bool = False,
     ) -> FloatsType:
         if inplace:
-            X *= slope  # type: ignore
-            X += offset  # type: ignore
+            X *= slope  # type: ignore[assignment]
+            X += offset  # type: ignore[assignment]
             return cast(FloatsType, self.xp.clip(X, min_val, max_val, out=X))
-        out = X * slope + offset  # type: ignore
+        out = X * slope + offset  # type: ignore[assignment]
         return cast(FloatsType, self.xp.clip(out, min_val, max_val))
 
     def backprop_clipped_linear(
@@ -840,27 +920,27 @@ def backprop_hard_tanh(
 
     def swish(self, X: FloatsType, inplace: bool = False) -> FloatsType:
         if inplace:
-            X *= self.sigmoid(X)  # type: ignore
+            X *= self.sigmoid(X)  # type: ignore[operator, assignment]
             return cast(FloatsType, X)
-        out = X * self.sigmoid(X)  # type: ignore
+        out = X * self.sigmoid(X)  # type: ignore[operator]
         return cast(FloatsType, out)
 
     def backprop_swish(
         self, dY: FloatsType, X: FloatsType, Y: FloatsType, inplace: bool = False
     ) -> FloatsType:
-        Y = Y + self.sigmoid(X) * (1 - Y)  # type: ignore
+        Y = Y + self.sigmoid(X) * (1 - Y)  # type: ignore[operator]
         if inplace:
-            dY *= Y  # type: ignore
+            dY *= Y  # type: ignore[operator, assignment]
             return cast(FloatsType, dY)
-        out = dY * Y  # type: ignore
+        out = dY * Y  # type: ignore[operator]
         return cast(FloatsType, out)
 
     # Following https://www.scitepress.org/Papers/2019/74696/74696.pdf
     def hard_swish(self, X: FloatsType, inplace: bool = False) -> FloatsType:
         if inplace:
-            X *= self.hard_sigmoid(X)  # type: ignore
+            X *= self.hard_sigmoid(X)  # type: ignore[operator, assignment]
             return cast(FloatsType, X)
-        out = X * self.hard_sigmoid(X)  # type: ignore
+        out = X * self.hard_sigmoid(X)  # type: ignore[operator]
         return cast(FloatsType, out)
 
     def backprop_hard_swish(
@@ -927,7 +1007,7 @@ def gelu_approx(self, X: FloatsType, inplace: bool = False) -> FloatsType:
         else:
             Y = self.xp.array(X)
             Y *= tmp
-            return cast(FloatsType, Y)
+            return Y
 
     def backprop_gelu_approx(
         self, dY: FloatsType, X: FloatsType, inplace: bool = False
@@ -949,15 +1029,15 @@ def gelu(self, X: FloatsType, inplace: bool = False) -> FloatsType:
         # GELU(x) = x · Φ(x)
         cdf = gaussian_cdf(self, X)
         if inplace:
-            X *= cdf  # type: ignore
+            X *= cdf  # type: ignore[operator, assignment]
             return X
-        return X * cdf  # type: ignore
+        return X * cdf  # type: ignore[operator, return-value]
 
     def backprop_gelu(
         self, dY: FloatsType, X: FloatsType, inplace: bool = False
     ) -> FloatsType:
         # GELU'(x) = Φ(x) + x · PDF(x)
-        dX = gaussian_cdf(self, X) + X * gaussian_pdf(self, X)  # type: ignore
+        dX = gaussian_cdf(self, X) + X * gaussian_pdf(self, X)  # type: ignore[operator]
         if inplace:
             dY *= dX
             return dY
@@ -1239,8 +1319,8 @@ def lstm_forward_training(
         for d in range(dirs):
             # The inits are shaped (depth, dirs, nO). We add the internal dimension
             # to make them set correctly.
-            Yt2 = h_init[i, d].reshape((1, nO))  # type: ignore
-            Ct2 = c_init[i, d].reshape((1, nO))  # type: ignore
+            Yt2 = h_init[i, d].reshape((1, nO))  # type: ignore[assignment]
+            Ct2 = c_init[i, d].reshape((1, nO))  # type: ignore[assignment]
             layer_params, params_i = _split_weights(params, i, nO, nI, params_i)
             Wx, Wh, bias = _transpose_weights(layer_params)
             G[i, d] += xp.dot(X, Wx.T)
 
@@ -1,4 +1,4 @@
-from typing import Union, Dict, Any, Optional, List, Tuple, Callable, Type
+from typing import Union, Dict, Any, Optional, List, Tuple, Callable, Type, Mapping
 from typing import Iterable, Sequence, cast
 from types import GeneratorType
 from dataclasses import dataclass
@@ -550,7 +550,7 @@ def __init__(
         self,
         *,
         config: Optional[Union[Config, Dict[str, Dict[str, Any]], str]] = None,
-        errors: Iterable[Dict[str, Any]] = tuple(),
+        errors: Union[Sequence[Mapping[str, Any]], Iterable[Dict[str, Any]]] = tuple(),
         title: Optional[str] = "Config validation error",
         desc: Optional[str] = None,
         parent: Optional[str] = None,
@@ -560,9 +560,10 @@ def __init__(
 
         config (Union[Config, Dict[str, Dict[str, Any]], str]): The
             config the validation error refers to.
-        errors (Iterable[Dict[str, Any]]): A list of errors as dicts with keys
-            "loc" (list of strings describing the path of the value), "msg"
-            (validation message to show) and optional "type" (mostly internals).
+        errors (Union[Sequence[Mapping[str, Any]], Iterable[Dict[str, Any]]]):
+            A list of errors as dicts with keys "loc" (list of strings
+            describing the path of the value), "msg" (validation message
+            to show) and optional "type" (mostly internals).
             Same format as produced by pydantic's validation error (e.errors()).
         title (str): The error title.
         desc (str): Optional error description, displayed below the title.
 
@@ -1,13 +1,14 @@
-from typing import Union, Sequence, Tuple
+from typing import Union, Sequence, Tuple, TypeVar
 from ..types import ArrayXd, FloatsXd, IntsXd
 from ..model import Model
 
 
 AxisIndex = Union[int, slice, Sequence[int]]
 Index = Union[AxisIndex, Tuple[AxisIndex, ...]]
+ArrayTXd = TypeVar("ArrayTXd", bound=ArrayXd)
 
 
-def array_getitem(index: Index) -> Model[ArrayXd, ArrayXd]:
+def array_getitem(index: Index) -> Model[ArrayTXd, ArrayTXd]:
     """Index into input arrays, and return the subarrays.
 
     index: