Upgrade mypy to latest version. Turn off strict mode.

tomwhite · tomwhite · commit d9c5c9aec761 · 2022-03-03T15:31:35.000Z
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -22,8 +22,8 @@ repos:
       - id: flake8
         language_version: python3
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.782
+    rev: v0.931
     hooks:
       - id: mypy
-        args: ["--strict", "--show-error-codes"]
-        additional_dependencies: ["numpy>=1.21.2", "xarray", "dask[array]", "scipy", "typing-extensions", "zarr", "numba", "dask-ml", "networkx"]
+        args: ["--show-error-codes", "--ignore-missing-imports"]
+        additional_dependencies: ["numpy>=1.21.2", "xarray", "dask[array]", "scipy", "typing-extensions", "zarr", "numba", "dask-ml", "networkx", "types-PyYAML"]
diff --git a/sgkit/__init__.py b/sgkit/__init__.py
@@ -1,4 +1,4 @@
-from pkg_resources import DistributionNotFound, get_distribution
+from pkg_resources import DistributionNotFound, get_distribution  # type: ignore[import]
 
 from .display import display_genotypes
 from .distance.api import pairwise_distance
diff --git a/sgkit/io/bgen/bgen_reader.py b/sgkit/io/bgen/bgen_reader.py
@@ -145,7 +145,7 @@ def __getitem__(self, idx: Any) -> NDArray:
                     res = np.zeros((len(all_vaddr), len(probs), 3), dtype=self.dtype)
                 res[i] = probs
             res = res[..., idx[2]]  # type: ignore[index]
-            return np.squeeze(res, axis=squeeze_dims)
+            return np.squeeze(res, axis=squeeze_dims)  # type: ignore[arg-type]
 
 
 def _split_alleles(allele_ids: bytes) -> List[bytes]:
@@ -520,7 +520,7 @@ def rechunk_bgen(
 
     zarr.consolidate_metadata(output)
 
-    ds: Dataset = xr.open_zarr(output, concat_characters=False)  # type: ignore[no-untyped-call]
+    ds = xr.open_zarr(output, concat_characters=False)  # type: ignore[no-untyped-call]
     if pack:
         ds = unpack_variables(ds)
 
diff --git a/sgkit/io/utils.py b/sgkit/io/utils.py
@@ -132,9 +132,9 @@ def _slice_zarrs(
 ) -> ArrayLike:
     """Slice concatenated zarrs by locs"""
     # convert array locations to slices
-    locs = [slice(*loc) for loc in locs]
+    locs = [slice(*loc) for loc in locs]  # type: ignore[misc]
     # determine which zarr files are needed
-    start, stop = locs[0].start, locs[0].stop  # stack on first axis
+    start, stop = locs[0].start, locs[0].stop  # type: ignore[attr-defined] # stack on first axis
     i0 = _zarr_index(offsets, start)
     i1 = _zarr_index(offsets, stop)
     if i0 == i1:  # within a single zarr file
diff --git a/sgkit/stats/pca.py b/sgkit/stats/pca.py
@@ -126,12 +126,14 @@ def pca_stats(ds: Dataset, est: BaseEstimator, *, merge: bool = True) -> Dataset
             _get(est, "explained_variance_ratio_"),
         ),
     }
-    new_ds = Dataset({k: v for k, v in new_ds.items() if v[1] is not None})
+    new_ds = Dataset({k: v for k, v in new_ds.items() if v[1] is not None})  # type: ignore[assignment]
     if "sample_pca_component" in new_ds and "sample_pca_explained_variance" in new_ds:
         new_ds[variables.sample_pca_loading] = new_ds[
             variables.sample_pca_component
-        ] * np.sqrt(new_ds[variables.sample_pca_explained_variance].data)
-    return conditional_merge_datasets(ds, variables.validate(new_ds), merge)
+        ] * np.sqrt(
+            new_ds[variables.sample_pca_explained_variance].data  # type: ignore[attr-defined]
+        )
+    return conditional_merge_datasets(ds, variables.validate(new_ds), merge)  # type: ignore[call-overload]
 
 
 def pca(
diff --git a/sgkit/stats/popgen.py b/sgkit/stats/popgen.py
@@ -265,7 +265,7 @@ def divergence(
     n_variants = ds.dims["variants"]
     n_cohorts = ds.dims["cohorts"]
     ac = da.asarray(ac)
-    shape = (ac.chunks[0], n_cohorts, n_cohorts)
+    shape = (ac.chunks[0], n_cohorts, n_cohorts)  # type: ignore[index]
     d = da.map_blocks(_divergence, ac, chunks=shape, dtype=np.float64)
     assert_array_shape(d, n_variants, n_cohorts, n_cohorts)
 
@@ -741,7 +741,7 @@ def pbs(
 
     # calculate PBS triples
     t = da.asarray(t)
-    shape = (t.chunks[0], n_cohorts, n_cohorts, n_cohorts)
+    shape = (t.chunks[0], n_cohorts, n_cohorts, n_cohorts)  # type: ignore[attr-defined]
 
     cohorts = cohorts or list(itertools.combinations(range(n_cohorts), 3))  # type: ignore
     ct = _cohorts_to_array(cohorts, ds.indexes.get("cohorts_0", None))
@@ -763,23 +763,23 @@ def pbs(
 def _Garud_h(haplotypes: ArrayLike) -> ArrayLike:
     # find haplotype counts (sorted in descending order)
     counts = sorted(collections.Counter(haplotypes.tolist()).values(), reverse=True)
-    counts = np.array(counts)
+    counts = np.array(counts)  # type: ignore[assignment]
 
     # find haplotype frequencies
     n = haplotypes.shape[0]
-    f = counts / n
+    f = counts / n  # type: ignore[operator]
 
     # compute H1
     h1 = np.sum(f ** 2)
 
     # compute H12
-    h12 = np.sum(f[:2]) ** 2 + np.sum(f[2:] ** 2)
+    h12 = np.sum(f[:2]) ** 2 + np.sum(f[2:] ** 2)  # type: ignore[index]
 
     # compute H123
-    h123 = np.sum(f[:3]) ** 2 + np.sum(f[3:] ** 2)
+    h123 = np.sum(f[:3]) ** 2 + np.sum(f[3:] ** 2)  # type: ignore[index]
 
     # compute H2/H1
-    h2 = h1 - f[0] ** 2
+    h2 = h1 - f[0] ** 2  # type: ignore[index]
     h2_h1 = h2 / h1
 
     return np.array([h1, h12, h123, h2_h1])
diff --git a/sgkit/stats/regenie.py b/sgkit/stats/regenie.py
@@ -151,7 +151,7 @@ def ridge_regression(
             # https://github.com/projectglow/glow/issues/266
             diag[:n_zero_reg] = 1
         diags.append(np.diag(diag))  # type: ignore[no-untyped-call]
-    diags = np.stack(diags)
+    diags = np.stack(diags)  # type: ignore[assignment]
     B = np.linalg.inv(XtX + diags) @ XtY  # type: ignore[no-untyped-call]
     B = B.astype(dtype or XtX.dtype)
     assert_array_shape(B, n_alpha, n_obs, n_outcome)
@@ -363,8 +363,8 @@ def _stage_2(
             alphas = get_alphas(n_variant_block * n_alpha_1)
     n_alpha_2 = alphas.size
 
-    YR = []
-    BR = []
+    YR_list = []
+    BR_list = []
     for i in range(n_outcome):
         # Slice and reshape to new 2D covariate matrix;
         # The order of raveling in trailing dimensions is important
@@ -382,11 +382,11 @@ def _stage_2(
         BB, YPB = _ridge_regression_cv(XPB, YB, alphas, n_zero_reg=n_covar)[-2:]
         assert_array_shape(BB, n_alpha_2, n_sample_block * n_indvar, 1)
         assert_array_shape(YPB, n_alpha_2, n_sample, 1)
-        BR.append(BB)
-        YR.append(YPB)
+        BR_list.append(BB)
+        YR_list.append(YPB)
 
     # Concatenate predictions along outcome dimension
-    YR = da.concatenate(YR, axis=2)
+    YR = da.concatenate(YR_list, axis=2)
     assert_block_shape(YR, 1, n_sample_block, n_outcome)
     assert_chunk_shape(YR, n_alpha_2, sample_chunks[0], 1)
     assert_array_shape(YR, n_alpha_2, n_sample, n_outcome)
@@ -399,7 +399,7 @@ def _stage_2(
     assert YR.shape[1:] == Y.T.shape
 
     # Concatenate betas along outcome dimension
-    BR = da.concatenate(BR, axis=2)
+    BR = da.concatenate(BR_list, axis=2)
     assert_block_shape(BR, 1, n_sample_block, n_outcome)
     assert_chunk_shape(BR, n_alpha_2, n_indvar, 1)
     assert_array_shape(BR, n_alpha_2, n_sample_block * n_indvar, n_outcome)
diff --git a/sgkit/tests/test_association.py b/sgkit/tests/test_association.py
@@ -146,14 +146,14 @@ def ds() -> Dataset:
 def _sm_statistics(
     ds: Dataset, i: int, add_intercept: bool
 ) -> RegressionResultsWrapper:
-    X = []
+    X_list = []
     # Make sure first independent variable is variant
-    X.append(ds["dosage"].values[i])
+    X_list.append(ds["dosage"].values[i])
     for v in [c for c in list(ds.keys()) if c.startswith("covar_")]:
-        X.append(ds[v].values)
+        X_list.append(ds[v].values)
     if add_intercept:
-        X.append(np.ones(ds.dims["samples"]))
-    X = np.stack(X).T
+        X_list.append(np.ones(ds.dims["samples"]))
+    X = np.stack(X_list).T
     y = ds[f"trait_{i}"].values
 
     return sm.OLS(y, X, hasconst=True).fit()
diff --git a/sgkit/tests/test_distance.py b/sgkit/tests/test_distance.py
@@ -80,7 +80,7 @@ def create_distance_matrix(
             chunks,
             device,
             metric,
-            marks=pytest.mark.gpu if device == "gpu" else "",
+            marks=pytest.mark.gpu if device == "gpu" else "",  # type: ignore[arg-type]
         )
         for shape in [(30, 30), (15, 30), (30, 15)]
         for chunks in [(10, 10), (5, 10), (10, 5)]
diff --git a/validation/gwas/method/regenie/glow_wgr.py b/validation/gwas/method/regenie/glow_wgr.py
@@ -1,3 +1,4 @@
+# type: ignore
 import os
 import sys
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from pkg_resources import DistributionNotFound, get_distribution`
	`1`	`+from pkg_resources import DistributionNotFound, get_distribution # type: ignore[import]`
`2`	`2`
`3`	`3`	`from .display import display_genotypes`
`4`	`4`	`from .distance.api import pairwise_distance`
Original file line number	Diff line number	Diff line change
`@@ -80,7 +80,7 @@ def create_distance_matrix(`
`80`	`80`	`chunks,`
`81`	`81`	`device,`
`82`	`82`	`metric,`
`83`		`- marks=pytest.mark.gpu if device == "gpu" else "",`
	`83`	`+ marks=pytest.mark.gpu if device == "gpu" else "", # type: ignore[arg-type]`
`84`	`84`	`)`
`85`	`85`	`for shape in [(30, 30), (15, 30), (30, 15)]`
`86`	`86`	`for chunks in [(10, 10), (5, 10), (10, 5)]`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# type: ignore`
`1`	`2`	`import os`
`2`	`3`	`import sys`
`3`	`4`