Fix coverage

tomwhite · tomwhite · commit 6b38be390843 · 2020-11-12T17:57:21.000Z
diff --git a/sgkit/io/bgen/__init__.py b/sgkit/io/bgen/__init__.py
@@ -2,7 +2,7 @@
     from .bgen_reader import bgen_to_zarr, read_bgen, rechunk_bgen
 
     __all__ = ["read_bgen", "bgen_to_zarr", "rechunk_bgen"]
-except ImportError as e:
+except ImportError as e:  # pragma: no cover
     msg = (
         "sgkit bgen requirements are not installed.\n\n"
         "Please install them via pip :\n\n"
diff --git a/sgkit/io/plink/__init__.py b/sgkit/io/plink/__init__.py
@@ -2,7 +2,7 @@
     from .plink_reader import read_plink
 
     __all__ = ["read_plink"]
-except ImportError as e:
+except ImportError as e:  # pragma: no cover
     msg = (
         "sgkit plink requirements are not installed.\n\n"
         "Please install them via pip :\n\n"
diff --git a/sgkit/io/vcf/__init__.py b/sgkit/io/vcf/__init__.py
@@ -11,7 +11,7 @@
         "vcf_to_zarrs",
         "zarrs_to_dataset",
     ]
-except ImportError as e:
+except ImportError as e:  # pragma: no cover
     if platform.system() == "Windows":
         msg = (
             "sgkit-vcf is not supported on Windows.\n"
diff --git a/sgkit/io/vcfzarr_reader.py b/sgkit/io/vcfzarr_reader.py
@@ -169,7 +169,7 @@ def _vcfzarr_to_dataset(
     if "variants/ID" in vcfzarr:
         variants_id = da.from_zarr(vcfzarr["variants/ID"]).astype(str)
     else:
-        variants_id = None
+        variants_id = None  # pragma: no cover
 
     ds = create_genotype_call_dataset(
         variant_contig_names=variant_contig_names,
diff --git a/sgkit/tests/data/sample-grouped.vcf.zarr.zip b/sgkit/tests/data/sample-grouped.vcf.zarr.zip
diff --git a/sgkit/tests/io/test_utils.py b/sgkit/tests/io/test_utils.py
@@ -35,6 +35,25 @@ def test_concatenate_and_rechunk__2d():
     np.testing.assert_array_equal(out.compute(), np.arange(30).reshape(10, 3))
 
 
+def test_concatenate_and_rechunk__tiny_file():
+    z1 = zarr.zeros(4, chunks=3, dtype="i4")
+    z1[:] = np.arange(4)
+
+    # this zarr array lies entirely within the second chunk
+    z2 = zarr.zeros(1, chunks=3, dtype="i4")
+    z2[:] = np.arange(4, 5)
+
+    z3 = zarr.zeros(5, chunks=3, dtype="i4")
+    z3[:] = np.arange(5, 10)
+
+    zarrs = [z1, z2, z3]
+
+    out = concatenate_and_rechunk(zarrs)
+
+    assert out.chunks == ((3, 3, 3, 1),)
+    np.testing.assert_array_equal(out.compute(), np.arange(10))
+
+
 def test_concatenate_and_rechunk__shape_mismatch():
     z1 = zarr.zeros((5, 3), chunks=(2, 3), dtype="i4")
     z2 = zarr.zeros((5, 4), chunks=(2, 4), dtype="i4")
diff --git a/sgkit/tests/test_hwe.py b/sgkit/tests/test_hwe.py
@@ -143,6 +143,24 @@ def test_hwep_dataset__precomputed_counts(ds_neq: Dataset) -> None:
     assert np.all(p < 1e-8)
 
 
+def test_hwep_dataset__raise_on_missing_ploidy():
+    with pytest.raises(
+        ValueError,
+        match="`ploidy` parameter must be set when not present as dataset dimension.",
+    ):
+        ds = xr.Dataset({"x": (("alleles"), np.zeros((2,)))})
+        hwep_test(ds)
+
+
+def test_hwep_dataset__raise_on_missing_alleles():
+    with pytest.raises(
+        ValueError,
+        match="`alleles` parameter must be set when not present as dataset dimension.",
+    ):
+        ds = xr.Dataset({"x": (("ploidy"), np.zeros((2,)))})
+        hwep_test(ds)
+
+
 def test_hwep_dataset__raise_on_nondiploid():
     with pytest.raises(
         NotImplementedError, match="HWE test only implemented for diploid genotypes"
diff --git a/sgkit/tests/test_testing.py b/sgkit/tests/test_testing.py
@@ -1,3 +1,6 @@
+import re
+
+import pytest
 import xarray as xr
 
 from sgkit.testing import simulate_genotype_call_dataset
@@ -8,3 +11,10 @@ def test_simulate_genotype_call_dataset__zarr(tmp_path):
     ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10)
     ds.to_zarr(path)
     xr.testing.assert_equal(ds, xr.open_zarr(path, concat_characters=False))  # type: ignore[no-untyped-call]
+
+
+def test_simulate_genotype_call_dataset__invalid_missing_pct():
+    with pytest.raises(
+        ValueError, match=re.escape("missing_pct must be within [0.0, 1.0]")
+    ):
+        simulate_genotype_call_dataset(n_variant=10, n_sample=10, missing_pct=-1.0)
diff --git a/sgkit/tests/test_variables.py b/sgkit/tests/test_variables.py
@@ -77,6 +77,8 @@ def test_variables__whole_ds(dummy_ds: xr.Dataset) -> None:
     spec_bar = ArrayLikeSpec("bar", kind="i", ndim=1)
     try:
         SgkitVariables.register_variable(spec_foo)
+        with pytest.raises(ValueError, match="`foo` already registered"):
+            SgkitVariables.register_variable(spec_foo)
         with pytest.raises(ValueError, match="No array spec registered for bar"):
             variables.validate(dummy_ds)
         SgkitVariables.register_variable(spec_bar)
diff --git a/sgkit/tests/test_vcfzarr_reader.py b/sgkit/tests/test_vcfzarr_reader.py
@@ -73,22 +73,32 @@ def test_read_vcfzarr(shared_datadir):
 
 
 @pytest.mark.parametrize(
-    "vcfzarr_filename, grouped_by_contig",
-    [("sample.vcf.zarr.zip", False), ("sample-grouped.vcf.zarr.zip", True)],
+    "vcfzarr_filename, grouped_by_contig, consolidated",
+    [
+        ("sample.vcf.zarr.zip", False, False),
+        ("sample-grouped.vcf.zarr.zip", True, False),
+        ("sample-grouped.vcf.zarr.zip", True, True),
+    ],
 )
 @pytest.mark.parametrize(
     "concat_algorithm",
     [None, "xarray_internal"],
 )
 def test_vcfzarr_to_zarr(
-    shared_datadir, tmp_path, vcfzarr_filename, grouped_by_contig, concat_algorithm
+    shared_datadir,
+    tmp_path,
+    vcfzarr_filename,
+    grouped_by_contig,
+    consolidated,
+    concat_algorithm,
 ):
     # The file sample-grouped.vcf.zarr.zip was created by running the following
     # in a python session with the scikit-allel package installed.
     #
     # import allel
     # for contig in ["19", "20", "X"]:
     #   allel.vcf_to_zarr("sample.vcf", "sample-grouped.vcf.zarr", group=contig, region=contig)
+    # zarr.consolidate_metadata("sample-grouped.vcf.zarr")
     #
     # Then (in a shell):
     # (cd sample-grouped.vcf.zarr; zip -r ../sample-grouped.vcf.zarr.zip .)
@@ -100,6 +110,7 @@ def test_vcfzarr_to_zarr(
         output,
         grouped_by_contig=grouped_by_contig,
         concat_algorithm=concat_algorithm,
+        consolidated=consolidated,
     )
 
     ds = xr.open_zarr(output)  # type: ignore[no-untyped-call]
diff --git a/sgkit/tests/test_window.py b/sgkit/tests/test_window.py
@@ -1,3 +1,5 @@
+import re
+
 import allel
 import dask.array as da
 import numpy as np
@@ -16,7 +18,8 @@
 
 
 @pytest.mark.parametrize(
-    "length, chunks, size, step", [(12, 6, 4, 4), (12, 6, 4, 2), (12, 5, 4, 4)]
+    "length, chunks, size, step",
+    [(12, 6, 4, 4), (12, 6, 4, 2), (12, 5, 4, 4), (12, 12, 4, 4)],
 )
 @pytest.mark.parametrize("dtype", [np.int64, np.float32, np.float64])
 def test_moving_statistic_1d(length, chunks, size, step, dtype):
@@ -59,6 +62,15 @@ def sum_cols(x):
     np.testing.assert_equal(stat, stat_sa)
 
 
+def test_moving_statistic__min_chunksize_smaller_than_size():
+    values = da.from_array(np.arange(10), chunks=2)
+    with pytest.raises(
+        ValueError,
+        match=re.escape("Minimum chunk size (2) must not be smaller than size (3)."),
+    ):
+        moving_statistic(values, np.sum, size=3, step=3, dtype=values.dtype)
+
+
 def test_window():
     ds = simulate_genotype_call_dataset(n_variant=10, n_sample=3, seed=0)
     assert not has_windows(ds)
diff --git a/sgkit/variables.py b/sgkit/variables.py
@@ -50,7 +50,7 @@ def _validate(
         Validate that xr_dataset contains array(s) of interest with alternative
         variable name(s). To validate all variables in the dataset, skip `specs`.
         """
-        ...
+        ...  # pragma: no cover
 
     @classmethod
     @overload
@@ -59,7 +59,7 @@ def _validate(cls, xr_dataset: xr.Dataset, *specs: Spec) -> xr.Dataset:
         Validate that xr_dataset contains array(s) of interest with default
         variable name(s). To validate all variables in the dataset, skip `specs`.
         """
-        ...
+        ...  # pragma: no cover
 
     @classmethod
     @overload
@@ -69,7 +69,7 @@ def _validate(cls, xr_dataset: xr.Dataset, *specs: Hashable) -> xr.Dataset:
         name(s). Variable must be registered in `SgkitVariables.registered_variables`.
         To validate all variables in the dataset, skip `specs`.
         """
-        ...
+        ...  # pragma: no cover
 
     @classmethod
     def _validate(

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`	`"vcf_to_zarrs",`
`12`	`12`	`"zarrs_to_dataset",`
`13`	`13`	`]`
`14`		`-except ImportError as e:`
	`14`	`+except ImportError as e: # pragma: no cover`
`15`	`15`	`if platform.system() == "Windows":`
`16`	`16`	`msg = (`
`17`	`17`	`"sgkit-vcf is not supported on Windows.\n"`