From d60c459e65124fed1effa64350dba348c7396a11 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 9 Oct 2022 23:09:28 +0200 Subject: [PATCH 01/31] Type test_xarray.py --- tests/test_xarray.py | 119 +++++++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 38 deletions(-) diff --git a/tests/test_xarray.py b/tests/test_xarray.py index 6669830b5..3d97092d4 100644 --- a/tests/test_xarray.py +++ b/tests/test_xarray.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + import numpy as np import pandas as pd import pytest @@ -23,12 +27,17 @@ except ValueError: pass +if TYPE_CHECKING: + from flox.core import T_Agg, T_Engine + @pytest.mark.parametrize("reindex", [None, False, True]) @pytest.mark.parametrize("min_count", [None, 1, 3]) @pytest.mark.parametrize("add_nan", [True, False]) @pytest.mark.parametrize("skipna", [True, False]) -def test_xarray_reduce(skipna, add_nan, min_count, engine, reindex): +def test_xarray_reduce( + skipna: bool, add_nan: bool, min_count: int | None, engine: T_Engine, reindex: bool | None +) -> None: arr = np.ones((4, 12)) if add_nan: @@ -76,7 +85,9 @@ def test_xarray_reduce(skipna, add_nan, min_count, engine, reindex): # TODO: sort @pytest.mark.parametrize("pass_expected_groups", [True, False]) @pytest.mark.parametrize("chunk", (True, False)) -def test_xarray_reduce_multiple_groupers(pass_expected_groups, chunk, engine): +def test_xarray_reduce_multiple_groupers( + pass_expected_groups: bool, chunk: bool, engine: T_Engine +) -> None: if not has_dask and chunk: pytest.skip() @@ -100,32 +111,43 @@ def test_xarray_reduce_multiple_groupers(pass_expected_groups, chunk, engine): coords={"labels": ["a", "b", "c", "f"], "labels2": [1, 2]}, ).expand_dims(z=4) - kwargs = dict(func="count", engine=engine) + func = "count" + expected_groups = None if pass_expected_groups: - kwargs["expected_groups"] = (expected.labels.data, expected.labels2.data) + expected_groups = (expected.labels.data, expected.labels2.data) with raise_if_dask_computes(): - actual = xarray_reduce(da, da.labels, da.labels2, **kwargs) + actual = xarray_reduce( + da, da.labels, da.labels2, func=func, expected_groups=expected_groups, engine=engine + ) xr.testing.assert_identical(expected, actual) with raise_if_dask_computes(): - actual = xarray_reduce(da, "labels", da.labels2, **kwargs) + actual = xarray_reduce( + da, "labels", da.labels2, func=func, expected_groups=expected_groups, engine=engine + ) xr.testing.assert_identical(expected, actual) with raise_if_dask_computes(): - actual = xarray_reduce(da, "labels", "labels2", **kwargs) + actual = xarray_reduce( + da, "labels", "labels2", func=func, expected_groups=expected_groups, engine=engine + ) xr.testing.assert_identical(expected, actual) if pass_expected_groups: - kwargs["expected_groups"] = (expected.labels2.data, expected.labels.data) + expected_groups = (expected.labels2.data, expected.labels.data) with raise_if_dask_computes(): - actual = xarray_reduce(da, "labels2", "labels", **kwargs) + actual = xarray_reduce( + da, "labels2", "labels", func=func, expected_groups=expected_groups, engine=engine + ) xr.testing.assert_identical(expected.transpose("z", "labels2", "labels"), actual) @pytest.mark.parametrize("pass_expected_groups", [True, False]) @pytest.mark.parametrize("chunk", (True, False)) -def test_xarray_reduce_multiple_groupers_2(pass_expected_groups, chunk, engine): +def test_xarray_reduce_multiple_groupers_2( + pass_expected_groups: bool, chunk: bool, engine: T_Engine +) -> None: if not has_dask and chunk: pytest.skip() @@ -151,20 +173,31 @@ def test_xarray_reduce_multiple_groupers_2(pass_expected_groups, chunk, engine): }, ).expand_dims(z=4, x=2) - kwargs = dict(func="count", engine=engine) + func = "count" + expected_groups = None if pass_expected_groups: - kwargs["expected_groups"] = (expected.labels.data, expected.labels.data) + expected_groups = (expected.labels.data, expected.labels.data) with raise_if_dask_computes(): - actual = xarray_reduce(da, "labels", "labels2", **kwargs) + actual = xarray_reduce( + da, "labels", "labels2", func=func, expected_groups=expected_groups, engine=engine + ) xr.testing.assert_identical(expected, actual) with pytest.raises(NotImplementedError): - xarray_reduce(da, "labels", "labels2", dim=..., **kwargs) + xarray_reduce( + da, + "labels", + "labels2", + dim=..., + func=func, + expected_groups=expected_groups, + engine=engine, + ) @requires_dask -def test_dask_groupers_error(): +def test_dask_groupers_error() -> None: da = xr.DataArray( [1.0, 2.0], dims="x", coords={"labels": ("x", [1, 2]), "labels2": ("x", [1, 2])} ) @@ -173,7 +206,7 @@ def test_dask_groupers_error(): @requires_dask -def test_xarray_reduce_single_grouper(engine): +def test_xarray_reduce_single_grouper(engine: T_Engine) -> None: # DataArray ds = xr.tutorial.open_dataset("rasm", chunks={"time": 9}) @@ -218,7 +251,7 @@ def test_xarray_reduce_single_grouper(engine): xr.testing.assert_allclose(actual, expected) -def test_xarray_reduce_errors(): +def test_xarray_reduce_errors() -> None: da = xr.DataArray(np.ones((12,)), dims="x") by = xr.DataArray(np.ones((12,)), dims="x") @@ -238,7 +271,7 @@ def test_xarray_reduce_errors(): @pytest.mark.parametrize("isdask", [True, False]) @pytest.mark.parametrize("dataarray", [True, False]) @pytest.mark.parametrize("chunklen", [27, 4 * 31 + 1, 4 * 31 + 20]) -def test_xarray_resample(chunklen, isdask, dataarray, engine): +def test_xarray_resample(chunklen: int, isdask: bool, dataarray: bool, engine: T_Engine) -> None: if isdask: if not has_dask: pytest.skip() @@ -256,7 +289,7 @@ def test_xarray_resample(chunklen, isdask, dataarray, engine): @requires_dask -def test_xarray_resample_dataset_multiple_arrays(engine): +def test_xarray_resample_dataset_multiple_arrays(engine: T_Engine) -> None: # regression test for #35 times = pd.date_range("2000", periods=5) foo = xr.DataArray(range(5), dims=["time"], coords=[times], name="foo") @@ -289,7 +322,7 @@ def test_xarray_resample_dataset_multiple_arrays(engine): [(10,), (10,)], ], ) -def test_rechunk_for_blockwise(inchunks, expected): +def test_rechunk_for_blockwise(inchunks: tuple[int, ...], expected: tuple[int, ...]) -> None: labels = np.array([1, 1, 1, 2, 2, 3, 3, 5, 5, 5]) da = xr.DataArray(dask.array.ones((10,), chunks=inchunks), dims="x", name="foo") @@ -310,7 +343,7 @@ def test_rechunk_for_blockwise(inchunks, expected): # TODO: dim=None, dim=Ellipsis, groupby unindexed dim -def test_groupby_duplicate_coordinate_labels(engine): +def test_groupby_duplicate_coordinate_labels(engine: T_Engine) -> None: # fix for http://stackoverflow.com/questions/38065129 array = xr.DataArray([1, 2, 3], [("x", [1, 1, 2])]) expected = xr.DataArray([3, 3], [("x", [1, 2])]) @@ -318,7 +351,7 @@ def test_groupby_duplicate_coordinate_labels(engine): assert_equal(expected, actual) -def test_multi_index_groupby_sum(engine): +def test_multi_index_groupby_sum(engine: T_Engine) -> None: # regression test for xarray GH873 ds = xr.Dataset( {"foo": (("x", "y", "z"), np.ones((3, 4, 2)))}, @@ -342,7 +375,7 @@ def test_multi_index_groupby_sum(engine): @pytest.mark.parametrize("chunks", (None, 2)) -def test_xarray_groupby_bins(chunks, engine): +def test_xarray_groupby_bins(chunks, engine: T_Engine) -> None: array = xr.DataArray([1, 1, 1, 1, 1], dims="x") labels = xr.DataArray([1, 1.5, 1.9, 2, 3], dims="x", name="labels") @@ -352,16 +385,17 @@ def test_xarray_groupby_bins(chunks, engine): array = array.chunk({"x": chunks}) labels = labels.chunk({"x": chunks}) - kwargs = dict( - dim="x", - func="count", - engine=engine, - expected_groups=np.array([1, 2, 4, 5]), - isbin=True, - fill_value=0, - ) with raise_if_dask_computes(): - actual = xarray_reduce(array, labels, **kwargs) + actual = xarray_reduce( + array, + labels, + dim="x", + func="count", + engine=engine, + expected_groups=np.array([1, 2, 4, 5]), + isbin=True, + fill_value=0, + ) expected = xr.DataArray( np.array([3, 1, 0]), dims="labels_bins", @@ -374,7 +408,16 @@ def test_xarray_groupby_bins(chunks, engine): labels = labels.expand_dims(y=2).copy() labels.data[-1, -1] = np.nan with raise_if_dask_computes(): - actual = xarray_reduce(array, labels, **kwargs) + actual = xarray_reduce( + array, + labels, + dim="x", + func="count", + engine=engine, + expected_groups=np.array([1, 2, 4, 5]), + isbin=True, + fill_value=0, + ) expected = xr.DataArray( np.array([[[3, 1, 0]] * 3, [[3, 0, 0]] * 3]), dims=("y", "z", "labels_bins"), @@ -384,7 +427,7 @@ def test_xarray_groupby_bins(chunks, engine): @requires_dask -def test_func_is_aggregation(): +def test_func_is_aggregation() -> None: from flox.aggregations import mean ds = xr.tutorial.open_dataset("rasm", chunks={"time": 9}) @@ -400,7 +443,7 @@ def test_func_is_aggregation(): @requires_dask -def test_cache(): +def test_cache() -> None: pytest.importorskip("cachey") from flox.cache import cache @@ -423,7 +466,7 @@ def test_cache(): @pytest.mark.parametrize("use_cftime", [True, False]) @pytest.mark.parametrize("func", ["count", "mean"]) -def test_datetime_array_reduce(use_cftime, func, engine): +def test_datetime_array_reduce(use_cftime: bool, func: str, engine: T_Engine) -> None: time = xr.DataArray( xr.date_range("2009-01-01", "2012-12-31", use_cftime=use_cftime), @@ -436,7 +479,7 @@ def test_datetime_array_reduce(use_cftime, func, engine): @requires_dask -def test_groupby_bins_indexed_coordinate(): +def test_groupby_bins_indexed_coordinate() -> None: ds = ( xr.tutorial.open_dataset("air_temperature") .isel(time=slice(100)) @@ -457,7 +500,7 @@ def test_groupby_bins_indexed_coordinate(): @pytest.mark.parametrize("chunk", (True, False)) -def test_mixed_grouping(chunk): +def test_mixed_grouping(chunk: bool) -> None: if not has_dask and chunk: pytest.skip() # regression test for https://github.com/xarray-contrib/flox/pull/111 From 6d22bb00b692cde412505b4f1f154550f237b374 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 9 Oct 2022 23:09:45 +0200 Subject: [PATCH 02/31] Fix typing --- flox/xarray.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 5f87bafe6..59047492d 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -19,6 +19,8 @@ from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric if TYPE_CHECKING: + from .core import T_Engine, T_Method + from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset @@ -63,8 +65,8 @@ def xarray_reduce( dim: Dims | ellipsis = None, split_out: int = 1, fill_value=None, - method: str = "map-reduce", - engine: str = "numpy", + method: T_Method = "map-reduce", + engine: T_Engine = "numpy", keep_attrs: bool | None = True, skipna: bool | None = None, min_count: int | None = None, From 42826a840fe63279b7e0ef91e98afa987c5ad0da Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 9 Oct 2022 21:10:49 +0000 Subject: [PATCH 03/31] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 59047492d..dddd5b363 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -19,11 +19,11 @@ from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric if TYPE_CHECKING: - from .core import T_Engine, T_Method - from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset + from .core import T_Engine, T_Method + Dims = Union[str, Iterable[Hashable], None] From 91a5bd2ad6aee74fcda522f5f2f0378854cbdf35 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:22:03 +0200 Subject: [PATCH 04/31] Add type hints to test_core.py --- tests/test_core.py | 320 +++++++++++++++++++++++++++++++++------------ 1 file changed, 239 insertions(+), 81 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 25660e734..d232d0871 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,7 +1,7 @@ from __future__ import annotations from functools import reduce -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd @@ -67,7 +67,7 @@ def dask_array_ones(*args): ) if TYPE_CHECKING: - from flox.core import T_Engine, T_ExpectedGroupsOpt, T_Func2 + from flox.core import T_Engine, T_ExpectedGroupsOpt, T_Agg, T_Method def test_alignment_error(): @@ -108,7 +108,7 @@ def test_alignment_error(): ) def test_groupby_reduce( engine: T_Engine, - func: T_Func2, + func: T_Agg, array: np.ndarray, by: np.ndarray, expected: list[float], @@ -146,10 +146,12 @@ def test_groupby_reduce( assert_equal(expected_result, result) -def gen_array_by(size, func): +def gen_array_by( + size: tuple[int, ...], func: str +) -> tuple[np.ndarray[Any, Any], np.ndarray[Any, Any]]: by = np.ones(size[-1]) rng = np.random.default_rng(12345) - array = rng.random(size) + array: np.ndarray[Any, Any] = rng.random(size) if "nan" in func and "nanarg" not in func: array[[1, 4, 5], ...] = np.nan elif "nanarg" in func and len(size) > 1: @@ -164,7 +166,9 @@ def gen_array_by(size, func): @pytest.mark.parametrize("size", ((12,), (12, 9))) @pytest.mark.parametrize("add_nan_by", [True, False]) @pytest.mark.parametrize("func", ALL_FUNCS) -def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine): +def test_groupby_reduce_all( + nby, size: tuple[int, ...], chunks, func: str, add_nan_by: bool, engine: T_Engine +) -> None: if chunks is not None and not has_dask: pytest.skip() if "arg" in func and engine == "flox": @@ -173,15 +177,14 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine): array, by = gen_array_by(size, func) if chunks: array = dask.array.from_array(array, chunks=chunks) - by = (by,) * nby - by = [b + idx for idx, b in enumerate(by)] + bys = [by] * nby + bys = [b + idx for idx, b in enumerate(bys)] if add_nan_by: for idx in range(nby): - by[idx][2 * idx : 2 * idx + 3] = np.nan - by = tuple(by) - nanmask = reduce(np.logical_or, (np.isnan(b) for b in by)) + bys[idx][2 * idx : 2 * idx + 3] = np.nan + nanmask = reduce(np.logical_or, (np.isnan(b) for b in bys)) - finalize_kwargs = [{}] + finalize_kwargs: list[dict[str, Any]] = [{}] if "var" in func or "std" in func: finalize_kwargs = finalize_kwargs + [{"ddof": 1}, {"ddof": 0}] fill_value = np.nan @@ -189,7 +192,6 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine): fill_value = None for kwargs in finalize_kwargs: - flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): if "arg" in func and add_nan_by: array[..., nanmask] = np.nan @@ -199,7 +201,9 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine): for _ in range(nby): expected = np.expand_dims(expected, -1) - actual, *groups = groupby_reduce(array, *by, **flox_kwargs) + actual, *groups = groupby_reduce( + array, *bys, func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value + ) assert actual.ndim == (array.ndim + nby - 1) assert expected.ndim == (array.ndim + nby - 1) expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby)) @@ -211,10 +215,20 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine): if not has_dask: continue - for method in ["map-reduce", "cohorts", "split-reduce"]: + + methods: list[T_Method] = ["map-reduce", "cohorts", "split-reduce"] + for method in methods: if "arg" in func and method != "map-reduce": continue - actual, *groups = groupby_reduce(array, *by, method=method, **flox_kwargs) + actual, *groups = groupby_reduce( + array, + *bys, + method=method, + func=func, + engine=engine, + finalize_kwargs=kwargs, + fill_value=fill_value, + ) for actual_group, expect in zip(groups, expected_groups): assert_equal(actual_group, expect) if "arg" in func: @@ -225,7 +239,7 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine): @requires_dask @pytest.mark.parametrize("size", ((12,), (12, 5))) @pytest.mark.parametrize("func", ("argmax", "nanargmax", "argmin", "nanargmin")) -def test_arg_reduction_dtype_is_int(size, func): +def test_arg_reduction_dtype_is_int(size: tuple[int, ...], func: str) -> None: """avoid bugs being hidden by the xfail in the above test.""" rng = np.random.default_rng(12345) @@ -245,14 +259,14 @@ def test_arg_reduction_dtype_is_int(size, func): assert actual.dtype.kind == "i" -def test_groupby_reduce_count(): +def test_groupby_reduce_count() -> None: array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1]) labels = np.array(["a", "b", "b", "b", "c", "c", "c"]) result, _ = groupby_reduce(array, labels, func="count") assert_equal(result, [1, 1, 2]) -def test_func_is_aggregation(): +def test_func_is_aggregation() -> None: from flox.aggregations import mean array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1]) @@ -265,14 +279,14 @@ def test_func_is_aggregation(): @requires_dask @pytest.mark.parametrize("func", ("sum", "prod")) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) -def test_groupby_reduce_preserves_dtype(dtype, func): +def test_groupby_reduce_preserves_dtype(dtype, func: str) -> None: array = np.ones((2, 12), dtype=dtype) by = np.array([labels] * 2) result, _ = groupby_reduce(from_array(array, chunks=(-1, 4)), by, func=func) assert result.dtype == array.dtype -def test_numpy_reduce_nd_md(): +def test_numpy_reduce_nd_md() -> None: array = np.ones((2, 12)) by = np.array([labels] * 2) @@ -319,7 +333,16 @@ def test_numpy_reduce_nd_md(): ((10, 12), (3, 3), 3), # form 3 ], ) -def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtype, engine, reindex): +def test_groupby_agg_dask( + func: str, + shape: tuple[int, ...], + array_chunks: tuple[int, ...], + group_chunks, + add_nan: bool, + dtype, + engine: T_Engine, + reindex: bool | None, +) -> None: """Tests groupby_reduce with dask arrays against groupby_reduce with numpy arrays""" rng = np.random.default_rng(12345) @@ -341,67 +364,115 @@ def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtyp kwargs = dict( func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123 ) + expected_groups = [0, 1, 2] + fill_value = False if func in ["all", "any"] else 123 - expected, _ = groupby_reduce(array.compute(), labels, engine="numpy", **kwargs) - actual, _ = groupby_reduce(array.compute(), labels, engine=engine, **kwargs) + expected, _ = groupby_reduce( + array.compute(), + labels, + engine="numpy", + func=func, + expected_groups=expected_groups, + fill_value=fill_value, + ) + actual, _ = groupby_reduce( + array.compute(), + labels, + engine=engine, + func=func, + expected_groups=expected_groups, + fill_value=fill_value, + ) assert_equal(actual, expected) with raise_if_dask_computes(): - actual, _ = groupby_reduce(array, labels, engine=engine, **kwargs) + actual, _ = groupby_reduce( + array, + labels, + engine=engine, + func=func, + expected_groups=expected_groups, + fill_value=fill_value, + ) assert_equal(actual, expected) by = from_array(labels, group_chunks) with raise_if_dask_computes(): - actual, _ = groupby_reduce(array, by, engine=engine, **kwargs) + actual, _ = groupby_reduce( + array, + by, + engine=engine, + func=func, + expected_groups=expected_groups, + fill_value=fill_value, + ) assert_equal(expected, actual) - kwargs["expected_groups"] = [0, 2, 1] + expected_groups = [0, 2, 1] with raise_if_dask_computes(): - actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False) + actual, groups = groupby_reduce( + array, + by, + engine=engine, + func=func, + expected_groups=expected_groups, + fill_value=fill_value, + sort=False, + ) assert_equal(groups, [0, 2, 1]) assert_equal(expected, actual[..., [0, 2, 1]]) kwargs["expected_groups"] = [0, 2, 1] with raise_if_dask_computes(): - actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True) + actual, groups = groupby_reduce( + array, + by, + engine=engine, + func=func, + expected_groups=expected_groups, + fill_value=fill_value, + sort=True, + ) assert_equal(groups, [0, 1, 2]) assert_equal(expected, actual) -def test_numpy_reduce_axis_subset(engine): +def test_numpy_reduce_axis_subset(engine: T_Engine) -> None: # TODO: add NaNs by = labels2d array = np.ones_like(by) kwargs = dict(func="count", engine=engine, fill_value=0) - result, _ = groupby_reduce(array, by, **kwargs, axis=1) + result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1) assert_equal(result, [[2, 3], [2, 3]]) by = np.broadcast_to(labels2d, (3, *labels2d.shape)) array = np.ones_like(by) - result, _ = groupby_reduce(array, by, **kwargs, axis=1) + result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1) subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]]) expected = np.tile(subarr, (3, 1, 1)) assert_equal(result, expected) - result, _ = groupby_reduce(array, by, **kwargs, axis=2) + result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=2) subarr = np.array([[2, 3], [2, 3]]) expected = np.tile(subarr, (3, 1, 1)) assert_equal(result, expected) - result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2)) + result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(1, 2)) expected = np.array([[4, 6], [4, 6], [4, 6]]) assert_equal(result, expected) - result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1)) + result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(2, 1)) assert_equal(result, expected) - result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2)) + result, _ = groupby_reduce( + array, by[0, ...], func="count", engine=engine, fill_value=0, axis=(1, 2) + ) expected = np.array([[4, 6], [4, 6], [4, 6]]) assert_equal(result, expected) @requires_dask -def test_dask_reduce_axis_subset(): +def test_dask_reduce_axis_subset() -> None: by = labels2d array = np.ones_like(by) @@ -456,12 +527,13 @@ def test_dask_reduce_axis_subset(): @pytest.mark.parametrize( "axis", [None, (0, 1, 2), (0, 1), (0, 2), (1, 2), 0, 1, 2, (0,), (1,), (2,)] ) -def test_groupby_reduce_axis_subset_against_numpy(func, axis, engine): +def test_groupby_reduce_axis_subset_against_numpy(func: str, axis, engine: T_Engine) -> None: if "arg" in func and engine == "flox": pytest.skip() if not isinstance(axis, int) and "arg" in func and (axis is None or len(axis) > 1): pytest.skip() + fill_value: bool | float if func in ["all", "any"]: fill_value = False else: @@ -496,7 +568,9 @@ def test_groupby_reduce_axis_subset_against_numpy(func, axis, engine): (None, [0], (1,)), # global reduction; 0 shaped group axis; 1 group ], ) -def test_groupby_reduce_nans(chunks, axis, groups, expected_shape, engine): +def test_groupby_reduce_nans( + chunks, axis, groups, expected_shape: tuple[int, ...], engine: T_Engine +) -> None: def _maybe_chunk(arr): if chunks: if not has_dask: @@ -531,7 +605,7 @@ def _maybe_chunk(arr): @requires_dask -def test_groupby_all_nan_blocks(engine): +def test_groupby_all_nan_blocks(engine: T_Engine) -> None: labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0]) nan_labels = labels.astype(float) # copy nan_labels[:5] = np.nan @@ -553,7 +627,7 @@ def test_groupby_all_nan_blocks(engine): @pytest.mark.parametrize("axis", (0, 1, 2, -1)) -def test_reindex(axis): +def test_reindex(axis: int) -> None: shape = [2, 2, 2] fill_value = 0 @@ -573,7 +647,7 @@ def test_reindex(axis): @pytest.mark.xfail -def test_bad_npg_behaviour(): +def test_bad_npg_behaviour() -> None: labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0], dtype=int) # fmt: off array = np.array([[1] * 12, [1] * 12]) @@ -590,7 +664,7 @@ def test_bad_npg_behaviour(): @pytest.mark.xfail @pytest.mark.parametrize("func", ("nanargmax", "nanargmin")) -def test_npg_nanarg_bug(func): +def test_npg_nanarg_bug(func: str) -> None: array = np.array([1, 1, 2, 1, 1, np.nan, 6, 1]) labels = np.array([1, 1, 1, 1, 1, 1, 1, 1]) - 1 @@ -602,7 +676,9 @@ def test_npg_nanarg_bug(func): @pytest.mark.parametrize("method", ["split-reduce", "cohorts", "map-reduce"]) @pytest.mark.parametrize("chunk_labels", [False, True]) @pytest.mark.parametrize("chunks", ((), (1,), (2,))) -def test_groupby_bins(chunk_labels, chunks, engine, method) -> None: +def test_groupby_bins( + chunk_labels: bool, chunks: tuple[int, ...], engine: T_Engine, method: T_Method +) -> None: array = [1, 1, 1, 1, 1, 1] labels = [0.2, 1.5, 1.9, 2, 3, 20] @@ -650,7 +726,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None: [(10,), (10,)], ], ) -def test_rechunk_for_blockwise(inchunks, expected): +def test_rechunk_for_blockwise(inchunks: tuple[int, ...], expected: tuple[int, ...]) -> None: labels = np.array([1, 1, 1, 2, 2, 3, 3, 5, 5, 5]) assert _get_optimal_chunks_for_groups(inchunks, labels) == expected @@ -673,7 +749,7 @@ def test_rechunk_for_blockwise(inchunks, expected): ], ], ) -def test_find_group_cohorts(expected, labels, chunks, merge): +def test_find_group_cohorts(expected, labels, chunks: tuple[int, ...], merge: bool) -> None: actual = list(find_group_cohorts(labels, (chunks,), merge, method="cohorts")) assert actual == expected, (actual, expected) @@ -691,7 +767,7 @@ def test_find_group_cohorts(expected, labels, chunks, merge): [3, ((3, 4, 3, 4, 3, 4, 3, 4, 2),)], ], ) -def test_rechunk_for_cohorts(chunk_at, expected): +def test_rechunk_for_cohorts(chunk_at: int, expected) -> None: array = dask.array.ones((30,), chunks=7) labels = np.arange(0, 30) % 7 rechunked = rechunk_for_cohorts(array, axis=-1, force_new_chunk_at=chunk_at, labels=labels) @@ -701,7 +777,7 @@ def test_rechunk_for_cohorts(chunk_at, expected): @pytest.mark.parametrize("chunks", [None, 3]) @pytest.mark.parametrize("fill_value", [123, np.nan]) @pytest.mark.parametrize("func", ALL_FUNCS) -def test_fill_value_behaviour(func, chunks, fill_value, engine): +def test_fill_value_behaviour(func: str, chunks, fill_value: float, engine: T_Engine) -> None: # fill_value = np.nan tests promotion of int counts to float # This is used by xarray if func in ["all", "any"] or "arg" in func: @@ -732,7 +808,8 @@ def npfunc(x): @requires_dask @pytest.mark.parametrize("func", ["mean", "sum"]) @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64"]) -def test_dtype_preservation(dtype, func, engine): +def test_dtype_preservation(dtype: str, func: str, engine: T_Engine) -> None: + expected: np.typing.DTypeLike if func == "sum" or (func == "mean" and "float" in dtype): expected = np.dtype(dtype) elif func == "mean" and "int" in dtype: @@ -749,7 +826,7 @@ def test_dtype_preservation(dtype, func, engine): @requires_dask @pytest.mark.parametrize("method", ["split-reduce", "map-reduce", "cohorts"]) -def test_cohorts(method): +def test_cohorts(method: T_Method) -> None: repeats = [4, 4, 12, 2, 3, 4] labels = np.repeat(np.arange(6), repeats) array = dask.array.from_array(labels, chunks=(4, 8, 4, 9, 4)) @@ -763,7 +840,7 @@ def test_cohorts(method): @pytest.mark.parametrize("func", ALL_FUNCS) @pytest.mark.parametrize("axis", (-1, None)) @pytest.mark.parametrize("method", ["blockwise", "cohorts", "map-reduce", "split-reduce"]) -def test_cohorts_nd_by(func, method, axis, engine): +def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_Engine) -> None: o = dask.array.ones((3,), chunks=-1) o2 = dask.array.ones((2, 3), chunks=-1) @@ -777,6 +854,7 @@ def test_cohorts_nd_by(func, method, axis, engine): if "arg" in func and (axis is None or engine == "flox"): pytest.skip() + fill_value: bool | int if func in ["any", "all"]: fill_value = False else: @@ -785,13 +863,31 @@ def test_cohorts_nd_by(func, method, axis, engine): if axis is not None and method != "map-reduce": pytest.xfail() - kwargs = dict(func=func, engine=engine, method=method, axis=axis, fill_value=fill_value) - actual, groups = groupby_reduce(array, by, **kwargs) - expected, sorted_groups = groupby_reduce(array.compute(), by, **kwargs) + actual, groups = groupby_reduce( + array, by, func=func, engine=engine, method=method, axis=axis, fill_value=fill_value + ) + expected, sorted_groups = groupby_reduce( + array.compute(), + by, + func=func, + engine=engine, + method=method, + axis=axis, + fill_value=fill_value, + ) assert_equal(groups, sorted_groups) assert_equal(actual, expected) - actual, groups = groupby_reduce(array, by, sort=False, **kwargs) + actual, groups = groupby_reduce( + array, + by, + sort=False, + func=func, + engine=engine, + method=method, + axis=axis, + fill_value=fill_value, + ) if method == "cohorts": assert_equal(groups, [4, 3, 40, 2, 31, 1, 30]) elif method in ("split-reduce", "map-reduce"): @@ -804,7 +900,9 @@ def test_cohorts_nd_by(func, method, axis, engine): @pytest.mark.parametrize("func", ["sum", "count"]) @pytest.mark.parametrize("fill_value, expected", ((0, np.integer), (np.nan, np.floating))) -def test_dtype_promotion(func, fill_value, expected, engine): +def test_dtype_promotion( + func: str, fill_value: int, expected: np.typing.DTypeLike, engine: T_Engine +) -> None: array = np.array([1, 1]) by = [0, 1] @@ -815,7 +913,7 @@ def test_dtype_promotion(func, fill_value, expected, engine): @pytest.mark.parametrize("func", ["mean", "nanmean"]) -def test_empty_bins(func, engine): +def test_empty_bins(func: str, engine: T_Engine) -> None: array = np.ones((2, 3, 2)) by = np.broadcast_to([0, 1], array.shape) @@ -832,7 +930,7 @@ def test_empty_bins(func, engine): assert_equal(actual, expected) -def test_datetime_binning(): +def test_datetime_binning() -> None: time_bins = pd.date_range(start="2010-08-01", end="2010-08-15", freq="24H") by = pd.date_range("2010-08-01", "2010-08-15", freq="15min") @@ -848,7 +946,7 @@ def test_datetime_binning(): @pytest.mark.parametrize("func", ALL_FUNCS) -def test_bool_reductions(func, engine): +def test_bool_reductions(func: str, engine: T_Engine) -> None: if "arg" in func and engine == "flox": pytest.skip() groups = np.array([1, 1, 1]) @@ -874,17 +972,18 @@ def test_map_reduce_blockwise_mixed() -> None: @requires_dask @pytest.mark.parametrize("method", ["split-reduce", "blockwise", "map-reduce", "cohorts"]) -def test_group_by_datetime(engine, method): - kwargs = dict( - func="mean", - method=method, - engine=engine, - ) +def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None: t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series() data = t.dt.dayofyear daskarray = dask.array.from_array(data.values, chunks=30) - actual, _ = groupby_reduce(daskarray, t, **kwargs) + actual, _ = groupby_reduce( + daskarray, + t, + func="mean", + method=method, + engine=engine, + ) expected = data.to_numpy().astype(float) assert_equal(expected, actual) @@ -892,7 +991,15 @@ def test_group_by_datetime(engine, method): return None edges = pd.date_range("1999-12-31", "2000-12-31", freq="M").to_series().to_numpy() - actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs) + actual, _ = groupby_reduce( + daskarray, + t.to_numpy(), + isbin=True, + expected_groups=edges, + func="mean", + method=method, + engine=engine, + ) expected = data.resample("M").mean().to_numpy() assert_equal(expected, actual) @@ -901,13 +1008,15 @@ def test_group_by_datetime(engine, method): t.to_numpy(), isbin=True, expected_groups=edges, - **kwargs, + func="mean", + method=method, + engine=engine, ) expected = np.broadcast_to(expected, (2, 3, expected.shape[-1])) assert_equal(expected, actual) -def test_factorize_values_outside_bins(): +def test_factorize_values_outside_bins() -> None: vals = factorize_( (np.arange(10).reshape(5, 2), np.arange(10).reshape(5, 2)), @@ -941,50 +1050,98 @@ def test_multiple_groupers() -> None: assert_equal(expected, actual) -def test_factorize_reindex_sorting_strings(): +def test_factorize_reindex_sorting_strings() -> None: kwargs = dict( by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), axis=-1, expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), ) - expected = factorize_(**kwargs, reindex=True, sort=True)[0] + expected = factorize_( + by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), + axis=-1, + expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), + reindex=True, + sort=True, + )[0] assert_equal(expected, [0, 1, 4, 2]) - expected = factorize_(**kwargs, reindex=True, sort=False)[0] + expected = factorize_( + by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), + axis=-1, + expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), + reindex=True, + sort=False, + )[0] assert_equal(expected, [0, 3, 4, 1]) - expected = factorize_(**kwargs, reindex=False, sort=False)[0] + expected = factorize_( + by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), + axis=-1, + expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), + reindex=False, + sort=False, + )[0] assert_equal(expected, [0, 1, 2, 3]) - expected = factorize_(**kwargs, reindex=False, sort=True)[0] + expected = factorize_( + by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), + axis=-1, + expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), + reindex=False, + sort=True, + )[0] assert_equal(expected, [0, 1, 3, 2]) -def test_factorize_reindex_sorting_ints(): +def test_factorize_reindex_sorting_ints() -> None: kwargs = dict( by=(np.array([-10, 1, 10, 2, 3, 5]),), axis=-1, expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), ) - expected = factorize_(**kwargs, reindex=True, sort=True)[0] + expected = factorize_( + by=(np.array([-10, 1, 10, 2, 3, 5]),), + axis=-1, + expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + reindex=True, + sort=True, + )[0] assert_equal(expected, [6, 1, 6, 2, 3, 5]) - expected = factorize_(**kwargs, reindex=True, sort=False)[0] + expected = factorize_( + by=(np.array([-10, 1, 10, 2, 3, 5]),), + axis=-1, + expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + reindex=True, + sort=False, + )[0] assert_equal(expected, [6, 1, 6, 2, 3, 5]) kwargs["expected_groups"] = (np.arange(5, -1, -1),) - expected = factorize_(**kwargs, reindex=True, sort=True)[0] + expected = factorize_( + by=(np.array([-10, 1, 10, 2, 3, 5]),), + axis=-1, + expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + reindex=True, + sort=True, + )[0] assert_equal(expected, [6, 1, 6, 2, 3, 5]) - expected = factorize_(**kwargs, reindex=True, sort=False)[0] + expected = factorize_( + by=(np.array([-10, 1, 10, 2, 3, 5]),), + axis=-1, + expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + reindex=True, + sort=False, + )[0] assert_equal(expected, [6, 4, 6, 3, 2, 0]) @requires_dask -def test_custom_aggregation_blockwise(): +def test_custom_aggregation_blockwise() -> None: def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): return aggregate( group_idx, @@ -1007,7 +1164,8 @@ def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dty expected = np.median(array, axis=-1, keepdims=True) assert_equal(expected, actual) - for method in ["map-reduce", "cohorts", "split-reduce"]: + methods: list[T_Method] = ["map-reduce", "cohorts", "split-reduce"] + for method in methods: with pytest.raises(NotImplementedError): groupby_reduce( dask.array.from_array(array, chunks=(1, -1)), From 203adb09e5c3eb5ca77089b452f407738d79e8d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:22:31 +0000 Subject: [PATCH 05/31] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index d232d0871..5c62f1814 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -67,7 +67,7 @@ def dask_array_ones(*args): ) if TYPE_CHECKING: - from flox.core import T_Engine, T_ExpectedGroupsOpt, T_Agg, T_Method + from flox.core import T_Agg, T_Engine, T_ExpectedGroupsOpt, T_Method def test_alignment_error(): From 492963ad04390692e77e1d1a391782be7522cf14 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:24:32 +0200 Subject: [PATCH 06/31] Update test_core.py --- tests/test_core.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index d232d0871..b9066864b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -441,7 +441,6 @@ def test_numpy_reduce_axis_subset(engine: T_Engine) -> None: # TODO: add NaNs by = labels2d array = np.ones_like(by) - kwargs = dict(func="count", engine=engine, fill_value=0) result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1) assert_equal(result, [[2, 3], [2, 3]]) @@ -1051,12 +1050,6 @@ def test_multiple_groupers() -> None: def test_factorize_reindex_sorting_strings() -> None: - kwargs = dict( - by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), - axis=-1, - expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), - ) - expected = factorize_( by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), axis=-1, From 071c8694510662f5dbd7a66a7280380e5d8333fe Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:31:43 +0200 Subject: [PATCH 07/31] Update test_core.py --- tests/test_core.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 827e70332..00d513a8e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1088,16 +1088,11 @@ def test_factorize_reindex_sorting_strings() -> None: def test_factorize_reindex_sorting_ints() -> None: - kwargs = dict( - by=(np.array([-10, 1, 10, 2, 3, 5]),), - axis=-1, - expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), - ) - + expected_groups = (np.array([0, 1, 2, 3, 4, 5]),) expected = factorize_( by=(np.array([-10, 1, 10, 2, 3, 5]),), axis=-1, - expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + expected_groups=expected_groups, reindex=True, sort=True, )[0] @@ -1106,18 +1101,18 @@ def test_factorize_reindex_sorting_ints() -> None: expected = factorize_( by=(np.array([-10, 1, 10, 2, 3, 5]),), axis=-1, - expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + expected_groups=expected_groups, reindex=True, sort=False, )[0] assert_equal(expected, [6, 1, 6, 2, 3, 5]) - kwargs["expected_groups"] = (np.arange(5, -1, -1),) + expected_groups = (np.arange(5, -1, -1),) expected = factorize_( by=(np.array([-10, 1, 10, 2, 3, 5]),), axis=-1, - expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + expected_groups=expected_groups, reindex=True, sort=True, )[0] @@ -1126,7 +1121,7 @@ def test_factorize_reindex_sorting_ints() -> None: expected = factorize_( by=(np.array([-10, 1, 10, 2, 3, 5]),), axis=-1, - expected_groups=(np.array([0, 1, 2, 3, 4, 5]),), + expected_groups=expected_groups, reindex=True, sort=False, )[0] From 4e9db600efe56dd251a77a18d5eacd155b802272 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 09:09:18 +0100 Subject: [PATCH 08/31] Don't add type hints to kwargs for readability --- tests/test_core.py | 128 ++++++++++----------------------------------- 1 file changed, 27 insertions(+), 101 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 163b54193..5827ab29b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -361,78 +361,32 @@ def test_groupby_agg_dask( labels[:3] = np.nan # entire block is NaN when group_chunks=3 labels[-2:] = np.nan - kwargs = dict( + kwargs: dict[str, Any] = dict( func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123 ) - expected_groups = [0, 1, 2] - fill_value = False if func in ["all", "any"] else 123 - expected, _ = groupby_reduce( - array.compute(), - labels, - engine="numpy", - func=func, - expected_groups=expected_groups, - fill_value=fill_value, - ) - actual, _ = groupby_reduce( - array.compute(), - labels, - engine=engine, - func=func, - expected_groups=expected_groups, - fill_value=fill_value, - ) + expected, _ = groupby_reduce(array.compute(), labels, engine="numpy", **kwargs) + actual, _ = groupby_reduce(array.compute(), labels, engine=engine, **kwargs) assert_equal(actual, expected) with raise_if_dask_computes(): - actual, _ = groupby_reduce( - array, - labels, - engine=engine, - func=func, - expected_groups=expected_groups, - fill_value=fill_value, - ) + actual, _ = groupby_reduce(array, labels, engine=engine, **kwargs) assert_equal(actual, expected) by = from_array(labels, group_chunks) with raise_if_dask_computes(): - actual, _ = groupby_reduce( - array, - by, - engine=engine, - func=func, - expected_groups=expected_groups, - fill_value=fill_value, - ) + actual, _ = groupby_reduce(array, by, engine=engine, **kwargs) assert_equal(expected, actual) expected_groups = [0, 2, 1] with raise_if_dask_computes(): - actual, groups = groupby_reduce( - array, - by, - engine=engine, - func=func, - expected_groups=expected_groups, - fill_value=fill_value, - sort=False, - ) + actual, groups = groupby_reduce(array, by, **kwargs, sort=False) assert_equal(groups, [0, 2, 1]) assert_equal(expected, actual[..., [0, 2, 1]]) kwargs["expected_groups"] = [0, 2, 1] with raise_if_dask_computes(): - actual, groups = groupby_reduce( - array, - by, - engine=engine, - func=func, - expected_groups=expected_groups, - fill_value=fill_value, - sort=True, - ) + actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True) assert_equal(groups, [0, 1, 2]) assert_equal(expected, actual) @@ -441,31 +395,30 @@ def test_numpy_reduce_axis_subset(engine: T_Engine) -> None: # TODO: add NaNs by = labels2d array = np.ones_like(by) - result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1) + kwargs: dict[str, Any] = dict(func="count", engine=engine, fill_value=0) + result, _ = groupby_reduce(array, by, *[], axis=1, **kwargs) assert_equal(result, [[2, 3], [2, 3]]) by = np.broadcast_to(labels2d, (3, *labels2d.shape)) array = np.ones_like(by) - result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1) + result, _ = groupby_reduce(array, by, **kwargs, axis=1) subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]]) expected = np.tile(subarr, (3, 1, 1)) assert_equal(result, expected) - result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=2) + result, _ = groupby_reduce(array, by, **kwargs, axis=2) subarr = np.array([[2, 3], [2, 3]]) expected = np.tile(subarr, (3, 1, 1)) assert_equal(result, expected) - result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(1, 2)) + result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2)) expected = np.array([[4, 6], [4, 6], [4, 6]]) assert_equal(result, expected) - result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(2, 1)) + result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1)) assert_equal(result, expected) - result, _ = groupby_reduce( - array, by[0, ...], func="count", engine=engine, fill_value=0, axis=(1, 2) - ) + result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2)) expected = np.array([[4, 6], [4, 6], [4, 6]]) assert_equal(result, expected) @@ -863,31 +816,15 @@ def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_ if axis is not None and method != "map-reduce": pytest.xfail() - actual, groups = groupby_reduce( - array, by, func=func, engine=engine, method=method, axis=axis, fill_value=fill_value - ) - expected, sorted_groups = groupby_reduce( - array.compute(), - by, - func=func, - engine=engine, - method=method, - axis=axis, - fill_value=fill_value, + kwargs: dict[str, Any] = dict( + func=func, engine=engine, method=method, axis=axis, fill_value=fill_value ) + actual, groups = groupby_reduce(array, by, **kwargs) + expected, sorted_groups = groupby_reduce(array.compute(), by, **kwargs) assert_equal(groups, sorted_groups) assert_equal(actual, expected) - actual, groups = groupby_reduce( - array, - by, - sort=False, - func=func, - engine=engine, - method=method, - axis=axis, - fill_value=fill_value, - ) + actual, groups = groupby_reduce(array, by, sort=False, **kwargs) if method == "map-reduce": assert_equal(groups, [1, 30, 2, 31, 3, 4, 40]) else: @@ -971,17 +908,16 @@ def test_map_reduce_blockwise_mixed() -> None: @requires_dask @pytest.mark.parametrize("method", ["split-reduce", "blockwise", "map-reduce", "cohorts"]) def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None: - t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series() - data = t.dt.dayofyear - daskarray = dask.array.from_array(data.values, chunks=30) - - actual, _ = groupby_reduce( - daskarray, - t, + kwargs: dict[str, Any] = dict( func="mean", method=method, engine=engine, ) + t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series() + data = t.dt.dayofyear + daskarray = dask.array.from_array(data.values, chunks=30) + + actual, _ = groupby_reduce(daskarray, t, **kwargs) expected = data.to_numpy().astype(float) assert_equal(expected, actual) @@ -989,15 +925,7 @@ def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None: return None edges = pd.date_range("1999-12-31", "2000-12-31", freq="M").to_series().to_numpy() - actual, _ = groupby_reduce( - daskarray, - t.to_numpy(), - isbin=True, - expected_groups=edges, - func="mean", - method=method, - engine=engine, - ) + actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs) expected = data.resample("M").mean().to_numpy() assert_equal(expected, actual) @@ -1006,9 +934,7 @@ def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None: t.to_numpy(), isbin=True, expected_groups=edges, - func="mean", - method=method, - engine=engine, + **kwargs, ) expected = np.broadcast_to(expected, (2, 3, expected.shape[-1])) assert_equal(expected, actual) From 4b134fc3972b0f3db2ce4d80674717ba8d31ca48 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 09:43:42 +0100 Subject: [PATCH 09/31] fix merge errors --- tests/test_core.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 788d217bf..272198564 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -232,6 +232,9 @@ def test_groupby_reduce_all( params = list(itertools.product(["map-reduce"], [True, False, None])) params.extend(itertools.product(["cohorts"], [False, None])) for method, reindex in params: + flox_kwargs = dict( + func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value + ) call = partial( groupby_reduce, array, *by, method=method, reindex=reindex, **flox_kwargs ) @@ -395,7 +398,7 @@ def test_groupby_agg_dask( actual, _ = groupby_reduce(array, by, engine=engine, **kwargs) assert_equal(expected, actual) - expected_groups = [0, 2, 1] + kwargs["expected_groups"] = [0, 2, 1] with raise_if_dask_computes(): actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False) assert_equal(groups, np.array([0, 2, 1], dtype=np.intp)) @@ -1005,7 +1008,7 @@ def test_multiple_groupers() -> None: def test_factorize_reindex_sorting_strings() -> None: - expected = factorize_( + kwargs = dict( by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), axis=-1, expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), @@ -1025,8 +1028,7 @@ def test_factorize_reindex_sorting_strings() -> None: def test_factorize_reindex_sorting_ints() -> None: - expected_groups = (np.array([0, 1, 2, 3, 4, 5]),) - expected = factorize_( + kwargs = dict( by=(np.array([-10, 1, 10, 2, 3, 5]),), axis=-1, expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),), @@ -1038,7 +1040,7 @@ def test_factorize_reindex_sorting_ints() -> None: expected = factorize_(**kwargs, reindex=True, sort=False)[0] assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64)) - expected_groups = (np.arange(5, -1, -1),) + kwargs["expected_groups"] = (np.arange(5, -1, -1),) expected = factorize_(**kwargs, reindex=True, sort=True)[0] assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64)) From af5c912000d45b94cfddd6d300af052bd30bd7f6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 09:54:40 +0100 Subject: [PATCH 10/31] Update test_core.py --- tests/test_core.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 272198564..ed982422d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -205,6 +205,7 @@ def test_groupby_reduce_all( tolerance = None for kwargs in finalize_kwargs: + flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): if "arg" in func and add_nan_by: array[..., nanmask] = np.nan @@ -214,9 +215,7 @@ def test_groupby_reduce_all( for _ in range(nby): expected = np.expand_dims(expected, -1) - actual, *groups = groupby_reduce( - array, *bys, func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value - ) + actual, *groups = groupby_reduce(array, *by, **flox_kwargs) assert actual.ndim == (array.ndim + nby - 1) assert expected.ndim == (array.ndim + nby - 1) expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby)) From 6656b7fd8ee90fdd2ebe0750f2b421c72328f14a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 09:56:27 +0100 Subject: [PATCH 11/31] Update test_core.py --- tests/test_core.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index ed982422d..ec168665f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -231,9 +231,6 @@ def test_groupby_reduce_all( params = list(itertools.product(["map-reduce"], [True, False, None])) params.extend(itertools.product(["cohorts"], [False, None])) for method, reindex in params: - flox_kwargs = dict( - func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value - ) call = partial( groupby_reduce, array, *by, method=method, reindex=reindex, **flox_kwargs ) From 51f448c560bc8a2c2dc8389d3ecd0d2e99ddb8ed Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 10:11:18 +0100 Subject: [PATCH 12/31] Update test_core.py --- tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index ec168665f..3d0eea885 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -215,7 +215,7 @@ def test_groupby_reduce_all( for _ in range(nby): expected = np.expand_dims(expected, -1) - actual, *groups = groupby_reduce(array, *by, **flox_kwargs) + actual, *groups = groupby_reduce(array, *bys, **flox_kwargs) assert actual.ndim == (array.ndim + nby - 1) assert expected.ndim == (array.ndim + nby - 1) expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby)) @@ -232,7 +232,7 @@ def test_groupby_reduce_all( params.extend(itertools.product(["cohorts"], [False, None])) for method, reindex in params: call = partial( - groupby_reduce, array, *by, method=method, reindex=reindex, **flox_kwargs + groupby_reduce, array, *bys, method=method, reindex=reindex, **flox_kwargs ) if "arg" in func and reindex is True: # simple_combine with argreductions not supported right now From 83e3cec010a4d5f78ea62847b8b1463d9ea8d7c4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 10:58:02 +0100 Subject: [PATCH 13/31] Update mypy ci --- .github/workflows/ci-additional.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index fd7cc8242..c7d7be20e 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -100,6 +100,7 @@ jobs: environment-name: xarray-tests extra-specs: | python=${{env.PYTHON_VERSION}} + conda cache-env: true cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" - name: Install xarray @@ -115,4 +116,13 @@ jobs: - name: Run mypy run: | - python -m mypy --install-types --non-interactive + python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report + - name: Upload mypy coverage to Codecov + uses: codecov/codecov-action@v3.1.1 + with: + file: mypy_report/cobertura.xml + flags: mypy + env_vars: PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false + From 1698ed225a5a219ca9d99d8308a4cbe9a9fdf8b0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 11:09:09 +0100 Subject: [PATCH 14/31] Update ci-additional.yaml --- .github/workflows/ci-additional.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index c7d7be20e..fc1608773 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -59,6 +59,7 @@ jobs: environment-name: flox-tests extra-specs: | python=${{env.PYTHON_VERSION}} + lxml cache-env: true cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" From 87c81169b35f4d25e01e6339c02c3248ff69eb18 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 11:11:38 +0100 Subject: [PATCH 15/31] Update ci-additional.yaml --- .github/workflows/ci-additional.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index fc1608773..818ce5b40 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -59,7 +59,6 @@ jobs: environment-name: flox-tests extra-specs: | python=${{env.PYTHON_VERSION}} - lxml cache-env: true cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" @@ -102,6 +101,7 @@ jobs: extra-specs: | python=${{env.PYTHON_VERSION}} conda + lxml cache-env: true cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" - name: Install xarray From 7d619ea9ca6b8865a933d84549e61955b1d2f412 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 11:22:14 +0100 Subject: [PATCH 16/31] Move tests to /flox so mypy finds it --- {tests => flox/tests}/__init__.py | 0 {tests => flox/tests}/test_core.py | 0 {tests => flox/tests}/test_xarray.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {tests => flox/tests}/__init__.py (100%) rename {tests => flox/tests}/test_core.py (100%) rename {tests => flox/tests}/test_xarray.py (100%) diff --git a/tests/__init__.py b/flox/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to flox/tests/__init__.py diff --git a/tests/test_core.py b/flox/tests/test_core.py similarity index 100% rename from tests/test_core.py rename to flox/tests/test_core.py diff --git a/tests/test_xarray.py b/flox/tests/test_xarray.py similarity index 100% rename from tests/test_xarray.py rename to flox/tests/test_xarray.py From 6117bc2632eaf403c86b6ab48b65048639bee8d3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 11:31:10 +0100 Subject: [PATCH 17/31] Update ci-additional.yaml --- .github/workflows/ci-additional.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 818ce5b40..cdbe8c410 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -97,14 +97,14 @@ jobs: uses: mamba-org/provision-with-micromamba@v14 with: environment-file: ${{env.CONDA_ENV_FILE}} - environment-name: xarray-tests + environment-name: flox-tests extra-specs: | python=${{env.PYTHON_VERSION}} conda lxml cache-env: true cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" - - name: Install xarray + - name: Install flox run: | python -m pip install --no-deps -e . - name: Version info From 391c7b5269b1782d3dfc7fae27556cf0583f9afe Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 11:46:01 +0100 Subject: [PATCH 18/31] include more files --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 32e55d712..b10f1a26e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,8 +33,8 @@ known_third_party = [ [tool.mypy] allow_redefinition = true -exclude = "properties|asv_bench|doc|tests|flycheck" -files = "flox/*.py" +exclude = "properties|doc|flycheck" +files = "flox" show_error_codes = true [[tool.mypy.overrides]] From 90bf14f09b596f73d1a408d5ca3823331420dc66 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 11:46:14 +0100 Subject: [PATCH 19/31] move back tests --- {flox/tests => tests}/__init__.py | 0 {flox/tests => tests}/test_core.py | 0 {flox/tests => tests}/test_xarray.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {flox/tests => tests}/__init__.py (100%) rename {flox/tests => tests}/test_core.py (100%) rename {flox/tests => tests}/test_xarray.py (100%) diff --git a/flox/tests/__init__.py b/tests/__init__.py similarity index 100% rename from flox/tests/__init__.py rename to tests/__init__.py diff --git a/flox/tests/test_core.py b/tests/test_core.py similarity index 100% rename from flox/tests/test_core.py rename to tests/test_core.py diff --git a/flox/tests/test_xarray.py b/tests/test_xarray.py similarity index 100% rename from flox/tests/test_xarray.py rename to tests/test_xarray.py From 3bab2f084bcf77eeeb9beb217af156e7651fca43 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 11:48:23 +0100 Subject: [PATCH 20/31] Revert "move back tests" This reverts commit 90bf14f09b596f73d1a408d5ca3823331420dc66. --- {tests => flox/tests}/__init__.py | 0 {tests => flox/tests}/test_core.py | 0 {tests => flox/tests}/test_xarray.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {tests => flox/tests}/__init__.py (100%) rename {tests => flox/tests}/test_core.py (100%) rename {tests => flox/tests}/test_xarray.py (100%) diff --git a/tests/__init__.py b/flox/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to flox/tests/__init__.py diff --git a/tests/test_core.py b/flox/tests/test_core.py similarity index 100% rename from tests/test_core.py rename to flox/tests/test_core.py diff --git a/tests/test_xarray.py b/flox/tests/test_xarray.py similarity index 100% rename from tests/test_xarray.py rename to flox/tests/test_xarray.py From 3e4831917dfa81d7621dda06fa2119d31fb25d3b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 12:11:30 +0100 Subject: [PATCH 21/31] fix type errors --- flox/tests/test_core.py | 26 ++++++++++++++++---------- flox/tests/test_xarray.py | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/flox/tests/test_core.py b/flox/tests/test_core.py index 3d0eea885..c657fa31b 100644 --- a/flox/tests/test_core.py +++ b/flox/tests/test_core.py @@ -80,6 +80,10 @@ def dask_array_ones(*args): if TYPE_CHECKING: from flox.core import T_Agg, T_Engine, T_ExpectedGroupsOpt, T_Method + # Let anything through in kwargs for code readability, will likely miss a lot of + # type errors within these dicts though: + T_Kwargs = dict[str, Any] + def test_alignment_error(): da = np.ones((12,)) @@ -195,7 +199,7 @@ def test_groupby_reduce_all( bys[idx][2 * idx : 2 * idx + 3] = np.nan nanmask = reduce(np.logical_or, (np.isnan(b) for b in bys)) - finalize_kwargs: list[dict[str, Any]] = [{}] + finalize_kwargs: list[T_Kwargs] = [{}] if "var" in func or "std" in func: finalize_kwargs = finalize_kwargs + [{"ddof": 1}, {"ddof": 0}] fill_value = np.nan @@ -205,7 +209,9 @@ def test_groupby_reduce_all( tolerance = None for kwargs in finalize_kwargs: - flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value) + flox_kwargs: T_Kwargs = dict( + func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value + ) with np.errstate(invalid="ignore", divide="ignore"): if "arg" in func and add_nan_by: array[..., nanmask] = np.nan @@ -377,7 +383,7 @@ def test_groupby_agg_dask( labels[:3] = np.nan # entire block is NaN when group_chunks=3 labels[-2:] = np.nan - kwargs: dict[str, Any] = dict( + kwargs: T_Kwargs = dict( func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123 ) @@ -410,7 +416,7 @@ def test_numpy_reduce_axis_subset(engine: T_Engine) -> None: # TODO: add NaNs by = labels2d array = np.ones_like(by, dtype=np.int64) - kwargs = dict(func="count", engine=engine, fill_value=0) + kwargs: T_Kwargs = dict(func="count", engine=engine, fill_value=0) result, _ = groupby_reduce(array, by, **kwargs, axis=1) assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64)) @@ -845,7 +851,7 @@ def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_ if axis is not None and method != "map-reduce": pytest.xfail() - kwargs: dict[str, Any] = dict( + kwargs: T_Kwargs = dict( func=func, engine=engine, method=method, axis=axis, fill_value=fill_value ) actual, groups = groupby_reduce(array, by, **kwargs) @@ -928,7 +934,7 @@ def test_map_reduce_blockwise_mixed() -> None: dask.array.from_array(data.values, chunks=365), t.dt.month, func="mean", - method="split-reduce", + method="cohorts", ) expected, _ = groupby_reduce(data, t.dt.month, func="mean") assert_equal(expected, actual) @@ -937,7 +943,7 @@ def test_map_reduce_blockwise_mixed() -> None: @requires_dask @pytest.mark.parametrize("method", ["split-reduce", "blockwise", "map-reduce", "cohorts"]) def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None: - kwargs: dict[str, Any] = dict( + kwargs: T_Kwargs = dict( func="mean", method=method, engine=engine, @@ -1004,7 +1010,7 @@ def test_multiple_groupers() -> None: def test_factorize_reindex_sorting_strings() -> None: - kwargs = dict( + kwargs: T_Kwargs = dict( by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), axis=-1, expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), @@ -1024,7 +1030,7 @@ def test_factorize_reindex_sorting_strings() -> None: def test_factorize_reindex_sorting_ints() -> None: - kwargs = dict( + kwargs: T_Kwargs = dict( by=(np.array([-10, 1, 10, 2, 3, 5]),), axis=-1, expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),), @@ -1069,7 +1075,7 @@ def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dty expected = np.median(array, axis=-1, keepdims=True) assert_equal(expected, actual) - methods: list[T_Method] = ["map-reduce", "cohorts", "split-reduce"] + methods: list[T_Method] = ["map-reduce", "cohorts"] for method in methods: with pytest.raises(NotImplementedError): groupby_reduce( diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py index 9fce115c7..5ba94af16 100644 --- a/flox/tests/test_xarray.py +++ b/flox/tests/test_xarray.py @@ -503,7 +503,7 @@ def test_groupby_bins_indexed_coordinate() -> None: expected_groups=([40, 50, 60, 70],), isbin=(True,), func="mean", - method="split-reduce", + method="cohorts", ) xr.testing.assert_allclose(expected, actual) From 797a50e372416ec31c6bc60a7486d97560134ca3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 12:16:49 +0100 Subject: [PATCH 22/31] numba numpy_groupies to ignore --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b10f1a26e..885626285 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,8 @@ module=[ "cftime", "dask.*", "importlib_metadata", - "numpy_groupies", + "numba.*", + "numpy_groupies.*", "matplotlib.*", "pandas", "setuptools", From 3026276aaed518ae648bece08cf7acb8b4508a6c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 12:31:57 +0100 Subject: [PATCH 23/31] Update test_core.py --- flox/tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/tests/test_core.py b/flox/tests/test_core.py index c657fa31b..0a2f7c9b1 100644 --- a/flox/tests/test_core.py +++ b/flox/tests/test_core.py @@ -837,7 +837,7 @@ def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_ by[0, 1] = 30 by[2, 1] = 40 by[0, 4] = 31 - array = np.broadcast_to(array, (2, 3) + array.shape) + array = dask.array.broadcast_to(array, (2, 3) + array.shape) if "arg" in func and (axis is None or engine == "flox"): pytest.skip() From d38239363e3c8076a2ddc8f3918604bebcfc64d6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 12:56:20 +0100 Subject: [PATCH 24/31] move engine fixture to conftest --- conftest.py | 49 +++++++++++++++++++++++++++++++++++++++++ flox/tests/__init__.py | 10 --------- flox/tests/test_core.py | 1 - 3 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 conftest.py diff --git a/conftest.py b/conftest.py new file mode 100644 index 000000000..d427ecc64 --- /dev/null +++ b/conftest.py @@ -0,0 +1,49 @@ +"""Configuration for pytest.""" + +import pytest + + +def pytest_addoption(parser): + """Add command-line flags for pytest.""" + parser.addoption("--run-flaky", action="store_true", help="runs flaky tests") + parser.addoption( + "--run-network-tests", + action="store_true", + help="runs tests requiring a network connection", + ) + + +def pytest_runtest_setup(item): + # based on https://stackoverflow.com/questions/47559524 + if "flaky" in item.keywords and not item.config.getoption("--run-flaky"): + pytest.skip("set --run-flaky option to run flaky tests") + if "network" in item.keywords and not item.config.getoption("--run-network-tests"): + pytest.skip("set --run-network-tests to run test requiring an internet connection") + + +@pytest.fixture(autouse=True) +def add_standard_imports(doctest_namespace, tmpdir): + import numpy as np + import pandas as pd + + import xarray as xr + + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + doctest_namespace["xr"] = xr + + # always seed numpy.random to make the examples deterministic + np.random.seed(0) + + # always switch to the temporary directory, so files get written there + tmpdir.chdir() + + +@pytest.fixture(scope="module", params=["flox", "numpy", "numba"]) +def engine(request): + if request.param == "numba": + try: + import numba # noqa: F401 + except ImportError: + pytest.xfail() + return request.param diff --git a/flox/tests/__init__.py b/flox/tests/__init__.py index b1a266652..e2b8d8584 100644 --- a/flox/tests/__init__.py +++ b/flox/tests/__init__.py @@ -125,13 +125,3 @@ def assert_equal_tuple(a, b): np.testing.assert_array_equal(a_, b_) else: assert a_ == b_ - - -@pytest.fixture(scope="module", params=["flox", "numpy", "numba"]) -def engine(request): - if request.param == "numba": - try: - import numba - except ImportError: - pytest.xfail() - return request.param diff --git a/flox/tests/test_core.py b/flox/tests/test_core.py index 0a2f7c9b1..48fd7365c 100644 --- a/flox/tests/test_core.py +++ b/flox/tests/test_core.py @@ -26,7 +26,6 @@ from . import ( assert_equal, assert_equal_tuple, - engine, has_dask, raise_if_dask_computes, requires_dask, From 8a52b4f756a524f0893b27dd3fddfe16f2334445 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 12:59:22 +0100 Subject: [PATCH 25/31] Update test_xarray.py --- flox/tests/test_xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py index 5ba94af16..fcbf4f22c 100644 --- a/flox/tests/test_xarray.py +++ b/flox/tests/test_xarray.py @@ -12,7 +12,7 @@ from flox.xarray import rechunk_for_blockwise, resample_reduce, xarray_reduce -from . import assert_equal, engine, has_dask, raise_if_dask_computes, requires_dask +from . import assert_equal, has_dask, raise_if_dask_computes, requires_dask # isort: off if has_dask: From 1a02d23ec258c0f7cef6729bfae5f69dbe80d1b0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 13:02:55 +0100 Subject: [PATCH 26/31] comment out for now --- conftest.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/conftest.py b/conftest.py index d427ecc64..906a70949 100644 --- a/conftest.py +++ b/conftest.py @@ -21,22 +21,22 @@ def pytest_runtest_setup(item): pytest.skip("set --run-network-tests to run test requiring an internet connection") -@pytest.fixture(autouse=True) -def add_standard_imports(doctest_namespace, tmpdir): - import numpy as np - import pandas as pd +# @pytest.fixture(autouse=True) +# def add_standard_imports(doctest_namespace, tmpdir): +# import numpy as np +# import pandas as pd - import xarray as xr +# import xarray as xr - doctest_namespace["np"] = np - doctest_namespace["pd"] = pd - doctest_namespace["xr"] = xr +# doctest_namespace["np"] = np +# doctest_namespace["pd"] = pd +# doctest_namespace["xr"] = xr - # always seed numpy.random to make the examples deterministic - np.random.seed(0) +# # always seed numpy.random to make the examples deterministic +# np.random.seed(0) - # always switch to the temporary directory, so files get written there - tmpdir.chdir() +# # always switch to the temporary directory, so files get written there +# tmpdir.chdir() @pytest.fixture(scope="module", params=["flox", "numpy", "numba"]) From 988956f1f28a3c3f4896000101eb9594608999b6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 13:07:17 +0100 Subject: [PATCH 27/31] Update test_xarray.py --- flox/tests/test_xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py index fcbf4f22c..98375b5c0 100644 --- a/flox/tests/test_xarray.py +++ b/flox/tests/test_xarray.py @@ -475,7 +475,7 @@ def test_cache() -> None: @pytest.mark.parametrize("use_cftime", [True, False]) @pytest.mark.parametrize("func", ["count", "mean"]) -def test_datetime_array_reduce(use_cftime: bool, func: str, engine: T_Engine) -> None: +def test_datetime_array_reduce(use_cftime: bool, func: T_Agg, engine: T_Engine) -> None: time = xr.DataArray( xr.date_range("2009-01-01", "2012-12-31", use_cftime=use_cftime), From a4d5a9e02f4929b76cb43226a944cfcc37b7e1b4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 13:09:32 +0100 Subject: [PATCH 28/31] Update ci-additional.yaml --- .github/workflows/ci-additional.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index cdbe8c410..4a1f397a2 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -126,4 +126,3 @@ jobs: env_vars: PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false - From e0308b8935757d06db96a86401a57a475cf135dc Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Nov 2022 13:12:52 +0100 Subject: [PATCH 29/31] Update test_xarray.py --- flox/tests/test_xarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py index 98375b5c0..963fe9c1c 100644 --- a/flox/tests/test_xarray.py +++ b/flox/tests/test_xarray.py @@ -28,7 +28,7 @@ pass if TYPE_CHECKING: - from flox.core import T_Agg, T_Engine + from flox.core import T_Engine tolerance64 = {"rtol": 1e-15, "atol": 1e-18} @@ -475,7 +475,7 @@ def test_cache() -> None: @pytest.mark.parametrize("use_cftime", [True, False]) @pytest.mark.parametrize("func", ["count", "mean"]) -def test_datetime_array_reduce(use_cftime: bool, func: T_Agg, engine: T_Engine) -> None: +def test_datetime_array_reduce(use_cftime: bool, func, engine: T_Engine) -> None: time = xr.DataArray( xr.date_range("2009-01-01", "2012-12-31", use_cftime=use_cftime), From 472db7823bd4d9df1e3e88cf9bc88b3f8f87abfe Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 26 Nov 2022 20:50:53 -0700 Subject: [PATCH 30/31] Clean up conftest --- conftest.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/conftest.py b/conftest.py index 906a70949..c5c121d32 100644 --- a/conftest.py +++ b/conftest.py @@ -3,42 +3,6 @@ import pytest -def pytest_addoption(parser): - """Add command-line flags for pytest.""" - parser.addoption("--run-flaky", action="store_true", help="runs flaky tests") - parser.addoption( - "--run-network-tests", - action="store_true", - help="runs tests requiring a network connection", - ) - - -def pytest_runtest_setup(item): - # based on https://stackoverflow.com/questions/47559524 - if "flaky" in item.keywords and not item.config.getoption("--run-flaky"): - pytest.skip("set --run-flaky option to run flaky tests") - if "network" in item.keywords and not item.config.getoption("--run-network-tests"): - pytest.skip("set --run-network-tests to run test requiring an internet connection") - - -# @pytest.fixture(autouse=True) -# def add_standard_imports(doctest_namespace, tmpdir): -# import numpy as np -# import pandas as pd - -# import xarray as xr - -# doctest_namespace["np"] = np -# doctest_namespace["pd"] = pd -# doctest_namespace["xr"] = xr - -# # always seed numpy.random to make the examples deterministic -# np.random.seed(0) - -# # always switch to the temporary directory, so files get written there -# tmpdir.chdir() - - @pytest.fixture(scope="module", params=["flox", "numpy", "numba"]) def engine(request): if request.param == "numba": From c5596b5a415484b34ab8e3af9e66789ce72edea6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 26 Nov 2022 20:59:53 -0700 Subject: [PATCH 31/31] Cleanup ci-additional --- .github/workflows/ci-additional.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 4a1f397a2..a097c606c 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -65,10 +65,6 @@ jobs: - name: Install flox run: | python -m pip install --no-deps -e . - - name: Version info - run: | - conda info -a - conda list - name: Run doctests run: | python -m pytest --doctest-modules flox --ignore flox/tests @@ -100,17 +96,12 @@ jobs: environment-name: flox-tests extra-specs: | python=${{env.PYTHON_VERSION}} - conda lxml cache-env: true cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" - name: Install flox run: | python -m pip install --no-deps -e . - - name: Version info - run: | - conda info -a - conda list - name: Install mypy run: | python -m pip install mypy